summaryrefslogtreecommitdiffstats
path: root/vendor/rustc_apfloat
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:59:35 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:59:35 +0000
commitd1b2d29528b7794b41e66fc2136e395a02f8529b (patch)
treea4a17504b260206dec3cf55b2dca82929a348ac2 /vendor/rustc_apfloat
parentReleasing progress-linux version 1.72.1+dfsg1-1~progress7.99u1. (diff)
downloadrustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.tar.xz
rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.zip
Merging upstream version 1.73.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/rustc_apfloat')
-rw-r--r--vendor/rustc_apfloat/.cargo-checksum.json1
-rw-r--r--vendor/rustc_apfloat/Cargo.toml37
-rw-r--r--vendor/rustc_apfloat/LICENSE-DETAILS.md80
-rw-r--r--vendor/rustc_apfloat/LICENSE.txt222
-rw-r--r--vendor/rustc_apfloat/README.md117
-rw-r--r--vendor/rustc_apfloat/benches/decimal.rs114
-rw-r--r--vendor/rustc_apfloat/build.rs41
-rw-r--r--vendor/rustc_apfloat/rustfmt.toml7
-rw-r--r--vendor/rustc_apfloat/src/ieee.rs3211
-rw-r--r--vendor/rustc_apfloat/src/lib.rs739
-rw-r--r--vendor/rustc_apfloat/src/ppc.rs453
-rw-r--r--vendor/rustc_apfloat/tests/downstream.rs410
-rw-r--r--vendor/rustc_apfloat/tests/ieee.rs4927
-rw-r--r--vendor/rustc_apfloat/tests/ppc.rs470
14 files changed, 10829 insertions, 0 deletions
diff --git a/vendor/rustc_apfloat/.cargo-checksum.json b/vendor/rustc_apfloat/.cargo-checksum.json
new file mode 100644
index 000000000..0a0985b25
--- /dev/null
+++ b/vendor/rustc_apfloat/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"Cargo.toml":"42b594d78c4289cce2f2021fd03ee3514753e5bc5c792524bd9c89d538e92284","LICENSE-DETAILS.md":"5998f303e26191363f591e04bdd0f829b2000afc843c67326a9b7efd66850416","LICENSE.txt":"981f4155fbd55dcf13745e2ed508e6fa30aa90f9f668c4ef0f7686980e5d8521","README.md":"10858c435fb226798bf53052cfd0caa65f5f7cc17f09447157557f3b8b812598","benches/decimal.rs":"94db2414aaf2c5c82ca12a5a5a2423809e2175f8782900b4d041159b099eaab8","build.rs":"5534a113df3edb85288eaa611ae41af3c4218bb9a7e95b5ea39fe433fdcdaca3","rustfmt.toml":"caf2e8eb750ae147f31b4920902479ff0b77df4ee40fc07f85855724c355747c","src/ieee.rs":"5400237c3ca6876874ea04909465d4bb65788ed918f8279ef6f74a7e4dc795da","src/lib.rs":"61d3015ce93b707648073dea3f87bd50f52f50a4bdcc05e41f700403dc47b675","src/ppc.rs":"09860735d5e9286504377f9aeabc54c5f113d8af69c65d9088912f61797fcf11","tests/downstream.rs":"531f91181c1dadeb29db6a6679bb82d07ec899546d5341b34f9ac8a644eb7026","tests/ieee.rs":"4aee82c83673a900032672eb2708329dc4ae1de4b266ff4beefcad5253fa47b4","tests/ppc.rs":"dd0ded7a3ce4922623f5f4eb233f38e3563513abf0dbb854e4b8bb1661345113"},"package":"465187772033a5ee566f69fe008df03628fce549a0899aae76f0a0c2e34696be"} \ No newline at end of file
diff --git a/vendor/rustc_apfloat/Cargo.toml b/vendor/rustc_apfloat/Cargo.toml
new file mode 100644
index 000000000..571a6ed16
--- /dev/null
+++ b/vendor/rustc_apfloat/Cargo.toml
@@ -0,0 +1,37 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+name = "rustc_apfloat"
+version = "0.2.0+llvm-462a31f5a5ab"
+description = "Rust port of C++ llvm::APFloat library"
+readme = "README.md"
+license = "Apache-2.0 WITH LLVM-exception"
+repository = "https://github.com/rust-lang/rustc_apfloat"
+
+[[bench]]
+name = "decimal"
+harness = false
+
+[dependencies.bitflags]
+version = "1.3.2"
+
+[dependencies.smallvec]
+version = "1.11.0"
+features = [
+ "const_generics",
+ "union",
+]
+
+[dev-dependencies.criterion]
+version = "0.5.1"
+features = ["html_reports"]
diff --git a/vendor/rustc_apfloat/LICENSE-DETAILS.md b/vendor/rustc_apfloat/LICENSE-DETAILS.md
new file mode 100644
index 000000000..46c530136
--- /dev/null
+++ b/vendor/rustc_apfloat/LICENSE-DETAILS.md
@@ -0,0 +1,80 @@
+# Detailed Licensing Information
+
+This crate started as a port of LLVM's APFloat and (parts of) APInt code (revision [`f3598e8fca83ccfb11f58ec7957c229e349765e3`](https://github.com/llvm/llvm-project/commit/f3598e8fca83ccfb11f58ec7957c229e349765e3)).
+At that time, LLVM was licensed under the [University of Illinois/NCSA Open Source License](https://spdx.org/licenses/NCSA.html).
+LLVM has since worked to [relicense](https://foundation.llvm.org/docs/relicensing/) their project under the [Apache 2](https://spdx.org/licenses/Apache-2.0.html) with [LLVM Exception](https://spdx.org/licenses/LLVM-exception.html) license.
+
+Reviewing the history of the APFloat/APInt code in LLVM, of the 795 commits which modify the code, only the following 7 have yet to be relicensed:
+
+- [`cb4029110040c3655a66b5f423d328c749ba6a49`](https://github.com/llvm/llvm-project/commit/cb4029110040c3655a66b5f423d328c749ba6a49)
+- [`f907b891da1641034f0603b0c6bc00b7aa4d1f4a`](https://github.com/llvm/llvm-project/commit/f907b891da1641034f0603b0c6bc00b7aa4d1f4a)
+- [`bf19e0a5561593d3d25924693a20a9bbe7771a3f`](https://github.com/llvm/llvm-project/commit/bf19e0a5561593d3d25924693a20a9bbe7771a3f)
+- [`49c758b769b9a787d415a9c2ce0e40fa0e482412`](https://github.com/llvm/llvm-project/commit/49c758b769b9a787d415a9c2ce0e40fa0e482412)
+- [`4e69e29a72a1ffcbf755f13ed909b51cfbcafd60`](https://github.com/llvm/llvm-project/commit/4e69e29a72a1ffcbf755f13ed909b51cfbcafd60)
+- [`8710d9831b1a78e3ecb1f49da24447ee27f73096`](https://github.com/llvm/llvm-project/commit/8710d9831b1a78e3ecb1f49da24447ee27f73096)
+- [`1c419ff50d35c4cab442f5a1c8f5e82812921633`](https://github.com/llvm/llvm-project/commit/1c419ff50d35c4cab442f5a1c8f5e82812921633)
+
+However, as the [LLVM blog mentions](https://blog.llvm.org/posts/2022-08-14-relicensing-update/):
+
+> Some pieces of code are not covered by copyright law.
+> For example, copyright law contains a concept called [“Threshold of originality”](https://en.wikipedia.org/wiki/Threshold_of_originality).
+> It means that a work needs to be “sufficiently original” for it to be considered to be covered by copyright.
+> There could be a lot of different interpretations into what it means for a code contribution to be sufficiently original for it to be covered by copyright.
+> A threshold that is often used in open source projects that use [contributor license agreements (CLA)](https://en.wikipedia.org/wiki/Contributor_License_Agreement) is to assume that any contribution that’s 10 lines of code or less does not meet the threshold of originality and therefore copyright does not apply.
+> In [their May 2022](https://discourse.llvm.org/t/board-meeting-minutes-may-2022/63628) board meeting, the LLVM Foundation decided to make the same assumption for the relicensing project: contributions of 10 lines of code or less are assumed to not be covered by copyright.
+> Therefore, we don’t need relicensing agreements for those.
+>
+> Furthermore, there are a few commits that don’t seem to meet the “threshold-of-originality” even though they’re changing/adding more than 10 lines.
+> We also consider those to not needing a relicensing agreement.
+> One example is [this commit](https://github.com/llvm/llvm-project/commit/cd13ef01a21e), which only removes the full stop at the end of a few sentences.
+
+With that in mind, a review of the commits in question shows:
+
+### [`cb4029110040c3655a66b5f423d328c749ba6a49`](https://github.com/llvm/llvm-project/commit/cb4029110040c3655a66b5f423d328c749ba6a49)
+
+This commit is the result of running a spellchecker on the code base.
+While it is larger than 10 lines of code, it is very similar to the example commit mentioned above which mechanically removed the full stop from the end of various sentences in the codebase.
+As such, it does not seem "sufficiently original" to be copyrightable.
+
+### [`f907b891da1641034f0603b0c6bc00b7aa4d1f4a`](https://github.com/llvm/llvm-project/commit/f907b891da1641034f0603b0c6bc00b7aa4d1f4a)
+
+This commit fixes hyphenation of words mostly in code comments or documentation.
+Again, while it is larger than 10 lines of code, it is very similar to the example commit mentioned above and does not seem "sufficiently original" to be copyrightable.
+
+### [`bf19e0a5561593d3d25924693a20a9bbe7771a3f`](https://github.com/llvm/llvm-project/commit/bf19e0a5561593d3d25924693a20a9bbe7771a3f)
+
+This commit fixes a comparison to work properly with the MSVC compiler.
+As the total diff is only 4 lines, it does not meet the established threshold of originality.
+
+### [`49c758b769b9a787d415a9c2ce0e40fa0e482412`](https://github.com/llvm/llvm-project/commit/49c758b769b9a787d415a9c2ce0e40fa0e482412)
+
+This commit changes some uses of raw pointers to use `unique_ptr`.
+While the commit is larger than 10 lines of code, it is reverted in the next commit which landed the following day.
+As such, the combination of this commit and the following commit are zero lines of code changed in the codebase.
+
+### [`4e69e29a72a1ffcbf755f13ed909b51cfbcafd60`](https://github.com/llvm/llvm-project/commit/4e69e29a72a1ffcbf755f13ed909b51cfbcafd60)
+
+This commit is a mechanical revert of the prior commit.
+
+### [`8710d9831b1a78e3ecb1f49da24447ee27f73096`](https://github.com/llvm/llvm-project/commit/8710d9831b1a78e3ecb1f49da24447ee27f73096)
+
+This commit changes a `struct` definition to be a `class`.
+As the total diff is only 2 lines, it does not meet the established threshold of originality.
+
+### [`1c419ff50d35c4cab442f5a1c8f5e82812921633`](https://github.com/llvm/llvm-project/commit/1c419ff50d35c4cab442f5a1c8f5e82812921633)
+
+This commit fixes an assertion in the code which had not been correctly updated in a prior change.
+As the total diff is only 6 lines of code (excluding changes to APInt unit tests which we did not port to Rust), it does not meet the established threshold of originality.
+
+# Conclusion
+
+The original LLVM code appears to be available as Apache 2 with LLVM Exception and so our port of this code is licensed as such.
+
+A few additional patches (code cleanups and performance improvements) have been made on top of the initial port.
+The authors of these patches have also agreed to allow their code to be used under the Apache 2 with LLVM Exception license.
+
+Subsequent work on this crate has advanced the state of the port from the original commit.
+Reviewing the set of upstream LLVM changes after revision `f3598e8fca83ccfb11f58ec7957c229e349765e3` and before the relicensing on 2019-01-19 reveals 41 changes all of which LLVM has relicensing agreements with their authors.
+As such, these changes and all changes made to LLVM after the relicensing data are available under the Apache 2 with LLVM Exception license.
+
+Therefore, the whole of this crate is Apache 2 with LLVM Exception licensed.
diff --git a/vendor/rustc_apfloat/LICENSE.txt b/vendor/rustc_apfloat/LICENSE.txt
new file mode 100644
index 000000000..b941b9166
--- /dev/null
+++ b/vendor/rustc_apfloat/LICENSE.txt
@@ -0,0 +1,222 @@
+==============================================================================
+This project is under the Apache License v2.0 with LLVM Exceptions:
+==============================================================================
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+
+---- LLVM Exceptions to the Apache 2.0 License ----
+
+As an exception, if, as a result of your compiling your source code, portions
+of this Software are embedded into an Object form of such source code, you
+may redistribute such embedded portions in such Object form without complying
+with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
+
+In addition, if you combine or link compiled forms of this Software with
+software that is licensed under the GPLv2 ("Combined Software") and if a
+court of competent jurisdiction determines that the patent provision (Section
+3), the indemnity provision (Section 9) or other Section of the License
+conflicts with the conditions of the GPLv2, you may retroactively and
+prospectively choose to deem waived or otherwise exclude such Section(s) of
+the License, but only in their entirety and only with respect to the Combined
+Software.
diff --git a/vendor/rustc_apfloat/README.md b/vendor/rustc_apfloat/README.md
new file mode 100644
index 000000000..60ddd5b85
--- /dev/null
+++ b/vendor/rustc_apfloat/README.md
@@ -0,0 +1,117 @@
+# `rustc_apfloat`<br><sub>(Rust port of the C++ `llvm::APFloat` "softfloat" library)</sub>
+
+## History
+
+LLVM's `APFloat` (aka `llvm::APFloat`) software floating-point (or "softfloat")
+library was first ported to Rust (and named `rustc_apfloat`) back in 2017,
+in the Rust pull request [`rust-lang/rust#43554`](https://github.com/rust-lang/rust/pull/43554),
+as part of an effort to expand Rust compile-time capabilities without sacrificing
+determinism (and therefore soundness, if the type-system was involved).
+
+<sub>Note: while using the original C++ `llvm::APFloat` directly would've been an option,
+certain high-level API design differences made in the Rust port, without behavioral impact
+(C++ raw pointers and dynamic allocations vs Rust generics, traits and `#![no_std]`),
+made the Rust port more appealing from a determinism standpoint (mostly thanks to
+lacking all 3 of: `unsafe` code, host floating-point use, `std` access - and only
+allocating to handle the arbitrary precision needed for conversions to/from decimal),
+*even though there was a chance it had correctness issues unique to it*.</sub>
+
+However, that port had a fatal flaw: it was added to the `rust-lang/rust` repository
+without its unique licensing status (as a port of a C++ library with its own license)
+being properly tracked, communicated, taken into account, etc.
+The end result was years of limbo, mostly chronicled in the Rust issue
+[`rust-lang/rust#55993`](https://github.com/rust-lang/rust/issues/55993), in which
+the in-tree port couldn't really receive proper updated or even maintenance, due
+due to its unclear status.
+
+### Revival (as `rust-lang/rustc_apfloat`)
+
+This repository (`rust-lang/rustc_apfloat`) is the result of a 2022 plan on
+[the relevant Zulip topic](https://rust-lang.zulipchat.com/#narrow/stream/231349-t-core.2Flicensing/topic/apfloat), fully put into motion during 2023:
+* the `git` history of the in-tree `compiler/rustc_apfloat` library was extracted
+ (see the separate [`rustc_apfloat-git-history-extraction`](https://github.com/LykenSol/rustc_apfloat-git-history-extraction) repository for more details)
+* only commits that were *both* necessary *and* had clear copyright status, were kept
+* any missing functionality or bug fixes, would have to be either be re-contributed,
+ or rebuilt from the ground up (mostly the latter ended up being done, see below)
+
+Most changes since the original port had been aesthetic (e.g. spell-checking, `rustfmt`),
+so little was lost in the process.
+
+Starting from that much smaller "trusted" base:
+* everything could use LLVM's new (since 2019) license, "`Apache-2.0 WITH LLVM-exception`"
+ (see the ["Licensing"](#licensing) section below and/or [LICENSE-DETAILS.md](./LICENSE-DETAILS.md) for more details)
+* new facilities were built (benchmarks, and [a fuzzer comparing Rust/C++/hardware](#fuzzing))
+* excessive testing was performed (via a combination of fuzzing and bruteforce search)
+* latent bugs were discovered (e.g. LLVM issues
+[#63895](https://github.com/llvm/llvm-project/issues/63895) and
+[#63938](https://github.com/llvm/llvm-project/issues/63938))
+* the port has been forwarded in time, to include upstream (`llvm/llvm-project`) changes
+ to `llvm::APFloat` over the years (since 2017), removing the need for selective backports
+
+## Versioning
+
+As this is, for the time being, a "living port", tracking upstream (`llvm/llvm-project`)
+`llvm::APFloat` changes, the `rustc_apfloat` crate will have versions of the form:
+
+```
+0.X.Y+llvm-ZZZZZZZZZZZZ
+```
+* `X` is always bumped after semver-incompatible API changes,
+ or when updating the upstream (`llvm/llvm-project`) commit the port is based on
+* `Y` is only bumped when other parts of the version don't need to be (e.g. for bug fixes)
+* `+llvm-ZZZZZZZZZZZZ` is ["version metadata"](https://doc.rust-lang.org/cargo/reference/resolver.html#version-metadata) (which Cargo itself ignores),
+ and `ZZZZZZZZZZZZ` always holds the first 12 hexadecimal digits of
+ the upstream (`llvm/llvm-project`) `git` commit hash the port is based on
+
+
+## Testing
+
+`rustc_apfloat` contains ports of all the tests from the C++ `llvm::APFloat` code,
+in `tests/ieee.rs` and `tests/ppc.rs`.
+
+For tests specific to `rustc_apfloat` (without C++ equivalents), `tests/downstream.rs`
+is used (which mainly contains tests for now-fixed bugs, found by fuzzing).
+
+### Fuzzing
+
+As `llvm::APFloat` tests are far from comprehensive, the only option for in-depth
+comparisons between the original C++ code and the Rust port (and between them and
+hardware floating-point behavior) is to employ *fuzzing*.
+
+The fuzzing infrastructure lives in `fuzz/` and requires `cargo-afl`, but also
+involves an automated build of the original C++ `llvm::APFloat` code with `clang`
+(to be able to instrument it via LLVM, in the same way `cargo-afl` does for the
+Rust code), and has been prototyped and tested on Linux (and is unlikely to work
+on other platforms, or even some Linux distros, though it mostly assumes UNIX).
+
+Example usage:
+<sub>(**TODO**: maybe move this to `fuzz/README.md` and/or expand on it)</sub>
+
+```sh
+# Install `cargo-afl` (used below to build/run the fuzzing binary).
+cargo install afl
+
+# Build the fuzzing binary (`target/release/rustc_apfloat-fuzz`).
+cargo afl build -p rustc_apfloat-fuzz --release
+
+# Seed the inputs for a run `foo` (while not ideal, even this one minimal input works).
+mkdir fuzz/in-foo && echo > fuzz/in-foo/empty
+
+# Start the fuzzing run `foo`, which should bring up the AFL++ progress TUI
+# (see also `cargo run -p rustc_apfloat-fuzz` for extra flags available).
+cargo afl fuzz -i fuzz/in-foo -o fuzz/out-foo target/release/rustc_apfloat-fuzz
+```
+
+To visualize the fuzzing testcases, you can use the `decode` subcommand:
+```sh
+cargo run -p rustc_apfloat-fuzz decode fuzz/out-foo/default/crashes/*
+```
+(this will work even while `cargo afl fuzz`, i.e. AFL++, is running)
+
+## Licensing
+
+This project is licensed under the [Apache 2] with [LLVM exception] license.
+For a more complete discussion of this project's licensing, see [LICENSE-DETAILS.md](./LICENSE-DETAILS.md).
+
+[Apache 2]: https://spdx.org/licenses/Apache-2.0.html
+[LLVM Exception]: https://spdx.org/licenses/LLVM-exception.html
diff --git a/vendor/rustc_apfloat/benches/decimal.rs b/vendor/rustc_apfloat/benches/decimal.rs
new file mode 100644
index 000000000..26e06e91d
--- /dev/null
+++ b/vendor/rustc_apfloat/benches/decimal.rs
@@ -0,0 +1,114 @@
+//! Benchmarks for converting from/to (decimal) strings, the only operations
+//! that (may) need to allocate, and also some of the few that aren't `O(1)`
+//! (alongside e.g. div/mod, but even those likely have a better bound).
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use std::fmt::{self, Write as _};
+
+struct Sample {
+ name: &'static str,
+ decimal_str: &'static str,
+}
+
+impl fmt::Display for Sample {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // HACK(eddyb) this is mostly to sort criterion's output correctly.
+ write!(f, "[len={:02}] ", self.decimal_str.len())?;
+ f.write_str(self.decimal_str)?;
+ if !self.name.is_empty() {
+ write!(f, " aka {}", self.name)?;
+ }
+ Ok(())
+ }
+}
+
+impl Sample {
+ const fn new(decimal_str: &'static str) -> Self {
+ Self { name: "", decimal_str }
+ }
+
+ const fn named(self, name: &'static str) -> Self {
+ Self { name, ..self }
+ }
+}
+
+const DOUBLE_SAMPLES: &[Sample] = &[
+ Sample::new("0.0"),
+ Sample::new("1.0"),
+ Sample::new("1234.56789"),
+ Sample::new("3.14159265358979323846264338327950288").named("π"),
+ Sample::new("0.693147180559945309417232121458176568").named("ln(2)"),
+];
+
+fn double_from_str(c: &mut Criterion) {
+ let mut group = c.benchmark_group("Double::from_str");
+ for sample in DOUBLE_SAMPLES {
+ group.bench_with_input(BenchmarkId::from_parameter(sample), sample.decimal_str, |b, s| {
+ b.iter(|| s.parse::<rustc_apfloat::ieee::Double>().unwrap());
+ });
+ }
+ group.finish();
+}
+
+/// `fmt::Write` implementation that does not need to allocate at all,
+/// but instead asserts that what's written matches a known string exactly.
+struct CheckerFmtSink<'a> {
+ remaining: &'a str,
+}
+
+impl fmt::Write for CheckerFmtSink<'_> {
+ fn write_str(&mut self, s: &str) -> fmt::Result {
+ self.remaining = self.remaining.strip_prefix(s).ok_or(fmt::Error)?;
+ Ok(())
+ }
+}
+
+impl CheckerFmtSink<'_> {
+ fn finish(self) -> fmt::Result {
+ if self.remaining.is_empty() {
+ Ok(())
+ } else {
+ Err(fmt::Error)
+ }
+ }
+}
+
+fn double_to_str(c: &mut Criterion) {
+ let mut group = c.benchmark_group("Double::to_str");
+ for sample in DOUBLE_SAMPLES {
+ let value = sample.decimal_str.parse::<rustc_apfloat::ieee::Double>().unwrap();
+
+ // `CheckerFmtSink` is used later to ensure the formatting doesn't get
+ // optimized away, but without allocating - we can, however, allocate
+ // the expected output here, ahead of time, and also sanity-check it
+ // in a more convenient (and user-friendly) way, ensuring that benching
+ // itself never panics (though not in a way the optimizer would know of).
+ let value_to_string = &value.to_string();
+
+ // NOTE(eddyb) we only check that we get back the same floating-point
+ // `value`, without comparing `value_to_string` and `sample.decimal_str`,
+ // because `rustc_apfloat` (correctly) considers "natural precision" can
+ // be shorter than our samples, and also it always strips trailing `.0`
+ // (outside of scientific notation) - while it is possible to approximate
+ // "is this plausibly close enough", it's an irrelevant complication here.
+ assert_eq!(value_to_string.parse::<rustc_apfloat::ieee::Double>().unwrap(), value);
+
+ group.bench_with_input(
+ BenchmarkId::from_parameter(sample),
+ &(value, value_to_string),
+ |b, &(value, sample_to_string)| {
+ b.iter(|| {
+ let mut checker = CheckerFmtSink {
+ remaining: sample_to_string,
+ };
+ write!(checker, "{value}").unwrap();
+ checker.finish().unwrap();
+ });
+ },
+ );
+ }
+ group.finish();
+}
+
+criterion_group!(benches, double_from_str, double_to_str);
+criterion_main!(benches);
diff --git a/vendor/rustc_apfloat/build.rs b/vendor/rustc_apfloat/build.rs
new file mode 100644
index 000000000..a7398a28a
--- /dev/null
+++ b/vendor/rustc_apfloat/build.rs
@@ -0,0 +1,41 @@
+// HACK(eddyb) easier dep-tracking if we let `rustc` do it.
+const SRC_LIB_RS_CONTENTS: &str = include_str!("src/lib.rs");
+
+const EXPECTED_SRC_LIB_RS_PREFIX: &str = "\
+//! Port of LLVM's APFloat software floating-point implementation from the
+//! following C++ sources (please update commit hash when backporting):
+//! https://github.com/llvm/llvm-project/commit/";
+
+fn main() {
+ // HACK(eddyb) disable the default of re-running the build script on *any*
+ // change to *the entire source tree* (i.e. the default is roughly `./`).
+ println!("cargo:rerun-if-changed=build.rs");
+
+ let llvm_commit_hash = SRC_LIB_RS_CONTENTS
+ .strip_prefix(EXPECTED_SRC_LIB_RS_PREFIX)
+ .ok_or(())
+ .map_err(|_| format!("expected `src/lib.rs` to start with:\n\n{EXPECTED_SRC_LIB_RS_PREFIX}"))
+ .and_then(|commit_hash_plus_rest_of_file| {
+ Ok(commit_hash_plus_rest_of_file
+ .split_once('\n')
+ .ok_or("expected `src/lib.rs` to have more than 3 lines")?)
+ })
+ .and_then(|(commit_hash, _)| {
+ if commit_hash.len() != 40 || !commit_hash.chars().all(|c| matches!(c, '0'..='9'|'a'..='f')) {
+ Err(format!("expected `src/lib.rs` to have a valid commit hash, found {commit_hash:?}"))
+ } else {
+ Ok(commit_hash)
+ }
+ })
+ .unwrap_or_else(|e| {
+ eprintln!("\n{e}\n");
+ panic!("failed to validate `src/lib.rs`'s commit hash (see above)")
+ });
+
+ let expected_version_metadata = format!("+llvm-{}", &llvm_commit_hash[..12]);
+ let actual_version = env!("CARGO_PKG_VERSION");
+ if !actual_version.ends_with(&expected_version_metadata) {
+ eprintln!("\nexpected version ending in `{expected_version_metadata}`, found `{actual_version}`\n");
+ panic!("failed to validate Cargo package version (see above)");
+ }
+}
diff --git a/vendor/rustc_apfloat/rustfmt.toml b/vendor/rustc_apfloat/rustfmt.toml
new file mode 100644
index 000000000..d0a6474fb
--- /dev/null
+++ b/vendor/rustc_apfloat/rustfmt.toml
@@ -0,0 +1,7 @@
+max_width = 120
+
+# HACK(eddyb) this seems to be used for tuple literals too, and tests use
+# those a lot, in a way really meant to have one (tuple) value per line
+# (also, isn't allowed to be `max_width`, or even one less, but rather
+# depends on indentation levels?! thankfully it's just a warning, though)
+fn_call_width = 120
diff --git a/vendor/rustc_apfloat/src/ieee.rs b/vendor/rustc_apfloat/src/ieee.rs
new file mode 100644
index 000000000..06f2f081a
--- /dev/null
+++ b/vendor/rustc_apfloat/src/ieee.rs
@@ -0,0 +1,3211 @@
+use crate::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO};
+use crate::{Float, FloatConvert, ParseError, Round, Status, StatusAnd};
+
+use core::cmp::{self, Ordering};
+use core::convert::TryFrom;
+use core::fmt::{self, Write};
+use core::marker::PhantomData;
+use core::mem;
+use core::ops::Neg;
+
+#[must_use]
+pub struct IeeeFloat<S> {
+ /// Absolute significand value (including the integer bit).
+ sig: [Limb; 1],
+
+ /// The signed unbiased exponent of the value.
+ exp: ExpInt,
+
+ /// What kind of floating point number this is.
+ //
+ // HACK(eddyb) because mutating this without accounting for `exp`/`sig`
+ // can break some subtle edge cases, it should be only read through the
+ // `.category()` method, and only set during initialization, either for
+ // one of the special value constants, or for conversion from bits.
+ read_only_category_do_not_mutate: Category,
+
+ /// Sign bit of the number.
+ //
+ // HACK(eddyb) because mutating this without accounting for `category`
+ // can break some subtle edge cases, it should be only read through the
+ // `.is_negative()` method, and only set through negation (which can be
+ // more easily used through e.g. `copy_sign`/`negate_if`/`with_sign`).
+ read_only_sign_do_not_mutate: bool,
+
+ marker: PhantomData<S>,
+}
+
+/// Fundamental unit of big integer arithmetic, but also
+/// large to store the largest significands by itself.
+type Limb = u128;
+const LIMB_BITS: usize = 128;
+#[inline(always)]
+fn limbs_for_bits(bits: usize) -> usize {
+ (bits + LIMB_BITS - 1) / LIMB_BITS
+}
+
+/// Growable `[Limb]` (i.e. heap-allocated and typically `Vec`/`SmallVec`/etc.),
+/// used only by algorithms that may require dynamically arbitrary precision,
+/// i.e. conversions from/to decimal strings.
+///
+/// Note: the specific type was chosen by starting with `SmallVec<[_; 1]>` and
+/// increasing the inline length as long as benchmarks were showing improvements,
+/// or at least the `Double::from_str` ones, which roughly had these behaviors:
+/// * `Vec<_>` -> `SmallVec<[_; 1]>`: ~15% speedup, but only for shorter inputs
+/// * `SmallVec<[_; 1]>` -> `SmallVec<[_; 2]>`: ~10% speedup for longer inputs
+/// * `SmallVec<[_; 2]>` -> `SmallVec<[_; 3]>`: noise and/or diminishing returns
+///
+/// Note: the choice of type described above, and the factors in its decision,
+/// are specific to `Limb` being `u128`, so if `Limb` changes, this should too.
+type DynPrecisionLimbVec = smallvec::SmallVec<[Limb; 2]>;
+
+/// Enum that represents what fraction of the LSB truncated bits of an fp number
+/// represent.
+///
+/// This essentially combines the roles of guard and sticky bits.
+#[must_use]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+enum Loss {
+ // Example of truncated bits:
+ ExactlyZero, // 000000
+ LessThanHalf, // 0xxxxx x's not all zero
+ ExactlyHalf, // 100000
+ MoreThanHalf, // 1xxxxx x's not all zero
+}
+
+/// How the nonfinite values Inf and NaN are represented.
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub enum NonfiniteBehavior {
+ /// Represents standard IEEE 754 behavior. A value is nonfinite if the
+ /// exponent field is all 1s. In such cases, a value is Inf if the
+ /// significand bits are all zero, and NaN otherwise
+ IEEE754,
+
+ /// Only the Float8E5M2 has this behavior. There is no Inf representation. A
+ /// value is NaN if the exponent field and the mantissa field are all 1s.
+ /// This behavior matches the FP8 E4M3 type described in
+ /// https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
+ /// as non-signalling, although the paper does not state whether the NaN
+ /// values are signalling or not.
+ NanOnly,
+}
+
+// HACK(eddyb) extension method flipping/changing the sign based on `bool`s.
+trait NegExt: Neg<Output = Self> + Sized {
+ fn negate_if(self, negate: bool) -> Self {
+ if negate {
+ -self
+ } else {
+ self
+ }
+ }
+
+ fn with_sign(self, sign: bool) -> Self
+ where
+ Self: Float,
+ {
+ self.negate_if(self.is_negative() != sign)
+ }
+}
+impl<T: Neg<Output = Self>> NegExt for T {}
+
+/// Represents floating point arithmetic semantics.
+pub trait Semantics: Sized {
+ /// Total number of bits in the interchange format.
+ const BITS: usize;
+
+ /// Number of exponent bits in the interchange format.
+ const EXP_BITS: usize;
+
+ /// Number of bits in the significand. This includes the integer bit.
+ const PRECISION: usize = (Self::BITS - 1 - Self::EXP_BITS) + 1;
+
+ /// How the nonfinite values Inf and NaN are represented.
+ const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::IEEE754;
+
+ /// The largest E such that 2^E is representable; this matches the
+ /// definition of IEEE 754.
+ const MAX_EXP: ExpInt = {
+ let ieee_inf_and_nan_replaced_with_extra_normals = match Self::NONFINITE_BEHAVIOR {
+ NonfiniteBehavior::IEEE754 => false,
+ NonfiniteBehavior::NanOnly => true,
+ };
+ Self::IEEE_MAX_EXP
+ + (Self::MIN_EXP - Self::IEEE_MIN_EXP)
+ + (ieee_inf_and_nan_replaced_with_extra_normals as ExpInt)
+ };
+ const IEEE_MAX_EXP: ExpInt = -Self::IEEE_MIN_EXP + 1;
+
+ /// The smallest E such that 2^E is a normalized number; this
+ /// matches the definition of IEEE 754.
+ const MIN_EXP: ExpInt = Self::IEEE_MIN_EXP;
+ const IEEE_MIN_EXP: ExpInt = -(1 << (Self::EXP_BITS - 1)) + 2;
+
+ /// The base significand bitpattern of NaNs, i.e. the bits that must always
+ /// be set in all NaNs, with other significand bits being either used for
+ /// payload bits (if `NAN_PAYLOAD_MASK` covers them) or always unset.
+ const NAN_SIGNIFICAND_BASE: Limb = match Self::NONFINITE_BEHAVIOR {
+ NonfiniteBehavior::IEEE754 => 0,
+ NonfiniteBehavior::NanOnly => (1 << (Self::PRECISION - 1)) - 1,
+ };
+
+ /// The significand bitmask for the payload of a NaN (if supported),
+ /// including the "quiet bit" (telling QNaNs apart from SNaNs).
+ const NAN_PAYLOAD_MASK: Limb = match Self::NONFINITE_BEHAVIOR {
+ NonfiniteBehavior::IEEE754 => (1 << (Self::PRECISION - 1)) - 1,
+ NonfiniteBehavior::NanOnly => 0,
+ };
+
+ /// The significand bitpattern to mark a NaN as quiet (if supported).
+ ///
+ /// NOTE: for X87DoubleExtended we need to set two bits instead of one.
+ ///
+ /// NOTE: all NaNs are quiet if unsupported (see `NonfiniteBehavior::NanOnly`).
+ const QNAN_SIGNIFICAND: Limb = match Self::NONFINITE_BEHAVIOR {
+ NonfiniteBehavior::IEEE754 => 1 << (Self::PRECISION - 2),
+ NonfiniteBehavior::NanOnly => 0,
+ };
+
+ fn from_bits(bits: u128) -> IeeeFloat<Self> {
+ assert!(Self::BITS > Self::PRECISION);
+
+ let sign = bits & (1 << (Self::BITS - 1));
+ let exponent = ((bits & !sign) >> (Self::BITS - 1 - Self::EXP_BITS)) & ((1 << Self::EXP_BITS) - 1);
+ let mut r = IeeeFloat {
+ sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)],
+ // Convert the exponent from its bias representation to a signed integer.
+ exp: (exponent as ExpInt) + (Self::MIN_EXP - 1),
+ read_only_category_do_not_mutate: Category::Zero,
+ read_only_sign_do_not_mutate: sign != 0,
+ marker: PhantomData,
+ };
+
+ // NOTE(eddyb) unlike the original C++ code, this doesn't check for
+ // specific exponent/significand combinations, but instead relies on
+ // being able to construct known-good special values to compare to.
+ let try_category_from_special = |mut special: IeeeFloat<Self>| {
+ special = special.copy_sign(r);
+
+ // Ignore NaN payload to avoid needing a separate NaN check.
+ let sig_mask = if special.is_nan() { !Self::NAN_PAYLOAD_MASK } else { !0 };
+
+ if special.is_negative() == r.is_negative()
+ && special.exp == r.exp
+ && special.sig[0] & sig_mask == r.sig[0] & sig_mask
+ {
+ Some(special.category())
+ } else {
+ None
+ }
+ };
+
+ // NOTE(eddyb) the order here matters, i.e. `NAN` needs to be last, as
+ // its relaxed check (see above) overlaps `INFINITY`, for IEEE NaNs.
+ let specials = [
+ IeeeFloat::<Self>::ZERO,
+ IeeeFloat::<Self>::INFINITY,
+ IeeeFloat::<Self>::NAN,
+ ];
+
+ let category = specials
+ .into_iter()
+ .find_map(try_category_from_special)
+ .unwrap_or_else(|| {
+ if r.exp == Self::MIN_EXP - 1 {
+ // Denormal.
+ r.exp = Self::MIN_EXP;
+ } else {
+ // Set integer bit.
+ sig::set_bit(&mut r.sig, Self::PRECISION - 1);
+ }
+ Category::Normal
+ });
+
+ r.read_only_category_do_not_mutate = category;
+
+ r
+ }
+
+ fn to_bits(x: IeeeFloat<Self>) -> u128 {
+ assert!(Self::BITS > Self::PRECISION);
+
+ // Split integer bit from significand.
+ let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1);
+ let mut significand = x.sig[0] & ((1 << (Self::PRECISION - 1)) - 1);
+ let mut exponent = x.exp;
+ match x.category() {
+ Category::Normal => {
+ if exponent == Self::MIN_EXP && !integer_bit {
+ // Denormal.
+ exponent -= 1;
+ }
+ }
+ Category::Zero => {
+ // FIXME(eddyb) Maybe we should guarantee an invariant instead?
+ IeeeFloat::<Self> {
+ sig: [significand],
+ exp: exponent,
+ ..
+ } = Float::ZERO;
+ }
+ Category::Infinity => {
+ // FIXME(eddyb) Maybe we should guarantee an invariant instead?
+ IeeeFloat::<Self> {
+ sig: [significand],
+ exp: exponent,
+ ..
+ } = Float::INFINITY;
+ }
+ Category::NaN => {
+ IeeeFloat::<Self> { exp: exponent, .. } = Float::NAN;
+ }
+ }
+
+ // Convert the exponent from a signed integer to its bias representation.
+ let exponent = (exponent - (Self::MIN_EXP - 1)) as u128;
+
+ ((x.is_negative() as u128) << (Self::BITS - 1)) | (exponent << (Self::PRECISION - 1)) | significand
+ }
+}
+
+impl<S> Copy for IeeeFloat<S> {}
+impl<S> Clone for IeeeFloat<S> {
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+macro_rules! ieee_semantics {
+ ($($name:ident = $sem:ident($bits:tt : $exp_bits:tt) $({ $($extra:tt)* })?),* $(,)?) => {
+ $(pub struct $sem;)*
+ $(pub type $name = IeeeFloat<$sem>;)*
+ $(impl Semantics for $sem {
+ const BITS: usize = $bits;
+ const EXP_BITS: usize = $exp_bits;
+
+ $($($extra)*)?
+ })*
+ }
+}
+
+ieee_semantics! {
+ Half = HalfS(16:5),
+ Single = SingleS(32:8),
+ Double = DoubleS(64:11),
+ Quad = QuadS(128:15),
+
+ // Non-standard IEEE-like semantics:
+
+ // FIXME(eddyb) document this as "Brain Float 16" (C++ didn't have docs).
+ BFloat = BFloatS(16:8),
+
+ // 8-bit floating point number following IEEE-754 conventions with bit
+ // layout S1E5M2 as described in https://arxiv.org/abs/2209.05433.
+ Float8E5M2 = Float8E5M2S(8:5),
+
+ // 8-bit floating point number mostly following IEEE-754 conventions with
+ // bit layout S1E4M3 as described in https://arxiv.org/abs/2209.05433.
+ // Unlike IEEE-754 types, there are no infinity values, and NaN is
+ // represented with the exponent and mantissa bits set to all 1s.
+ Float8E4M3FN = Float8E4M3FNS(8:4) {
+ const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::NanOnly;
+ },
+}
+
+// FIXME(eddyb) consider moving X87-specific logic to a "has explicit integer bit"
+// associated `const` on `Semantics` itself.
+pub struct X87DoubleExtendedS;
+pub type X87DoubleExtended = IeeeFloat<X87DoubleExtendedS>;
+impl Semantics for X87DoubleExtendedS {
+ const BITS: usize = 80;
+ const EXP_BITS: usize = 15;
+
+ // HACK(eddyb) overwriting `EXP_BITS` because its default is incorrect.
+ // FIMXE(eddyb) get rid of this by allowing `Semantics` to generically have
+ // the concept of "explicitly included integer bit", which is the main way
+ // in which the 80-bit X87 format differs from standard IEEE encodings.
+ const PRECISION: usize = 64;
+
+ /// For x87 extended precision, we want to make a NaN, not a
+ /// pseudo-NaN. Maybe we should expose the ability to make
+ /// pseudo-NaNs?
+ const QNAN_SIGNIFICAND: Limb = 0b11 << (Self::PRECISION - 2);
+
+ /// Integer bit is explicit in this format. Intel hardware (387 and later)
+ /// does not support these bit patterns:
+ /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
+ /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
+ /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
+ /// exponent = 0, integer bit 1 ("pseudodenormal")
+ /// At the moment, the first three are treated as NaNs, the last one as Normal.
+ #[inline]
+ fn from_bits(bits: u128) -> IeeeFloat<Self> {
+ let sign = bits & (1 << (Self::BITS - 1));
+ let exponent = ((bits & !sign) >> (Self::BITS - 1 - Self::EXP_BITS)) & ((1 << Self::EXP_BITS) - 1);
+ let mut r = IeeeFloat {
+ sig: [bits & ((1 << Self::PRECISION) - 1)],
+ // Convert the exponent from its bias representation to a signed integer.
+ exp: (exponent as ExpInt) + (Self::MIN_EXP - 1),
+ read_only_category_do_not_mutate: Category::Zero,
+ read_only_sign_do_not_mutate: sign != 0,
+ marker: PhantomData,
+ };
+
+ let integer_bit = r.sig[0] >> (Self::PRECISION - 1);
+
+ let category = if r.exp == Self::MIN_EXP - 1 && r.sig == [0] {
+ Category::Zero
+ } else if r.exp == Self::MAX_EXP + 1 && r.sig == [1 << (Self::PRECISION - 1)] {
+ Category::Infinity
+ } else if r.exp == Self::MAX_EXP + 1 && r.sig != [1 << (Self::PRECISION - 1)]
+ || r.exp != Self::MAX_EXP + 1 && r.exp != Self::MIN_EXP - 1 && integer_bit == 0
+ {
+ r.exp = Self::MAX_EXP + 1;
+ Category::NaN
+ } else {
+ if r.exp == Self::MIN_EXP - 1 {
+ // Denormal.
+ r.exp = Self::MIN_EXP;
+ }
+ Category::Normal
+ };
+
+ r.read_only_category_do_not_mutate = category;
+
+ r
+ }
+
+ #[inline]
+ fn to_bits(x: IeeeFloat<Self>) -> u128 {
+ // Get integer bit from significand.
+ let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1);
+ let mut significand = x.sig[0] & ((1 << Self::PRECISION) - 1);
+ let exponent = match x.category() {
+ Category::Normal => {
+ if x.exp == Self::MIN_EXP && !integer_bit {
+ // Denormal.
+ Self::MIN_EXP - 1
+ } else {
+ x.exp
+ }
+ }
+ Category::Zero => {
+ // FIXME(eddyb) Maybe we should guarantee an invariant instead?
+ significand = 0;
+ Self::MIN_EXP - 1
+ }
+ Category::Infinity => {
+ // FIXME(eddyb) Maybe we should guarantee an invariant instead?
+ significand = 1 << (Self::PRECISION - 1);
+ Self::MAX_EXP + 1
+ }
+ Category::NaN => Self::MAX_EXP + 1,
+ };
+
+ // Convert the exponent from a signed integer to its bias representation.
+ let exponent = (exponent - (Self::MIN_EXP - 1)) as u128;
+
+ ((x.is_negative() as u128) << (Self::BITS - 1)) | (exponent << Self::PRECISION) | significand
+ }
+}
+
+float_common_impls!(IeeeFloat<S>);
+
+impl<S: Semantics> PartialEq for IeeeFloat<S> {
+ fn eq(&self, rhs: &Self) -> bool {
+ self.partial_cmp(rhs) == Some(Ordering::Equal)
+ }
+}
+
+impl<S: Semantics> PartialOrd for IeeeFloat<S> {
+ fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> {
+ match (self.category(), rhs.category()) {
+ (Category::NaN, _) | (_, Category::NaN) => None,
+
+ (Category::Infinity, Category::Infinity) => Some((!self.is_negative()).cmp(&(!rhs.is_negative()))),
+
+ (Category::Zero, Category::Zero) => Some(Ordering::Equal),
+
+ (Category::Infinity, _) | (Category::Normal, Category::Zero) => {
+ Some((!self.is_negative()).cmp(&self.is_negative()))
+ }
+
+ (_, Category::Infinity) | (Category::Zero, Category::Normal) => {
+ Some(rhs.is_negative().cmp(&(!rhs.is_negative())))
+ }
+
+ (Category::Normal, Category::Normal) => {
+ // Two normal numbers. Do they have the same sign?
+ Some((!self.is_negative()).cmp(&(!rhs.is_negative())).then_with(|| {
+ // Compare absolute values; invert result if negative.
+ let result = self.cmp_abs_normal(*rhs);
+
+ if self.is_negative() {
+ result.reverse()
+ } else {
+ result
+ }
+ }))
+ }
+ }
+ }
+}
+
+impl<S: Semantics> Neg for IeeeFloat<S> {
+ type Output = Self;
+ fn neg(mut self) -> Self {
+ self.read_only_sign_do_not_mutate = !self.is_negative();
+ self
+ }
+}
+
+/// Prints this value as a decimal string.
+///
+/// \param precision The maximum number of digits of
+/// precision to output. If there are fewer digits available,
+/// zero padding will not be used unless the value is
+/// integral and small enough to be expressed in
+/// precision digits. 0 means to use the natural
+/// precision of the number.
+/// \param width The maximum number of zeros to
+/// consider inserting before falling back to scientific
+/// notation. 0 means to always use scientific notation.
+///
+/// \param alternate Indicate whether to remove the trailing zero in
+/// fraction part or not. Also setting this parameter to true forces
+/// producing of output more similar to default printf behavior.
+/// Specifically the lower e is used as exponent delimiter and exponent
+/// always contains no less than two digits.
+///
+/// Number precision width Result
+/// ------ --------- ----- ------
+/// 1.01E+4 5 2 10100
+/// 1.01E+4 4 2 1.01E+4
+/// 1.01E+4 5 1 1.01E+4
+/// 1.01E-2 5 2 0.0101
+/// 1.01E-2 4 2 0.0101
+/// 1.01E-2 4 1 1.01E-2
+impl<S: Semantics> fmt::Display for IeeeFloat<S> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let width = f.width().unwrap_or(3);
+ let alternate = f.alternate();
+
+ match self.category() {
+ Category::Infinity => {
+ if self.is_negative() {
+ return f.write_str("-Inf");
+ } else {
+ return f.write_str("+Inf");
+ }
+ }
+
+ Category::NaN => return f.write_str("NaN"),
+
+ Category::Zero => {
+ if self.is_negative() {
+ f.write_char('-')?;
+ }
+
+ if width == 0 {
+ if alternate {
+ f.write_str("0.0")?;
+ if let Some(n) = f.precision() {
+ for _ in 1..n {
+ f.write_char('0')?;
+ }
+ }
+ f.write_str("e+00")?;
+ } else {
+ f.write_str("0.0E+0")?;
+ }
+ } else {
+ f.write_char('0')?;
+ }
+ return Ok(());
+ }
+
+ Category::Normal => {}
+ }
+
+ if self.is_negative() {
+ f.write_char('-')?;
+ }
+
+ // We use enough digits so the number can be round-tripped back to an
+ // APFloat. The formula comes from "How to Print Floating-Point Numbers
+ // Accurately" by Steele and White.
+ // FIXME: Using a formula based purely on the precision is conservative;
+ // we can print fewer digits depending on the actual value being printed.
+
+ // precision = 2 + floor(S::PRECISION / lg_2(10))
+ let precision = f.precision().unwrap_or(2 + S::PRECISION * 59 / 196);
+
+ // Decompose the number into an APInt and an exponent.
+ let mut exp = self.exp - (S::PRECISION as ExpInt - 1);
+ let mut sig: DynPrecisionLimbVec = [self.sig[0]].into_iter().collect();
+
+ // Ignore trailing binary zeros.
+ let trailing_zeros = sig[0].trailing_zeros();
+ let _: Loss = sig::shift_right(&mut sig, &mut exp, trailing_zeros as usize);
+
+ // Change the exponent from 2^e to 10^e.
+ if exp == 0 {
+ // Nothing to do.
+ } else if exp > 0 {
+ // Just shift left.
+ let shift = exp as usize;
+ sig.resize(limbs_for_bits(S::PRECISION + shift), 0);
+ sig::shift_left(&mut sig, &mut exp, shift);
+ } else {
+ // exp < 0
+ let mut texp = -exp as usize;
+
+ // We transform this using the identity:
+ // (N)(2^-e) == (N)(5^e)(10^-e)
+
+ // Multiply significand by 5^e.
+ // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
+ let mut sig_scratch = DynPrecisionLimbVec::new();
+ let mut p5 = DynPrecisionLimbVec::new();
+ let mut p5_scratch = DynPrecisionLimbVec::new();
+ while texp != 0 {
+ if p5.is_empty() {
+ p5.push(5);
+ } else {
+ p5_scratch.resize(p5.len() * 2, 0);
+ let _: Loss = sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS);
+ while p5_scratch.last() == Some(&0) {
+ p5_scratch.pop();
+ }
+ mem::swap(&mut p5, &mut p5_scratch);
+ }
+ if texp & 1 != 0 {
+ sig_scratch.resize(sig.len() + p5.len(), 0);
+ let _: Loss = sig::mul(&mut sig_scratch, &mut 0, &sig, &p5, (sig.len() + p5.len()) * LIMB_BITS);
+ while sig_scratch.last() == Some(&0) {
+ sig_scratch.pop();
+ }
+ mem::swap(&mut sig, &mut sig_scratch);
+ }
+ texp >>= 1;
+ }
+ }
+
+ // Fill the buffer.
+ let mut buffer = smallvec::SmallVec::<[u8; 64]>::new();
+
+ // Ignore digits from the significand until it is no more
+ // precise than is required for the desired precision.
+ // 196/59 is a very slight overestimate of lg_2(10).
+ let required = (precision * 196 + 58) / 59;
+ let mut discard_digits = sig::omsb(&sig).saturating_sub(required) * 59 / 196;
+ let mut in_trail = true;
+ while !sig.is_empty() {
+ // Perform short division by 10 to extract the rightmost digit.
+ // rem <- sig % 10
+ // sig <- sig / 10
+ let mut rem = 0;
+
+ // Use 64-bit division and remainder, with 32-bit chunks from sig.
+ sig::each_chunk(&mut sig, 32, |chunk| {
+ let chunk = chunk as u32;
+ let combined = ((rem as u64) << 32) | (chunk as u64);
+ rem = (combined % 10) as u8;
+ (combined / 10) as u32 as Limb
+ });
+
+ // Reduce the sigificand to avoid wasting time dividing 0's.
+ while sig.last() == Some(&0) {
+ sig.pop();
+ }
+
+ let digit = rem;
+
+ // Ignore digits we don't need.
+ if discard_digits > 0 {
+ discard_digits -= 1;
+ exp += 1;
+ continue;
+ }
+
+ // Drop trailing zeros.
+ if in_trail && digit == 0 {
+ exp += 1;
+ } else {
+ in_trail = false;
+ buffer.push(b'0' + digit);
+ }
+ }
+
+ assert!(!buffer.is_empty(), "no characters in buffer!");
+
+ // Drop down to precision.
+ // FIXME: don't do more precise calculations above than are required.
+ if buffer.len() > precision {
+ // The most significant figures are the last ones in the buffer.
+ let mut first_sig = buffer.len() - precision;
+
+ // Round.
+ // FIXME: this probably shouldn't use 'round half up'.
+
+ // Rounding down is just a truncation, except we also want to drop
+ // trailing zeros from the new result.
+ if buffer[first_sig - 1] < b'5' {
+ while first_sig < buffer.len() && buffer[first_sig] == b'0' {
+ first_sig += 1;
+ }
+ } else {
+ // Rounding up requires a decimal add-with-carry. If we continue
+ // the carry, the newly-introduced zeros will just be truncated.
+ for x in &mut buffer[first_sig..] {
+ if *x == b'9' {
+ first_sig += 1;
+ } else {
+ *x += 1;
+ break;
+ }
+ }
+ }
+
+ exp += first_sig as ExpInt;
+ buffer.drain(..first_sig);
+
+ // If we carried through, we have exactly one digit of precision.
+ if buffer.is_empty() {
+ buffer.push(b'1');
+ }
+ }
+
+ let digits = buffer.len();
+
+ // Check whether we should use scientific notation.
+ let scientific = if width == 0 {
+ true
+ } else {
+ if exp >= 0 {
+ // 765e3 --> 765000
+ // ^^^
+ // But we shouldn't make the number look more precise than it is.
+ exp as usize > width || digits + exp as usize > precision
+ } else {
+ // Power of the most significant digit.
+ let msd = exp + (digits - 1) as ExpInt;
+ if msd >= 0 {
+ // 765e-2 == 7.65
+ false
+ } else {
+ // 765e-5 == 0.00765
+ // ^ ^^
+ -msd as usize > width
+ }
+ }
+ };
+
+ // Scientific formatting is pretty straightforward.
+ if scientific {
+ exp += digits as ExpInt - 1;
+
+ f.write_char(buffer[digits - 1] as char)?;
+ f.write_char('.')?;
+ let truncate_zero = !alternate;
+ if digits == 1 && truncate_zero {
+ f.write_char('0')?;
+ } else {
+ for &d in buffer[..digits - 1].iter().rev() {
+ f.write_char(d as char)?;
+ }
+ }
+ // Fill with zeros up to precision.
+ if !truncate_zero && precision > digits - 1 {
+ for _ in 0..precision - digits + 1 {
+ f.write_char('0')?;
+ }
+ }
+ // For alternate we use lower 'e'.
+ f.write_char(if alternate { 'e' } else { 'E' })?;
+
+ // Exponent always at least two digits if we do not truncate zeros.
+ if truncate_zero {
+ write!(f, "{:+}", exp)?;
+ } else {
+ write!(f, "{:+03}", exp)?;
+ }
+
+ return Ok(());
+ }
+
+ // Non-scientific, positive exponents.
+ if exp >= 0 {
+ for &d in buffer.iter().rev() {
+ f.write_char(d as char)?;
+ }
+ for _ in 0..exp {
+ f.write_char('0')?;
+ }
+ return Ok(());
+ }
+
+ // Non-scientific, negative exponents.
+ let unit_place = -exp as usize;
+ if unit_place < digits {
+ for &d in buffer[unit_place..].iter().rev() {
+ f.write_char(d as char)?;
+ }
+ f.write_char('.')?;
+ for &d in buffer[..unit_place].iter().rev() {
+ f.write_char(d as char)?;
+ }
+ } else {
+ f.write_str("0.")?;
+ for _ in digits..unit_place {
+ f.write_char('0')?;
+ }
+ for &d in buffer.iter().rev() {
+ f.write_char(d as char)?;
+ }
+ }
+
+ Ok(())
+ }
+}
+
+impl<S: Semantics> fmt::Debug for IeeeFloat<S> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(
+ f,
+ "{}({:?} | {}{:?} * 2^{})",
+ self,
+ self.category(),
+ if self.is_negative() { "-" } else { "+" },
+ self.sig,
+ self.exp
+ )
+ }
+}
+
+// HACK(eddyb) this logic is duplicated throughout the original C++ code,
+// but it's a bit too long to keep repeating in the Rust port for all ops.
+// FIXME(eddyb) find a better name/organization for all of this functionality
+// (`IeeeDefaultExceptionHandling` doesn't have a counterpart in the C++ code).
+struct IeeeDefaultExceptionHandling;
+impl IeeeDefaultExceptionHandling {
+ fn result_from_nan<S: Semantics>(mut r: IeeeFloat<S>) -> StatusAnd<IeeeFloat<S>> {
+ assert!(r.is_nan());
+
+ let status = if r.is_signaling() {
+ // [IEEE Std 754-2008 6.2]:
+ // Under default exception handling, any operation signaling an invalid
+ // operation exception and for which a floating-point result is to be
+ // delivered shall deliver a quiet NaN.
+ let [sig] = &mut r.sig;
+ *sig |= if S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND {
+ // HACK(eddyb) remain bug-compatible with the original C++ code
+ // which doesn't appear to attempt avoiding creating pseudo-NaNs
+ // (see https://github.com/llvm/llvm-project/issues/63938).
+ S::QNAN_SIGNIFICAND & S::NAN_PAYLOAD_MASK
+ } else {
+ S::QNAN_SIGNIFICAND
+ };
+
+ // [IEEE Std 754-2008 6.2]:
+ // Signaling NaNs shall be reserved operands that, under default exception
+ // handling, signal the invalid operation exception(see 7.2) for every
+ // general-computational and signaling-computational operation except for
+ // the conversions described in 5.12.
+ Status::INVALID_OP
+ } else {
+ // [IEEE Std 754-2008 6.2]:
+ // For an operation with quiet NaN inputs, other than maximum and minimum
+ // operations, if a floating-point result is to be delivered the result
+ // shall be a quiet NaN which should be one of the input NaNs.
+ // ...
+ // Every general-computational and quiet-computational operation involving
+ // one or more input NaNs, none of them signaling, shall signal no
+ // exception, except fusedMultiplyAdd might signal the invalid operation
+ // exception(see 7.2).
+ Status::OK
+ };
+ status.and(r)
+ }
+
+ fn binop_result_from_either_nan<S: Semantics>(a: IeeeFloat<S>, b: IeeeFloat<S>) -> StatusAnd<IeeeFloat<S>> {
+ let r = match (a.category(), b.category()) {
+ (Category::NaN, _) => a,
+ (_, Category::NaN) => b,
+ _ => unreachable!(),
+ };
+ let mut status_and_r = Self::result_from_nan(r);
+ if b.is_signaling() {
+ status_and_r.status |= Status::INVALID_OP;
+ }
+ status_and_r
+ }
+}
+
+impl<S: Semantics> IeeeFloat<S> {
+ // HACK(eddyb) allow `Self::qnan` to be used from `IeeeFloat::NAN`.
+ // FIXME(eddyb) move back to the trait impl when that can be `const fn`.
+ const fn qnan(payload: Option<u128>) -> Self {
+ let sig = [S::NAN_SIGNIFICAND_BASE
+ | S::QNAN_SIGNIFICAND
+ | match payload {
+ // Zero out the excess bits of the significand.
+ Some(payload) => payload & S::NAN_PAYLOAD_MASK,
+ None => 0,
+ }];
+
+ let exp = match S::NONFINITE_BEHAVIOR {
+ NonfiniteBehavior::IEEE754 => S::MAX_EXP + 1,
+ NonfiniteBehavior::NanOnly => S::MAX_EXP,
+ };
+
+ IeeeFloat {
+ sig,
+ exp,
+ read_only_category_do_not_mutate: Category::NaN,
+ read_only_sign_do_not_mutate: false,
+ marker: PhantomData,
+ }
+ }
+}
+
+impl<S: Semantics> Float for IeeeFloat<S> {
+ const BITS: usize = S::BITS;
+ const PRECISION: usize = S::PRECISION;
+ const MAX_EXP: ExpInt = S::MAX_EXP;
+ const MIN_EXP: ExpInt = S::MIN_EXP;
+
+ const ZERO: Self = IeeeFloat {
+ sig: [0],
+ exp: S::MIN_EXP - 1,
+ read_only_category_do_not_mutate: Category::Zero,
+ read_only_sign_do_not_mutate: false,
+ marker: PhantomData,
+ };
+
+ const INFINITY: Self = match S::NONFINITE_BEHAVIOR {
+ NonfiniteBehavior::IEEE754 => IeeeFloat {
+ sig: [0],
+ exp: S::MAX_EXP + 1,
+ read_only_category_do_not_mutate: Category::Infinity,
+ read_only_sign_do_not_mutate: false,
+ marker: PhantomData,
+ },
+
+ // There is no Inf, so make NaN instead.
+ NonfiniteBehavior::NanOnly => Self::NAN,
+ };
+
+ const NAN: Self = Self::qnan(None);
+
+ fn qnan(payload: Option<u128>) -> Self {
+ // NOTE(eddyb) this is not a recursive self-call, but rather it calls
+ // the `const fn` inherent mehtod (see above).
+ Self::qnan(payload)
+ }
+
+ fn snan(payload: Option<u128>) -> Self {
+ let mut snan = Self::qnan(payload);
+
+ let [sig] = &mut snan.sig;
+
+ // We always have to clear the QNaN bit to make it an SNaN.
+ *sig &= !(S::QNAN_SIGNIFICAND & S::NAN_PAYLOAD_MASK);
+
+ // If there are no bits set in the payload, we have to set
+ // *something* to make it a NaN instead of an infinity;
+ // conventionally, this is the next bit down from the QNaN bit.
+ if *sig & S::NAN_PAYLOAD_MASK == 0 {
+ *sig |= (S::QNAN_SIGNIFICAND & S::NAN_PAYLOAD_MASK) >> 1;
+ }
+
+ snan
+ }
+
+ fn largest() -> Self {
+ // We want (in interchange format):
+ // exponent = 1..10
+ // significand = 1..1
+ IeeeFloat {
+ sig: [((1 << S::PRECISION) - 1)
+ & match S::NONFINITE_BEHAVIOR {
+ // The largest number by magnitude in our format will be the floating point
+ // number with maximum exponent and with significand that is all ones.
+ NonfiniteBehavior::IEEE754 => !0,
+
+ // The largest number by magnitude in our format will be the floating point
+ // number with maximum exponent and with significand that is all ones except
+ // the LSB.
+ NonfiniteBehavior::NanOnly => !1,
+ }],
+ exp: S::MAX_EXP,
+ read_only_category_do_not_mutate: Category::Normal,
+ read_only_sign_do_not_mutate: false,
+ marker: PhantomData,
+ }
+ }
+
+ // We want (in interchange format):
+ // exponent = 0..0
+ // significand = 0..01
+ const SMALLEST: Self = IeeeFloat {
+ sig: [1],
+ exp: S::MIN_EXP,
+ read_only_category_do_not_mutate: Category::Normal,
+ read_only_sign_do_not_mutate: false,
+ marker: PhantomData,
+ };
+
+ fn smallest_normalized() -> Self {
+ // We want (in interchange format):
+ // exponent = 0..0
+ // significand = 10..0
+ IeeeFloat {
+ sig: [1 << (S::PRECISION - 1)],
+ exp: S::MIN_EXP,
+ read_only_category_do_not_mutate: Category::Normal,
+ read_only_sign_do_not_mutate: false,
+ marker: PhantomData,
+ }
+ }
+
+ fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
+ let status = match (self.category(), rhs.category()) {
+ (Category::NaN, _) | (_, Category::NaN) => {
+ return IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs);
+ }
+
+ (Category::Infinity, Category::Infinity) => {
+ // Differently signed infinities can only be validly
+ // subtracted.
+ if self.is_negative() != rhs.is_negative() {
+ self = Self::NAN;
+ Status::INVALID_OP
+ } else {
+ Status::OK
+ }
+ }
+
+ // Sign may depend on rounding mode; handled below.
+ (_, Category::Zero) | (Category::Infinity, Category::Normal) => Status::OK,
+
+ (Category::Zero, _) | (_, Category::Infinity) => {
+ self = rhs;
+ Status::OK
+ }
+
+ (Category::Normal, Category::Normal) => {
+ let mut sign = self.is_negative();
+ let loss = sig::add_or_sub(
+ &mut self.sig,
+ &mut self.exp,
+ &mut sign,
+ &mut [rhs.sig[0]],
+ rhs.exp,
+ rhs.is_negative(),
+ );
+ self = self.with_sign(sign);
+
+ let status;
+ self = unpack!(status=, self.normalize(round, loss));
+
+ // Can only be zero if we lost no fraction.
+ assert!(self.category() != Category::Zero || loss == Loss::ExactlyZero);
+
+ status
+ }
+ };
+
+ // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
+ // positive zero unless rounding to minus infinity, except that
+ // adding two like-signed zeroes gives that zero.
+ if self.category() == Category::Zero
+ && (rhs.category() != Category::Zero || self.is_negative() != rhs.is_negative())
+ {
+ self = self.with_sign(round == Round::TowardNegative);
+ }
+
+ status.and(self)
+ }
+
+ // NOTE(eddyb) we can't rely on the `sub_r` method default implementation
+ // because NaN handling needs the original `rhs` (i.e. without negation).
+ fn sub_r(self, rhs: Self, round: Round) -> StatusAnd<Self> {
+ match (self.category(), rhs.category()) {
+ (Category::NaN, _) | (_, Category::NaN) => {
+ IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs)
+ }
+
+ _ => self.add_r(-rhs, round),
+ }
+ }
+
+ fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
+ self = self.negate_if(rhs.is_negative());
+
+ match (self.category(), rhs.category()) {
+ (Category::NaN, _) | (_, Category::NaN) => {
+ self = self.negate_if(rhs.is_negative()); // restore the original sign
+
+ IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs)
+ }
+
+ (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => {
+ Status::INVALID_OP.and(Self::NAN)
+ }
+
+ (Category::Infinity, _) | (_, Category::Infinity) => Status::OK.and(Self::INFINITY.copy_sign(self)),
+
+ (Category::Zero, _) | (_, Category::Zero) => Status::OK.and(Self::ZERO.copy_sign(self)),
+
+ (Category::Normal, Category::Normal) => {
+ self.exp += rhs.exp;
+ let mut wide_sig = [0; 2];
+ let loss = sig::mul(&mut wide_sig, &mut self.exp, &self.sig, &rhs.sig, S::PRECISION);
+ self.sig = [wide_sig[0]];
+ let mut status;
+ self = unpack!(status=, self.normalize(round, loss));
+ if loss != Loss::ExactlyZero {
+ status |= Status::INEXACT;
+ }
+ status.and(self)
+ }
+ }
+ }
+
+ fn mul_add_r(mut self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> {
+ // If and only if all arguments are normal do we need to do an
+ // extended-precision calculation.
+ if !self.is_finite_non_zero() || !multiplicand.is_finite_non_zero() || !addend.is_finite() {
+ let mut status;
+ if self.is_finite_non_zero() && multiplicand.is_finite_non_zero() {
+ // HACK(eddyb) this corresponds to the case where the C++ code
+ // (`multiplySpecials`) is effectively a noop: no multiplication
+ // is actually performed, because we're really only here to handle
+ // the "infinite/NaN `addend`" special-case, which needs to ignore
+ // the "finite * finite" multiplication entirely, instead of letting
+ // it e.g. overflow into infinity (and trample over `addend`).
+ assert!(!addend.is_finite());
+ status = Status::OK;
+ } else {
+ self = unpack!(status=, self.mul_r(multiplicand, round));
+ }
+
+ // FS can only be Status::OK or Status::INVALID_OP. There is no more work
+ // to do in the latter case. The IEEE-754R standard says it is
+ // implementation-defined in this case whether, if ADDEND is a
+ // quiet NaN, we raise invalid op; this implementation does so.
+ //
+ // If we need to do the addition we can do so with normal
+ // precision.
+ if status == Status::OK {
+ self = unpack!(status=, self.add_r(addend, round));
+ }
+ return status.and(self);
+ }
+
+ // Post-multiplication sign, before addition.
+ self = self.negate_if(multiplicand.is_negative());
+
+ // Allocate space for twice as many bits as the original significand, plus one
+ // extra bit for the addition to overflow into.
+ assert!(limbs_for_bits(S::PRECISION * 2 + 1) <= 2);
+ let mut wide_sig = sig::widening_mul(self.sig[0], multiplicand.sig[0]);
+
+ let mut loss = Loss::ExactlyZero;
+ let mut omsb = sig::omsb(&wide_sig);
+ self.exp += multiplicand.exp;
+
+ // Assume the operands involved in the multiplication are single-precision
+ // FP, and the two multiplicants are:
+ // lhs = a23 . a22 ... a0 * 2^e1
+ // rhs = b23 . b22 ... b0 * 2^e2
+ // the result of multiplication is:
+ // lhs = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
+ // Note that there are three significant bits at the left-hand side of the
+ // radix point: two for the multiplication, and an overflow bit for the
+ // addition (that will always be zero at this point). Move the radix point
+ // toward left by two bits, and adjust exponent accordingly.
+ self.exp += 2;
+
+ if addend.is_non_zero() {
+ // Normalize our MSB to one below the top bit to allow for overflow.
+ let ext_precision = 2 * S::PRECISION + 1;
+ if omsb != ext_precision - 1 {
+ assert!(ext_precision > omsb);
+ sig::shift_left(&mut wide_sig, &mut self.exp, (ext_precision - 1) - omsb);
+ }
+
+ // The intermediate result of the multiplication has "2 * S::PRECISION"
+ // signicant bit; adjust the addend to be consistent with mul result.
+ let mut ext_addend_sig = [addend.sig[0], 0];
+
+ // Extend the addend significand to ext_precision - 1. This guarantees
+ // that the high bit of the significand is zero (same as wide_sig),
+ // so the addition will overflow (if it does overflow at all) into the top bit.
+ sig::shift_left(&mut ext_addend_sig, &mut 0, ext_precision - 1 - S::PRECISION);
+
+ let mut sign = self.is_negative();
+ loss = sig::add_or_sub(
+ &mut wide_sig,
+ &mut self.exp,
+ &mut sign,
+ &mut ext_addend_sig,
+ addend.exp + 1,
+ addend.is_negative(),
+ );
+ self = self.with_sign(sign);
+
+ omsb = sig::omsb(&wide_sig);
+ }
+
+ // Convert the result having "2 * S::PRECISION" significant-bits back to the one
+ // having "S::PRECISION" significant-bits. First, move the radix point from
+ // poision "2*S::PRECISION - 1" to "S::PRECISION - 1". The exponent need to be
+ // adjusted by "2*S::PRECISION - 1" - "S::PRECISION - 1" = "S::PRECISION".
+ self.exp -= S::PRECISION as ExpInt + 1;
+
+ // In case MSB resides at the left-hand side of radix point, shift the
+ // mantissa right by some amount to make sure the MSB reside right before
+ // the radix point (i.e. "MSB . rest-significant-bits").
+ if omsb > S::PRECISION {
+ let bits = omsb - S::PRECISION;
+ loss = sig::shift_right(&mut wide_sig, &mut self.exp, bits).combine(loss);
+ }
+
+ self.sig[0] = wide_sig[0];
+
+ let mut status;
+ self = unpack!(status=, self.normalize(round, loss));
+ if loss != Loss::ExactlyZero {
+ status |= Status::INEXACT;
+ }
+
+ // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
+ // positive zero unless rounding to minus infinity, except that
+ // adding two like-signed zeroes gives that zero.
+ if self.category() == Category::Zero
+ && !status.intersects(Status::UNDERFLOW)
+ && self.is_negative() != addend.is_negative()
+ {
+ self = self.with_sign(round == Round::TowardNegative);
+ }
+
+ status.and(self)
+ }
+
+ fn div_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
+ self = self.negate_if(rhs.is_negative());
+
+ match (self.category(), rhs.category()) {
+ (Category::NaN, _) | (_, Category::NaN) => {
+ self = self.negate_if(rhs.is_negative()); // restore the original sign
+
+ IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs)
+ }
+
+ (Category::Infinity, Category::Infinity) | (Category::Zero, Category::Zero) => {
+ Status::INVALID_OP.and(Self::NAN)
+ }
+
+ (Category::Infinity | Category::Zero, _) => Status::OK.and(self),
+
+ (_, Category::Infinity) => Status::OK.and(Self::ZERO.copy_sign(self)),
+
+ (_, Category::Zero) => Status::DIV_BY_ZERO.and(Self::INFINITY.copy_sign(self)),
+
+ (Category::Normal, Category::Normal) => {
+ self.exp -= rhs.exp;
+ let dividend = self.sig[0];
+ let loss = sig::div(&mut self.sig, &mut self.exp, &mut [dividend], &mut [rhs.sig[0]], S::PRECISION);
+ let mut status;
+ self = unpack!(status=, self.normalize(round, loss));
+ if loss != Loss::ExactlyZero {
+ status |= Status::INEXACT;
+ }
+ status.and(self)
+ }
+ }
+ }
+
+ fn ieee_rem(self, rhs: Self) -> StatusAnd<Self> {
+ match (self.category(), rhs.category()) {
+ (Category::NaN, _) | (_, Category::NaN) => {
+ IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs)
+ }
+
+ (Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN),
+
+ (Category::Zero, _) | (_, Category::Infinity) => Status::OK.and(self),
+
+ (Category::Normal, Category::Normal) => {
+ let mut status;
+
+ let mut x = self;
+ let mut p = rhs;
+
+ // Make sure the current value is less than twice the denom. If the addition
+ // did not succeed (an overflow has happened), which means that the finite
+ // value we currently posses must be less than twice the denom (as we are
+ // using the same semantics).
+ let p2 = unpack!(status=, p + p);
+ if status == Status::OK {
+ x = unpack!(status=, x.c_fmod(p2));
+ assert_eq!(status, Status::OK);
+ }
+
+ // Lets work with absolute numbers.
+ p = p.abs();
+ x = x.abs();
+
+ //
+ // To calculate the remainder we use the following scheme.
+ //
+ // The remainder is defained as follows:
+ //
+ // remainder = numer - rquot * denom = x - r * p
+ //
+ // Where r is the result of: x/p, rounded toward the nearest integral value
+ // (with halfway cases rounded toward the even number).
+ //
+ // Currently, (after x mod 2p):
+ // r is the number of 2p's present inside x, which is inherently, an even
+ // number of p's.
+ //
+ // We may split the remaining calculation into 4 options:
+ // - if x < 0.5p then we round to the nearest number with is 0, and are done.
+ // - if x == 0.5p then we round to the nearest even number which is 0, and we
+ // are done as well.
+ // - if 0.5p < x < p then we round to nearest number which is 1, and we have
+ // to subtract 1p at least once.
+ // - if x >= p then we must subtract p at least once, as x must be a
+ // remainder.
+ //
+ // By now, we were done, or we added 1 to r, which in turn, now an odd number.
+ //
+ // We can now split the remaining calculation to the following 3 options:
+ // - if x < 0.5p then we round to the nearest number with is 0, and are done.
+ // - if x == 0.5p then we round to the nearest even number. As r is odd, we
+ // must round up to the next even number. so we must subtract p once more.
+ // - if x > 0.5p (and inherently x < p) then we must round r up to the next
+ // integral, and subtract p once more.
+ //
+
+ // Return `x * 2` at no loss of precision (i.e. no overflow).
+ //
+ // HACK(eddyb) this may seem a bit sketchy because it can return
+ // values that `normalize` would've replaced with `overflow_result`
+ // (e.g. overflowing to infinity), but the result is only used for
+ // comparisons, where both sides of such comparison can be seen
+ // as transiently having a larger *effective* exponent range.
+ let lossless_2x = |mut x: Self| {
+ x.exp += 1;
+
+ if x.exp >= Self::MAX_EXP {
+ // HACK(eddyb) skip lossy `normalize` (see above).
+ } else {
+ let status;
+ x = unpack!(status=, x.normalize(Round::NearestTiesToEven, Loss::ExactlyZero));
+ assert_eq!(status, Status::OK);
+ }
+
+ x
+ };
+
+ if lossless_2x(x) > p {
+ x = unpack!(status=, x - p);
+ assert_eq!(status, Status::OK);
+
+ if lossless_2x(x) >= p {
+ x = unpack!(status=, x - p);
+ assert_eq!(status, Status::OK);
+ }
+ }
+
+ if x.is_zero() {
+ Status::OK.and(x.copy_sign(self)) // IEEE754 requires this
+ } else {
+ Status::OK.and(x.negate_if(self.is_negative()))
+ }
+ }
+ }
+ }
+
+ fn c_fmod(mut self, rhs: Self) -> StatusAnd<Self> {
+ match (self.category(), rhs.category()) {
+ (Category::NaN, _) | (_, Category::NaN) => {
+ IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs)
+ }
+
+ (Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN),
+
+ (Category::Zero, _) | (_, Category::Infinity) => Status::OK.and(self),
+
+ (Category::Normal, Category::Normal) => {
+ let orig = self;
+
+ while self.is_finite_non_zero()
+ && rhs.is_finite_non_zero()
+ && self.cmp_abs_normal(rhs) != Ordering::Less
+ {
+ let exp = self.ilogb() - rhs.ilogb();
+ let mut v = rhs.scalbn(exp);
+ // `v` can overflow to NaN with `NonfiniteBehavior::NanOnly`, so explicitly
+ // check for it.
+ if v.is_nan() || self.cmp_abs_normal(v) == Ordering::Less {
+ v = rhs.scalbn(exp - 1);
+ }
+ v = v.copy_sign(self);
+
+ let status;
+ self = unpack!(status=, self - v);
+ assert_eq!(status, Status::OK);
+ }
+ if self.is_zero() {
+ self = self.copy_sign(orig);
+ }
+ Status::OK.and(self)
+ }
+ }
+ }
+
+ fn round_to_integral(self, round: Round) -> StatusAnd<Self> {
+ match self.category() {
+ Category::NaN => IeeeDefaultExceptionHandling::result_from_nan(self),
+
+ // [IEEE Std 754-2008 6.1]:
+ // The behavior of infinity in floating-point arithmetic is derived from the
+ // limiting cases of real arithmetic with operands of arbitrarily
+ // large magnitude, when such a limit exists.
+ // ...
+ // Operations on infinite operands are usually exact and therefore signal no
+ // exceptions ...
+ Category::Infinity => Status::OK.and(self),
+
+ // [IEEE Std 754-2008 6.3]:
+ // ... the sign of the result of conversions, the quantize operation, the
+ // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
+ // the sign of the first or only operand.
+ Category::Zero => Status::OK.and(self),
+
+ Category::Normal => {
+ // If the exponent is large enough, we know that this value is already
+ // integral, and the arithmetic below would potentially cause it to saturate
+ // to +/-Inf. Bail out early instead.
+ if self.exp + 1 >= S::PRECISION as ExpInt {
+ return Status::OK.and(self);
+ }
+
+ // The algorithm here is quite simple: we add 2^(p-1), where p is the
+ // precision of our format, and then subtract it back off again. The choice
+ // of rounding modes for the addition/subtraction determines the rounding mode
+ // for our integral rounding as well.
+ // NOTE: When the input value is negative, we do subtraction followed by
+ // addition instead.
+ assert!(S::PRECISION <= 128);
+ let mut status;
+ let magic_const = unpack!(status=, Self::from_u128(1 << (S::PRECISION - 1)));
+ assert_eq!(status, Status::OK);
+ let magic_const = magic_const.copy_sign(self);
+
+ let mut r = self;
+ r = unpack!(status=, r.add_r(magic_const, round));
+
+ // Current value and 'MagicConstant' are both integers, so the result of the
+ // subtraction is always exact according to Sterbenz' lemma.
+ r = r.sub_r(magic_const, round).value;
+
+ // Restore the input sign to handle the case of zero result
+ // correctly.
+ status.and(r.copy_sign(self))
+ }
+ }
+ }
+
+ fn next_up(mut self) -> StatusAnd<Self> {
+ // Compute nextUp(x), handling each float category separately.
+ match self.category() {
+ Category::Infinity => {
+ if self.is_negative() {
+ // nextUp(-inf) = -largest
+ Status::OK.and(-Self::largest())
+ } else {
+ // nextUp(+inf) = +inf
+ Status::OK.and(self)
+ }
+ }
+ Category::NaN => {
+ // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
+ // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
+ // change the payload.
+ if self.is_signaling() {
+ // For consistency, propagate the sign of the sNaN to the qNaN.
+ Status::INVALID_OP.and(Self::NAN.copy_sign(self))
+ } else {
+ Status::OK.and(self)
+ }
+ }
+ Category::Zero => {
+ // nextUp(pm 0) = +smallest
+ Status::OK.and(Self::SMALLEST)
+ }
+ Category::Normal => {
+ // nextUp(-smallest) = -0
+ if self.is_smallest() && self.is_negative() {
+ return Status::OK.and(-Self::ZERO);
+ }
+
+ // nextUp(largest) == INFINITY
+ if self.is_largest() && !self.is_negative() {
+ return Status::OK.and(Self::INFINITY);
+ }
+
+ // Excluding the integral bit. This allows us to test for binade boundaries.
+ let sig_mask = (1 << (S::PRECISION - 1)) - 1;
+
+ // nextUp(normal) == normal + inc.
+ if self.is_negative() {
+ // If we are negative, we need to decrement the significand.
+
+ // We only cross a binade boundary that requires adjusting the exponent
+ // if:
+ // 1. exponent != S::MIN_EXP. This implies we are not in the
+ // smallest binade or are dealing with denormals.
+ // 2. Our significand excluding the integral bit is all zeros.
+ let crossing_binade_boundary = self.exp != S::MIN_EXP && self.sig[0] & sig_mask == 0;
+
+ // Decrement the significand.
+ //
+ // We always do this since:
+ // 1. If we are dealing with a non-binade decrement, by definition we
+ // just decrement the significand.
+ // 2. If we are dealing with a normal -> normal binade decrement, since
+ // we have an explicit integral bit the fact that all bits but the
+ // integral bit are zero implies that subtracting one will yield a
+ // significand with 0 integral bit and 1 in all other spots. Thus we
+ // must just adjust the exponent and set the integral bit to 1.
+ // 3. If we are dealing with a normal -> denormal binade decrement,
+ // since we set the integral bit to 0 when we represent denormals, we
+ // just decrement the significand.
+ sig::decrement(&mut self.sig);
+
+ if crossing_binade_boundary {
+ // Our result is a normal number. Do the following:
+ // 1. Set the integral bit to 1.
+ // 2. Decrement the exponent.
+ sig::set_bit(&mut self.sig, S::PRECISION - 1);
+ self.exp -= 1;
+ }
+ } else {
+ // If we are positive, we need to increment the significand.
+
+ // We only cross a binade boundary that requires adjusting the exponent if
+ // the input is not a denormal and all of said input's significand bits
+ // are set. If all of said conditions are true: clear the significand, set
+ // the integral bit to 1, and increment the exponent. If we have a
+ // denormal always increment since moving denormals and the numbers in the
+ // smallest normal binade have the same exponent in our representation.
+ let crossing_binade_boundary = !self.is_denormal() && self.sig[0] & sig_mask == sig_mask;
+
+ if crossing_binade_boundary {
+ self.sig = [0];
+ sig::set_bit(&mut self.sig, S::PRECISION - 1);
+ assert_ne!(
+ self.exp,
+ S::MAX_EXP,
+ "We can not increment an exponent beyond the MAX_EXP \
+ allowed by the given floating point semantics."
+ );
+ self.exp += 1;
+ } else {
+ sig::increment(&mut self.sig);
+ }
+ }
+ Status::OK.and(self)
+ }
+ }
+ }
+
+ fn from_bits(input: u128) -> Self {
+ // Dispatch to semantics.
+ S::from_bits(input)
+ }
+
+ fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> {
+ IeeeFloat {
+ sig: [input],
+ exp: S::PRECISION as ExpInt - 1,
+ read_only_category_do_not_mutate: Category::Normal,
+ read_only_sign_do_not_mutate: false,
+ marker: PhantomData,
+ }
+ .normalize(round, Loss::ExactlyZero)
+ }
+
+ fn from_str_r(s: &str, mut round: Round) -> Result<StatusAnd<Self>, ParseError> {
+ if s.is_empty() {
+ return Err(ParseError("Invalid string length"));
+ }
+
+ // Handle a leading minus sign.
+ let (minus, s) = s.strip_prefix("-").map(|s| (true, s)).unwrap_or((false, s));
+ let from_abs = |r: Self| r.negate_if(minus);
+
+ // Handle a leading plus sign (mutually exclusive with minus).
+ let (explicit_plus, s) = s
+ .strip_prefix("+")
+ .filter(|_| !minus)
+ .map(|s| (true, s))
+ .unwrap_or((false, s));
+
+ // Handle special cases.
+ let special = match s {
+ "Inf" if minus || explicit_plus => Some(Self::INFINITY),
+
+ "inf" | "INFINITY" if !explicit_plus => Some(Self::INFINITY),
+
+ _ if !explicit_plus => {
+ // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
+ let (is_signaling, s) = s.strip_prefix(['s', 'S']).map_or((false, s), |s| (true, s));
+
+ s.strip_prefix("nan").or_else(|| s.strip_prefix("NaN")).and_then(|s| {
+ // Allow the payload to be inside parentheses.
+ let s = s
+ .strip_prefix("(")
+ .and_then(|s| {
+ // Parentheses should be balanced (and not empty).
+ s.strip_suffix(")").filter(|s| !s.is_empty())
+ })
+ .unwrap_or(s);
+
+ let payload = if s.is_empty() {
+ // A NaN without payload.
+ None
+ } else {
+ // Determine the payload number's radix.
+ let (radix, s) = s
+ .strip_prefix("0")
+ .filter(|s| !s.is_empty())
+ .map(|s| s.strip_prefix(['x', 'X']).map(|s| (16, s)).unwrap_or((8, s)))
+ .unwrap_or((10, s));
+
+ // Parse the payload and make the NaN.
+ Some(u128::from_str_radix(s, radix).ok()?)
+ };
+
+ Some(if is_signaling {
+ Self::snan(payload)
+ } else {
+ Self::qnan(payload)
+ })
+ })
+ }
+
+ _ => None,
+ };
+ if let Some(r) = special {
+ return Ok(Status::OK.and(from_abs(r)));
+ }
+
+ if s.is_empty() {
+ return Err(ParseError("String has no digits"));
+ }
+
+ // Adjust the rounding mode for the absolute value below.
+ round = round.negate_if(minus);
+
+ let (is_hex, s) = s
+ .strip_prefix("0")
+ .and_then(|s| s.strip_prefix(['x', 'X']))
+ .map(|s| (true, s))
+ .unwrap_or((false, s));
+
+ let r = if is_hex {
+ if s.is_empty() {
+ return Err(ParseError("Invalid string"));
+ }
+ Self::from_hexadecimal_string(s, round)?
+ } else {
+ Self::from_decimal_string(s, round)?
+ };
+
+ Ok(r.map(from_abs))
+ }
+
+ fn to_bits(self) -> u128 {
+ // Dispatch to semantics.
+ S::to_bits(self)
+ }
+
+ fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> {
+ // The result of trying to convert a number too large.
+ let overflow = if self.is_negative() {
+ // Negative numbers cannot be represented as unsigned.
+ 0
+ } else {
+ // Largest unsigned integer of the given width.
+ !0 >> (128 - width)
+ };
+
+ *is_exact = false;
+
+ match self.category() {
+ Category::NaN => Status::INVALID_OP.and(0),
+
+ Category::Infinity => Status::INVALID_OP.and(overflow),
+
+ Category::Zero => {
+ // Negative zero can't be represented as an int.
+ *is_exact = !self.is_negative();
+ Status::OK.and(0)
+ }
+
+ Category::Normal => {
+ let mut r = 0;
+
+ // Step 1: place our absolute value, with any fraction truncated, in
+ // the destination.
+ let truncated_bits = if self.exp < 0 {
+ // Our absolute value is less than one; truncate everything.
+ // For exponent -1 the integer bit represents .5, look at that.
+ // For smaller exponents leftmost truncated bit is 0.
+ S::PRECISION - 1 + (-self.exp) as usize
+ } else {
+ // We want the most significant (exponent + 1) bits; the rest are
+ // truncated.
+ let bits = self.exp as usize + 1;
+
+ // Hopelessly large in magnitude?
+ if bits > width {
+ return Status::INVALID_OP.and(overflow);
+ }
+
+ if bits < S::PRECISION {
+ // We truncate (S::PRECISION - bits) bits.
+ r = self.sig[0] >> (S::PRECISION - bits);
+ S::PRECISION - bits
+ } else {
+ // We want at least as many bits as are available.
+ r = self.sig[0] << (bits - S::PRECISION);
+ 0
+ }
+ };
+
+ // Step 2: work out any lost fraction, and increment the absolute
+ // value if we would round away from zero.
+ let mut loss = Loss::ExactlyZero;
+ if truncated_bits > 0 {
+ loss = Loss::through_truncation(&self.sig, truncated_bits);
+ if loss != Loss::ExactlyZero && self.round_away_from_zero(round, loss, truncated_bits) {
+ r = r.wrapping_add(1);
+ if r == 0 {
+ return Status::INVALID_OP.and(overflow); // Overflow.
+ }
+ }
+ }
+
+ // Step 3: check if we fit in the destination.
+ if r > overflow {
+ return Status::INVALID_OP.and(overflow);
+ }
+
+ if loss == Loss::ExactlyZero {
+ *is_exact = true;
+ Status::OK.and(r)
+ } else {
+ Status::INEXACT.and(r)
+ }
+ }
+ }
+ }
+
+ fn cmp_abs_normal(self, rhs: Self) -> Ordering {
+ assert!(self.is_finite_non_zero());
+ assert!(rhs.is_finite_non_zero());
+
+ // If exponents are equal, do an unsigned comparison of the significands.
+ self.exp.cmp(&rhs.exp).then_with(|| sig::cmp(&self.sig, &rhs.sig))
+ }
+
+ fn bitwise_eq(self, rhs: Self) -> bool {
+ if self.category() != rhs.category() || self.is_negative() != rhs.is_negative() {
+ return false;
+ }
+
+ if self.category() == Category::Zero || self.category() == Category::Infinity {
+ return true;
+ }
+
+ if self.is_finite_non_zero() && self.exp != rhs.exp {
+ return false;
+ }
+
+ self.sig == rhs.sig
+ }
+
+ fn is_negative(self) -> bool {
+ self.read_only_sign_do_not_mutate
+ }
+
+ fn is_denormal(self) -> bool {
+ self.is_finite_non_zero() && self.exp == S::MIN_EXP && !sig::get_bit(&self.sig, S::PRECISION - 1)
+ }
+
+ fn is_signaling(self) -> bool {
+ // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
+ // first bit of the trailing significand being 0.
+ self.is_nan() && self.sig[0] & S::QNAN_SIGNIFICAND != S::QNAN_SIGNIFICAND
+ }
+
+ fn category(self) -> Category {
+ self.read_only_category_do_not_mutate
+ }
+
+ fn get_exact_inverse(self) -> Option<Self> {
+ // Special floats and denormals have no exact inverse.
+ if !self.is_finite_non_zero() {
+ return None;
+ }
+
+ // Check that the number is a power of two by making sure that only the
+ // integer bit is set in the significand.
+ if self.sig != [1 << (S::PRECISION - 1)] {
+ return None;
+ }
+
+ // Get the inverse.
+ let mut reciprocal = Self::from_u128(1).value;
+ let status;
+ reciprocal = unpack!(status=, reciprocal / self);
+ if status != Status::OK {
+ return None;
+ }
+
+ // Avoid multiplication with a denormal, it is not safe on all platforms and
+ // may be slower than a normal division.
+ if reciprocal.is_denormal() {
+ return None;
+ }
+
+ assert!(reciprocal.is_finite_non_zero());
+ assert_eq!(reciprocal.sig, [1 << (S::PRECISION - 1)]);
+
+ Some(reciprocal)
+ }
+
+ fn ilogb(mut self) -> ExpInt {
+ if self.is_nan() {
+ return IEK_NAN;
+ }
+ if self.is_zero() {
+ return IEK_ZERO;
+ }
+ if self.is_infinite() {
+ return IEK_INF;
+ }
+ if !self.is_denormal() {
+ return self.exp;
+ }
+
+ let sig_bits = (S::PRECISION - 1) as ExpInt;
+ self.exp += sig_bits;
+ self = self.normalize(Round::NearestTiesToEven, Loss::ExactlyZero).value;
+ self.exp - sig_bits
+ }
+
+ fn scalbn_r(mut self, exp: ExpInt, round: Round) -> Self {
+ // If exp is wildly out-of-scale, simply adding it to self.exp will
+ // overflow; clamp it to a safe range before adding, but ensure that the range
+ // is large enough that the clamp does not change the result. The range we
+ // need to support is the difference between the largest possible exponent and
+ // the normalized exponent of half the smallest denormal.
+
+ let sig_bits = (S::PRECISION - 1) as i32;
+ let max_change = S::MAX_EXP as i32 - (S::MIN_EXP as i32 - sig_bits) + 1;
+
+ // Clamp to one past the range ends to let normalize handle overflow.
+ let exp_change = cmp::min(cmp::max(exp as i32, -max_change - 1), max_change);
+ self.exp = self.exp.saturating_add(exp_change as ExpInt);
+ self = self.normalize(round, Loss::ExactlyZero).value;
+ if self.is_nan() {
+ self = IeeeDefaultExceptionHandling::result_from_nan(self).value;
+ }
+ self
+ }
+
+ fn frexp_r(mut self, exp: &mut ExpInt, round: Round) -> Self {
+ *exp = self.ilogb();
+
+ // Quiet signalling nans.
+ if *exp == IEK_NAN {
+ self = IeeeDefaultExceptionHandling::result_from_nan(self).value;
+ return self;
+ }
+
+ if *exp == IEK_INF {
+ return self;
+ }
+
+ // 1 is added because frexp is defined to return a normalized fraction in
+ // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
+ if *exp == IEK_ZERO {
+ *exp = 0;
+ } else {
+ *exp += 1;
+ }
+ self.scalbn_r(-*exp, round)
+ }
+}
+
+impl<S: Semantics, T: Semantics> FloatConvert<IeeeFloat<T>> for IeeeFloat<S> {
+ fn convert_r(mut self, round: Round, loses_info: &mut bool) -> StatusAnd<IeeeFloat<T>> {
+ // FIXME(eddyb) move this into the return result.
+ *loses_info = false;
+
+ // x86 has some unusual NaNs which cannot be represented in any other
+ // format; note them here.
+ fn is_x87_double_extended<S: Semantics>() -> bool {
+ S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND
+ }
+ let loses_x87_pseudo_nan = is_x87_double_extended::<S>()
+ && !is_x87_double_extended::<T>()
+ && self.category() == Category::NaN
+ && (self.sig[0] & S::QNAN_SIGNIFICAND) != S::QNAN_SIGNIFICAND;
+
+ // NOTE(eddyb) this is done early because the target semantics may not
+ // actually support encoding the distinction between SNaN and QNaN.
+ //
+ // Convert of sNaN creates qNaN and raises an exception (invalid op).
+ // This also guarantees that a sNaN does not become Inf on a truncation
+ // that loses all payload bits.
+ let mut status = Status::OK;
+ if self.is_nan() {
+ self = unpack!(status|=, IeeeDefaultExceptionHandling::result_from_nan(self));
+ }
+
+ let Self { mut sig, mut exp, .. } = self;
+
+ // If this is a truncation of a denormal number, and the target semantics
+ // has larger exponent range than the source semantics (this can happen
+ // when truncating from PowerPC double-double to double format), the
+ // right shift could lose result mantissa bits. Adjust exponent instead
+ // of performing excessive shift.
+ // Also do a similar trick in case shifting denormal would produce zero
+ // significand as this case isn't handled correctly by normalize.
+ let mut shift = T::PRECISION as ExpInt - S::PRECISION as ExpInt;
+ if shift < 0 && self.is_finite_non_zero() {
+ let omsb = sig::omsb(&sig) as ExpInt;
+ let mut exp_change = omsb - S::PRECISION as ExpInt;
+ if exp + exp_change < T::MIN_EXP {
+ exp_change = T::MIN_EXP - exp;
+ }
+ if exp_change < shift {
+ exp_change = shift;
+ }
+ if exp_change < 0 {
+ shift -= exp_change;
+ exp += exp_change;
+ } else if omsb <= -shift {
+ exp_change = omsb + shift - 1; // leave at least one bit set
+ shift -= exp_change;
+ exp += exp_change;
+ }
+ }
+
+ // If this is a truncation, perform the shift.
+ let mut loss = Loss::ExactlyZero;
+ if shift < 0 && (self.is_finite_non_zero() || self.category() == Category::NaN && S::NAN_PAYLOAD_MASK != 0) {
+ loss = sig::shift_right(&mut sig, &mut 0, -shift as usize);
+ }
+
+ // If this is an extension, perform the shift.
+ if shift > 0 && (self.is_finite_non_zero() || self.category() == Category::NaN) {
+ sig::shift_left(&mut sig, &mut 0, shift as usize);
+ }
+
+ let r = match self.category() {
+ Category::Normal => {
+ let r = IeeeFloat::<T> {
+ sig,
+ exp,
+ read_only_category_do_not_mutate: self.category(),
+ read_only_sign_do_not_mutate: self.is_negative(),
+ marker: PhantomData,
+ };
+ unpack!(status|=, r.normalize(round, loss))
+ }
+ Category::NaN => {
+ *loses_info = loss != Loss::ExactlyZero
+ || loses_x87_pseudo_nan
+ || S::NAN_PAYLOAD_MASK != 0 && T::NAN_PAYLOAD_MASK == 0;
+
+ IeeeFloat::<T>::qnan(Some(sig[0])).with_sign(self.is_negative())
+ }
+ Category::Infinity => IeeeFloat::<T>::INFINITY.with_sign(self.is_negative()),
+ Category::Zero => IeeeFloat::<T>::ZERO.with_sign(self.is_negative()),
+ };
+
+ // NOTE(eddyb) this catches all cases of e.g. ±Inf turning into NaN,
+ // because of `T::NONFINITE_BEHAVIOR` not being `IEEE754`.
+ if matches!(self.category(), Category::Infinity | Category::Zero)
+ && (r.category() != self.category() || r.is_negative() != self.is_negative())
+ {
+ status |= Status::INEXACT;
+ }
+
+ *loses_info |= (status - Status::INVALID_OP) != Status::OK;
+
+ status.and(r)
+ }
+}
+
+impl<S: Semantics> IeeeFloat<S> {
+ /// Handle positive overflow. We either return infinity or
+ /// the largest finite number. For negative overflow,
+ /// negate the `round` argument before calling.
+ fn overflow_result(round: Round) -> StatusAnd<Self> {
+ match round {
+ // Infinity?
+ Round::NearestTiesToEven | Round::NearestTiesToAway | Round::TowardPositive => {
+ (Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY)
+ }
+ // Otherwise we become the largest finite number.
+ Round::TowardNegative | Round::TowardZero => Status::INEXACT.and(Self::largest()),
+ }
+ }
+
+ /// Returns TRUE if, when truncating the current number, with BIT the
+ /// new LSB, with the given lost fraction and rounding mode, the result
+ /// would need to be rounded away from zero (i.e., by increasing the
+ /// signficand). This routine must work for Category::Zero of both signs, and
+ /// Category::Normal numbers.
+ fn round_away_from_zero(&self, round: Round, loss: Loss, bit: usize) -> bool {
+ // NaNs and infinities should not have lost fractions.
+ assert!(self.is_finite_non_zero() || self.is_zero());
+
+ // Current callers never pass this so we don't handle it.
+ assert_ne!(loss, Loss::ExactlyZero);
+
+ match round {
+ Round::NearestTiesToAway => loss == Loss::ExactlyHalf || loss == Loss::MoreThanHalf,
+ Round::NearestTiesToEven => {
+ if loss == Loss::MoreThanHalf {
+ return true;
+ }
+
+ // Our zeros don't have a significand to test.
+ if loss == Loss::ExactlyHalf && self.category() != Category::Zero {
+ return sig::get_bit(&self.sig, bit);
+ }
+
+ false
+ }
+ Round::TowardZero => false,
+ Round::TowardPositive => !self.is_negative(),
+ Round::TowardNegative => self.is_negative(),
+ }
+ }
+
+ fn normalize(mut self, round: Round, mut loss: Loss) -> StatusAnd<Self> {
+ if !self.is_finite_non_zero() {
+ return Status::OK.and(self);
+ }
+
+ // Before rounding normalize the exponent of Category::Normal numbers.
+ let mut omsb = sig::omsb(&self.sig);
+
+ if omsb > 0 {
+ // OMSB is numbered from 1. We want to place it in the integer
+ // bit numbered PRECISION if possible, with a compensating change in
+ // the exponent.
+ let mut final_exp = self.exp.saturating_add(omsb as ExpInt - S::PRECISION as ExpInt);
+
+ // If the resulting exponent is too high, overflow according to
+ // the rounding mode.
+ if final_exp > S::MAX_EXP {
+ let round = round.negate_if(self.is_negative());
+ return Self::overflow_result(round).map(|r| r.copy_sign(self));
+ }
+
+ // Subnormal numbers have exponent MIN_EXP, and their MSB
+ // is forced based on that.
+ if final_exp < S::MIN_EXP {
+ final_exp = S::MIN_EXP;
+ }
+
+ // Shifting left is easy as we don't lose precision.
+ if final_exp < self.exp {
+ assert_eq!(loss, Loss::ExactlyZero);
+
+ let exp_change = (self.exp - final_exp) as usize;
+ sig::shift_left(&mut self.sig, &mut self.exp, exp_change);
+
+ return Status::OK.and(self);
+ }
+
+ // Shift right and capture any new lost fraction.
+ if final_exp > self.exp {
+ let exp_change = (final_exp - self.exp) as usize;
+ loss = sig::shift_right(&mut self.sig, &mut self.exp, exp_change).combine(loss);
+
+ // Keep OMSB up-to-date.
+ omsb = omsb.saturating_sub(exp_change);
+ }
+ }
+
+ // NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up
+ // the largest significand of `MAX_EXP` (which also has normals), though
+ // comparing significands needs to ignore the integer bit `NAN` lacks.
+ if S::NONFINITE_BEHAVIOR == NonfiniteBehavior::NanOnly
+ && self.exp == Self::NAN.exp
+ && [self.sig[0] & S::NAN_SIGNIFICAND_BASE] == Self::NAN.sig
+ {
+ return Self::overflow_result(round).map(|r| r.copy_sign(self));
+ }
+
+ // Now round the number according to round given the lost
+ // fraction.
+
+ // As specified in IEEE 754, since we do not trap we do not report
+ // underflow for exact results.
+ if loss == Loss::ExactlyZero {
+ // Canonicalize zeros.
+ if omsb == 0 {
+ self = Self::ZERO.copy_sign(self);
+ }
+
+ return Status::OK.and(self);
+ }
+
+ // Increment the significand if we're rounding away from zero.
+ if self.round_away_from_zero(round, loss, 0) {
+ if omsb == 0 {
+ self.exp = S::MIN_EXP;
+ }
+
+ // We should never overflow.
+ assert_eq!(sig::increment(&mut self.sig), 0);
+ omsb = sig::omsb(&self.sig);
+
+ // Did the significand increment overflow?
+ if omsb == S::PRECISION + 1 {
+ // Renormalize by incrementing the exponent and shifting our
+ // significand right one. However if we already have the
+ // maximum exponent we overflow to infinity.
+ if self.exp == S::MAX_EXP {
+ return (Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY.copy_sign(self));
+ }
+
+ let _: Loss = sig::shift_right(&mut self.sig, &mut self.exp, 1);
+
+ return Status::INEXACT.and(self);
+ }
+
+ // NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up
+ // the largest significand of `MAX_EXP` (which also has normals), though
+ // comparing significands needs to ignore the integer bit `NAN` lacks.
+ if S::NONFINITE_BEHAVIOR == NonfiniteBehavior::NanOnly
+ && self.exp == Self::NAN.exp
+ && [self.sig[0] & S::NAN_SIGNIFICAND_BASE] == Self::NAN.sig
+ {
+ return Self::overflow_result(round).map(|r| r.copy_sign(self));
+ }
+ }
+
+ // The normal case - we were and are not denormal, and any
+ // significand increment above didn't overflow.
+ if omsb == S::PRECISION {
+ return Status::INEXACT.and(self);
+ }
+
+ // We have a non-zero denormal.
+ assert!(omsb < S::PRECISION);
+
+ // Canonicalize zeros.
+ if omsb == 0 {
+ self = Self::ZERO.copy_sign(self);
+ }
+
+ // The Category::Zero case is a denormal that underflowed to zero.
+ (Status::UNDERFLOW | Status::INEXACT).and(self)
+ }
+
+ fn from_hexadecimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
+ let mut r = IeeeFloat {
+ sig: [0],
+ exp: 0,
+ read_only_category_do_not_mutate: Category::Normal,
+ read_only_sign_do_not_mutate: false,
+ marker: PhantomData,
+ };
+
+ let mut any_digits = false;
+ let mut has_exp = false;
+ let mut bit_pos = LIMB_BITS as isize;
+ let mut loss = None;
+
+ // Without leading or trailing zeros, irrespective of the dot.
+ let mut first_sig_digit = None;
+ let mut dot = s.len();
+
+ for (p, c) in s.char_indices() {
+ // Skip leading zeros and any (hexa)decimal point.
+ if c == '.' {
+ if dot != s.len() {
+ return Err(ParseError("String contains multiple dots"));
+ }
+ dot = p;
+ } else if let Some(hex_value) = c.to_digit(16) {
+ any_digits = true;
+
+ if first_sig_digit.is_none() {
+ if hex_value == 0 {
+ continue;
+ }
+ first_sig_digit = Some(p);
+ }
+
+ // Store the number while we have space.
+ bit_pos -= 4;
+ if bit_pos >= 0 {
+ r.sig[0] |= (hex_value as Limb) << bit_pos;
+ } else {
+ // If zero or one-half (the hexadecimal digit 8) are followed
+ // by non-zero, they're a little more than zero or one-half.
+ if let Some(ref mut loss) = loss {
+ if hex_value != 0 {
+ if *loss == Loss::ExactlyZero {
+ *loss = Loss::LessThanHalf;
+ }
+ if *loss == Loss::ExactlyHalf {
+ *loss = Loss::MoreThanHalf;
+ }
+ }
+ } else {
+ loss = Some(match hex_value {
+ 0 => Loss::ExactlyZero,
+ 1..=7 => Loss::LessThanHalf,
+ 8 => Loss::ExactlyHalf,
+ 9..=15 => Loss::MoreThanHalf,
+ _ => unreachable!(),
+ });
+ }
+ }
+ } else if c == 'p' || c == 'P' {
+ if !any_digits {
+ return Err(ParseError("Significand has no digits"));
+ }
+
+ if dot == s.len() {
+ dot = p;
+ }
+
+ let mut chars = s[p + 1..].chars().peekable();
+
+ // Adjust for the given exponent.
+ let exp_minus = chars.peek() == Some(&'-');
+ if exp_minus || chars.peek() == Some(&'+') {
+ chars.next();
+ }
+
+ for c in chars {
+ if let Some(value) = c.to_digit(10) {
+ has_exp = true;
+ r.exp = r.exp.saturating_mul(10).saturating_add(value as ExpInt);
+ } else {
+ return Err(ParseError("Invalid character in exponent"));
+ }
+ }
+ if !has_exp {
+ return Err(ParseError("Exponent has no digits"));
+ }
+
+ r.exp = r.exp.negate_if(exp_minus);
+
+ break;
+ } else {
+ return Err(ParseError("Invalid character in significand"));
+ }
+ }
+ if !any_digits {
+ return Err(ParseError("Significand has no digits"));
+ }
+
+ // Hex floats require an exponent but not a hexadecimal point.
+ if !has_exp {
+ return Err(ParseError("Hex strings require an exponent"));
+ }
+
+ // Ignore the exponent if we are zero.
+ let first_sig_digit = match first_sig_digit {
+ Some(p) => p,
+ None => return Ok(Status::OK.and(Self::ZERO)),
+ };
+
+ // Calculate the exponent adjustment implicit in the number of
+ // significant digits and adjust for writing the significand starting
+ // at the most significant nibble.
+ let exp_adjustment = if dot > first_sig_digit {
+ ExpInt::try_from(dot - first_sig_digit).unwrap()
+ } else {
+ -ExpInt::try_from(first_sig_digit - dot - 1).unwrap()
+ };
+ let exp_adjustment = exp_adjustment
+ .saturating_mul(4)
+ .saturating_sub(1)
+ .saturating_add(S::PRECISION as ExpInt)
+ .saturating_sub(LIMB_BITS as ExpInt);
+ r.exp = r.exp.saturating_add(exp_adjustment);
+
+ Ok(r.normalize(round, loss.unwrap_or(Loss::ExactlyZero)))
+ }
+
+ fn from_decimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
+ // Given a normal decimal floating point number of the form
+ //
+ // dddd.dddd[eE][+-]ddd
+ //
+ // where the decimal point and exponent are optional, fill out the
+ // variables below. Exponent is appropriate if the significand is
+ // treated as an integer, and normalized_exp if the significand
+ // is taken to have the decimal point after a single leading
+ // non-zero digit.
+ //
+ // If the value is zero, first_sig_digit is None.
+
+ let mut any_digits = false;
+ let mut dec_exp = 0i32;
+
+ // Without leading or trailing zeros, irrespective of the dot.
+ let mut first_sig_digit = None;
+ let mut last_sig_digit = 0;
+ let mut dot = s.len();
+
+ for (p, c) in s.char_indices() {
+ if c == '.' {
+ if dot != s.len() {
+ return Err(ParseError("String contains multiple dots"));
+ }
+ dot = p;
+ } else if let Some(dec_value) = c.to_digit(10) {
+ any_digits = true;
+
+ if dec_value != 0 {
+ if first_sig_digit.is_none() {
+ first_sig_digit = Some(p);
+ }
+ last_sig_digit = p;
+ }
+ } else if c == 'e' || c == 'E' {
+ if !any_digits {
+ return Err(ParseError("Significand has no digits"));
+ }
+
+ if dot == s.len() {
+ dot = p;
+ }
+
+ let mut chars = s[p + 1..].chars().peekable();
+
+ // Adjust for the given exponent.
+ let exp_minus = chars.peek() == Some(&'-');
+ if exp_minus || chars.peek() == Some(&'+') {
+ chars.next();
+ }
+
+ let mut any_exp_digits = false;
+ for c in chars {
+ if let Some(value) = c.to_digit(10) {
+ any_exp_digits = true;
+ dec_exp = dec_exp.saturating_mul(10).saturating_add(value as i32);
+ } else {
+ return Err(ParseError("Invalid character in exponent"));
+ }
+ }
+ // Treat no exponent as 0 to match binutils
+ if !any_exp_digits {
+ assert_eq!(dec_exp, 0);
+ }
+
+ dec_exp = dec_exp.negate_if(exp_minus);
+
+ break;
+ } else {
+ return Err(ParseError("Invalid character in significand"));
+ }
+ }
+ if !any_digits {
+ return Err(ParseError("Significand has no digits"));
+ }
+
+ // Test if we have a zero number allowing for non-zero exponents.
+ let first_sig_digit = match first_sig_digit {
+ Some(p) => p,
+ None => return Ok(Status::OK.and(Self::ZERO)),
+ };
+
+ // Adjust the exponents for any decimal point.
+ if dot > last_sig_digit {
+ dec_exp = dec_exp.saturating_add((dot - last_sig_digit - 1) as i32);
+ } else {
+ dec_exp = dec_exp.saturating_sub((last_sig_digit - dot) as i32);
+ }
+ let significand_digits =
+ last_sig_digit - first_sig_digit + 1 - (dot > first_sig_digit && dot < last_sig_digit) as usize;
+ let normalized_exp = dec_exp.saturating_add(significand_digits as i32 - 1);
+
+ // Handle the cases where exponents are obviously too large or too
+ // small. Writing L for log 10 / log 2, a number d.ddddd*10^dec_exp
+ // definitely overflows if
+ //
+ // (dec_exp - 1) * L >= MAX_EXP
+ //
+ // and definitely underflows to zero where
+ //
+ // (dec_exp + 1) * L <= MIN_EXP - PRECISION
+ //
+ // With integer arithmetic the tightest bounds for L are
+ //
+ // 93/28 < L < 196/59 [ numerator <= 256 ]
+ // 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
+
+ // Check for MAX_EXP.
+ if normalized_exp.saturating_sub(1).saturating_mul(42039) >= 12655 * S::MAX_EXP as i32 {
+ // Overflow and round.
+ return Ok(Self::overflow_result(round));
+ }
+
+ // Check for MIN_EXP.
+ if normalized_exp.saturating_add(1).saturating_mul(28738) <= 8651 * (S::MIN_EXP as i32 - S::PRECISION as i32) {
+ // Underflow to zero and round.
+ let r = if round == Round::TowardPositive {
+ IeeeFloat::SMALLEST
+ } else {
+ IeeeFloat::ZERO
+ };
+ return Ok((Status::UNDERFLOW | Status::INEXACT).and(r));
+ }
+
+ // A tight upper bound on number of bits required to hold an
+ // N-digit decimal integer is N * 196 / 59. Allocate enough space
+ // to hold the full significand, and an extra limb required by
+ // tcMultiplyPart.
+ let max_limbs = limbs_for_bits(1 + 196 * significand_digits / 59);
+ let mut dec_sig = DynPrecisionLimbVec::with_capacity(max_limbs);
+
+ // Convert to binary efficiently - we do almost all multiplication
+ // in a Limb. When this would overflow do we do a single
+ // bignum multiplication, and then revert again to multiplication
+ // in a Limb.
+ let mut chars = s[first_sig_digit..last_sig_digit + 1].chars();
+ loop {
+ let mut val = 0;
+ let mut multiplier = 1;
+
+ loop {
+ let dec_value = match chars.next() {
+ Some('.') => continue,
+ Some(c) => c.to_digit(10).unwrap(),
+ None => break,
+ };
+
+ multiplier *= 10;
+ val = val * 10 + dec_value as Limb;
+
+ // The maximum number that can be multiplied by ten with any
+ // digit added without overflowing a Limb.
+ if multiplier > (!0 - 9) / 10 {
+ break;
+ }
+ }
+
+ // If we've consumed no digits, we're done.
+ if multiplier == 1 {
+ break;
+ }
+
+ // Multiply out the current limb.
+ let mut carry = val;
+ for x in &mut dec_sig {
+ let [low, mut high] = sig::widening_mul(*x, multiplier);
+
+ // Now add carry.
+ let (low, overflow) = low.overflowing_add(carry);
+ high += overflow as Limb;
+
+ *x = low;
+ carry = high;
+ }
+
+ // If we had carry, we need another limb (likely but not guaranteed).
+ if carry > 0 {
+ dec_sig.push(carry);
+ }
+ }
+
+ // Calculate pow(5, abs(dec_exp)) into `pow5_full`.
+ // The *_calc Vec's are reused scratch space, as an optimization.
+ let (pow5_full, mut pow5_calc, mut sig_calc, mut sig_scratch_calc) = {
+ let mut power = dec_exp.abs() as usize;
+
+ const FIRST_EIGHT_POWERS: [Limb; 8] = [1, 5, 25, 125, 625, 3125, 15625, 78125];
+
+ let mut p5_scratch = DynPrecisionLimbVec::new();
+ let mut p5: DynPrecisionLimbVec = [FIRST_EIGHT_POWERS[4]].into_iter().collect();
+
+ let mut r_scratch = DynPrecisionLimbVec::new();
+ let mut r: DynPrecisionLimbVec = [FIRST_EIGHT_POWERS[power & 7]].into_iter().collect();
+ power >>= 3;
+
+ while power > 0 {
+ // Calculate pow(5,pow(2,n+3)).
+ p5_scratch.resize(p5.len() * 2, 0);
+ let _: Loss = sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS);
+ while p5_scratch.last() == Some(&0) {
+ p5_scratch.pop();
+ }
+ mem::swap(&mut p5, &mut p5_scratch);
+
+ if power & 1 != 0 {
+ r_scratch.resize(r.len() + p5.len(), 0);
+ let _: Loss = sig::mul(&mut r_scratch, &mut 0, &r, &p5, (r.len() + p5.len()) * LIMB_BITS);
+ while r_scratch.last() == Some(&0) {
+ r_scratch.pop();
+ }
+ mem::swap(&mut r, &mut r_scratch);
+ }
+
+ power >>= 1;
+ }
+
+ (r, r_scratch, p5, p5_scratch)
+ };
+
+ // Attempt dec_sig * 10^dec_exp with increasing precision.
+ let mut attempt = 0;
+ loop {
+ let calc_precision = (LIMB_BITS << attempt) - 1;
+ attempt += 1;
+
+ let calc_normal_from_limbs = |sig: &mut DynPrecisionLimbVec, limbs: &[Limb]| -> StatusAnd<ExpInt> {
+ sig.resize(limbs_for_bits(calc_precision), 0);
+ let (mut loss, mut exp) = sig::from_limbs(sig, limbs, calc_precision);
+
+ // Before rounding normalize the exponent of Category::Normal numbers.
+ let mut omsb = sig::omsb(sig);
+
+ assert_ne!(omsb, 0);
+
+ // OMSB is numbered from 1. We want to place it in the integer
+ // bit numbered PRECISION if possible, with a compensating change in
+ // the exponent.
+ let final_exp = exp.saturating_add(omsb as ExpInt - calc_precision as ExpInt);
+
+ // Shifting left is easy as we don't lose precision.
+ if final_exp < exp {
+ assert_eq!(loss, Loss::ExactlyZero);
+
+ let exp_change = (exp - final_exp) as usize;
+ sig::shift_left(sig, &mut exp, exp_change);
+
+ return Status::OK.and(exp);
+ }
+
+ // Shift right and capture any new lost fraction.
+ if final_exp > exp {
+ let exp_change = (final_exp - exp) as usize;
+ loss = sig::shift_right(sig, &mut exp, exp_change).combine(loss);
+
+ // Keep OMSB up-to-date.
+ omsb = omsb.saturating_sub(exp_change);
+ }
+
+ assert_eq!(omsb, calc_precision);
+
+ // Now round the number according to round given the lost
+ // fraction.
+
+ // As specified in IEEE 754, since we do not trap we do not report
+ // underflow for exact results.
+ if loss == Loss::ExactlyZero {
+ return Status::OK.and(exp);
+ }
+
+ // Increment the significand if we're rounding away from zero.
+ if loss == Loss::MoreThanHalf || loss == Loss::ExactlyHalf && sig::get_bit(sig, 0) {
+ // We should never overflow.
+ assert_eq!(sig::increment(sig), 0);
+ omsb = sig::omsb(sig);
+
+ // Did the significand increment overflow?
+ if omsb == calc_precision + 1 {
+ let _: Loss = sig::shift_right(sig, &mut exp, 1);
+
+ return Status::INEXACT.and(exp);
+ }
+ }
+
+ // The normal case - we were and are not denormal, and any
+ // significand increment above didn't overflow.
+ Status::INEXACT.and(exp)
+ };
+
+ let status;
+ let mut exp = unpack!(status=,
+ calc_normal_from_limbs(&mut sig_calc, &dec_sig));
+ let pow5_status;
+ let pow5_exp = unpack!(pow5_status=,
+ calc_normal_from_limbs(&mut pow5_calc, &pow5_full));
+
+ // Add dec_exp, as 10^n = 5^n * 2^n.
+ exp += dec_exp as ExpInt;
+
+ let mut used_bits = S::PRECISION;
+ let mut truncated_bits = calc_precision - used_bits;
+
+ let half_ulp_err1 = (status != Status::OK) as Limb;
+ let (calc_loss, half_ulp_err2);
+ if dec_exp >= 0 {
+ exp += pow5_exp;
+
+ sig_scratch_calc.resize(sig_calc.len() + pow5_calc.len(), 0);
+ calc_loss = sig::mul(&mut sig_scratch_calc, &mut exp, &sig_calc, &pow5_calc, calc_precision);
+ mem::swap(&mut sig_calc, &mut sig_scratch_calc);
+
+ half_ulp_err2 = (pow5_status != Status::OK) as Limb;
+ } else {
+ exp -= pow5_exp;
+
+ sig_scratch_calc.resize(sig_calc.len(), 0);
+ calc_loss = sig::div(&mut sig_scratch_calc, &mut exp, &mut sig_calc, &mut pow5_calc, calc_precision);
+ mem::swap(&mut sig_calc, &mut sig_scratch_calc);
+
+ // Denormal numbers have less precision.
+ if exp < S::MIN_EXP {
+ truncated_bits += (S::MIN_EXP - exp) as usize;
+ used_bits = calc_precision.saturating_sub(truncated_bits);
+ }
+ // Extra half-ulp lost in reciprocal of exponent.
+ half_ulp_err2 = 2 * (pow5_status != Status::OK || calc_loss != Loss::ExactlyZero) as Limb;
+ }
+
+ // Both sig::mul and sig::div return the
+ // result with the integer bit set.
+ assert!(sig::get_bit(&sig_calc, calc_precision - 1));
+
+ // The error from the true value, in half-ulps, on multiplying two
+ // floating point numbers, which differ from the value they
+ // approximate by at most half_ulp_err1 and half_ulp_err2 half-ulps, is strictly less
+ // than the returned value.
+ //
+ // See "How to Read Floating Point Numbers Accurately" by William D Clinger.
+ assert!(half_ulp_err1 < 2 || half_ulp_err2 < 2 || (half_ulp_err1 + half_ulp_err2 < 8));
+
+ let inexact = (calc_loss != Loss::ExactlyZero) as Limb;
+ let half_ulp_err = if half_ulp_err1 + half_ulp_err2 == 0 {
+ inexact * 2 // <= inexact half-ulps.
+ } else {
+ inexact + 2 * (half_ulp_err1 + half_ulp_err2)
+ };
+
+ let ulps_from_boundary = {
+ let bits = calc_precision - used_bits - 1;
+
+ let i = bits / LIMB_BITS;
+ let limb = sig_calc[i] & (!0 >> (LIMB_BITS - 1 - bits % LIMB_BITS));
+ let boundary = match round {
+ Round::NearestTiesToEven | Round::NearestTiesToAway => 1 << (bits % LIMB_BITS),
+ _ => 0,
+ };
+ if i == 0 {
+ let delta = limb.wrapping_sub(boundary);
+ cmp::min(delta, delta.wrapping_neg())
+ } else if limb == boundary {
+ if !sig::is_all_zeros(&sig_calc[1..i]) {
+ !0 // A lot.
+ } else {
+ sig_calc[0]
+ }
+ } else if limb == boundary.wrapping_sub(1) {
+ if sig_calc[1..i].iter().any(|&x| x.wrapping_neg() != 1) {
+ !0 // A lot.
+ } else {
+ sig_calc[0].wrapping_neg()
+ }
+ } else {
+ !0 // A lot.
+ }
+ };
+
+ // Are we guaranteed to round correctly if we truncate?
+ if ulps_from_boundary.saturating_mul(2) >= half_ulp_err {
+ let mut r = IeeeFloat {
+ sig: [0],
+ exp,
+ read_only_category_do_not_mutate: Category::Normal,
+ read_only_sign_do_not_mutate: false,
+ marker: PhantomData,
+ };
+ sig::extract(&mut r.sig, &sig_calc, used_bits, calc_precision - used_bits);
+ // If we extracted less bits above we must adjust our exponent
+ // to compensate for the implicit right shift.
+ r.exp += (S::PRECISION - used_bits) as ExpInt;
+ let loss = Loss::through_truncation(&sig_calc, truncated_bits);
+ return Ok(r.normalize(round, loss));
+ }
+ }
+ }
+}
+
+impl Loss {
+ /// Combine the effect of two lost fractions.
+ #[inline]
+ fn combine(self, less_significant: Loss) -> Loss {
+ let mut more_significant = self;
+ if less_significant != Loss::ExactlyZero {
+ if more_significant == Loss::ExactlyZero {
+ more_significant = Loss::LessThanHalf;
+ } else if more_significant == Loss::ExactlyHalf {
+ more_significant = Loss::MoreThanHalf;
+ }
+ }
+
+ more_significant
+ }
+
+ /// Return the fraction lost were a bignum truncated losing the least
+ /// significant `bits` bits.
+ #[inline]
+ fn through_truncation(limbs: &[Limb], bits: usize) -> Loss {
+ if bits == 0 {
+ return Loss::ExactlyZero;
+ }
+
+ let half_bit = bits - 1;
+ let half_limb = half_bit / LIMB_BITS;
+ let (half_limb, rest) = if half_limb < limbs.len() {
+ (limbs[half_limb], &limbs[..half_limb])
+ } else {
+ (0, limbs)
+ };
+ let half = 1 << (half_bit % LIMB_BITS);
+ let has_half = half_limb & half != 0;
+ let has_rest = half_limb & (half - 1) != 0 || !sig::is_all_zeros(rest);
+
+ match (has_half, has_rest) {
+ (false, false) => Loss::ExactlyZero,
+ (false, true) => Loss::LessThanHalf,
+ (true, false) => Loss::ExactlyHalf,
+ (true, true) => Loss::MoreThanHalf,
+ }
+ }
+}
+
+/// Implementation details of IeeeFloat significands, such as big integer arithmetic.
+/// As a rule of thumb, no functions in this module should dynamically allocate.
+mod sig {
+ use super::{limbs_for_bits, ExpInt, Limb, Loss, LIMB_BITS};
+ use core::cmp::Ordering;
+ use core::mem;
+
+ #[inline]
+ pub(super) fn is_all_zeros(limbs: &[Limb]) -> bool {
+ limbs.iter().all(|&l| l == 0)
+ }
+
+ /// One, not zero, based LSB. That is, returns 0 for a zeroed significand.
+ #[inline]
+ pub(super) fn olsb(limbs: &[Limb]) -> usize {
+ for i in 0..limbs.len() {
+ if limbs[i] != 0 {
+ return i * LIMB_BITS + limbs[i].trailing_zeros() as usize + 1;
+ }
+ }
+
+ 0
+ }
+
+ /// One, not zero, based MSB. That is, returns 0 for a zeroed significand.
+ #[inline]
+ pub(super) fn omsb(limbs: &[Limb]) -> usize {
+ for i in (0..limbs.len()).rev() {
+ if limbs[i] != 0 {
+ return (i + 1) * LIMB_BITS - limbs[i].leading_zeros() as usize;
+ }
+ }
+
+ 0
+ }
+
+ /// Comparison (unsigned) of two significands.
+ #[inline]
+ pub(super) fn cmp(a: &[Limb], b: &[Limb]) -> Ordering {
+ assert_eq!(a.len(), b.len());
+ for (a, b) in a.iter().zip(b).rev() {
+ match a.cmp(b) {
+ Ordering::Equal => {}
+ o => return o,
+ }
+ }
+
+ Ordering::Equal
+ }
+
+ /// Extract the given bit.
+ #[inline]
+ pub(super) fn get_bit(limbs: &[Limb], bit: usize) -> bool {
+ limbs[bit / LIMB_BITS] & (1 << (bit % LIMB_BITS)) != 0
+ }
+
+ /// Set the given bit.
+ #[inline]
+ pub(super) fn set_bit(limbs: &mut [Limb], bit: usize) {
+ limbs[bit / LIMB_BITS] |= 1 << (bit % LIMB_BITS);
+ }
+
+ /// Shift `dst` left `bits` bits, subtract `bits` from its exponent.
+ #[inline]
+ pub(super) fn shift_left(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) {
+ if bits > 0 {
+ // Our exponent should not underflow.
+ *exp = exp.checked_sub(bits as ExpInt).unwrap();
+
+ // Jump is the inter-limb jump; shift is is intra-limb shift.
+ let jump = bits / LIMB_BITS;
+ let shift = bits % LIMB_BITS;
+
+ for i in (0..dst.len()).rev() {
+ let mut limb;
+
+ if i < jump {
+ limb = 0;
+ } else {
+ // dst[i] comes from the two limbs src[i - jump] and, if we have
+ // an intra-limb shift, src[i - jump - 1].
+ limb = dst[i - jump];
+ if shift > 0 {
+ limb <<= shift;
+ if i >= jump + 1 {
+ limb |= dst[i - jump - 1] >> (LIMB_BITS - shift);
+ }
+ }
+ }
+
+ dst[i] = limb;
+ }
+ }
+ }
+
+ /// Shift `dst` right `bits` bits noting lost fraction.
+ #[inline]
+ pub(super) fn shift_right(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) -> Loss {
+ let loss = Loss::through_truncation(dst, bits);
+
+ if bits > 0 {
+ // Our exponent should not overflow.
+ *exp = exp.checked_add(bits as ExpInt).unwrap();
+
+ // Jump is the inter-limb jump; shift is is intra-limb shift.
+ let jump = bits / LIMB_BITS;
+ let shift = bits % LIMB_BITS;
+
+ // Perform the shift. This leaves the most significant `bits` bits
+ // of the result at zero.
+ for i in 0..dst.len() {
+ let mut limb;
+
+ if i + jump >= dst.len() {
+ limb = 0;
+ } else {
+ limb = dst[i + jump];
+ if shift > 0 {
+ limb >>= shift;
+ if i + jump + 1 < dst.len() {
+ limb |= dst[i + jump + 1] << (LIMB_BITS - shift);
+ }
+ }
+ }
+
+ dst[i] = limb;
+ }
+ }
+
+ loss
+ }
+
+ /// Copy the bit vector of width `src_bits` from `src`, starting at bit SRC_LSB,
+ /// to `dst`, such that the bit SRC_LSB becomes the least significant bit of `dst`.
+ /// All high bits above `src_bits` in `dst` are zero-filled.
+ #[inline]
+ pub(super) fn extract(dst: &mut [Limb], src: &[Limb], src_bits: usize, src_lsb: usize) {
+ if src_bits == 0 {
+ return;
+ }
+
+ let dst_limbs = limbs_for_bits(src_bits);
+ assert!(dst_limbs <= dst.len());
+
+ let src = &src[src_lsb / LIMB_BITS..];
+ dst[..dst_limbs].copy_from_slice(&src[..dst_limbs]);
+
+ let shift = src_lsb % LIMB_BITS;
+ let _: Loss = shift_right(&mut dst[..dst_limbs], &mut 0, shift);
+
+ // We now have (dst_limbs * LIMB_BITS - shift) bits from `src`
+ // in `dst`. If this is less that src_bits, append the rest, else
+ // clear the high bits.
+ let n = dst_limbs * LIMB_BITS - shift;
+ if n < src_bits {
+ let mask = (1 << (src_bits - n)) - 1;
+ dst[dst_limbs - 1] |= (src[dst_limbs] & mask) << n % LIMB_BITS;
+ } else if n > src_bits && src_bits % LIMB_BITS > 0 {
+ dst[dst_limbs - 1] &= (1 << (src_bits % LIMB_BITS)) - 1;
+ }
+
+ // Clear high limbs.
+ for x in &mut dst[dst_limbs..] {
+ *x = 0;
+ }
+ }
+
+ /// We want the most significant PRECISION bits of `src`. There may not
+ /// be that many; extract what we can.
+ #[inline]
+ pub(super) fn from_limbs(dst: &mut [Limb], src: &[Limb], precision: usize) -> (Loss, ExpInt) {
+ let omsb = omsb(src);
+
+ if precision <= omsb {
+ extract(dst, src, precision, omsb - precision);
+ (Loss::through_truncation(src, omsb - precision), omsb as ExpInt - 1)
+ } else {
+ extract(dst, src, omsb, 0);
+ (Loss::ExactlyZero, precision as ExpInt - 1)
+ }
+ }
+
+ /// For every consecutive chunk of `bits` bits from `limbs`,
+ /// going from most significant to the least significant bits,
+ /// call `f` to transform those bits and store the result back.
+ #[inline]
+ pub(super) fn each_chunk<F: FnMut(Limb) -> Limb>(limbs: &mut [Limb], bits: usize, mut f: F) {
+ assert_eq!(LIMB_BITS % bits, 0);
+ for limb in limbs.iter_mut().rev() {
+ let mut r = 0;
+ for i in (0..LIMB_BITS / bits).rev() {
+ r |= f((*limb >> (i * bits)) & ((1 << bits) - 1)) << (i * bits);
+ }
+ *limb = r;
+ }
+ }
+
+ /// Increment in-place, return the carry flag.
+ #[inline]
+ pub(super) fn increment(dst: &mut [Limb]) -> Limb {
+ for x in dst {
+ *x = x.wrapping_add(1);
+ if *x != 0 {
+ return 0;
+ }
+ }
+
+ 1
+ }
+
+ /// Decrement in-place, return the borrow flag.
+ #[inline]
+ pub(super) fn decrement(dst: &mut [Limb]) -> Limb {
+ for x in dst {
+ *x = x.wrapping_sub(1);
+ if *x != !0 {
+ return 0;
+ }
+ }
+
+ 1
+ }
+
+ /// `a += b + c` where `c` is zero or one. Returns the carry flag.
+ #[inline]
+ pub(super) fn add(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb {
+ assert!(c <= 1);
+
+ for (a, &b) in a.iter_mut().zip(b) {
+ let (r, overflow) = a.overflowing_add(b);
+ let (r, overflow2) = r.overflowing_add(c);
+ *a = r;
+ c = (overflow | overflow2) as Limb;
+ }
+
+ c
+ }
+
+ /// `a -= b + c` where `c` is zero or one. Returns the borrow flag.
+ #[inline]
+ pub(super) fn sub(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb {
+ assert!(c <= 1);
+
+ for (a, &b) in a.iter_mut().zip(b) {
+ let (r, overflow) = a.overflowing_sub(b);
+ let (r, overflow2) = r.overflowing_sub(c);
+ *a = r;
+ c = (overflow | overflow2) as Limb;
+ }
+
+ c
+ }
+
+ /// `a += b` or `a -= b`. Does not preserve `b`.
+ #[inline]
+ pub(super) fn add_or_sub(
+ a_sig: &mut [Limb],
+ a_exp: &mut ExpInt,
+ a_sign: &mut bool,
+ b_sig: &mut [Limb],
+ b_exp: ExpInt,
+ b_sign: bool,
+ ) -> Loss {
+ // Are we bigger exponent-wise than the RHS?
+ let bits = *a_exp - b_exp;
+
+ // Determine if the operation on the absolute values is effectively
+ // an addition or subtraction.
+ // Subtraction is more subtle than one might naively expect.
+ if *a_sign ^ b_sign {
+ let loss;
+
+ if bits == 0 {
+ loss = Loss::ExactlyZero;
+ } else if bits > 0 {
+ loss = shift_right(b_sig, &mut 0, (bits - 1) as usize);
+ shift_left(a_sig, a_exp, 1);
+ } else {
+ loss = shift_right(a_sig, a_exp, (-bits - 1) as usize);
+ shift_left(b_sig, &mut 0, 1);
+ }
+
+ let borrow = (loss != Loss::ExactlyZero) as Limb;
+
+ // Should we reverse the subtraction.
+ if cmp(a_sig, b_sig) == Ordering::Less {
+ // The code above is intended to ensure that no borrow is necessary.
+ assert_eq!(sub(b_sig, a_sig, borrow), 0);
+ a_sig.copy_from_slice(b_sig);
+ *a_sign = !*a_sign;
+ } else {
+ // The code above is intended to ensure that no borrow is necessary.
+ assert_eq!(sub(a_sig, b_sig, borrow), 0);
+ }
+
+ // Invert the lost fraction - it was on the RHS and subtracted.
+ match loss {
+ Loss::LessThanHalf => Loss::MoreThanHalf,
+ Loss::MoreThanHalf => Loss::LessThanHalf,
+ _ => loss,
+ }
+ } else {
+ let loss = if bits > 0 {
+ shift_right(b_sig, &mut 0, bits as usize)
+ } else {
+ shift_right(a_sig, a_exp, -bits as usize)
+ };
+ // We have a guard bit; generating a carry cannot happen.
+ assert_eq!(add(a_sig, b_sig, 0), 0);
+ loss
+ }
+ }
+
+ /// `[low, high] = a * b`.
+ ///
+ /// This cannot overflow, because
+ ///
+ /// `(n - 1) * (n - 1) + 2 * (n - 1) == (n - 1) * (n + 1)`
+ ///
+ /// which is less than n^2.
+ #[inline]
+ pub(super) fn widening_mul(a: Limb, b: Limb) -> [Limb; 2] {
+ let mut wide = [0, 0];
+
+ if a == 0 || b == 0 {
+ return wide;
+ }
+
+ const HALF_BITS: usize = LIMB_BITS / 2;
+
+ let select = |limb, i| (limb >> (i * HALF_BITS)) & ((1 << HALF_BITS) - 1);
+ for i in 0..2 {
+ for j in 0..2 {
+ let mut x = [select(a, i) * select(b, j), 0];
+ shift_left(&mut x, &mut 0, (i + j) * HALF_BITS);
+ assert_eq!(add(&mut wide, &x, 0), 0);
+ }
+ }
+
+ wide
+ }
+
+ /// `dst = a * b` (for normal `a` and `b`). Returns the lost fraction.
+ #[inline]
+ pub(super) fn mul<'a>(
+ dst: &mut [Limb],
+ exp: &mut ExpInt,
+ mut a: &'a [Limb],
+ mut b: &'a [Limb],
+ precision: usize,
+ ) -> Loss {
+ // Put the narrower number on the `a` for less loops below.
+ if a.len() > b.len() {
+ mem::swap(&mut a, &mut b);
+ }
+
+ for x in &mut dst[..b.len()] {
+ *x = 0;
+ }
+
+ for i in 0..a.len() {
+ let mut carry = 0;
+ for j in 0..b.len() {
+ let [low, mut high] = widening_mul(a[i], b[j]);
+
+ // Now add carry.
+ let (low, overflow) = low.overflowing_add(carry);
+ high += overflow as Limb;
+
+ // And now `dst[i + j]`, and store the new low part there.
+ let (low, overflow) = low.overflowing_add(dst[i + j]);
+ high += overflow as Limb;
+
+ dst[i + j] = low;
+ carry = high;
+ }
+ dst[i + b.len()] = carry;
+ }
+
+ // Assume the operands involved in the multiplication are single-precision
+ // FP, and the two multiplicants are:
+ // a = a23 . a22 ... a0 * 2^e1
+ // b = b23 . b22 ... b0 * 2^e2
+ // the result of multiplication is:
+ // dst = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
+ // Note that there are three significant bits at the left-hand side of the
+ // radix point: two for the multiplication, and an overflow bit for the
+ // addition (that will always be zero at this point). Move the radix point
+ // toward left by two bits, and adjust exponent accordingly.
+ *exp += 2;
+
+ // Convert the result having "2 * precision" significant-bits back to the one
+ // having "precision" significant-bits. First, move the radix point from
+ // poision "2*precision - 1" to "precision - 1". The exponent need to be
+ // adjusted by "2*precision - 1" - "precision - 1" = "precision".
+ *exp -= precision as ExpInt + 1;
+
+ // In case MSB resides at the left-hand side of radix point, shift the
+ // mantissa right by some amount to make sure the MSB reside right before
+ // the radix point (i.e. "MSB . rest-significant-bits").
+ //
+ // Note that the result is not normalized when "omsb < precision". So, the
+ // caller needs to call IeeeFloat::normalize() if normalized value is
+ // expected.
+ let omsb = omsb(dst);
+ if omsb <= precision {
+ Loss::ExactlyZero
+ } else {
+ shift_right(dst, exp, omsb - precision)
+ }
+ }
+
+ /// `quotient = dividend / divisor`. Returns the lost fraction.
+ /// Does not preserve `dividend` or `divisor`.
+ #[inline]
+ pub(super) fn div(
+ quotient: &mut [Limb],
+ exp: &mut ExpInt,
+ dividend: &mut [Limb],
+ divisor: &mut [Limb],
+ precision: usize,
+ ) -> Loss {
+ // Normalize the divisor.
+ let bits = precision - omsb(divisor);
+ shift_left(divisor, &mut 0, bits);
+ *exp += bits as ExpInt;
+
+ // Normalize the dividend.
+ let bits = precision - omsb(dividend);
+ shift_left(dividend, exp, bits);
+
+ // Division by 1.
+ let olsb_divisor = olsb(divisor);
+ if olsb_divisor == precision {
+ quotient.copy_from_slice(dividend);
+ return Loss::ExactlyZero;
+ }
+
+ // Ensure the dividend >= divisor initially for the loop below.
+ // Incidentally, this means that the division loop below is
+ // guaranteed to set the integer bit to one.
+ if cmp(dividend, divisor) == Ordering::Less {
+ shift_left(dividend, exp, 1);
+ assert_ne!(cmp(dividend, divisor), Ordering::Less)
+ }
+
+ // Helper for figuring out the lost fraction.
+ let lost_fraction = |dividend: &[Limb], divisor: &[Limb]| match cmp(dividend, divisor) {
+ Ordering::Greater => Loss::MoreThanHalf,
+ Ordering::Equal => Loss::ExactlyHalf,
+ Ordering::Less => {
+ if is_all_zeros(dividend) {
+ Loss::ExactlyZero
+ } else {
+ Loss::LessThanHalf
+ }
+ }
+ };
+
+ // Try to perform a (much faster) short division for small divisors.
+ let divisor_bits = precision - (olsb_divisor - 1);
+ macro_rules! try_short_div {
+ ($W:ty, $H:ty, $half:expr) => {
+ if divisor_bits * 2 <= $half {
+ // Extract the small divisor.
+ let _: Loss = shift_right(divisor, &mut 0, olsb_divisor - 1);
+ let divisor = divisor[0] as $H as $W;
+
+ // Shift the dividend to produce a quotient with the unit bit set.
+ let top_limb = *dividend.last().unwrap();
+ let mut rem = (top_limb >> (LIMB_BITS - (divisor_bits - 1))) as $H;
+ shift_left(dividend, &mut 0, divisor_bits - 1);
+
+ // Apply short division in place on $H (of $half bits) chunks.
+ each_chunk(dividend, $half, |chunk| {
+ let chunk = chunk as $H;
+ let combined = ((rem as $W) << $half) | (chunk as $W);
+ rem = (combined % divisor) as $H;
+ (combined / divisor) as $H as Limb
+ });
+ quotient.copy_from_slice(dividend);
+
+ return lost_fraction(&[(rem as Limb) << 1], &[divisor as Limb]);
+ }
+ };
+ }
+
+ try_short_div!(u32, u16, 16);
+ try_short_div!(u64, u32, 32);
+ try_short_div!(u128, u64, 64);
+
+ // Zero the quotient before setting bits in it.
+ for x in &mut quotient[..limbs_for_bits(precision)] {
+ *x = 0;
+ }
+
+ // Long division.
+ for bit in (0..precision).rev() {
+ if cmp(dividend, divisor) != Ordering::Less {
+ sub(dividend, divisor, 0);
+ set_bit(quotient, bit);
+ }
+ shift_left(dividend, &mut 0, 1);
+ }
+
+ lost_fraction(dividend, divisor)
+ }
+}
diff --git a/vendor/rustc_apfloat/src/lib.rs b/vendor/rustc_apfloat/src/lib.rs
new file mode 100644
index 000000000..90c47dae0
--- /dev/null
+++ b/vendor/rustc_apfloat/src/lib.rs
@@ -0,0 +1,739 @@
+//! Port of LLVM's APFloat software floating-point implementation from the
+//! following C++ sources (please update commit hash when backporting):
+//! https://github.com/llvm/llvm-project/commit/462a31f5a5abb905869ea93cc49b096079b11aa4
+//! * `llvm/include/llvm/ADT/APFloat.h` -> `Float` and `FloatConvert` traits
+//! * `llvm/lib/Support/APFloat.cpp` -> `ieee` and `ppc` modules
+//! * `llvm/unittests/ADT/APFloatTest.cpp` -> `tests` directory
+//!
+//! The port contains no unsafe code, global state, or side-effects in general,
+//! and the only allocations are in the conversion to/from decimal strings.
+//!
+//! Most of the API and the testcases are intact in some form or another,
+//! with some ergonomic changes, such as idiomatic short names, returning
+//! new values instead of mutating the receiver, and having separate method
+//! variants that take a non-default rounding mode (with the suffix `_r`).
+//! Comments have been preserved where possible, only slightly adapted.
+//!
+//! Instead of keeping a pointer to a configuration struct and inspecting it
+//! dynamically on every operation, types (e.g. `ieee::Double`), traits
+//! (e.g. `ieee::Semantics`) and associated constants are employed for
+//! increased type safety and performance.
+//!
+//! On-heap bigints are replaced everywhere (except in decimal conversion),
+//! with short arrays of `type Limb = u128` elements (instead of `u64`),
+//! This allows fitting the largest supported significands in one integer
+//! (`ieee::Quad` and `ppc::Fallback` use slightly less than 128 bits).
+//! All of the functions in the `ieee::sig` module operate on slices.
+//!
+//! # Note
+//!
+//! This API is completely unstable and subject to change.
+
+#![no_std]
+#![deny(warnings)]
+#![forbid(unsafe_code)]
+
+#[macro_use]
+extern crate bitflags;
+
+extern crate alloc;
+
+use core::cmp::Ordering;
+use core::fmt;
+use core::ops::{Add, Div, Mul, Neg, Rem, Sub};
+use core::ops::{AddAssign, DivAssign, MulAssign, RemAssign, SubAssign};
+use core::str::FromStr;
+
+bitflags! {
+ /// IEEE-754R 7: Default exception handling.
+ ///
+ /// UNDERFLOW or OVERFLOW are always returned or-ed with INEXACT.
+ ///
+ /// APFloat models this behavior specified by IEEE-754:
+ /// "For operations producing results in floating-point format, the default
+ /// result of an operation that signals the invalid operation exception
+ /// shall be a quiet NaN."
+ #[must_use]
+ pub struct Status: u8 {
+ const OK = 0x00;
+ const INVALID_OP = 0x01;
+ const DIV_BY_ZERO = 0x02;
+ const OVERFLOW = 0x04;
+ const UNDERFLOW = 0x08;
+ const INEXACT = 0x10;
+ }
+}
+
+#[must_use]
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
+pub struct StatusAnd<T> {
+ pub status: Status,
+ pub value: T,
+}
+
+impl Status {
+ pub fn and<T>(self, value: T) -> StatusAnd<T> {
+ StatusAnd { status: self, value }
+ }
+}
+
+impl<T> StatusAnd<T> {
+ pub fn map<F: FnOnce(T) -> U, U>(self, f: F) -> StatusAnd<U> {
+ StatusAnd {
+ status: self.status,
+ value: f(self.value),
+ }
+ }
+}
+
+#[macro_export]
+macro_rules! unpack {
+ ($status:ident|=, $e:expr) => {
+ match $e {
+ $crate::StatusAnd { status, value } => {
+ $status |= status;
+ value
+ }
+ }
+ };
+ ($status:ident=, $e:expr) => {
+ match $e {
+ $crate::StatusAnd { status, value } => {
+ $status = status;
+ value
+ }
+ }
+ };
+}
+
+/// Category of internally-represented number.
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum Category {
+ Infinity,
+ NaN,
+ Normal,
+ Zero,
+}
+
+/// IEEE-754R 4.3: Rounding-direction attributes.
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum Round {
+ NearestTiesToEven,
+ TowardPositive,
+ TowardNegative,
+ TowardZero,
+ NearestTiesToAway,
+}
+
+impl Neg for Round {
+ type Output = Round;
+ #[inline]
+ fn neg(self) -> Round {
+ match self {
+ Round::TowardPositive => Round::TowardNegative,
+ Round::TowardNegative => Round::TowardPositive,
+ Round::NearestTiesToEven | Round::TowardZero | Round::NearestTiesToAway => self,
+ }
+ }
+}
+
+/// A signed type to represent a floating point number's unbiased exponent.
+pub type ExpInt = i32;
+
+// \c ilogb error results.
+pub const IEK_INF: ExpInt = ExpInt::max_value();
+pub const IEK_NAN: ExpInt = ExpInt::min_value();
+pub const IEK_ZERO: ExpInt = ExpInt::min_value() + 1;
+
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub struct ParseError(pub &'static str);
+
+/// A self-contained host- and target-independent arbitrary-precision
+/// floating-point software implementation.
+///
+/// `apfloat` uses significand bignum integer arithmetic as provided by functions
+/// in the `ieee::sig`.
+///
+/// Written for clarity rather than speed, in particular with a view to use in
+/// the front-end of a cross compiler so that target arithmetic can be correctly
+/// performed on the host. Performance should nonetheless be reasonable,
+/// particularly for its intended use. It may be useful as a base
+/// implementation for a run-time library during development of a faster
+/// target-specific one.
+///
+/// All 5 rounding modes in the IEEE-754R draft are handled correctly for all
+/// implemented operations. Currently implemented operations are add, subtract,
+/// multiply, divide, fused-multiply-add, conversion-to-float,
+/// conversion-to-integer and conversion-from-integer. New rounding modes
+/// (e.g. away from zero) can be added with three or four lines of code.
+///
+/// Four formats are built-in: IEEE single precision, double precision,
+/// quadruple precision, and x87 80-bit extended double (when operating with
+/// full extended precision). Adding a new format that obeys IEEE semantics
+/// only requires adding two lines of code: a declaration and definition of the
+/// format.
+///
+/// All operations return the status of that operation as an exception bit-mask,
+/// so multiple operations can be done consecutively with their results or-ed
+/// together. The returned status can be useful for compiler diagnostics; e.g.,
+/// inexact, underflow and overflow can be easily diagnosed on constant folding,
+/// and compiler optimizers can determine what exceptions would be raised by
+/// folding operations and optimize, or perhaps not optimize, accordingly.
+///
+/// At present, underflow tininess is detected after rounding; it should be
+/// straight forward to add support for the before-rounding case too.
+///
+/// The library reads hexadecimal floating point numbers as per C99, and
+/// correctly rounds if necessary according to the specified rounding mode.
+/// Syntax is required to have been validated by the caller.
+///
+/// It also reads decimal floating point numbers and correctly rounds according
+/// to the specified rounding mode.
+///
+/// Non-zero finite numbers are represented internally as a sign bit, a 16-bit
+/// signed exponent, and the significand as an array of integer limbs. After
+/// normalization of a number of precision P the exponent is within the range of
+/// the format, and if the number is not denormal the P-th bit of the
+/// significand is set as an explicit integer bit. For denormals the most
+/// significant bit is shifted right so that the exponent is maintained at the
+/// format's minimum, so that the smallest denormal has just the least
+/// significant bit of the significand set. The sign of zeros and infinities
+/// is significant; the exponent and significand of such numbers is not stored,
+/// but has a known implicit (deterministic) value: 0 for the significands, 0
+/// for zero exponent, all 1 bits for infinity exponent. For NaNs the sign and
+/// significand are deterministic, although not really meaningful, and preserved
+/// in non-conversion operations. The exponent is implicitly all 1 bits.
+///
+/// `apfloat` does not provide any exception handling beyond default exception
+/// handling. We represent Signaling NaNs via IEEE-754R 2008 6.2.1 should clause
+/// by encoding Signaling NaNs with the first bit of its trailing significand as
+/// 0.
+///
+/// Future work
+/// ===========
+///
+/// Some features that may or may not be worth adding:
+///
+/// Optional ability to detect underflow tininess before rounding.
+///
+/// New formats: x87 in single and double precision mode (IEEE apart from
+/// extended exponent range) (hard).
+///
+/// New operations: sqrt, nexttoward.
+///
+pub trait Float:
+ Copy
+ + Default
+ + FromStr<Err = ParseError>
+ + PartialOrd
+ + fmt::Display
+ + Neg<Output = Self>
+ + AddAssign
+ + SubAssign
+ + MulAssign
+ + DivAssign
+ + RemAssign
+ + Add<Output = StatusAnd<Self>>
+ + Sub<Output = StatusAnd<Self>>
+ + Mul<Output = StatusAnd<Self>>
+ + Div<Output = StatusAnd<Self>>
+ + Rem<Output = StatusAnd<Self>>
+{
+ /// Total number of bits in the in-memory format.
+ const BITS: usize;
+
+ /// Number of bits in the significand. This includes the integer bit.
+ const PRECISION: usize;
+
+ /// The largest E such that 2^E is representable; this matches the
+ /// definition of IEEE 754.
+ const MAX_EXP: ExpInt;
+
+ /// The smallest E such that 2^E is a normalized number; this
+ /// matches the definition of IEEE 754.
+ const MIN_EXP: ExpInt;
+
+ /// Positive Zero.
+ const ZERO: Self;
+
+ /// Positive Infinity.
+ const INFINITY: Self;
+
+ /// NaN (Not a Number).
+ // FIXME(eddyb) provide a default when qnan becomes const fn.
+ const NAN: Self;
+
+ /// Factory for QNaN values.
+ // FIXME(eddyb) should be const fn.
+ fn qnan(payload: Option<u128>) -> Self;
+
+ /// Factory for SNaN values.
+ // FIXME(eddyb) should be const fn.
+ fn snan(payload: Option<u128>) -> Self;
+
+ /// Largest finite number.
+ // FIXME(eddyb) should be const (but FloatPair::largest is nontrivial).
+ fn largest() -> Self;
+
+ /// Smallest (by magnitude) finite number.
+ /// Might be denormalized, which implies a relative loss of precision.
+ const SMALLEST: Self;
+
+ /// Smallest (by magnitude) normalized finite number.
+ // FIXME(eddyb) should be const (but FloatPair::smallest_normalized is nontrivial).
+ fn smallest_normalized() -> Self;
+
+ // Arithmetic
+
+ fn add_r(self, rhs: Self, round: Round) -> StatusAnd<Self>;
+ fn sub_r(self, rhs: Self, round: Round) -> StatusAnd<Self> {
+ self.add_r(-rhs, round)
+ }
+ fn mul_r(self, rhs: Self, round: Round) -> StatusAnd<Self>;
+ fn mul_add_r(self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self>;
+ fn mul_add(self, multiplicand: Self, addend: Self) -> StatusAnd<Self> {
+ self.mul_add_r(multiplicand, addend, Round::NearestTiesToEven)
+ }
+ fn div_r(self, rhs: Self, round: Round) -> StatusAnd<Self>;
+ /// IEEE remainder.
+ fn ieee_rem(self, rhs: Self) -> StatusAnd<Self>;
+ /// C fmod, or llvm frem.
+ fn c_fmod(self, rhs: Self) -> StatusAnd<Self>;
+ fn round_to_integral(self, round: Round) -> StatusAnd<Self>;
+
+ /// IEEE-754R 2008 5.3.1: nextUp.
+ fn next_up(self) -> StatusAnd<Self>;
+
+ /// IEEE-754R 2008 5.3.1: nextDown.
+ ///
+ /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
+ /// appropriate sign switching before/after the computation.
+ fn next_down(self) -> StatusAnd<Self> {
+ (-self).next_up().map(|r| -r)
+ }
+
+ fn abs(self) -> Self {
+ if self.is_negative() {
+ -self
+ } else {
+ self
+ }
+ }
+ fn copy_sign(self, rhs: Self) -> Self {
+ if self.is_negative() != rhs.is_negative() {
+ -self
+ } else {
+ self
+ }
+ }
+
+ // Conversions
+ fn from_bits(input: u128) -> Self;
+ fn from_i128_r(input: i128, round: Round) -> StatusAnd<Self> {
+ if input < 0 {
+ Self::from_u128_r(input.wrapping_neg() as u128, -round).map(|r| -r)
+ } else {
+ Self::from_u128_r(input as u128, round)
+ }
+ }
+ fn from_i128(input: i128) -> StatusAnd<Self> {
+ Self::from_i128_r(input, Round::NearestTiesToEven)
+ }
+ fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self>;
+ fn from_u128(input: u128) -> StatusAnd<Self> {
+ Self::from_u128_r(input, Round::NearestTiesToEven)
+ }
+ fn from_str_r(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError>;
+ fn to_bits(self) -> u128;
+
+ /// Convert a floating point number to an integer according to the
+ /// rounding mode. In case of an invalid operation exception,
+ /// deterministic values are returned, namely zero for NaNs and the
+ /// minimal or maximal value respectively for underflow or overflow.
+ /// If the rounded value is in range but the floating point number is
+ /// not the exact integer, the C standard doesn't require an inexact
+ /// exception to be raised. IEEE-854 does require it so we do that.
+ ///
+ /// Note that for conversions to integer type the C standard requires
+ /// round-to-zero to always be used.
+ ///
+ /// The *is_exact output tells whether the result is exact, in the sense
+ /// that converting it back to the original floating point type produces
+ /// the original value. This is almost equivalent to result==Status::OK,
+ /// except for negative zeroes.
+ fn to_i128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<i128> {
+ let status;
+ if self.is_negative() {
+ if self.is_zero() {
+ // Negative zero can't be represented as an int.
+ *is_exact = false;
+ }
+ let r = unpack!(status=, (-self).to_u128_r(width, -round, is_exact));
+
+ // Check for values that don't fit in the signed integer.
+ if r > (1 << (width - 1)) {
+ // Return the most negative integer for the given width.
+ *is_exact = false;
+ Status::INVALID_OP.and(-1 << (width - 1))
+ } else {
+ status.and(r.wrapping_neg() as i128)
+ }
+ } else {
+ // Positive case is simpler, can pretend it's a smaller unsigned
+ // integer, and `to_u128` will take care of all the edge cases.
+ self.to_u128_r(width - 1, round, is_exact).map(|r| r as i128)
+ }
+ }
+ fn to_i128(self, width: usize) -> StatusAnd<i128> {
+ self.to_i128_r(width, Round::TowardZero, &mut true)
+ }
+ fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128>;
+ fn to_u128(self, width: usize) -> StatusAnd<u128> {
+ self.to_u128_r(width, Round::TowardZero, &mut true)
+ }
+
+ fn cmp_abs_normal(self, rhs: Self) -> Ordering;
+
+ /// Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
+ fn bitwise_eq(self, rhs: Self) -> bool;
+
+ // IEEE-754R 5.7.2 General operations.
+
+ /// Implements IEEE minNum semantics. Returns the smaller of the 2 arguments if
+ /// both are not NaN. If either argument is a NaN, returns the other argument.
+ fn min(self, other: Self) -> Self {
+ if self.is_nan() {
+ other
+ } else if other.is_nan() {
+ self
+ } else if other < self {
+ other
+ } else {
+ self
+ }
+ }
+
+ /// Implements IEEE maxNum semantics. Returns the larger of the 2 arguments if
+ /// both are not NaN. If either argument is a NaN, returns the other argument.
+ fn max(self, other: Self) -> Self {
+ if self.is_nan() {
+ other
+ } else if other.is_nan() {
+ self
+ } else if self < other {
+ other
+ } else {
+ self
+ }
+ }
+
+ /// Implements IEEE 754-2018 minimum semantics. Returns the smaller of 2
+ /// arguments, propagating NaNs and treating -0 as less than +0.
+ fn minimum(self, other: Self) -> Self {
+ if self.is_nan() {
+ self
+ } else if other.is_nan() {
+ other
+ } else if self.is_zero() && other.is_zero() && self.is_negative() != other.is_negative() {
+ if self.is_negative() {
+ self
+ } else {
+ other
+ }
+ } else if other < self {
+ other
+ } else {
+ self
+ }
+ }
+
+ /// Implements IEEE 754-2018 maximum semantics. Returns the larger of 2
+ /// arguments, propagating NaNs and treating -0 as less than +0.
+ fn maximum(self, other: Self) -> Self {
+ if self.is_nan() {
+ self
+ } else if other.is_nan() {
+ other
+ } else if self.is_zero() && other.is_zero() && self.is_negative() != other.is_negative() {
+ if self.is_negative() {
+ other
+ } else {
+ self
+ }
+ } else if self < other {
+ other
+ } else {
+ self
+ }
+ }
+
+ /// IEEE-754R isSignMinus: Returns true if and only if the current value is
+ /// negative.
+ ///
+ /// This applies to zeros and NaNs as well.
+ fn is_negative(self) -> bool;
+
+ /// IEEE-754R isNormal: Returns true if and only if the current value is normal.
+ ///
+ /// This implies that the current value of the float is not zero, subnormal,
+ /// infinite, or NaN following the definition of normality from IEEE-754R.
+ fn is_normal(self) -> bool {
+ !self.is_denormal() && self.is_finite_non_zero()
+ }
+
+ /// Returns true if and only if the current value is zero, subnormal, or
+ /// normal.
+ ///
+ /// This means that the value is not infinite or NaN.
+ fn is_finite(self) -> bool {
+ !self.is_nan() && !self.is_infinite()
+ }
+
+ /// Returns true if and only if the float is plus or minus zero.
+ fn is_zero(self) -> bool {
+ self.category() == Category::Zero
+ }
+
+ /// IEEE-754R isSubnormal(): Returns true if and only if the float is a
+ /// denormal.
+ fn is_denormal(self) -> bool;
+
+ /// IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
+ fn is_infinite(self) -> bool {
+ self.category() == Category::Infinity
+ }
+
+ /// Returns true if and only if the float is a quiet or signaling NaN.
+ fn is_nan(self) -> bool {
+ self.category() == Category::NaN
+ }
+
+ /// Returns true if and only if the float is a signaling NaN.
+ fn is_signaling(self) -> bool;
+
+ // Simple Queries
+
+ fn category(self) -> Category;
+ fn is_non_zero(self) -> bool {
+ !self.is_zero()
+ }
+ fn is_finite_non_zero(self) -> bool {
+ self.is_finite() && !self.is_zero()
+ }
+ fn is_pos_zero(self) -> bool {
+ self.is_zero() && !self.is_negative()
+ }
+ fn is_neg_zero(self) -> bool {
+ self.is_zero() && self.is_negative()
+ }
+ fn is_pos_infinity(self) -> bool {
+ self.is_infinite() && !self.is_negative()
+ }
+ fn is_neg_infinity(self) -> bool {
+ self.is_infinite() && self.is_negative()
+ }
+
+ /// Returns true if and only if the number has the smallest possible non-zero
+ /// magnitude in the current semantics.
+ fn is_smallest(self) -> bool {
+ Self::SMALLEST.copy_sign(self).bitwise_eq(self)
+ }
+
+ /// Returns true if this is the smallest (by magnitude) normalized finite
+ /// number in the given semantics.
+ fn is_smallest_normalized(self) -> bool {
+ Self::smallest_normalized().copy_sign(self).bitwise_eq(self)
+ }
+
+ /// Returns true if and only if the number has the largest possible finite
+ /// magnitude in the current semantics.
+ fn is_largest(self) -> bool {
+ Self::largest().copy_sign(self).bitwise_eq(self)
+ }
+
+ /// Returns true if and only if the number is an exact integer.
+ fn is_integer(self) -> bool {
+ // This could be made more efficient; I'm going for obviously correct.
+ if !self.is_finite() {
+ return false;
+ }
+ self.round_to_integral(Round::TowardZero).value.bitwise_eq(self)
+ }
+
+ /// If this value has an exact multiplicative inverse, return it.
+ fn get_exact_inverse(self) -> Option<Self>;
+
+ /// Returns the exponent of the internal representation of the Float.
+ ///
+ /// Because the radix of Float is 2, this is equivalent to floor(log2(x)).
+ /// For special Float values, this returns special error codes:
+ ///
+ /// NaN -> \c IEK_NAN
+ /// 0 -> \c IEK_ZERO
+ /// Inf -> \c IEK_INF
+ ///
+ fn ilogb(self) -> ExpInt;
+
+ /// Returns: self * 2^exp for integral exponents.
+ fn scalbn_r(self, exp: ExpInt, round: Round) -> Self;
+ fn scalbn(self, exp: ExpInt) -> Self {
+ self.scalbn_r(exp, Round::NearestTiesToEven)
+ }
+
+ /// Equivalent of C standard library function.
+ ///
+ /// While the C standard says exp is an unspecified value for infinity and nan,
+ /// this returns INT_MAX for infinities, and INT_MIN for NaNs (see `ilogb`).
+ fn frexp_r(self, exp: &mut ExpInt, round: Round) -> Self;
+ fn frexp(self, exp: &mut ExpInt) -> Self {
+ self.frexp_r(exp, Round::NearestTiesToEven)
+ }
+}
+
+pub trait FloatConvert<T: Float>: Float {
+ /// Convert a value of one floating point type to another.
+ /// The return value corresponds to the IEEE754 exceptions. *loses_info
+ /// records whether the transformation lost information, i.e. whether
+ /// converting the result back to the original type will produce the
+ /// original value (this is almost the same as return value==Status::OK,
+ /// but there are edge cases where this is not so).
+ fn convert_r(self, round: Round, loses_info: &mut bool) -> StatusAnd<T>;
+ fn convert(self, loses_info: &mut bool) -> StatusAnd<T> {
+ self.convert_r(Round::NearestTiesToEven, loses_info)
+ }
+}
+
+macro_rules! float_common_impls {
+ ($ty:ident<$t:tt>) => {
+ impl<$t> Default for $ty<$t>
+ where
+ Self: Float,
+ {
+ #[inline]
+ fn default() -> Self {
+ Self::ZERO
+ }
+ }
+
+ impl<$t> ::core::str::FromStr for $ty<$t>
+ where
+ Self: Float,
+ {
+ type Err = ParseError;
+ #[inline]
+ fn from_str(s: &str) -> Result<Self, ParseError> {
+ Self::from_str_r(s, Round::NearestTiesToEven).map(|x| x.value)
+ }
+ }
+
+ // Rounding ties to the nearest even, by default.
+
+ impl<$t> ::core::ops::Add for $ty<$t>
+ where
+ Self: Float,
+ {
+ type Output = StatusAnd<Self>;
+ #[inline]
+ fn add(self, rhs: Self) -> StatusAnd<Self> {
+ self.add_r(rhs, Round::NearestTiesToEven)
+ }
+ }
+
+ impl<$t> ::core::ops::Sub for $ty<$t>
+ where
+ Self: Float,
+ {
+ type Output = StatusAnd<Self>;
+ #[inline]
+ fn sub(self, rhs: Self) -> StatusAnd<Self> {
+ self.sub_r(rhs, Round::NearestTiesToEven)
+ }
+ }
+
+ impl<$t> ::core::ops::Mul for $ty<$t>
+ where
+ Self: Float,
+ {
+ type Output = StatusAnd<Self>;
+ #[inline]
+ fn mul(self, rhs: Self) -> StatusAnd<Self> {
+ self.mul_r(rhs, Round::NearestTiesToEven)
+ }
+ }
+
+ impl<$t> ::core::ops::Div for $ty<$t>
+ where
+ Self: Float,
+ {
+ type Output = StatusAnd<Self>;
+ #[inline]
+ fn div(self, rhs: Self) -> StatusAnd<Self> {
+ self.div_r(rhs, Round::NearestTiesToEven)
+ }
+ }
+
+ impl<$t> ::core::ops::Rem for $ty<$t>
+ where
+ Self: Float,
+ {
+ type Output = StatusAnd<Self>;
+ #[inline]
+ fn rem(self, rhs: Self) -> StatusAnd<Self> {
+ self.c_fmod(rhs)
+ }
+ }
+
+ impl<$t> ::core::ops::AddAssign for $ty<$t>
+ where
+ Self: Float,
+ {
+ #[inline]
+ fn add_assign(&mut self, rhs: Self) {
+ *self = (*self + rhs).value;
+ }
+ }
+
+ impl<$t> ::core::ops::SubAssign for $ty<$t>
+ where
+ Self: Float,
+ {
+ #[inline]
+ fn sub_assign(&mut self, rhs: Self) {
+ *self = (*self - rhs).value;
+ }
+ }
+
+ impl<$t> ::core::ops::MulAssign for $ty<$t>
+ where
+ Self: Float,
+ {
+ #[inline]
+ fn mul_assign(&mut self, rhs: Self) {
+ *self = (*self * rhs).value;
+ }
+ }
+
+ impl<$t> ::core::ops::DivAssign for $ty<$t>
+ where
+ Self: Float,
+ {
+ #[inline]
+ fn div_assign(&mut self, rhs: Self) {
+ *self = (*self / rhs).value;
+ }
+ }
+
+ impl<$t> ::core::ops::RemAssign for $ty<$t>
+ where
+ Self: Float,
+ {
+ #[inline]
+ fn rem_assign(&mut self, rhs: Self) {
+ *self = (*self % rhs).value;
+ }
+ }
+ };
+}
+
+pub mod ieee;
+pub mod ppc;
diff --git a/vendor/rustc_apfloat/src/ppc.rs b/vendor/rustc_apfloat/src/ppc.rs
new file mode 100644
index 000000000..b03efff3e
--- /dev/null
+++ b/vendor/rustc_apfloat/src/ppc.rs
@@ -0,0 +1,453 @@
+use crate::ieee;
+use crate::{Category, ExpInt, Float, FloatConvert, ParseError, Round, Status, StatusAnd};
+
+use core::cmp::Ordering;
+use core::fmt;
+use core::ops::Neg;
+
+#[must_use]
+#[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
+pub struct DoubleFloat<F>(F, F);
+pub type DoubleDouble = DoubleFloat<ieee::Double>;
+
+// These are legacy semantics for the Fallback, inaccrurate implementation of
+// IBM double-double, if the accurate DoubleDouble doesn't handle the
+// operation. It's equivalent to having an IEEE number with consecutive 106
+// bits of mantissa and 11 bits of exponent.
+//
+// It's not equivalent to IBM double-double. For example, a legit IBM
+// double-double, 1 + epsilon:
+//
+// 1 + epsilon = 1 + (1 >> 1076)
+//
+// is not representable by a consecutive 106 bits of mantissa.
+//
+// Currently, these semantics are used in the following way:
+//
+// DoubleDouble -> (Double, Double) ->
+// DoubleDouble's Fallback -> IEEE operations
+//
+// FIXME: Implement all operations in DoubleDouble, and delete these
+// semantics.
+// FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds.
+pub struct FallbackS<F>(F);
+type Fallback<F> = ieee::IeeeFloat<FallbackS<F>>;
+impl<F: Float> ieee::Semantics for FallbackS<F> {
+ // Forbid any conversion to/from bits.
+ const BITS: usize = 0;
+ const EXP_BITS: usize = 0;
+
+ const PRECISION: usize = F::PRECISION * 2;
+ const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt;
+ const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt + F::PRECISION as ExpInt;
+}
+
+// Convert number to F. To avoid spurious underflows, we re-
+// normalize against the F exponent range first, and only *then*
+// truncate the mantissa. The result of that second conversion
+// may be inexact, but should never underflow.
+// FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds.
+pub struct FallbackExtendedS<F>(F);
+type FallbackExtended<F> = ieee::IeeeFloat<FallbackExtendedS<F>>;
+impl<F: Float> ieee::Semantics for FallbackExtendedS<F> {
+ // Forbid any conversion to/from bits.
+ const BITS: usize = 0;
+ const EXP_BITS: usize = 0;
+
+ const PRECISION: usize = Fallback::<F>::PRECISION;
+ const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt;
+ const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt;
+}
+
+impl<F: Float> From<Fallback<F>> for DoubleFloat<F>
+where
+ F: FloatConvert<FallbackExtended<F>>,
+ FallbackExtended<F>: FloatConvert<F>,
+{
+ fn from(x: Fallback<F>) -> Self {
+ let mut status;
+ let mut loses_info = false;
+
+ let extended: FallbackExtended<F> = unpack!(status=, x.convert(&mut loses_info));
+ assert_eq!((status, loses_info), (Status::OK, false));
+
+ let a = unpack!(status=, extended.convert(&mut loses_info));
+ assert_eq!(status - Status::INEXACT, Status::OK);
+
+ // If conversion was exact or resulted in a special case, we're done;
+ // just set the second double to zero. Otherwise, re-convert back to
+ // the extended format and compute the difference. This now should
+ // convert exactly to double.
+ let b = if a.is_finite_non_zero() && loses_info {
+ let u: FallbackExtended<F> = unpack!(status=, a.convert(&mut loses_info));
+ assert_eq!((status, loses_info), (Status::OK, false));
+ let v = unpack!(status=, extended - u);
+ assert_eq!(status, Status::OK);
+ let v = unpack!(status=, v.convert(&mut loses_info));
+ assert_eq!((status, loses_info), (Status::OK, false));
+ v
+ } else {
+ F::ZERO
+ };
+
+ DoubleFloat(a, b)
+ }
+}
+
+impl<F: FloatConvert<Self>> From<DoubleFloat<F>> for Fallback<F> {
+ fn from(DoubleFloat(a, b): DoubleFloat<F>) -> Self {
+ let mut status;
+ let mut loses_info = false;
+
+ // Get the first F and convert to our format.
+ let a = unpack!(status=, a.convert(&mut loses_info));
+ assert_eq!((status, loses_info), (Status::OK, false));
+
+ // Unless we have a special case, add in second F.
+ if a.is_finite_non_zero() {
+ let b = unpack!(status=, b.convert(&mut loses_info));
+ assert_eq!((status, loses_info), (Status::OK, false));
+
+ (a + b).value
+ } else {
+ a
+ }
+ }
+}
+
+float_common_impls!(DoubleFloat<F>);
+
+impl<F: Float> Neg for DoubleFloat<F> {
+ type Output = Self;
+ fn neg(self) -> Self {
+ if self.1.is_finite_non_zero() {
+ DoubleFloat(-self.0, -self.1)
+ } else {
+ DoubleFloat(-self.0, self.1)
+ }
+ }
+}
+
+impl<F: FloatConvert<Fallback<F>>> fmt::Display for DoubleFloat<F> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Display::fmt(&Fallback::from(*self), f)
+ }
+}
+
+impl<F: FloatConvert<Fallback<F>>> Float for DoubleFloat<F>
+where
+ Self: From<Fallback<F>>,
+{
+ const BITS: usize = F::BITS * 2;
+ const PRECISION: usize = Fallback::<F>::PRECISION;
+ const MAX_EXP: ExpInt = Fallback::<F>::MAX_EXP;
+ const MIN_EXP: ExpInt = Fallback::<F>::MIN_EXP;
+
+ const ZERO: Self = DoubleFloat(F::ZERO, F::ZERO);
+
+ const INFINITY: Self = DoubleFloat(F::INFINITY, F::ZERO);
+
+ // FIXME(eddyb) remove when qnan becomes const fn.
+ const NAN: Self = DoubleFloat(F::NAN, F::ZERO);
+
+ fn qnan(payload: Option<u128>) -> Self {
+ DoubleFloat(F::qnan(payload), F::ZERO)
+ }
+
+ fn snan(payload: Option<u128>) -> Self {
+ DoubleFloat(F::snan(payload), F::ZERO)
+ }
+
+ fn largest() -> Self {
+ let status;
+ let mut r = DoubleFloat(F::largest(), F::largest());
+ r.1 = r.1.scalbn(-(F::PRECISION as ExpInt + 1));
+ r.1 = unpack!(status=, r.1.next_down());
+ assert_eq!(status, Status::OK);
+ r
+ }
+
+ const SMALLEST: Self = DoubleFloat(F::SMALLEST, F::ZERO);
+
+ fn smallest_normalized() -> Self {
+ DoubleFloat(F::smallest_normalized().scalbn(F::PRECISION as ExpInt), F::ZERO)
+ }
+
+ // Implement addition, subtraction, multiplication and division based on:
+ // "Software for Doubled-Precision Floating-Point Computations",
+ // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
+
+ fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
+ match (self.category(), rhs.category()) {
+ (Category::Infinity, Category::Infinity) => {
+ if self.is_negative() != rhs.is_negative() {
+ Status::INVALID_OP.and(Self::NAN.copy_sign(self))
+ } else {
+ Status::OK.and(self)
+ }
+ }
+
+ (_, Category::Zero) | (Category::NaN, _) | (Category::Infinity, Category::Normal) => Status::OK.and(self),
+
+ (Category::Zero, _) | (_, Category::NaN) | (_, Category::Infinity) => Status::OK.and(rhs),
+
+ (Category::Normal, Category::Normal) => {
+ let mut status = Status::OK;
+ let (a, aa, c, cc) = (self.0, self.1, rhs.0, rhs.1);
+ let mut z = a;
+ z = unpack!(status|=, z.add_r(c, round));
+ if !z.is_finite() {
+ if !z.is_infinite() {
+ return status.and(DoubleFloat(z, F::ZERO));
+ }
+ status = Status::OK;
+ let a_cmp_c = a.cmp_abs_normal(c);
+ z = cc;
+ z = unpack!(status|=, z.add_r(aa, round));
+ if a_cmp_c == Ordering::Greater {
+ // z = cc + aa + c + a;
+ z = unpack!(status|=, z.add_r(c, round));
+ z = unpack!(status|=, z.add_r(a, round));
+ } else {
+ // z = cc + aa + a + c;
+ z = unpack!(status|=, z.add_r(a, round));
+ z = unpack!(status|=, z.add_r(c, round));
+ }
+ if !z.is_finite() {
+ return status.and(DoubleFloat(z, F::ZERO));
+ }
+ self.0 = z;
+ let mut zz = aa;
+ zz = unpack!(status|=, zz.add_r(cc, round));
+ if a_cmp_c == Ordering::Greater {
+ // self.1 = a - z + c + zz;
+ self.1 = a;
+ self.1 = unpack!(status|=, self.1.sub_r(z, round));
+ self.1 = unpack!(status|=, self.1.add_r(c, round));
+ self.1 = unpack!(status|=, self.1.add_r(zz, round));
+ } else {
+ // self.1 = c - z + a + zz;
+ self.1 = c;
+ self.1 = unpack!(status|=, self.1.sub_r(z, round));
+ self.1 = unpack!(status|=, self.1.add_r(a, round));
+ self.1 = unpack!(status|=, self.1.add_r(zz, round));
+ }
+ } else {
+ // q = a - z;
+ let mut q = a;
+ q = unpack!(status|=, q.sub_r(z, round));
+
+ // zz = q + c + (a - (q + z)) + aa + cc;
+ // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
+ let mut zz = q;
+ zz = unpack!(status|=, zz.add_r(c, round));
+ q = unpack!(status|=, q.add_r(z, round));
+ q = unpack!(status|=, q.sub_r(a, round));
+ q = -q;
+ zz = unpack!(status|=, zz.add_r(q, round));
+ zz = unpack!(status|=, zz.add_r(aa, round));
+ zz = unpack!(status|=, zz.add_r(cc, round));
+ if zz.is_zero() && !zz.is_negative() {
+ return Status::OK.and(DoubleFloat(z, F::ZERO));
+ }
+ self.0 = z;
+ self.0 = unpack!(status|=, self.0.add_r(zz, round));
+ if !self.0.is_finite() {
+ self.1 = F::ZERO;
+ return status.and(self);
+ }
+ self.1 = z;
+ self.1 = unpack!(status|=, self.1.sub_r(self.0, round));
+ self.1 = unpack!(status|=, self.1.add_r(zz, round));
+ }
+ status.and(self)
+ }
+ }
+ }
+
+ fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
+ // Interesting observation: For special categories, finding the lowest
+ // common ancestor of the following layered graph gives the correct
+ // return category:
+ //
+ // NaN
+ // / \
+ // Zero Inf
+ // \ /
+ // Normal
+ //
+ // e.g. NaN * NaN = NaN
+ // Zero * Inf = NaN
+ // Normal * Zero = Zero
+ // Normal * Inf = Inf
+ match (self.category(), rhs.category()) {
+ (Category::NaN, _) => Status::OK.and(self),
+
+ (_, Category::NaN) => Status::OK.and(rhs),
+
+ (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => Status::OK.and(Self::NAN),
+
+ (Category::Zero, _) | (Category::Infinity, _) => Status::OK.and(self),
+
+ (_, Category::Zero) | (_, Category::Infinity) => Status::OK.and(rhs),
+
+ (Category::Normal, Category::Normal) => {
+ let mut status = Status::OK;
+ let (a, b, c, d) = (self.0, self.1, rhs.0, rhs.1);
+ // t = a * c
+ let mut t = a;
+ t = unpack!(status|=, t.mul_r(c, round));
+ if !t.is_finite_non_zero() {
+ return status.and(DoubleFloat(t, F::ZERO));
+ }
+
+ // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
+ let mut tau = a;
+ tau = unpack!(status|=, tau.mul_add_r(c, -t, round));
+ // v = a * d
+ let mut v = a;
+ v = unpack!(status|=, v.mul_r(d, round));
+ // w = b * c
+ let mut w = b;
+ w = unpack!(status|=, w.mul_r(c, round));
+ v = unpack!(status|=, v.add_r(w, round));
+ // tau += v + w
+ tau = unpack!(status|=, tau.add_r(v, round));
+ // u = t + tau
+ let mut u = t;
+ u = unpack!(status|=, u.add_r(tau, round));
+
+ self.0 = u;
+ if !u.is_finite() {
+ self.1 = F::ZERO;
+ } else {
+ // self.1 = (t - u) + tau
+ t = unpack!(status|=, t.sub_r(u, round));
+ t = unpack!(status|=, t.add_r(tau, round));
+ self.1 = t;
+ }
+ status.and(self)
+ }
+ }
+ }
+
+ fn mul_add_r(self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> {
+ Fallback::from(self)
+ .mul_add_r(Fallback::from(multiplicand), Fallback::from(addend), round)
+ .map(Self::from)
+ }
+
+ fn div_r(self, rhs: Self, round: Round) -> StatusAnd<Self> {
+ Fallback::from(self).div_r(Fallback::from(rhs), round).map(Self::from)
+ }
+
+ fn ieee_rem(self, rhs: Self) -> StatusAnd<Self> {
+ Fallback::from(self).ieee_rem(Fallback::from(rhs)).map(Self::from)
+ }
+
+ fn c_fmod(self, rhs: Self) -> StatusAnd<Self> {
+ Fallback::from(self).c_fmod(Fallback::from(rhs)).map(Self::from)
+ }
+
+ fn round_to_integral(self, round: Round) -> StatusAnd<Self> {
+ Fallback::from(self).round_to_integral(round).map(Self::from)
+ }
+
+ fn next_up(self) -> StatusAnd<Self> {
+ Fallback::from(self).next_up().map(Self::from)
+ }
+
+ fn from_bits(input: u128) -> Self {
+ let (a, b) = (input, input >> F::BITS);
+ DoubleFloat(F::from_bits(a & ((1 << F::BITS) - 1)), F::from_bits(b & ((1 << F::BITS) - 1)))
+ }
+
+ fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> {
+ Fallback::from_u128_r(input, round).map(Self::from)
+ }
+
+ fn from_str_r(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
+ Fallback::from_str_r(s, round).map(|r| r.map(Self::from))
+ }
+
+ fn to_bits(self) -> u128 {
+ self.0.to_bits() | (self.1.to_bits() << F::BITS)
+ }
+
+ fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> {
+ Fallback::from(self).to_u128_r(width, round, is_exact)
+ }
+
+ fn cmp_abs_normal(self, rhs: Self) -> Ordering {
+ self.0.cmp_abs_normal(rhs.0).then_with(|| {
+ let result = self.1.cmp_abs_normal(rhs.1);
+ if result != Ordering::Equal {
+ let against = self.0.is_negative() ^ self.1.is_negative();
+ let rhs_against = rhs.0.is_negative() ^ rhs.1.is_negative();
+ (!against)
+ .cmp(&!rhs_against)
+ .then_with(|| if against { result.reverse() } else { result })
+ } else {
+ result
+ }
+ })
+ }
+
+ fn bitwise_eq(self, rhs: Self) -> bool {
+ self.0.bitwise_eq(rhs.0) && self.1.bitwise_eq(rhs.1)
+ }
+
+ fn is_negative(self) -> bool {
+ self.0.is_negative()
+ }
+
+ fn is_denormal(self) -> bool {
+ self.category() == Category::Normal
+ && (self.0.is_denormal() || self.0.is_denormal() ||
+ // (double)(Hi + Lo) == Hi defines a normal number.
+ self.0 != (self.0 + self.1).value)
+ }
+
+ fn is_signaling(self) -> bool {
+ self.0.is_signaling()
+ }
+
+ fn category(self) -> Category {
+ self.0.category()
+ }
+
+ fn is_integer(self) -> bool {
+ self.0.is_integer() && self.1.is_integer()
+ }
+
+ fn get_exact_inverse(self) -> Option<Self> {
+ Fallback::from(self).get_exact_inverse().map(Self::from)
+ }
+
+ fn ilogb(self) -> ExpInt {
+ self.0.ilogb()
+ }
+
+ fn scalbn_r(self, exp: ExpInt, round: Round) -> Self {
+ DoubleFloat(self.0.scalbn_r(exp, round), self.1.scalbn_r(exp, round))
+ }
+
+ fn frexp_r(self, exp: &mut ExpInt, round: Round) -> Self {
+ let a = self.0.frexp_r(exp, round);
+ let mut b = self.1;
+ if self.category() == Category::Normal {
+ b = b.scalbn_r(-*exp, round);
+ }
+ DoubleFloat(a, b)
+ }
+}
+
+// HACK(eddyb) this is here instead of in `tests/ppc.rs` because `DoubleFloat`
+// has private fields, and it's not worth it to make them public just for testing.
+#[test]
+fn is_integer() {
+ let double_from_f64 = |f: f64| ieee::Double::from_bits(f.to_bits().into());
+ assert!(DoubleFloat(double_from_f64(-0.0), double_from_f64(-0.0)).is_integer());
+ assert!(!DoubleFloat(double_from_f64(3.14159), double_from_f64(-0.0)).is_integer());
+ assert!(!DoubleFloat(double_from_f64(-0.0), double_from_f64(3.14159)).is_integer());
+}
diff --git a/vendor/rustc_apfloat/tests/downstream.rs b/vendor/rustc_apfloat/tests/downstream.rs
new file mode 100644
index 000000000..fc60a7c05
--- /dev/null
+++ b/vendor/rustc_apfloat/tests/downstream.rs
@@ -0,0 +1,410 @@
+//! Tests added to `rustc_apfloat`, that were not ported from the C++ code.
+
+use rustc_apfloat::ieee::{Double, Single, X87DoubleExtended};
+use rustc_apfloat::Float;
+
+// `f32 -> i128 -> f32` previously-crashing bit-patterns (found by fuzzing).
+pub const FUZZ_IEEE32_ROUNDTRIP_THROUGH_I128_CASES: &[u32] = &[
+ 0xff000000, // -1.7014118e+38
+ 0xff00e203, // -1.713147e+38
+ 0xff00e900, // -1.7135099e+38
+ 0xff7fffff, // -3.4028235e+38
+ 0xff800000, // -inf
+];
+
+// `f64 -> i128 -> f64` previously-crashing bit-patterns (found by fuzzing).
+pub const FUZZ_IEEE64_ROUNDTRIP_THROUGH_I128_CASES: &[u64] = &[
+ 0xc7e5d58020ffedff, // -2.3217876724230413e+38
+ 0xc7e7fffefefeff00, // -2.5521161229511617e+38
+ 0xc7e8030000653636, // -2.5533639056589687e+38
+ 0xea3501e2e8950007, // -4.116505897277026e+203
+ 0xf3ff0620ca000600, // -5.553072340247723e+250
+ 0xffc909842600d4ff, // -3.516340112093497e+307
+ 0xfff0000000000000, // -inf
+];
+
+#[test]
+fn fuzz_roundtrip_through_i128() {
+ for &bits in FUZZ_IEEE32_ROUNDTRIP_THROUGH_I128_CASES {
+ assert_eq!(
+ Single::from_i128(Single::from_bits(bits.into()).to_i128(128).value)
+ .value
+ .to_bits(),
+ (f32::from_bits(bits) as i128 as f32).to_bits().into()
+ );
+ }
+ for &bits in FUZZ_IEEE64_ROUNDTRIP_THROUGH_I128_CASES {
+ assert_eq!(
+ Double::from_i128(Double::from_bits(bits.into()).to_i128(128).value)
+ .value
+ .to_bits(),
+ (f64::from_bits(bits) as i128 as f64).to_bits().into()
+ );
+ }
+}
+
+// `f32` FMA bit-patterns which used to produce the wrong output (found by fuzzing).
+pub const FUZZ_IEEE32_FMA_CASES_WITH_EXPECTED_OUTPUTS: &[((u32, u32, u32), u32)] = &[
+ ((0x00001000 /* 5.74e-42 */, 0x0000001a /* 3.6e-44 */, 0xffff1a00 /* NaN */), 0xffff1a00 /* NaN */),
+ ((0x000080aa /* 4.6156e-41 */, 0xaaff0000 /* -4.52971e-13 */, 0xff9e007f /* NaN */), 0xffde007f /* NaN */),
+ ((0x0000843f /* 4.7441e-41 */, 0x0084ff80 /* 1.2213942e-38 */, 0xffff8000 /* NaN */), 0xffff8000 /* NaN */),
+ ((0x00009eaa /* 5.6918e-41 */, 0x201d7f1e /* 1.3340477e-19 */, 0xffff0001 /* NaN */), 0xffff0001 /* NaN */),
+ ((0x020400ff /* 9.698114e-38 */, 0x7f7f2200 /* 3.3912968e+38 */, 0xffffffff /* NaN */), 0xffffffff /* NaN */),
+ ((0x02060320 /* 9.845662e-38 */, 0x20002521 /* 1.0854307e-19 */, 0x7f800000 /* inf */), 0x7f800000 /* inf */),
+ (
+ (0x04000080 /* 1.5046557e-36 */, 0xff7fff00 /* -3.4027717e+38 */, 0xff800000 /* -inf */),
+ 0xff800000, /* -inf */
+ ),
+ (
+ (0x04007faa /* 1.5104948e-36 */, 0xff200000 /* -2.1267648e+38 */, 0xffff0000 /* NaN */),
+ 0xffff0000, /* NaN */
+ ),
+ ((0x1e0603ff /* 7.094727e-21 */, 0x00100000 /* 1.469368e-39 */, 0xffffff4f /* NaN */), 0xffffff4f /* NaN */),
+ ((0x200004aa /* 1.0843565e-19 */, 0x00202020 /* 2.95026e-39 */, 0x7fff00ff /* NaN */), 0x7fff00ff /* NaN */),
+ (
+ (0x20005eaa /* 1.0873343e-19 */, 0x9e9e9e3a /* -1.6794342e-20 */, 0xff9e009e /* NaN */),
+ 0xffde009e, /* NaN */
+ ),
+ ((0x20007faa /* 1.0884262e-19 */, 0x9e00611e /* -6.796347e-21 */, 0x7faa0600 /* NaN */), 0x7fea0600 /* NaN */),
+ (
+ (0x20007faa /* 1.0884262e-19 */, 0xaa069e1e /* -1.1956449e-13 */, 0xffffecff /* NaN */),
+ 0xffffecff, /* NaN */
+ ),
+ ((0x20025eaa /* 1.104275e-19 */, 0x9e01033a /* -6.82987e-21 */, 0xff9e009e /* NaN */), 0xffde009e /* NaN */),
+ ((0x3314f400 /* 3.4680852e-8 */, 0x00ff7903 /* 2.3461462e-38 */, 0xffffffdb /* NaN */), 0xffffffdb /* NaN */),
+ ((0x3314f400 /* 3.4680852e-8 */, 0x00ff7903 /* 2.3461462e-38 */, 0xfffffff6 /* NaN */), 0xfffffff6 /* NaN */),
+ ((0x3a218275 /* 0.0006161102 */, 0x3a3a3a3a /* 0.00071040133 */, 0x7f8a063a /* NaN */), 0x7fca063a /* NaN */),
+ ((0x40000001 /* 2.0000002 */, 0xfefffffe /* -1.7014116e+38 */, 0xfffe40ff /* NaN */), 0xfffe40ff /* NaN */),
+ ((0x50007faa /* 8623401000 */, 0x000011fb /* 6.45e-42 */, 0xff800000 /* -inf */), 0xff800000 /* -inf */),
+ ((0x64007f8b /* 9.481495e+21 */, 0xfa9a8702 /* -4.01176e+35 */, 0xff820000 /* NaN */), 0xffc20000 /* NaN */),
+ ((0x6a017faa /* 3.9138577e+25 */, 0x00000070 /* 1.57e-43 */, 0xff80db03 /* NaN */), 0xffc0db03 /* NaN */),
+ ((0x6a017faa /* 3.9138577e+25 */, 0x00000070 /* 1.57e-43 */, 0xff80db23 /* NaN */), 0xffc0db23 /* NaN */),
+ (
+ (0x6e000000 /* 9.9035203e+27 */, 0xdf008000 /* -9259401000000000000 */, 0x7f800000 /* inf */),
+ 0x7f800000, /* inf */
+ ),
+ ((0x7f7fff00 /* 3.4027717e+38 */, 0x02000080 /* 9.404098e-38 */, 0x7fc00000 /* NaN */), 0x7fc00000 /* NaN */),
+ (
+ (0xb3eb00ff /* -1.09432214e-7 */, 0x00ffefe2 /* 2.3504105e-38 */, 0xfffffee9 /* NaN */),
+ 0xfffffee9, /* NaN */
+ ),
+ (
+ (0xdf0603ff /* -9656842000000000000 */, 0x808000ff /* -1.1755301e-38 */, 0xff9b0000 /* NaN */),
+ 0xffdb0000, /* NaN */
+ ),
+ (
+ (
+ 0xf1001101, /* -634154200000000000000000000000 */
+ 0x7f400000, /* 255211780000000000000000000000000000000 */
+ 0x7f800000, /* inf */
+ ),
+ 0x7f800000, /* inf */
+ ),
+ ((0xf5000080 /* -1.6226175e+32 */, 0xc9ffff00 /* -2097120 */, 0xffff7fff /* NaN */), 0xffff7fff /* NaN */),
+ (
+ (0xf5ffffff /* -6.4903707e+32 */, 0xff000b09 /* -1.7019848e+38 */, 0xff800000 /* -inf */),
+ 0xff800000, /* -inf */
+ ),
+ (
+ (0xf70029e8 /* -2.5994686e+33 */, 0xf7ffff7f /* -1.0384514e+34 */, 0xffff7fff /* NaN */),
+ 0xffff7fff, /* NaN */
+ ),
+ (
+ (0xff007faa /* -1.7080405e+38 */, 0xd3fface5 /* -2196234700000 */, 0xffff7f00 /* NaN */),
+ 0xffff7f00, /* NaN */
+ ),
+ (
+ (0xff200000 /* -2.1267648e+38 */, 0xe380ffff /* -4.7592594e+21 */, 0xff800000 /* -inf */),
+ 0xff800000, /* -inf */
+ ),
+ ((0xff6d0000 /* -3.1502704e+38 */, 0xc12005ff /* -10.001464 */, 0xff800000 /* -inf */), 0xff800000 /* -inf */),
+];
+
+// `f64` FMA bit-patterns which used to produce the wrong output (found by fuzzing).
+pub const FUZZ_IEEE64_FMA_CASES_WITH_EXPECTED_OUTPUTS: &[((u64, u64, u64), u64)] = &[
+ (
+ (
+ 0x000000000000001e, /* 1.5e-322 */
+ 0x00000000ffdf0000, /* 2.120927281e-314 */
+ 0xffffff8000000000, /* NaN */
+ ),
+ 0xffffff8000000000, /* NaN */
+ ),
+ (
+ (
+ 0x000000007fffffff, /* 1.060997895e-314 */
+ 0xff00000000200000, /* -5.486124071348364e+303 */
+ 0xfffd0000000000e9, /* NaN */
+ ),
+ 0xfffd0000000000e9, /* NaN */
+ ),
+ (
+ (
+ 0x0000020000e30000, /* 1.086469195027e-311 */
+ 0xff00000011000000, /* -5.48612441622957e+303 */
+ 0xfffd00000000e0e9, /* NaN */
+ ),
+ 0xfffd00000000e0e9, /* NaN */
+ ),
+ (
+ (
+ 0x0000040000006400, /* 2.1729237025965e-311 */
+ 0x000000e5ff000000, /* 4.88050742876e-312 */
+ 0xffffffe300000000, /* NaN */
+ ),
+ 0xffffffe300000000, /* NaN */
+ ),
+ (
+ (
+ 0x00006a0000000000, /* 5.75824777836336e-310 */
+ 0x005015000018f9f1, /* 3.5783707339010265e-307 */
+ 0x7fffffde00000000, /* NaN */
+ ),
+ 0x7fffffde00000000, /* NaN */
+ ),
+ (
+ (
+ 0x00007ffa01000373, /* 6.95208343930866e-310 */
+ 0x0005000000ff107f, /* 6.95335589042254e-309 */
+ 0xffffffff00000005, /* NaN */
+ ),
+ 0xffffffff00000005, /* NaN */
+ ),
+ (
+ (
+ 0x0000ff8000000000, /* 1.387955006954565e-309 */
+ 0x0000000001000000, /* 8.289046e-317 */
+ 0xfff0000000000000, /* -inf */
+ ),
+ 0xfff0000000000000, /* -inf */
+ ),
+ (
+ (
+ 0x0002a000f6290000, /* 3.650532203442106e-309 */
+ 0x400013fffd000000, /* 2.009765602648258 */
+ 0xfffdfe0000ff9aff, /* NaN */
+ ),
+ 0xfffdfe0000ff9aff, /* NaN */
+ ),
+ (
+ (
+ 0x0006000000001700, /* 8.344026969431096e-309 */
+ 0xd9000000da080000, /* -5.164503950933907e+120 */
+ 0xfffffee5000000fd, /* NaN */
+ ),
+ 0xfffffee5000000fd, /* NaN */
+ ),
+ (
+ (
+ 0x0006000040000013, /* 8.344032274391576e-309 */
+ 0xfafe036500061100, /* -2.7893890583525793e+284 */
+ 0xffff7fff00001011, /* NaN */
+ ),
+ 0xffff7fff00001011, /* NaN */
+ ),
+ (
+ (
+ 0x00f1000000640000, /* 3.873408578194326e-304 */
+ 0xffe6005e00000000, /* -1.2359946076651026e+308 */
+ 0xfffd007000000000, /* NaN */
+ ),
+ 0xfffd007000000000, /* NaN */
+ ),
+ (
+ (
+ 0x05203a0080ff0513, /* 5.456081264530354e-284 */
+ 0xf90000000000f7ff, /* -6.924462078599005e+274 */
+ 0xfff0000000000000, /* -inf */
+ ),
+ 0xfff0000000000000, /* -inf */
+ ),
+ (
+ (
+ 0x0540400001000513, /* 2.1855837639726535e-283 */
+ 0xee05130640000100, /* -9.522265158052987e+221 */
+ 0x7fff00001004fa01, /* NaN */
+ ),
+ 0x7fff00001004fa01, /* NaN */
+ ),
+ (
+ (
+ 0x0540400001000513, /* 2.1855837639726535e-283 */
+ 0xffd8000000000000, /* -6.741349255733685e+307 */
+ 0xfff0001000000000, /* NaN */
+ ),
+ 0xfff8001000000000, /* NaN */
+ ),
+ (
+ (
+ 0x054040000100e213, /* 2.1855837639996873e-283 */
+ 0xfbd8000000000000, /* -3.6544927542749997e+288 */
+ 0xfff0ff1000000000, /* NaN */
+ ),
+ 0xfff8ff1000000000, /* NaN */
+ ),
+ (
+ (
+ 0x060000000000ff04, /* 8.814425663530262e-280 */
+ 0x00000020ffff0606, /* 7.00258294846e-313 */
+ 0xffffffde00001300, /* NaN */
+ ),
+ 0xffffffde00001300, /* NaN */
+ ),
+ (
+ (
+ 0x1306400001000513, /* 5.042468007014986e-217 */
+ 0x00001004fa03ee05, /* 8.7022551317144e-311 */
+ 0xfffc80f7ffff7fff, /* NaN */
+ ),
+ 0xfffc80f7ffff7fff, /* NaN */
+ ),
+ (
+ (
+ 0x1306400001000513, /* 5.042468007014986e-217 */
+ 0xa5001004fa01ee05, /* -1.810368898568446e-130 */
+ 0xfffa80f7ff1b7fff, /* NaN */
+ ),
+ 0xfffa80f7ff1b7fff, /* NaN */
+ ),
+ (
+ (
+ 0x4006400005130100, /* 2.7812500378059895 */
+ 0x0000ff4000000000, /* 1.38659692964835e-309 */
+ 0x7fffffec4200044b, /* NaN */
+ ),
+ 0x7fffffec4200044b, /* NaN */
+ ),
+ (
+ (
+ 0x4100000001000000, /* 131072.00048828125 */
+ 0x0000fffffff00000, /* 1.390671156386347e-309 */
+ 0xfffffe00000040ff, /* NaN */
+ ),
+ 0xfffffe00000040ff, /* NaN */
+ ),
+ (
+ (
+ 0x7a7a7a7a7a7a0000, /* 9.61276249042562e+281 */
+ 0xff7a7a7a7a7a7a7a, /* -1.1621116772547446e+306 */
+ 0xfffd007000ef0000, /* NaN */
+ ),
+ 0xfffd007000ef0000, /* NaN */
+ ),
+ (
+ (
+ 0x7f000012007ff010, /* 5.4862182545686e+303 */
+ 0x7f0000120091f010, /* 5.486218256005604e+303 */
+ 0xfff0000000000000, /* -inf */
+ ),
+ 0xfff0000000000000, /* -inf */
+ ),
+ (
+ (
+ 0x7f0022000a8000f6, /* 5.531663399192155e+303 */
+ 0xff00ebfef0800300, /* -5.802213559159178e+303 */
+ 0x7ff0000000000000, /* inf */
+ ),
+ 0x7ff0000000000000, /* inf */
+ ),
+ (
+ (
+ 0x7f06400001000513, /* 7.62914130360521e+303 */
+ 0xff001004fb88f7ff, /* -5.507580309563204e+303 */
+ 0xfffa01ee0513ffff, /* NaN */
+ ),
+ 0xfffa01ee0513ffff, /* NaN */
+ ),
+ (
+ (
+ 0xbbbbbb7f01000513, /* -5.872565540268704e-21 */
+ 0x0100bbbbbbbbbbbb, /* 7.625298445452731e-304 */
+ 0xffffff4000004000, /* NaN */
+ ),
+ 0xffffff4000004000, /* NaN */
+ ),
+ (
+ (
+ 0xbc00000000400000, /* -1.0842021734952464e-19 */
+ 0x00bc000000004000, /* 3.987332354453194e-305 */
+ 0xfff0000000e20000, /* NaN */
+ ),
+ 0xfff8000000e20000, /* NaN */
+ ),
+ (
+ (
+ 0xddff000004000000, /* -6.048387862754913e+144 */
+ 0xff00000000000000, /* -5.486124068793689e+303 */
+ 0xffffff0000000000, /* NaN */
+ ),
+ 0xffffff0000000000, /* NaN */
+ ),
+ (
+ (
+ 0xe100051b060c0513, /* -1.759578741202065e+159 */
+ 0xfbfeee0513064110, /* -1.8838766970066999e+289 */
+ 0xffff7fdf00001004, /* NaN */
+ ),
+ 0xffff7fdf00001004, /* NaN */
+ ),
+ (
+ (
+ 0xf0000000007ff010, /* -3.1050361903821855e+231 */
+ 0x7f06010800180000, /* 7.54480183807128e+303 */
+ 0x7ff0000000000000, /* inf */
+ ),
+ 0x7ff0000000000000, /* inf */
+ ),
+ (
+ (
+ 0xf4ffff05021d7d12, /* -3.753309156386366e+255 */
+ 0xfd100000e8030000, /* -2.5546778042386733e+294 */
+ 0xfff0000000000000, /* -inf */
+ ),
+ 0xfff0000000000000, /* -inf */
+ ),
+ (
+ (
+ 0xff0000fff05f0001, /* -5.48746313513839e+303 */
+ 0xff0000fff0800000, /* -5.487463137772898e+303 */
+ 0xfff0000000000000, /* -inf */
+ ),
+ 0xfff0000000000000, /* -inf */
+ ),
+];
+
+#[test]
+fn fuzz_fma_with_expected_outputs() {
+ for &((a_bits, b_bits, c_bits), expected_bits) in FUZZ_IEEE32_FMA_CASES_WITH_EXPECTED_OUTPUTS {
+ let (a, b, c) =
+ (Single::from_bits(a_bits.into()), Single::from_bits(b_bits.into()), Single::from_bits(c_bits.into()));
+ assert_eq!(a.mul_add(b, c).value.to_bits(), expected_bits.into());
+ }
+ for &((a_bits, b_bits, c_bits), expected_bits) in FUZZ_IEEE64_FMA_CASES_WITH_EXPECTED_OUTPUTS {
+ let (a, b, c) =
+ (Double::from_bits(a_bits.into()), Double::from_bits(b_bits.into()), Double::from_bits(c_bits.into()));
+ assert_eq!(a.mul_add(b, c).value.to_bits(), expected_bits.into());
+ }
+}
+
+// x87 80-bit "extended precision"/`long double` bit-patterns which used to
+// produce the wrong output when negated (found by fuzzing - though fuzzing also
+// found many examples in all ops, as the root issue was the handling of the
+// bit-level encoding itself, but negation was the easiest op to test here).
+pub const FUZZ_X87_F80_NEG_CASES_WITH_EXPECTED_OUTPUTS: &[(u128, u128)] = &[
+ (0x01010101010100000000 /* NaN */, 0xffff0101010100000000 /* NaN */),
+ (
+ 0x0000ff7f2300ff000000, /* 6.71098449692300485303E-4932 */
+ 0x8001ff7f2300ff000000, /* -6.71098449692300485303E-4932 */
+ ),
+ (
+ 0x00008000000000000000, /* 3.36210314311209350626E-4932 */
+ 0x80018000000000000000, /* -3.36210314311209350626E-4932 */
+ ),
+];
+
+#[test]
+fn fuzz_x87_f80_neg_with_expected_outputs() {
+ for &(bits, expected_bits) in FUZZ_X87_F80_NEG_CASES_WITH_EXPECTED_OUTPUTS {
+ assert_eq!((-X87DoubleExtended::from_bits(bits)).to_bits(), expected_bits);
+ }
+}
diff --git a/vendor/rustc_apfloat/tests/ieee.rs b/vendor/rustc_apfloat/tests/ieee.rs
new file mode 100644
index 000000000..8af64dc5a
--- /dev/null
+++ b/vendor/rustc_apfloat/tests/ieee.rs
@@ -0,0 +1,4927 @@
+#[macro_use]
+extern crate rustc_apfloat;
+
+use core::cmp::Ordering;
+use rustc_apfloat::ieee::{BFloat, Double, Float8E4M3FN, Float8E5M2, Half, Quad, Single, X87DoubleExtended};
+use rustc_apfloat::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO};
+use rustc_apfloat::{Float, FloatConvert, Round, Status};
+
+// FIXME(eddyb) maybe include this in `rustc_apfloat` itself?
+macro_rules! define_for_each_float_type {
+ ($($ty:ty),+ $(,)?) => {
+ macro_rules! for_each_float_type {
+ // FIXME(eddyb) use generic closures if they're ever added to Rust.
+ (for<$ty_var:ident: Float> $e:expr) => {{
+ $({
+ type $ty_var = $ty;
+ $e;
+ })+
+ }}
+ }
+ }
+}
+define_for_each_float_type! {
+ Half,
+ Single,
+ Double,
+ Quad,
+
+ BFloat,
+ Float8E5M2,
+ Float8E4M3FN,
+ X87DoubleExtended,
+
+ // NOTE(eddyb) tests for this are usually in `ppc.rs` but this works too.
+ rustc_apfloat::ppc::DoubleDouble,
+}
+
+trait SingleExt {
+ fn from_f32(input: f32) -> Self;
+ fn to_f32(self) -> f32;
+}
+
+impl SingleExt for Single {
+ fn from_f32(input: f32) -> Self {
+ Self::from_bits(input.to_bits() as u128)
+ }
+
+ fn to_f32(self) -> f32 {
+ f32::from_bits(self.to_bits() as u32)
+ }
+}
+
+trait DoubleExt {
+ fn from_f64(input: f64) -> Self;
+ fn to_f64(self) -> f64;
+}
+
+impl DoubleExt for Double {
+ fn from_f64(input: f64) -> Self {
+ Self::from_bits(input.to_bits() as u128)
+ }
+
+ fn to_f64(self) -> f64 {
+ f64::from_bits(self.to_bits() as u64)
+ }
+}
+
+// NOTE(eddyb) these match the C++ `convertToFloat`/`convertToDouble` methods,
+// after their generalization to allow an optional lossless conversion to their
+// expected semantics (from e.g. `IEEEhalf`/`BFloat`, for `convertToSingle`).
+// FIXME(eddyb) should the methods have e.g. `_lossless_via_convert` in their names?
+fn assert_lossless_conversion<S: FloatConvert<T>, T: Float>(src: S) -> T {
+ let mut loses_info = false;
+ let status;
+ let r = unpack!(status=, src.convert(&mut loses_info));
+ assert!(!status.intersects(Status::INEXACT) && !loses_info, "Unexpected imprecision");
+ r
+}
+
+trait ToF32LosslessViaConvertToSingle: FloatConvert<Single> {
+ fn to_f32(self) -> f32 {
+ assert_lossless_conversion(self).to_f32()
+ }
+}
+impl ToF32LosslessViaConvertToSingle for Half {}
+impl ToF32LosslessViaConvertToSingle for BFloat {}
+impl ToF32LosslessViaConvertToSingle for Float8E5M2 {}
+impl ToF32LosslessViaConvertToSingle for Float8E4M3FN {}
+
+trait ToF64LosslessViaConvertToDouble: FloatConvert<Double> {
+ fn to_f64(self) -> f64 {
+ assert_lossless_conversion(self).to_f64()
+ }
+}
+impl ToF64LosslessViaConvertToDouble for Single {}
+// HACK(eddyb) take advantage of the transitivity of "are conversions lossless".
+impl<T: ToF32LosslessViaConvertToSingle + FloatConvert<Double>> ToF64LosslessViaConvertToDouble for T {}
+
+#[test]
+fn is_signaling() {
+ // We test qNaN, -qNaN, +sNaN, -sNaN with and without payloads.
+ let payload = 4;
+ assert!(!Single::qnan(None).is_signaling());
+ assert!(!(-Single::qnan(None)).is_signaling());
+ assert!(!Single::qnan(Some(payload)).is_signaling());
+ assert!(!(-Single::qnan(Some(payload))).is_signaling());
+ assert!(Single::snan(None).is_signaling());
+ assert!((-Single::snan(None)).is_signaling());
+ assert!(Single::snan(Some(payload)).is_signaling());
+ assert!((-Single::snan(Some(payload))).is_signaling());
+}
+
+#[test]
+fn next() {
+ // 1. Test Special Cases Values.
+ //
+ // Test all special values for nextUp and nextDown perscribed by IEEE-754R
+ // 2008. These are:
+ // 1. +inf
+ // 2. -inf
+ // 3. largest
+ // 4. -largest
+ // 5. smallest
+ // 6. -smallest
+ // 7. qNaN
+ // 8. sNaN
+ // 9. +0
+ // 10. -0
+
+ let mut status;
+
+ // nextUp(+inf) = +inf.
+ let test = unpack!(status=, Quad::INFINITY.next_up());
+ let expected = Quad::INFINITY;
+ assert_eq!(status, Status::OK);
+ assert!(test.is_infinite());
+ assert!(!test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(+inf) = -nextUp(-inf) = -(-largest) = largest
+ let test = unpack!(status=, Quad::INFINITY.next_down());
+ let expected = Quad::largest();
+ assert_eq!(status, Status::OK);
+ assert!(!test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(-inf) = -largest
+ let test = unpack!(status=, (-Quad::INFINITY).next_up());
+ let expected = -Quad::largest();
+ assert_eq!(status, Status::OK);
+ assert!(test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(-inf) = -nextUp(+inf) = -(+inf) = -inf.
+ let test = unpack!(status=, (-Quad::INFINITY).next_down());
+ let expected = -Quad::INFINITY;
+ assert_eq!(status, Status::OK);
+ assert!(test.is_infinite() && test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(largest) = +inf
+ let test = unpack!(status=, Quad::largest().next_up());
+ let expected = Quad::INFINITY;
+ assert_eq!(status, Status::OK);
+ assert!(test.is_infinite() && !test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(largest) = -nextUp(-largest)
+ // = -(-largest + inc)
+ // = largest - inc.
+ let test = unpack!(status=, Quad::largest().next_down());
+ let expected = "0x1.fffffffffffffffffffffffffffep+16383".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(!test.is_infinite() && !test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(-largest) = -largest + inc.
+ let test = unpack!(status=, (-Quad::largest()).next_up());
+ let expected = "-0x1.fffffffffffffffffffffffffffep+16383".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(-largest) = -nextUp(largest) = -(inf) = -inf.
+ let test = unpack!(status=, (-Quad::largest()).next_down());
+ let expected = -Quad::INFINITY;
+ assert_eq!(status, Status::OK);
+ assert!(test.is_infinite() && test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(smallest) = smallest + inc.
+ let test = unpack!(status=, "0x0.0000000000000000000000000001p-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_up());
+ let expected = "0x0.0000000000000000000000000002p-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(smallest) = -nextUp(-smallest) = -(-0) = +0.
+ let test = unpack!(status=, "0x0.0000000000000000000000000001p-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_down());
+ let expected = Quad::ZERO;
+ assert_eq!(status, Status::OK);
+ assert!(test.is_pos_zero());
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(-smallest) = -0.
+ let test = unpack!(status=, "-0x0.0000000000000000000000000001p-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_up());
+ let expected = -Quad::ZERO;
+ assert_eq!(status, Status::OK);
+ assert!(test.is_neg_zero());
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(-smallest) = -nextUp(smallest) = -smallest - inc.
+ let test = unpack!(status=, "-0x0.0000000000000000000000000001p-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_down());
+ let expected = "-0x0.0000000000000000000000000002p-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(qNaN) = qNaN
+ let test = unpack!(status=, Quad::qnan(None).next_up());
+ let expected = Quad::qnan(None);
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(qNaN) = qNaN
+ let test = unpack!(status=, Quad::qnan(None).next_down());
+ let expected = Quad::qnan(None);
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(sNaN) = qNaN
+ let test = unpack!(status=, Quad::snan(None).next_up());
+ let expected = Quad::qnan(None);
+ assert_eq!(status, Status::INVALID_OP);
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(sNaN) = qNaN
+ let test = unpack!(status=, Quad::snan(None).next_down());
+ let expected = Quad::qnan(None);
+ assert_eq!(status, Status::INVALID_OP);
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(+0) = +smallest
+ let test = unpack!(status=, Quad::ZERO.next_up());
+ let expected = Quad::SMALLEST;
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(+0) = -nextUp(-0) = -smallest
+ let test = unpack!(status=, Quad::ZERO.next_down());
+ let expected = -Quad::SMALLEST;
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(-0) = +smallest
+ let test = unpack!(status=, (-Quad::ZERO).next_up());
+ let expected = Quad::SMALLEST;
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(-0) = -nextUp(0) = -smallest
+ let test = unpack!(status=, (-Quad::ZERO).next_down());
+ let expected = -Quad::SMALLEST;
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // 2. Binade Boundary Tests.
+
+ // 2a. Test denormal <-> normal binade boundaries.
+ // * nextUp(+Largest Denormal) -> +Smallest Normal.
+ // * nextDown(-Largest Denormal) -> -Smallest Normal.
+ // * nextUp(-Smallest Normal) -> -Largest Denormal.
+ // * nextDown(+Smallest Normal) -> +Largest Denormal.
+
+ // nextUp(+Largest Denormal) -> +Smallest Normal.
+ let test = unpack!(status=, "0x0.ffffffffffffffffffffffffffffp-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_up());
+ let expected = "0x1.0000000000000000000000000000p-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(!test.is_denormal());
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(-Largest Denormal) -> -Smallest Normal.
+ let test = unpack!(status=, "-0x0.ffffffffffffffffffffffffffffp-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_down());
+ let expected = "-0x1.0000000000000000000000000000p-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(!test.is_denormal());
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(-Smallest Normal) -> -Largest Denormal.
+ let test = unpack!(status=, "-0x1.0000000000000000000000000000p-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_up());
+ let expected = "-0x0.ffffffffffffffffffffffffffffp-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.is_denormal());
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(+Smallest Normal) -> +Largest Denormal.
+ let test = unpack!(status=, "+0x1.0000000000000000000000000000p-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_down());
+ let expected = "+0x0.ffffffffffffffffffffffffffffp-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.is_denormal());
+ assert!(test.bitwise_eq(expected));
+
+ // 2b. Test normal <-> normal binade boundaries.
+ // * nextUp(-Normal Binade Boundary) -> -Normal Binade Boundary + 1.
+ // * nextDown(+Normal Binade Boundary) -> +Normal Binade Boundary - 1.
+ // * nextUp(+Normal Binade Boundary - 1) -> +Normal Binade Boundary.
+ // * nextDown(-Normal Binade Boundary + 1) -> -Normal Binade Boundary.
+
+ // nextUp(-Normal Binade Boundary) -> -Normal Binade Boundary + 1.
+ let test = unpack!(status=, "-0x1p+1".parse::<Quad>().unwrap().next_up());
+ let expected = "-0x1.ffffffffffffffffffffffffffffp+0".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(+Normal Binade Boundary) -> +Normal Binade Boundary - 1.
+ let test = unpack!(status=, "0x1p+1".parse::<Quad>().unwrap().next_down());
+ let expected = "0x1.ffffffffffffffffffffffffffffp+0".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(+Normal Binade Boundary - 1) -> +Normal Binade Boundary.
+ let test = unpack!(status=, "0x1.ffffffffffffffffffffffffffffp+0"
+ .parse::<Quad>()
+ .unwrap()
+ .next_up());
+ let expected = "0x1p+1".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(-Normal Binade Boundary + 1) -> -Normal Binade Boundary.
+ let test = unpack!(status=, "-0x1.ffffffffffffffffffffffffffffp+0"
+ .parse::<Quad>()
+ .unwrap()
+ .next_down());
+ let expected = "-0x1p+1".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // 2c. Test using next at binade boundaries with a direction away from the
+ // binade boundary. Away from denormal <-> normal boundaries.
+ //
+ // This is to make sure that even though we are at a binade boundary, since
+ // we are rounding away, we do not trigger the binade boundary code. Thus we
+ // test:
+ // * nextUp(-Largest Denormal) -> -Largest Denormal + inc.
+ // * nextDown(+Largest Denormal) -> +Largest Denormal - inc.
+ // * nextUp(+Smallest Normal) -> +Smallest Normal + inc.
+ // * nextDown(-Smallest Normal) -> -Smallest Normal - inc.
+
+ // nextUp(-Largest Denormal) -> -Largest Denormal + inc.
+ let test = unpack!(status=, "-0x0.ffffffffffffffffffffffffffffp-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_up());
+ let expected = "-0x0.fffffffffffffffffffffffffffep-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.is_denormal());
+ assert!(test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(+Largest Denormal) -> +Largest Denormal - inc.
+ let test = unpack!(status=, "0x0.ffffffffffffffffffffffffffffp-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_down());
+ let expected = "0x0.fffffffffffffffffffffffffffep-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.is_denormal());
+ assert!(!test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(+Smallest Normal) -> +Smallest Normal + inc.
+ let test = unpack!(status=, "0x1.0000000000000000000000000000p-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_up());
+ let expected = "0x1.0000000000000000000000000001p-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(!test.is_denormal());
+ assert!(!test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(-Smallest Normal) -> -Smallest Normal - inc.
+ let test = unpack!(status=, "-0x1.0000000000000000000000000000p-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_down());
+ let expected = "-0x1.0000000000000000000000000001p-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(!test.is_denormal());
+ assert!(test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // 2d. Test values which cause our exponent to go to min exponent. This
+ // is to ensure that guards in the code to check for min exponent
+ // trigger properly.
+ // * nextUp(-0x1p-16381) -> -0x1.ffffffffffffffffffffffffffffp-16382
+ // * nextDown(-0x1.ffffffffffffffffffffffffffffp-16382) ->
+ // -0x1p-16381
+ // * nextUp(0x1.ffffffffffffffffffffffffffffp-16382) -> 0x1p-16382
+ // * nextDown(0x1p-16382) -> 0x1.ffffffffffffffffffffffffffffp-16382
+
+ // nextUp(-0x1p-16381) -> -0x1.ffffffffffffffffffffffffffffp-16382
+ let test = unpack!(status=, "-0x1p-16381".parse::<Quad>().unwrap().next_up());
+ let expected = "-0x1.ffffffffffffffffffffffffffffp-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(-0x1.ffffffffffffffffffffffffffffp-16382) ->
+ // -0x1p-16381
+ let test = unpack!(status=, "-0x1.ffffffffffffffffffffffffffffp-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_down());
+ let expected = "-0x1p-16381".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(0x1.ffffffffffffffffffffffffffffp-16382) -> 0x1p-16381
+ let test = unpack!(status=, "0x1.ffffffffffffffffffffffffffffp-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_up());
+ let expected = "0x1p-16381".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(0x1p-16381) -> 0x1.ffffffffffffffffffffffffffffp-16382
+ let test = unpack!(status=, "0x1p-16381".parse::<Quad>().unwrap().next_down());
+ let expected = "0x1.ffffffffffffffffffffffffffffp-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(expected));
+
+ // 3. Now we test both denormal/normal computation which will not cause us
+ // to go across binade boundaries. Specifically we test:
+ // * nextUp(+Denormal) -> +Denormal.
+ // * nextDown(+Denormal) -> +Denormal.
+ // * nextUp(-Denormal) -> -Denormal.
+ // * nextDown(-Denormal) -> -Denormal.
+ // * nextUp(+Normal) -> +Normal.
+ // * nextDown(+Normal) -> +Normal.
+ // * nextUp(-Normal) -> -Normal.
+ // * nextDown(-Normal) -> -Normal.
+
+ // nextUp(+Denormal) -> +Denormal.
+ let test = unpack!(status=, "0x0.ffffffffffffffffffffffff000cp-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_up());
+ let expected = "0x0.ffffffffffffffffffffffff000dp-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.is_denormal());
+ assert!(!test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(+Denormal) -> +Denormal.
+ let test = unpack!(status=, "0x0.ffffffffffffffffffffffff000cp-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_down());
+ let expected = "0x0.ffffffffffffffffffffffff000bp-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.is_denormal());
+ assert!(!test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(-Denormal) -> -Denormal.
+ let test = unpack!(status=, "-0x0.ffffffffffffffffffffffff000cp-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_up());
+ let expected = "-0x0.ffffffffffffffffffffffff000bp-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.is_denormal());
+ assert!(test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(-Denormal) -> -Denormal
+ let test = unpack!(status=, "-0x0.ffffffffffffffffffffffff000cp-16382"
+ .parse::<Quad>()
+ .unwrap()
+ .next_down());
+ let expected = "-0x0.ffffffffffffffffffffffff000dp-16382".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(test.is_denormal());
+ assert!(test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(+Normal) -> +Normal.
+ let test = unpack!(status=, "0x1.ffffffffffffffffffffffff000cp-16000"
+ .parse::<Quad>()
+ .unwrap()
+ .next_up());
+ let expected = "0x1.ffffffffffffffffffffffff000dp-16000".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(!test.is_denormal());
+ assert!(!test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(+Normal) -> +Normal.
+ let test = unpack!(status=, "0x1.ffffffffffffffffffffffff000cp-16000"
+ .parse::<Quad>()
+ .unwrap()
+ .next_down());
+ let expected = "0x1.ffffffffffffffffffffffff000bp-16000".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(!test.is_denormal());
+ assert!(!test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextUp(-Normal) -> -Normal.
+ let test = unpack!(status=, "-0x1.ffffffffffffffffffffffff000cp-16000"
+ .parse::<Quad>()
+ .unwrap()
+ .next_up());
+ let expected = "-0x1.ffffffffffffffffffffffff000bp-16000".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(!test.is_denormal());
+ assert!(test.is_negative());
+ assert!(test.bitwise_eq(expected));
+
+ // nextDown(-Normal) -> -Normal.
+ let test = unpack!(status=, "-0x1.ffffffffffffffffffffffff000cp-16000"
+ .parse::<Quad>()
+ .unwrap()
+ .next_down());
+ let expected = "-0x1.ffffffffffffffffffffffff000dp-16000".parse::<Quad>().unwrap();
+ assert_eq!(status, Status::OK);
+ assert!(!test.is_denormal());
+ assert!(test.is_negative());
+ assert!(test.bitwise_eq(expected));
+}
+
+#[test]
+fn fma() {
+ {
+ let mut f1 = Single::from_f32(14.5);
+ let f2 = Single::from_f32(-14.5);
+ let f3 = Single::from_f32(225.0);
+ f1 = f1.mul_add(f2, f3).value;
+ assert_eq!(14.75, f1.to_f32());
+ }
+
+ {
+ let val2 = Single::from_f32(2.0);
+ let mut f1 = Single::from_f32(1.17549435e-38);
+ let mut f2 = Single::from_f32(1.17549435e-38);
+ f1 /= val2;
+ f2 /= val2;
+ let f3 = Single::from_f32(12.0);
+ f1 = f1.mul_add(f2, f3).value;
+ assert_eq!(12.0, f1.to_f32());
+ }
+
+ // Test for correct zero sign when answer is exactly zero.
+ // fma(1.0, -1.0, 1.0) -> +ve 0.
+ {
+ let mut f1 = Double::from_f64(1.0);
+ let f2 = Double::from_f64(-1.0);
+ let f3 = Double::from_f64(1.0);
+ f1 = f1.mul_add(f2, f3).value;
+ assert!(!f1.is_negative() && f1.is_zero());
+ }
+
+ // Test for correct zero sign when answer is exactly zero and rounding towards
+ // negative.
+ // fma(1.0, -1.0, 1.0) -> +ve 0.
+ {
+ let mut f1 = Double::from_f64(1.0);
+ let f2 = Double::from_f64(-1.0);
+ let f3 = Double::from_f64(1.0);
+ f1 = f1.mul_add_r(f2, f3, Round::TowardNegative).value;
+ assert!(f1.is_negative() && f1.is_zero());
+ }
+
+ // Test for correct (in this case -ve) sign when adding like signed zeros.
+ // Test fma(0.0, -0.0, -0.0) -> -ve 0.
+ {
+ let mut f1 = Double::from_f64(0.0);
+ let f2 = Double::from_f64(-0.0);
+ let f3 = Double::from_f64(-0.0);
+ f1 = f1.mul_add(f2, f3).value;
+ assert!(f1.is_negative() && f1.is_zero());
+ }
+
+ // Test -ve sign preservation when small negative results underflow.
+ {
+ let mut f1 = "-0x1p-1074".parse::<Double>().unwrap();
+ let f2 = "+0x1p-1074".parse::<Double>().unwrap();
+ let f3 = Double::from_f64(0.0);
+ f1 = f1.mul_add(f2, f3).value;
+ assert!(f1.is_negative() && f1.is_zero());
+ }
+
+ // Test x87 extended precision case from http://llvm.org/PR20728.
+ {
+ let mut m1 = X87DoubleExtended::from_u128(1).value;
+ let m2 = X87DoubleExtended::from_u128(1).value;
+ let a = X87DoubleExtended::from_u128(3).value;
+
+ let mut loses_info = false;
+ m1 = m1.mul_add(m2, a).value;
+ let r: Single = m1.convert(&mut loses_info).value;
+ assert!(!loses_info);
+ assert_eq!(4.0, r.to_f32());
+ }
+
+ // Regression test that failed an assertion.
+ {
+ let mut f1 = Single::from_f32(-8.85242279E-41);
+ let f2 = Single::from_f32(2.0);
+ let f3 = Single::from_f32(8.85242279E-41);
+ f1 = f1.mul_add(f2, f3).value;
+ assert_eq!(-8.85242279E-41, f1.to_f32());
+ }
+
+ // Test using only a single instance of APFloat.
+ {
+ let mut f = Double::from_f64(1.5);
+
+ f = f.mul_add(f, f).value;
+ assert_eq!(3.75, f.to_f64());
+ }
+}
+
+#[test]
+fn min_num() {
+ let f1 = Double::from_f64(1.0);
+ let f2 = Double::from_f64(2.0);
+ let nan = Double::NAN;
+
+ assert_eq!(1.0, f1.min(f2).to_f64());
+ assert_eq!(1.0, f2.min(f1).to_f64());
+ assert_eq!(1.0, f1.min(nan).to_f64());
+ assert_eq!(1.0, nan.min(f1).to_f64());
+}
+
+#[test]
+fn max_num() {
+ let f1 = Double::from_f64(1.0);
+ let f2 = Double::from_f64(2.0);
+ let nan = Double::NAN;
+
+ assert_eq!(2.0, f1.max(f2).to_f64());
+ assert_eq!(2.0, f2.max(f1).to_f64());
+ assert_eq!(1.0, f1.max(nan).to_f64());
+ assert_eq!(1.0, nan.max(f1).to_f64());
+}
+
+#[test]
+fn minimum() {
+ let f1 = Double::from_f64(1.0);
+ let f2 = Double::from_f64(2.0);
+ let zp = Double::from_f64(0.0);
+ let zn = Double::from_f64(-0.0);
+ let nan = Double::NAN;
+
+ assert_eq!(1.0, f1.minimum(f2).to_f64());
+ assert_eq!(1.0, f2.minimum(f1).to_f64());
+ assert_eq!(-0.0, zp.minimum(zn).to_f64());
+ assert_eq!(-0.0, zn.minimum(zp).to_f64());
+ assert!(f1.minimum(nan).to_f64().is_nan());
+ assert!(nan.minimum(f1).to_f64().is_nan());
+}
+
+#[test]
+fn maximum() {
+ let f1 = Double::from_f64(1.0);
+ let f2 = Double::from_f64(2.0);
+ let zp = Double::from_f64(0.0);
+ let zn = Double::from_f64(-0.0);
+ let nan = Double::NAN;
+
+ assert_eq!(2.0, f1.maximum(f2).to_f64());
+ assert_eq!(2.0, f2.maximum(f1).to_f64());
+ assert_eq!(0.0, zp.maximum(zn).to_f64());
+ assert_eq!(0.0, zn.maximum(zp).to_f64());
+ assert!(f1.maximum(nan).to_f64().is_nan());
+ assert!(nan.maximum(f1).to_f64().is_nan());
+}
+
+#[test]
+fn denormal() {
+ // Test single precision
+ {
+ assert!(!Single::from_u128(0).value.is_denormal());
+
+ let mut t = "1.17549435082228750797e-38".parse::<Single>().unwrap();
+ assert!(!t.is_denormal());
+
+ t /= Single::from_u128(2).value;
+ assert!(t.is_denormal());
+ }
+
+ // Test double precision
+ {
+ assert!(!Double::from_u128(0).value.is_denormal());
+
+ let mut t = "2.22507385850720138309e-308".parse::<Double>().unwrap();
+ assert!(!t.is_denormal());
+
+ t /= Double::from_u128(2).value;
+ assert!(t.is_denormal());
+ }
+
+ // Test Intel double-ext
+ {
+ assert!(!X87DoubleExtended::from_u128(0).value.is_denormal());
+
+ let mut t = "3.36210314311209350626e-4932".parse::<X87DoubleExtended>().unwrap();
+ assert!(!t.is_denormal());
+
+ t /= X87DoubleExtended::from_u128(2).value;
+ assert!(t.is_denormal());
+ }
+
+ // Test quadruple precision
+ {
+ assert!(!Quad::from_u128(0).value.is_denormal());
+
+ let mut t = "3.36210314311209350626267781732175260e-4932".parse::<Quad>().unwrap();
+ assert!(!t.is_denormal());
+
+ t /= Quad::from_u128(2).value;
+ assert!(t.is_denormal());
+ }
+}
+
+#[test]
+fn is_smallest_normalized() {
+ for_each_float_type!(for<F: Float> test::<F>());
+ fn test<F: Float>() {
+ assert!(!F::ZERO.is_smallest_normalized());
+ assert!(!(-F::ZERO).is_smallest_normalized());
+
+ assert!(!F::INFINITY.is_smallest_normalized());
+ assert!(!(-F::INFINITY).is_smallest_normalized());
+
+ assert!(!F::qnan(None).is_smallest_normalized());
+ assert!(!F::snan(None).is_smallest_normalized());
+
+ assert!(!F::largest().is_smallest_normalized());
+ assert!(!(-F::largest()).is_smallest_normalized());
+
+ assert!(!F::SMALLEST.is_smallest_normalized());
+ assert!(!(-F::SMALLEST).is_smallest_normalized());
+
+ assert!(!F::from_bits(!0u128 >> (128 - F::BITS)).is_smallest_normalized());
+
+ let pos_smallest_normalized = F::smallest_normalized();
+ let neg_smallest_normalized = -F::smallest_normalized();
+ assert!(pos_smallest_normalized.is_smallest_normalized());
+ assert!(neg_smallest_normalized.is_smallest_normalized());
+
+ for mut val in [pos_smallest_normalized, neg_smallest_normalized] {
+ let old_sign = val.is_negative();
+
+ let mut status;
+
+ // Step down, make sure it's still not smallest normalized.
+ val = unpack!(status=, val.next_down());
+ assert_eq!(Status::OK, status);
+ assert_eq!(old_sign, val.is_negative());
+ assert!(!val.is_smallest_normalized());
+ assert_eq!(old_sign, val.is_negative());
+
+ // Step back up should restore it to being smallest normalized.
+ val = unpack!(status=, val.next_up());
+ assert_eq!(Status::OK, status);
+ assert!(val.is_smallest_normalized());
+ assert_eq!(old_sign, val.is_negative());
+
+ // Step beyond should no longer smallest normalized.
+ val = unpack!(status=, val.next_up());
+ assert_eq!(Status::OK, status);
+ assert!(!val.is_smallest_normalized());
+ assert_eq!(old_sign, val.is_negative());
+ }
+ }
+}
+
+#[test]
+fn decimal_strings_without_null_terminators() {
+ // Make sure that we can parse strings without null terminators.
+ // rdar://14323230.
+ assert_eq!("0.00"[..3].parse::<Double>().unwrap().to_f64(), 0.0);
+ assert_eq!("0.01"[..3].parse::<Double>().unwrap().to_f64(), 0.0);
+ assert_eq!("0.09"[..3].parse::<Double>().unwrap().to_f64(), 0.0);
+ assert_eq!("0.095"[..4].parse::<Double>().unwrap().to_f64(), 0.09);
+ assert_eq!("0.00e+3"[..7].parse::<Double>().unwrap().to_f64(), 0.00);
+ assert_eq!("0e+3"[..4].parse::<Double>().unwrap().to_f64(), 0.00);
+}
+
+#[test]
+fn from_zero_decimal_string() {
+ assert_eq!(0.0, "0".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0.".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0.".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0.".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, ".0".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+.0".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-.0".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0.0".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0.0".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0.0".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "00000.".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+00000.".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-00000.".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, ".00000".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+.00000".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-.00000".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0000.00000".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0000.00000".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0000.00000".parse::<Double>().unwrap().to_f64());
+}
+
+#[test]
+fn from_zero_decimal_single_exponent_string() {
+ assert_eq!(0.0, "0e1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0e1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0e1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0e+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0e+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0e+1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0e-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0e-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0e-1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0.e1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0.e1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0.e1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0.e+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0.e+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0.e+1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0.e-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0.e-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0.e-1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, ".0e1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+.0e1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-.0e1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, ".0e+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+.0e+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-.0e+1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, ".0e-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+.0e-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-.0e-1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0.0e1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0.0e1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0.0e1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0.0e+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0.0e+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0.0e+1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0.0e-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0.0e-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0.0e-1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "000.0000e1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+000.0000e+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-000.0000e+1".parse::<Double>().unwrap().to_f64());
+}
+
+#[test]
+fn from_zero_decimal_large_exponent_string() {
+ assert_eq!(0.0, "0e1234".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0e1234".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0e1234".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0e+1234".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0e+1234".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0e+1234".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0e-1234".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0e-1234".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0e-1234".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "000.0000e1234".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "000.0000e-1234".parse::<Double>().unwrap().to_f64());
+}
+
+#[test]
+fn from_zero_hexadecimal_string() {
+ assert_eq!(0.0, "0x0p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0x0p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0x0p1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0x0p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0x0p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0x0p+1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0x0p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0x0p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0x0p-1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0x0.p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0x0.p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0x0.p1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0x0.p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0x0.p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0x0.p+1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0x0.p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0x0.p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0x0.p-1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0x.0p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0x.0p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0x.0p1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0x.0p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0x.0p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0x.0p+1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0x.0p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0x.0p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0x.0p-1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0x0.0p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0x0.0p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0x0.0p1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0x0.0p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0x0.0p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0x0.0p+1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0x0.0p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "+0x0.0p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0x0.0p-1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.0, "0x00000.p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "0x0000.00000p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "0x.00000p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "0x0.p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "0x0p1234".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.0, "-0x0p1234".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "0x00000.p1234".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "0x0000.00000p1234".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "0x.00000p1234".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.0, "0x0.p1234".parse::<Double>().unwrap().to_f64());
+}
+
+#[test]
+fn from_decimal_string() {
+ assert_eq!(1.0, "1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(2.0, "2.".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.5, ".5".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1.0, "1.0".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-2.0, "-2".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-4.0, "-4.".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.5, "-.5".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-1.5, "-1.5".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1.25e12, "1.25e12".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1.25e+12, "1.25e+12".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1.25e-12, "1.25e-12".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1024.0, "1024.".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1024.05, "1024.05000".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.05, ".05000".parse::<Double>().unwrap().to_f64());
+ assert_eq!(2.0, "2.".parse::<Double>().unwrap().to_f64());
+ assert_eq!(2.0e2, "2.e2".parse::<Double>().unwrap().to_f64());
+ assert_eq!(2.0e+2, "2.e+2".parse::<Double>().unwrap().to_f64());
+ assert_eq!(2.0e-2, "2.e-2".parse::<Double>().unwrap().to_f64());
+ assert_eq!(2.05e2, "002.05000e2".parse::<Double>().unwrap().to_f64());
+ assert_eq!(2.05e+2, "002.05000e+2".parse::<Double>().unwrap().to_f64());
+ assert_eq!(2.05e-2, "002.05000e-2".parse::<Double>().unwrap().to_f64());
+ assert_eq!(2.05e12, "002.05000e12".parse::<Double>().unwrap().to_f64());
+ assert_eq!(2.05e+12, "002.05000e+12".parse::<Double>().unwrap().to_f64());
+ assert_eq!(2.05e-12, "002.05000e-12".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(1.0, "1e".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1.0, "+1e".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-1.0, "-1e".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(1.0, "1.e".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1.0, "+1.e".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-1.0, "-1.e".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.1, ".1e".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.1, "+.1e".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.1, "-.1e".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(1.1, "1.1e".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1.1, "+1.1e".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-1.1, "-1.1e".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(1.0, "1e+".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1.0, "1e-".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.1, ".1e".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.1, ".1e+".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.1, ".1e-".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(1.0, "1.0e".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1.0, "1.0e+".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1.0, "1.0e-".parse::<Double>().unwrap().to_f64());
+
+ // These are "carefully selected" to overflow the fast log-base
+ // calculations in the implementation.
+ assert!("99e99999".parse::<Double>().unwrap().is_infinite());
+ assert!("-99e99999".parse::<Double>().unwrap().is_infinite());
+ assert!("1e-99999".parse::<Double>().unwrap().is_pos_zero());
+ assert!("-1e-99999".parse::<Double>().unwrap().is_neg_zero());
+
+ assert_eq!(2.71828, "2.71828".parse::<Double>().unwrap().to_f64());
+}
+
+#[test]
+fn from_string_specials() {
+ let precision = 53;
+ let payload_bits = precision - 2;
+ let payload_mask = (1 << payload_bits) - 1;
+
+ let mut nan_payloads = [
+ 0,
+ 1,
+ 123,
+ 0xDEADBEEF,
+ -2i32 as u128,
+ 1 << payload_bits, // overflow bit
+ 1 << (payload_bits - 1), // signaling bit
+ 1 << (payload_bits - 2), // highest possible bit
+ ];
+
+ // Convert payload integer to decimal string representation.
+ let nan_payload_dec_strings: Vec<_> = nan_payloads.iter().map(|payload| format!("{payload}")).collect();
+
+ // Convert payload integer to hexadecimal string representation.
+ let nan_payload_hex_strings: Vec<_> = nan_payloads.iter().map(|payload| format!("{payload:#x}")).collect();
+
+ // Fix payloads to expected result.
+ for payload in &mut nan_payloads {
+ *payload &= payload_mask;
+ }
+
+ // Signaling NaN must have a non-zero payload. In case a zero payload is
+ // requested, a default arbitrary payload is set instead. Save this payload
+ // for testing.
+ let snan_default_payload = Double::snan(None).to_bits() & payload_mask;
+
+ // Negative sign prefix (or none - for positive).
+ let signs = ["", "-"];
+
+ // "Signaling" prefix (or none - for "Quiet").
+ let nan_types = ["", "s", "S"];
+
+ let nan_strings = ["nan", "NaN"];
+ for nan_str in nan_strings {
+ for type_str in nan_types {
+ let signaling = matches!(type_str, "s" | "S");
+
+ for j in 0..nan_payloads.len() {
+ let payload = if signaling && nan_payloads[j] == 0 {
+ snan_default_payload
+ } else {
+ nan_payloads[j]
+ };
+ let payload_dec = &nan_payload_dec_strings[j];
+ let payload_hex = &nan_payload_hex_strings[j];
+
+ for sign_str in signs {
+ let negative = sign_str == "-";
+
+ let prefix = format!("{sign_str}{type_str}{nan_str}");
+
+ let test_strings = [
+ // Test without any paylod.
+ (payload == 0).then(|| prefix.clone()),
+ // Test with the payload as a suffix.
+ Some(format!("{prefix}{payload_dec}")),
+ Some(format!("{prefix}{payload_hex}")),
+ // Test with the payload inside parentheses.
+ Some(format!("{prefix}({payload_dec})")),
+ Some(format!("{prefix}({payload_hex})")),
+ ]
+ .into_iter()
+ .flatten();
+
+ for test_str in test_strings {
+ let f = test_str
+ .parse::<Double>()
+ .map_err(|e| format!("{test_str:?}: {e:?}"))
+ .unwrap();
+ assert!(f.is_nan());
+ assert_eq!(signaling, f.is_signaling());
+ assert_eq!(negative, f.is_negative());
+ assert_eq!(payload, f.to_bits() & payload_mask);
+ }
+ }
+ }
+ }
+ }
+
+ let inf_strings = ["inf", "INFINITY", "+Inf", "-inf", "-INFINITY", "-Inf"];
+ for &inf_str in &inf_strings {
+ let negative = inf_str.starts_with('-');
+
+ let f = inf_str.parse::<Double>().unwrap();
+ assert!(f.is_infinite());
+ assert_eq!(negative, f.is_negative());
+ assert_eq!(0, f.to_bits() & payload_mask);
+ }
+}
+
+#[test]
+fn from_to_string_specials() {
+ assert_eq!("+Inf", "+Inf".parse::<Double>().unwrap().to_string());
+ assert_eq!("+Inf", "INFINITY".parse::<Double>().unwrap().to_string());
+ assert_eq!("+Inf", "inf".parse::<Double>().unwrap().to_string());
+ assert_eq!("-Inf", "-Inf".parse::<Double>().unwrap().to_string());
+ assert_eq!("-Inf", "-INFINITY".parse::<Double>().unwrap().to_string());
+ assert_eq!("-Inf", "-inf".parse::<Double>().unwrap().to_string());
+ assert_eq!("NaN", "NaN".parse::<Double>().unwrap().to_string());
+ assert_eq!("NaN", "nan".parse::<Double>().unwrap().to_string());
+ assert_eq!("NaN", "-NaN".parse::<Double>().unwrap().to_string());
+ assert_eq!("NaN", "-nan".parse::<Double>().unwrap().to_string());
+}
+
+#[test]
+fn from_hexadecimal_string() {
+ assert_eq!(1.0, "0x1p0".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1.0, "+0x1p0".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-1.0, "-0x1p0".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(1.0, "0x1p+0".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1.0, "+0x1p+0".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-1.0, "-0x1p+0".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(1.0, "0x1p-0".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1.0, "+0x1p-0".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-1.0, "-0x1p-0".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(2.0, "0x1p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(2.0, "+0x1p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-2.0, "-0x1p1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(2.0, "0x1p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(2.0, "+0x1p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-2.0, "-0x1p+1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.5, "0x1p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.5, "+0x1p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.5, "-0x1p-1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(3.0, "0x1.8p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(3.0, "+0x1.8p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-3.0, "-0x1.8p1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(3.0, "0x1.8p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(3.0, "+0x1.8p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-3.0, "-0x1.8p+1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.75, "0x1.8p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.75, "+0x1.8p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.75, "-0x1.8p-1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(8192.0, "0x1000.000p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(8192.0, "+0x1000.000p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-8192.0, "-0x1000.000p1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(8192.0, "0x1000.000p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(8192.0, "+0x1000.000p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-8192.0, "-0x1000.000p+1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(2048.0, "0x1000.000p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(2048.0, "+0x1000.000p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-2048.0, "-0x1000.000p-1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(8192.0, "0x1000p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(8192.0, "+0x1000p1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-8192.0, "-0x1000p1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(8192.0, "0x1000p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(8192.0, "+0x1000p+1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-8192.0, "-0x1000p+1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(2048.0, "0x1000p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(2048.0, "+0x1000p-1".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-2048.0, "-0x1000p-1".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(16384.0, "0x10p10".parse::<Double>().unwrap().to_f64());
+ assert_eq!(16384.0, "+0x10p10".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-16384.0, "-0x10p10".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(16384.0, "0x10p+10".parse::<Double>().unwrap().to_f64());
+ assert_eq!(16384.0, "+0x10p+10".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-16384.0, "-0x10p+10".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(0.015625, "0x10p-10".parse::<Double>().unwrap().to_f64());
+ assert_eq!(0.015625, "+0x10p-10".parse::<Double>().unwrap().to_f64());
+ assert_eq!(-0.015625, "-0x10p-10".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(1.0625, "0x1.1p0".parse::<Double>().unwrap().to_f64());
+ assert_eq!(1.0, "0x1p0".parse::<Double>().unwrap().to_f64());
+
+ assert_eq!(
+ "0x1p-150".parse::<Double>().unwrap().to_f64(),
+ "+0x800000000000000001.p-221".parse::<Double>().unwrap().to_f64()
+ );
+ assert_eq!(2251799813685248.5, "0x80000000000004000000.010p-28".parse::<Double>().unwrap().to_f64());
+}
+
+#[test]
+fn to_string() {
+ let to_string = |d: f64, precision: usize, width: usize| {
+ let x = Double::from_f64(d);
+ if precision == 0 {
+ format!("{:1$}", x, width)
+ } else {
+ format!("{:2$.1$}", x, precision, width)
+ }
+ };
+ assert_eq!("10", to_string(10.0, 6, 3));
+ assert_eq!("1.0E+1", to_string(10.0, 6, 0));
+ assert_eq!("10100", to_string(1.01E+4, 5, 2));
+ assert_eq!("1.01E+4", to_string(1.01E+4, 4, 2));
+ assert_eq!("1.01E+4", to_string(1.01E+4, 5, 1));
+ assert_eq!("0.0101", to_string(1.01E-2, 5, 2));
+ assert_eq!("0.0101", to_string(1.01E-2, 4, 2));
+ assert_eq!("1.01E-2", to_string(1.01E-2, 5, 1));
+ assert_eq!("0.78539816339744828", to_string(0.78539816339744830961, 0, 3));
+ assert_eq!("4.9406564584124654E-324", to_string(4.9406564584124654e-324, 0, 3));
+ assert_eq!("873.18340000000001", to_string(873.1834, 0, 1));
+ assert_eq!("8.7318340000000001E+2", to_string(873.1834, 0, 0));
+ assert_eq!("1.7976931348623157E+308", to_string(1.7976931348623157E+308, 0, 0));
+
+ let to_string = |d: f64, precision: usize, width: usize| {
+ let x = Double::from_f64(d);
+ if precision == 0 {
+ format!("{:#1$}", x, width)
+ } else {
+ format!("{:#2$.1$}", x, precision, width)
+ }
+ };
+ assert_eq!("10", to_string(10.0, 6, 3));
+ assert_eq!("1.000000e+01", to_string(10.0, 6, 0));
+ assert_eq!("10100", to_string(1.01E+4, 5, 2));
+ assert_eq!("1.0100e+04", to_string(1.01E+4, 4, 2));
+ assert_eq!("1.01000e+04", to_string(1.01E+4, 5, 1));
+ assert_eq!("0.0101", to_string(1.01E-2, 5, 2));
+ assert_eq!("0.0101", to_string(1.01E-2, 4, 2));
+ assert_eq!("1.01000e-02", to_string(1.01E-2, 5, 1));
+ assert_eq!("0.78539816339744828", to_string(0.78539816339744830961, 0, 3));
+ assert_eq!("4.94065645841246540e-324", to_string(4.9406564584124654e-324, 0, 3));
+ assert_eq!("873.18340000000001", to_string(873.1834, 0, 1));
+ assert_eq!("8.73183400000000010e+02", to_string(873.1834, 0, 0));
+ assert_eq!("1.79769313486231570e+308", to_string(1.7976931348623157E+308, 0, 0));
+ assert_eq!("NaN", X87DoubleExtended::from_bits(1 << 64).to_string());
+}
+
+#[test]
+fn to_integer() {
+ let mut is_exact = false;
+
+ assert_eq!(
+ Status::OK.and(10),
+ "10".parse::<Double>()
+ .unwrap()
+ .to_u128_r(5, Round::TowardZero, &mut is_exact,)
+ );
+ assert!(is_exact);
+
+ assert_eq!(
+ Status::INVALID_OP.and(0),
+ "-10"
+ .parse::<Double>()
+ .unwrap()
+ .to_u128_r(5, Round::TowardZero, &mut is_exact,)
+ );
+ assert!(!is_exact);
+
+ assert_eq!(
+ Status::INVALID_OP.and(31),
+ "32".parse::<Double>()
+ .unwrap()
+ .to_u128_r(5, Round::TowardZero, &mut is_exact,)
+ );
+ assert!(!is_exact);
+
+ assert_eq!(
+ Status::INEXACT.and(7),
+ "7.9"
+ .parse::<Double>()
+ .unwrap()
+ .to_u128_r(5, Round::TowardZero, &mut is_exact,)
+ );
+ assert!(!is_exact);
+
+ assert_eq!(
+ Status::OK.and(-10),
+ "-10"
+ .parse::<Double>()
+ .unwrap()
+ .to_i128_r(5, Round::TowardZero, &mut is_exact,)
+ );
+ assert!(is_exact);
+
+ assert_eq!(
+ Status::INVALID_OP.and(-16),
+ "-17"
+ .parse::<Double>()
+ .unwrap()
+ .to_i128_r(5, Round::TowardZero, &mut is_exact,)
+ );
+ assert!(!is_exact);
+
+ assert_eq!(
+ Status::INVALID_OP.and(15),
+ "16".parse::<Double>()
+ .unwrap()
+ .to_i128_r(5, Round::TowardZero, &mut is_exact,)
+ );
+ assert!(!is_exact);
+}
+
+#[test]
+fn nan() {
+ fn nanbits_from_u128<F: Float>(signaling: bool, negative: bool, payload: u128) -> u128 {
+ let x = if signaling {
+ F::snan(Some(payload))
+ } else {
+ F::qnan(Some(payload))
+ };
+ if negative {
+ (-x).to_bits()
+ } else {
+ x.to_bits()
+ }
+ }
+
+ let tests_single = [
+ // expected SNaN Neg payload
+ (0x7fc00000, false, false, 0x00000000),
+ (0xffc00000, false, true, 0x00000000),
+ (0x7fc0ae72, false, false, 0x0000ae72),
+ (0x7fffae72, false, false, 0xffffae72),
+ (0x7fdaae72, false, false, 0x00daae72),
+ (0x7fa00000, true, false, 0x00000000),
+ (0xffa00000, true, true, 0x00000000),
+ (0x7f80ae72, true, false, 0x0000ae72),
+ (0x7fbfae72, true, false, 0xffffae72),
+ (0x7f9aae72, true, false, 0x001aae72),
+ ];
+ let tests_double = [
+ // expected SNaN Neg payload
+ (0x7ff8000000000000, false, false, 0x0000000000000000),
+ (0xfff8000000000000, false, true, 0x0000000000000000),
+ (0x7ff800000000ae72, false, false, 0x000000000000ae72),
+ (0x7fffffffffffae72, false, false, 0xffffffffffffae72),
+ (0x7ffdaaaaaaaaae72, false, false, 0x000daaaaaaaaae72),
+ (0x7ff4000000000000, true, false, 0x0000000000000000),
+ (0xfff4000000000000, true, true, 0x0000000000000000),
+ (0x7ff000000000ae72, true, false, 0x000000000000ae72),
+ (0x7ff7ffffffffae72, true, false, 0xffffffffffffae72),
+ (0x7ff1aaaaaaaaae72, true, false, 0x0001aaaaaaaaae72),
+ ];
+ for (expected, signaling, negative, payload) in tests_single {
+ assert_eq!(expected, nanbits_from_u128::<Single>(signaling, negative, payload));
+ }
+ for (expected, signaling, negative, payload) in tests_double {
+ assert_eq!(expected, nanbits_from_u128::<Double>(signaling, negative, payload));
+ }
+}
+
+#[test]
+fn string_decimal_error() {
+ assert_eq!("Invalid string length", "".parse::<Double>().unwrap_err().0);
+ assert_eq!("String has no digits", "+".parse::<Double>().unwrap_err().0);
+ assert_eq!("String has no digits", "-".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Invalid character in significand", "\0".parse::<Double>().unwrap_err().0);
+ assert_eq!("Invalid character in significand", "1\0".parse::<Double>().unwrap_err().0);
+ assert_eq!("Invalid character in significand", "1\02".parse::<Double>().unwrap_err().0);
+ assert_eq!("Invalid character in significand", "1\02e1".parse::<Double>().unwrap_err().0);
+ assert_eq!("Invalid character in exponent", "1e\0".parse::<Double>().unwrap_err().0);
+ assert_eq!("Invalid character in exponent", "1e1\0".parse::<Double>().unwrap_err().0);
+ assert_eq!("Invalid character in exponent", "1e1\02".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Invalid character in significand", "1.0f".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("String contains multiple dots", "..".parse::<Double>().unwrap_err().0);
+ assert_eq!("String contains multiple dots", "..0".parse::<Double>().unwrap_err().0);
+ assert_eq!("String contains multiple dots", "1.0.0".parse::<Double>().unwrap_err().0);
+}
+
+#[test]
+fn string_decimal_significand_error() {
+ assert_eq!("Significand has no digits", ".".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "+.".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "-.".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Significand has no digits", "e".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "+e".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "-e".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Significand has no digits", "e1".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "+e1".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "-e1".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Significand has no digits", ".e1".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "+.e1".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "-.e1".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Significand has no digits", ".e".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "+.e".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "-.e".parse::<Double>().unwrap_err().0);
+}
+
+#[test]
+fn string_hexadecimal_error() {
+ assert_eq!("Invalid string", "0x".parse::<Double>().unwrap_err().0);
+ assert_eq!("Invalid string", "+0x".parse::<Double>().unwrap_err().0);
+ assert_eq!("Invalid string", "-0x".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Hex strings require an exponent", "0x0".parse::<Double>().unwrap_err().0);
+ assert_eq!("Hex strings require an exponent", "+0x0".parse::<Double>().unwrap_err().0);
+ assert_eq!("Hex strings require an exponent", "-0x0".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Hex strings require an exponent", "0x0.".parse::<Double>().unwrap_err().0);
+ assert_eq!("Hex strings require an exponent", "+0x0.".parse::<Double>().unwrap_err().0);
+ assert_eq!("Hex strings require an exponent", "-0x0.".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Hex strings require an exponent", "0x.0".parse::<Double>().unwrap_err().0);
+ assert_eq!("Hex strings require an exponent", "+0x.0".parse::<Double>().unwrap_err().0);
+ assert_eq!("Hex strings require an exponent", "-0x.0".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Hex strings require an exponent", "0x0.0".parse::<Double>().unwrap_err().0);
+ assert_eq!("Hex strings require an exponent", "+0x0.0".parse::<Double>().unwrap_err().0);
+ assert_eq!("Hex strings require an exponent", "-0x0.0".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Invalid character in significand", "0x\0".parse::<Double>().unwrap_err().0);
+ assert_eq!("Invalid character in significand", "0x1\0".parse::<Double>().unwrap_err().0);
+ assert_eq!("Invalid character in significand", "0x1\02".parse::<Double>().unwrap_err().0);
+ assert_eq!("Invalid character in significand", "0x1\02p1".parse::<Double>().unwrap_err().0);
+ assert_eq!("Invalid character in exponent", "0x1p\0".parse::<Double>().unwrap_err().0);
+ assert_eq!("Invalid character in exponent", "0x1p1\0".parse::<Double>().unwrap_err().0);
+ assert_eq!("Invalid character in exponent", "0x1p1\02".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Invalid character in exponent", "0x1p0f".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("String contains multiple dots", "0x..p1".parse::<Double>().unwrap_err().0);
+ assert_eq!("String contains multiple dots", "0x..0p1".parse::<Double>().unwrap_err().0);
+ assert_eq!("String contains multiple dots", "0x1.0.0p1".parse::<Double>().unwrap_err().0);
+}
+
+#[test]
+fn string_hexadecimal_significand_error() {
+ assert_eq!("Significand has no digits", "0x.".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "+0x.".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "-0x.".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Significand has no digits", "0xp".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "+0xp".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "-0xp".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Significand has no digits", "0xp+".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "+0xp+".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "-0xp+".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Significand has no digits", "0xp-".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "+0xp-".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "-0xp-".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Significand has no digits", "0x.p".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "+0x.p".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "-0x.p".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Significand has no digits", "0x.p+".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "+0x.p+".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "-0x.p+".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Significand has no digits", "0x.p-".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "+0x.p-".parse::<Double>().unwrap_err().0);
+ assert_eq!("Significand has no digits", "-0x.p-".parse::<Double>().unwrap_err().0);
+}
+
+#[test]
+fn string_hexadecimal_exponent_error() {
+ assert_eq!("Exponent has no digits", "0x1p".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "+0x1p".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "-0x1p".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Exponent has no digits", "0x1p+".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "+0x1p+".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "-0x1p+".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Exponent has no digits", "0x1p-".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "+0x1p-".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "-0x1p-".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Exponent has no digits", "0x1.p".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "+0x1.p".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "-0x1.p".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Exponent has no digits", "0x1.p+".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "+0x1.p+".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "-0x1.p+".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Exponent has no digits", "0x1.p-".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "+0x1.p-".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "-0x1.p-".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Exponent has no digits", "0x.1p".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "+0x.1p".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "-0x.1p".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Exponent has no digits", "0x.1p+".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "+0x.1p+".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "-0x.1p+".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Exponent has no digits", "0x.1p-".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "+0x.1p-".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "-0x.1p-".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Exponent has no digits", "0x1.1p".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "+0x1.1p".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "-0x1.1p".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Exponent has no digits", "0x1.1p+".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "+0x1.1p+".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "-0x1.1p+".parse::<Double>().unwrap_err().0);
+
+ assert_eq!("Exponent has no digits", "0x1.1p-".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "+0x1.1p-".parse::<Double>().unwrap_err().0);
+ assert_eq!("Exponent has no digits", "-0x1.1p-".parse::<Double>().unwrap_err().0);
+}
+
+#[test]
+fn exact_inverse() {
+ // Trivial operation.
+ assert!(Double::from_f64(2.0)
+ .get_exact_inverse()
+ .unwrap()
+ .bitwise_eq(Double::from_f64(0.5)));
+ assert!(Single::from_f32(2.0)
+ .get_exact_inverse()
+ .unwrap()
+ .bitwise_eq(Single::from_f32(0.5)));
+ assert!("2.0"
+ .parse::<Quad>()
+ .unwrap()
+ .get_exact_inverse()
+ .unwrap()
+ .bitwise_eq("0.5".parse::<Quad>().unwrap()));
+ assert!("2.0"
+ .parse::<X87DoubleExtended>()
+ .unwrap()
+ .get_exact_inverse()
+ .unwrap()
+ .bitwise_eq("0.5".parse::<X87DoubleExtended>().unwrap()));
+
+ // FLT_MIN
+ assert!(Single::from_f32(1.17549435e-38)
+ .get_exact_inverse()
+ .unwrap()
+ .bitwise_eq(Single::from_f32(8.5070592e+37)));
+
+ // Large float, inverse is a denormal.
+ assert!(Single::from_f32(1.7014118e38).get_exact_inverse().is_none());
+ // Zero
+ assert!(Double::from_f64(0.0).get_exact_inverse().is_none());
+ // Denormalized float
+ assert!(Single::from_f32(1.40129846e-45).get_exact_inverse().is_none());
+}
+
+#[test]
+fn round_to_integral() {
+ let t = Double::from_f64(-0.5);
+ assert_eq!(-0.0, t.round_to_integral(Round::TowardZero).value.to_f64());
+ assert_eq!(-1.0, t.round_to_integral(Round::TowardNegative).value.to_f64());
+ assert_eq!(-0.0, t.round_to_integral(Round::TowardPositive).value.to_f64());
+ assert_eq!(-0.0, t.round_to_integral(Round::NearestTiesToEven).value.to_f64());
+
+ let s = Double::from_f64(3.14);
+ assert_eq!(3.0, s.round_to_integral(Round::TowardZero).value.to_f64());
+ assert_eq!(3.0, s.round_to_integral(Round::TowardNegative).value.to_f64());
+ assert_eq!(4.0, s.round_to_integral(Round::TowardPositive).value.to_f64());
+ assert_eq!(3.0, s.round_to_integral(Round::NearestTiesToEven).value.to_f64());
+
+ let r = Double::largest();
+ assert_eq!(r.to_f64(), r.round_to_integral(Round::TowardZero).value.to_f64());
+ assert_eq!(r.to_f64(), r.round_to_integral(Round::TowardNegative).value.to_f64());
+ assert_eq!(r.to_f64(), r.round_to_integral(Round::TowardPositive).value.to_f64());
+ assert_eq!(r.to_f64(), r.round_to_integral(Round::NearestTiesToEven).value.to_f64());
+
+ let p = Double::ZERO.round_to_integral(Round::TowardZero).value;
+ assert_eq!(0.0, p.to_f64());
+ let p = (-Double::ZERO).round_to_integral(Round::TowardZero).value;
+ assert_eq!(-0.0, p.to_f64());
+ let p = Double::NAN.round_to_integral(Round::TowardZero).value;
+ assert!(p.to_f64().is_nan());
+ let p = Double::INFINITY.round_to_integral(Round::TowardZero).value;
+ assert!(p.to_f64().is_infinite() && p.to_f64() > 0.0);
+ let p = (-Double::INFINITY).round_to_integral(Round::TowardZero).value;
+ assert!(p.to_f64().is_infinite() && p.to_f64() < 0.0);
+
+ let mut status;
+
+ let p = unpack!(status=, Double::NAN.round_to_integral(Round::TowardZero));
+ assert!(p.is_nan());
+ assert!(!p.is_negative());
+ assert_eq!(Status::OK, status);
+
+ let p = unpack!(status=, (-Double::NAN).round_to_integral(Round::TowardZero));
+ assert!(p.is_nan());
+ assert!(p.is_negative());
+ assert_eq!(Status::OK, status);
+
+ let p = unpack!(status=, Double::snan(None).round_to_integral(Round::TowardZero));
+ assert!(p.is_nan());
+ assert!(!p.is_signaling());
+ assert!(!p.is_negative());
+ assert_eq!(Status::INVALID_OP, status);
+
+ let p = unpack!(status=, (-Double::snan(None)).round_to_integral(Round::TowardZero));
+ assert!(p.is_nan());
+ assert!(!p.is_signaling());
+ assert!(p.is_negative());
+ assert_eq!(Status::INVALID_OP, status);
+
+ let p = unpack!(status=, Double::INFINITY.round_to_integral(Round::TowardZero));
+ assert!(p.is_infinite());
+ assert!(!p.is_negative());
+ assert_eq!(Status::OK, status);
+
+ let p = unpack!(status=, (-Double::INFINITY).round_to_integral(Round::TowardZero));
+ assert!(p.is_infinite());
+ assert!(p.is_negative());
+ assert_eq!(Status::OK, status);
+
+ let p = unpack!(status=, Double::ZERO.round_to_integral(Round::TowardZero));
+ assert!(p.is_zero());
+ assert!(!p.is_negative());
+ assert_eq!(Status::OK, status);
+
+ let p = unpack!(status=, Double::ZERO.round_to_integral(Round::TowardNegative));
+ assert!(p.is_zero());
+ assert!(!p.is_negative());
+ assert_eq!(Status::OK, status);
+
+ let p = unpack!(status=, (-Double::ZERO).round_to_integral(Round::TowardZero));
+ assert!(p.is_zero());
+ assert!(p.is_negative());
+ assert_eq!(Status::OK, status);
+
+ let p = unpack!(status=, (-Double::ZERO).round_to_integral(Round::TowardNegative));
+ assert!(p.is_zero());
+ assert!(p.is_negative());
+ assert_eq!(Status::OK, status);
+
+ let p = unpack!(status=, Double::from_f64(1E-100).round_to_integral(Round::TowardNegative));
+ assert!(p.is_zero());
+ assert!(!p.is_negative());
+ assert_eq!(Status::INEXACT, status);
+
+ let p = unpack!(status=, Double::from_f64(1E-100).round_to_integral(Round::TowardPositive));
+ assert_eq!(1.0, p.to_f64());
+ assert!(!p.is_negative());
+ assert_eq!(Status::INEXACT, status);
+
+ let p = unpack!(status=, Double::from_f64(-1E-100).round_to_integral(Round::TowardNegative));
+ assert!(p.is_negative());
+ assert_eq!(-1.0, p.to_f64());
+ assert_eq!(Status::INEXACT, status);
+
+ let p = unpack!(status=, Double::from_f64(-1E-100).round_to_integral(Round::TowardPositive));
+ assert!(p.is_zero());
+ assert!(p.is_negative());
+ assert_eq!(Status::INEXACT, status);
+
+ let p = unpack!(status=, Double::from_f64(10.0).round_to_integral(Round::TowardZero));
+ assert_eq!(10.0, p.to_f64());
+ assert_eq!(Status::OK, status);
+
+ let p = unpack!(status=, Double::from_f64(10.5).round_to_integral(Round::TowardZero));
+ assert_eq!(10.0, p.to_f64());
+ assert_eq!(Status::INEXACT, status);
+
+ let p = unpack!(status=, Double::from_f64(10.5).round_to_integral(Round::TowardPositive));
+ assert_eq!(11.0, p.to_f64());
+ assert_eq!(Status::INEXACT, status);
+
+ let p = unpack!(status=, Double::from_f64(10.5).round_to_integral(Round::TowardNegative));
+ assert_eq!(10.0, p.to_f64());
+ assert_eq!(Status::INEXACT, status);
+
+ let p = unpack!(status=, Double::from_f64(10.5).round_to_integral(Round::NearestTiesToAway));
+ assert_eq!(11.0, p.to_f64());
+ assert_eq!(Status::INEXACT, status);
+
+ let p = unpack!(status=, Double::from_f64(10.5).round_to_integral(Round::NearestTiesToEven));
+ assert_eq!(10.0, p.to_f64());
+ assert_eq!(Status::INEXACT, status);
+}
+
+#[test]
+fn is_integer() {
+ let t = Double::from_f64(-0.0);
+ assert!(t.is_integer());
+ let t = Double::from_f64(3.14159);
+ assert!(!t.is_integer());
+ let t = Double::NAN;
+ assert!(!t.is_integer());
+ let t = Double::INFINITY;
+ assert!(!t.is_integer());
+ let t = -Double::INFINITY;
+ assert!(!t.is_integer());
+ let t = Double::largest();
+ assert!(t.is_integer());
+}
+
+#[test]
+fn largest() {
+ assert_eq!(3.402823466e+38, Single::largest().to_f32());
+ assert_eq!(1.7976931348623158e+308, Double::largest().to_f64());
+ assert_eq!(448.0, Float8E4M3FN::largest().to_f64());
+}
+
+#[test]
+fn smallest() {
+ let test = Single::SMALLEST;
+ let expected = "0x0.000002p-126".parse::<Single>().unwrap();
+ assert!(!test.is_negative());
+ assert!(test.is_finite_non_zero());
+ assert!(test.is_denormal());
+ assert!(test.bitwise_eq(expected));
+
+ let test = -Single::SMALLEST;
+ let expected = "-0x0.000002p-126".parse::<Single>().unwrap();
+ assert!(test.is_negative());
+ assert!(test.is_finite_non_zero());
+ assert!(test.is_denormal());
+ assert!(test.bitwise_eq(expected));
+
+ let test = Quad::SMALLEST;
+ let expected = "0x0.0000000000000000000000000001p-16382".parse::<Quad>().unwrap();
+ assert!(!test.is_negative());
+ assert!(test.is_finite_non_zero());
+ assert!(test.is_denormal());
+ assert!(test.bitwise_eq(expected));
+
+ let test = -Quad::SMALLEST;
+ let expected = "-0x0.0000000000000000000000000001p-16382".parse::<Quad>().unwrap();
+ assert!(test.is_negative());
+ assert!(test.is_finite_non_zero());
+ assert!(test.is_denormal());
+ assert!(test.bitwise_eq(expected));
+}
+
+#[test]
+fn smallest_normalized() {
+ let test = Single::smallest_normalized();
+ let expected = "0x1p-126".parse::<Single>().unwrap();
+ assert!(!test.is_negative());
+ assert!(test.is_finite_non_zero());
+ assert!(!test.is_denormal());
+ assert!(test.bitwise_eq(expected));
+ assert!(test.is_smallest_normalized());
+
+ let test = -Single::smallest_normalized();
+ let expected = "-0x1p-126".parse::<Single>().unwrap();
+ assert!(test.is_negative());
+ assert!(test.is_finite_non_zero());
+ assert!(!test.is_denormal());
+ assert!(test.bitwise_eq(expected));
+ assert!(test.is_smallest_normalized());
+
+ let test = Double::smallest_normalized();
+ let expected = "0x1p-1022".parse::<Double>().unwrap();
+ assert!(!test.is_negative());
+ assert!(test.is_finite_non_zero());
+ assert!(!test.is_denormal());
+ assert!(test.bitwise_eq(expected));
+ assert!(test.is_smallest_normalized());
+
+ let test = -Double::smallest_normalized();
+ let expected = "-0x1p-1022".parse::<Double>().unwrap();
+ assert!(test.is_negative());
+ assert!(test.is_finite_non_zero());
+ assert!(!test.is_denormal());
+ assert!(test.bitwise_eq(expected));
+ assert!(test.is_smallest_normalized());
+
+ let test = Quad::smallest_normalized();
+ let expected = "0x1p-16382".parse::<Quad>().unwrap();
+ assert!(!test.is_negative());
+ assert!(test.is_finite_non_zero());
+ assert!(!test.is_denormal());
+ assert!(test.bitwise_eq(expected));
+ assert!(test.is_smallest_normalized());
+
+ let test = -Quad::smallest_normalized();
+ let expected = "-0x1p-16382".parse::<Quad>().unwrap();
+ assert!(test.is_negative());
+ assert!(test.is_finite_non_zero());
+ assert!(!test.is_denormal());
+ assert!(test.bitwise_eq(expected));
+ assert!(test.is_smallest_normalized());
+}
+
+#[test]
+fn zero() {
+ assert_eq!(0.0, Single::from_f32(0.0).to_f32());
+ assert_eq!(-0.0, Single::from_f32(-0.0).to_f32());
+ assert!(Single::from_f32(-0.0).is_negative());
+
+ assert_eq!(0.0, Double::from_f64(0.0).to_f64());
+ assert_eq!(-0.0, Double::from_f64(-0.0).to_f64());
+ assert!(Double::from_f64(-0.0).is_negative());
+
+ fn test<F: Float>(sign: bool, bits: u128) {
+ let test = if sign { -F::ZERO } else { F::ZERO };
+ let pattern = if sign { "-0x0p+0" } else { "0x0p+0" };
+ let expected = pattern.parse::<F>().unwrap();
+ assert!(test.is_zero());
+ assert_eq!(sign, test.is_negative());
+ assert!(test.bitwise_eq(expected));
+ assert_eq!(bits, test.to_bits());
+ }
+ test::<Half>(false, 0);
+ test::<Half>(true, 0x8000);
+ test::<Single>(false, 0);
+ test::<Single>(true, 0x80000000);
+ test::<Double>(false, 0);
+ test::<Double>(true, 0x8000000000000000);
+ test::<Quad>(false, 0);
+ test::<Quad>(true, 0x8000000000000000_0000000000000000);
+ test::<X87DoubleExtended>(false, 0);
+ test::<X87DoubleExtended>(true, 0x8000_0000000000000000);
+ test::<Float8E5M2>(false, 0);
+ test::<Float8E5M2>(true, 0x80);
+ test::<Float8E4M3FN>(false, 0);
+ test::<Float8E4M3FN>(true, 0x80);
+}
+
+#[test]
+fn copy_sign() {
+ assert!(Double::from_f64(-42.0).bitwise_eq(Double::from_f64(42.0).copy_sign(Double::from_f64(-1.0),),));
+ assert!(Double::from_f64(42.0).bitwise_eq(Double::from_f64(-42.0).copy_sign(Double::from_f64(1.0),),));
+ assert!(Double::from_f64(-42.0).bitwise_eq(Double::from_f64(-42.0).copy_sign(Double::from_f64(-1.0),),));
+ assert!(Double::from_f64(42.0).bitwise_eq(Double::from_f64(42.0).copy_sign(Double::from_f64(1.0),),));
+}
+
+#[test]
+fn convert() {
+ let mut loses_info = false;
+ let mut status;
+
+ let test = "1.0".parse::<Double>().unwrap();
+ let test: Single = test.convert(&mut loses_info).value;
+ assert_eq!(1.0, test.to_f32());
+ assert!(!loses_info);
+
+ let mut test = "0x1p-53".parse::<X87DoubleExtended>().unwrap();
+ let one = "1.0".parse::<X87DoubleExtended>().unwrap();
+ test += one;
+ let test: Double = test.convert(&mut loses_info).value;
+ assert_eq!(1.0, test.to_f64());
+ assert!(loses_info);
+
+ let mut test = "0x1p-53".parse::<Quad>().unwrap();
+ let one = "1.0".parse::<Quad>().unwrap();
+ test += one;
+ let test: Double = test.convert(&mut loses_info).value;
+ assert_eq!(1.0, test.to_f64());
+ assert!(loses_info);
+
+ let test = "0xf.fffffffp+28".parse::<X87DoubleExtended>().unwrap();
+ let test: Double = test.convert(&mut loses_info).value;
+ assert_eq!(4294967295.0, test.to_f64());
+ assert!(!loses_info);
+
+ let test = Single::snan(None);
+ let test: X87DoubleExtended = unpack!(status=, test.convert(&mut loses_info));
+ // Conversion quiets the SNAN, so now 2 bits of the 64-bit significand should be set.
+ assert!(test.bitwise_eq(X87DoubleExtended::qnan(Some(0x6000000000000000))));
+ assert!(!loses_info);
+ assert_eq!(status, Status::INVALID_OP);
+
+ let test = Single::qnan(None);
+ let x87_qnan = X87DoubleExtended::qnan(None);
+ let test: X87DoubleExtended = test.convert(&mut loses_info).value;
+ assert!(test.bitwise_eq(x87_qnan));
+ assert!(!loses_info);
+
+ // NOTE(eddyb) these were mistakenly noops upstream, here they're already
+ // fixed (by instead converting from `Double` to `X87DoubleExtended`),
+ // see also upstream issue https://github.com/llvm/llvm-project/issues/63842.
+ let test = Double::snan(None);
+ let test: X87DoubleExtended = test.convert(&mut loses_info).value;
+ // Conversion quiets the SNAN, so now 2 bits of the 64-bit significand should be set.
+ assert!(test.bitwise_eq(X87DoubleExtended::qnan(Some(0x6000000000000000))));
+ assert!(!loses_info);
+
+ let test = Double::qnan(None);
+ let test: X87DoubleExtended = test.convert(&mut loses_info).value;
+ assert!(test.bitwise_eq(x87_qnan));
+ assert!(!loses_info);
+
+ // The payload is lost in truncation, but we retain NaN by setting the quiet bit.
+ let test = Double::snan(Some(1));
+ let test: Single = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(0x7fc00000, test.to_bits());
+ assert!(loses_info);
+ assert_eq!(status, Status::INVALID_OP);
+
+ // The payload is lost in truncation. QNaN remains QNaN.
+ let test = Double::qnan(Some(1));
+ let test: Single = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(0x7fc00000, test.to_bits());
+ assert!(loses_info);
+ assert_eq!(status, Status::OK);
+
+ // Test that subnormals are handled correctly in double to float conversion
+ let test = "0x0.0000010000000p-1022".parse::<Double>().unwrap();
+ let test: Single = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(0.0, test.to_f32());
+ assert!(loses_info);
+
+ let test = "0x0.0000010000001p-1022".parse::<Double>().unwrap();
+ let test: Single = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(0.0, test.to_f32());
+ assert!(loses_info);
+
+ let test = "-0x0.0000010000001p-1022".parse::<Double>().unwrap();
+ let test: Single = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(0.0, test.to_f32());
+ assert!(loses_info);
+
+ let test = "0x0.0000020000000p-1022".parse::<Double>().unwrap();
+ let test: Single = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(0.0, test.to_f32());
+ assert!(loses_info);
+
+ let test = "0x0.0000020000001p-1022".parse::<Double>().unwrap();
+ let test: Single = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(0.0, test.to_f32());
+ assert!(loses_info);
+
+ // Test subnormal conversion to bfloat
+ let test = "0x0.01p-126".parse::<Single>().unwrap();
+ let test: BFloat = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(0.0, test.to_f32());
+ assert!(loses_info);
+
+ let test = "0x0.02p-126".parse::<Single>().unwrap();
+ let test: BFloat = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(0x01, test.to_bits());
+ assert!(!loses_info);
+
+ let test = "0x0.01p-126".parse::<Single>().unwrap();
+ let test: BFloat = unpack!(status=, test.convert_r(Round::NearestTiesToAway, &mut loses_info));
+ assert_eq!(0x01, test.to_bits());
+ assert!(loses_info);
+}
+
+#[test]
+fn is_negative() {
+ let t = "0x1p+0".parse::<Single>().unwrap();
+ assert!(!t.is_negative());
+ let t = "-0x1p+0".parse::<Single>().unwrap();
+ assert!(t.is_negative());
+
+ assert!(!Single::INFINITY.is_negative());
+ assert!((-Single::INFINITY).is_negative());
+
+ assert!(!Single::ZERO.is_negative());
+ assert!((-Single::ZERO).is_negative());
+
+ assert!(!Single::NAN.is_negative());
+ assert!((-Single::NAN).is_negative());
+
+ assert!(!Single::snan(None).is_negative());
+ assert!((-Single::snan(None)).is_negative());
+}
+
+#[test]
+fn is_normal() {
+ let t = "0x1p+0".parse::<Single>().unwrap();
+ assert!(t.is_normal());
+
+ assert!(!Single::INFINITY.is_normal());
+ assert!(!Single::ZERO.is_normal());
+ assert!(!Single::NAN.is_normal());
+ assert!(!Single::snan(None).is_normal());
+ assert!(!"0x1p-149".parse::<Single>().unwrap().is_normal());
+}
+
+#[test]
+fn is_finite() {
+ let t = "0x1p+0".parse::<Single>().unwrap();
+ assert!(t.is_finite());
+ assert!(!Single::INFINITY.is_finite());
+ assert!(Single::ZERO.is_finite());
+ assert!(!Single::NAN.is_finite());
+ assert!(!Single::snan(None).is_finite());
+ assert!("0x1p-149".parse::<Single>().unwrap().is_finite());
+}
+
+#[test]
+fn is_infinite() {
+ let t = "0x1p+0".parse::<Single>().unwrap();
+ assert!(!t.is_infinite());
+
+ let pos_inf = Single::INFINITY;
+ let neg_inf = -Single::INFINITY;
+
+ assert!(pos_inf.is_infinite());
+ assert!(pos_inf.is_pos_infinity());
+ assert!(!pos_inf.is_neg_infinity());
+ assert!(neg_inf.is_infinite());
+ assert!(!neg_inf.is_pos_infinity());
+ assert!(neg_inf.is_neg_infinity());
+
+ assert!(!Single::ZERO.is_infinite());
+ assert!(!Single::NAN.is_infinite());
+ assert!(!Single::snan(None).is_infinite());
+ assert!(!"0x1p-149".parse::<Single>().unwrap().is_infinite());
+}
+
+#[test]
+fn is_nan() {
+ let t = "0x1p+0".parse::<Single>().unwrap();
+ assert!(!t.is_nan());
+ assert!(!Single::INFINITY.is_nan());
+ assert!(!Single::ZERO.is_nan());
+ assert!(Single::NAN.is_nan());
+ assert!(Single::snan(None).is_nan());
+ assert!(!"0x1p-149".parse::<Single>().unwrap().is_nan());
+}
+
+#[test]
+fn is_finite_non_zero() {
+ // Test positive/negative normal value.
+ assert!("0x1p+0".parse::<Single>().unwrap().is_finite_non_zero());
+ assert!("-0x1p+0".parse::<Single>().unwrap().is_finite_non_zero());
+
+ // Test positive/negative denormal value.
+ assert!("0x1p-149".parse::<Single>().unwrap().is_finite_non_zero());
+ assert!("-0x1p-149".parse::<Single>().unwrap().is_finite_non_zero());
+
+ // Test +/- Infinity.
+ assert!(!Single::INFINITY.is_finite_non_zero());
+ assert!(!(-Single::INFINITY).is_finite_non_zero());
+
+ // Test +/- Zero.
+ assert!(!Single::ZERO.is_finite_non_zero());
+ assert!(!(-Single::ZERO).is_finite_non_zero());
+
+ // Test +/- qNaN. +/- dont mean anything with qNaN but paranoia can't hurt in
+ // this instance.
+ assert!(!Single::NAN.is_finite_non_zero());
+ assert!(!(-Single::NAN).is_finite_non_zero());
+
+ // Test +/- sNaN. +/- dont mean anything with sNaN but paranoia can't hurt in
+ // this instance.
+ assert!(!Single::snan(None).is_finite_non_zero());
+ assert!(!(-Single::snan(None)).is_finite_non_zero());
+}
+
+#[test]
+fn add() {
+ // Test Special Cases against each other and normal values.
+
+ let p_inf = Single::INFINITY;
+ let m_inf = -Single::INFINITY;
+ let p_zero = Single::ZERO;
+ let m_zero = -Single::ZERO;
+ let qnan = Single::NAN;
+ let snan = "snan123".parse::<Single>().unwrap();
+ let p_normal_value = "0x1p+0".parse::<Single>().unwrap();
+ let m_normal_value = "-0x1p+0".parse::<Single>().unwrap();
+ let p_largest_value = Single::largest();
+ let m_largest_value = -Single::largest();
+ let p_smallest_value = Single::SMALLEST;
+ let m_smallest_value = -Single::SMALLEST;
+ let p_smallest_normalized = Single::smallest_normalized();
+ let m_smallest_normalized = -Single::smallest_normalized();
+
+ let overflow_status = Status::OVERFLOW | Status::INEXACT;
+
+ let special_cases = [
+ (p_inf, p_inf, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, p_zero, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_zero, "inf", Status::OK, Category::Infinity),
+ (p_inf, qnan, "nan", Status::OK, Category::NaN),
+ (p_inf, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_inf, p_normal_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_normal_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, p_largest_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_largest_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, p_smallest_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_smallest_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, p_smallest_normalized, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_smallest_normalized, "inf", Status::OK, Category::Infinity),
+ (m_inf, p_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, m_inf, "-inf", Status::OK, Category::Infinity),
+ (m_inf, p_zero, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_zero, "-inf", Status::OK, Category::Infinity),
+ (m_inf, qnan, "nan", Status::OK, Category::NaN),
+ (m_inf, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_inf, p_normal_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_normal_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, p_largest_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_largest_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, p_smallest_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_smallest_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, p_smallest_normalized, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_smallest_normalized, "-inf", Status::OK, Category::Infinity),
+ (p_zero, p_inf, "inf", Status::OK, Category::Infinity),
+ (p_zero, m_inf, "-inf", Status::OK, Category::Infinity),
+ (p_zero, p_zero, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_zero, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, qnan, "nan", Status::OK, Category::NaN),
+ (p_zero, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_zero, p_normal_value, "0x1p+0", Status::OK, Category::Normal),
+ (p_zero, m_normal_value, "-0x1p+0", Status::OK, Category::Normal),
+ (p_zero, p_largest_value, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_zero, m_largest_value, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_zero, p_smallest_value, "0x1p-149", Status::OK, Category::Normal),
+ (p_zero, m_smallest_value, "-0x1p-149", Status::OK, Category::Normal),
+ (p_zero, p_smallest_normalized, "0x1p-126", Status::OK, Category::Normal),
+ (p_zero, m_smallest_normalized, "-0x1p-126", Status::OK, Category::Normal),
+ (m_zero, p_inf, "inf", Status::OK, Category::Infinity),
+ (m_zero, m_inf, "-inf", Status::OK, Category::Infinity),
+ (m_zero, p_zero, "0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_zero, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, qnan, "nan", Status::OK, Category::NaN),
+ (m_zero, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_zero, p_normal_value, "0x1p+0", Status::OK, Category::Normal),
+ (m_zero, m_normal_value, "-0x1p+0", Status::OK, Category::Normal),
+ (m_zero, p_largest_value, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_zero, m_largest_value, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_zero, p_smallest_value, "0x1p-149", Status::OK, Category::Normal),
+ (m_zero, m_smallest_value, "-0x1p-149", Status::OK, Category::Normal),
+ (m_zero, p_smallest_normalized, "0x1p-126", Status::OK, Category::Normal),
+ (m_zero, m_smallest_normalized, "-0x1p-126", Status::OK, Category::Normal),
+ (qnan, p_inf, "nan", Status::OK, Category::NaN),
+ (qnan, m_inf, "nan", Status::OK, Category::NaN),
+ (qnan, p_zero, "nan", Status::OK, Category::NaN),
+ (qnan, m_zero, "nan", Status::OK, Category::NaN),
+ (qnan, qnan, "nan", Status::OK, Category::NaN),
+ (qnan, snan, "nan", Status::INVALID_OP, Category::NaN),
+ (qnan, p_normal_value, "nan", Status::OK, Category::NaN),
+ (qnan, m_normal_value, "nan", Status::OK, Category::NaN),
+ (qnan, p_largest_value, "nan", Status::OK, Category::NaN),
+ (qnan, m_largest_value, "nan", Status::OK, Category::NaN),
+ (qnan, p_smallest_value, "nan", Status::OK, Category::NaN),
+ (qnan, m_smallest_value, "nan", Status::OK, Category::NaN),
+ (qnan, p_smallest_normalized, "nan", Status::OK, Category::NaN),
+ (qnan, m_smallest_normalized, "nan", Status::OK, Category::NaN),
+ (snan, p_inf, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_inf, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_zero, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_zero, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, qnan, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_normal_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_normal_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_largest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_largest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_smallest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_smallest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_normal_value, p_inf, "inf", Status::OK, Category::Infinity),
+ (p_normal_value, m_inf, "-inf", Status::OK, Category::Infinity),
+ (p_normal_value, p_zero, "0x1p+0", Status::OK, Category::Normal),
+ (p_normal_value, m_zero, "0x1p+0", Status::OK, Category::Normal),
+ (p_normal_value, qnan, "nan", Status::OK, Category::NaN),
+ (p_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_normal_value, p_normal_value, "0x1p+1", Status::OK, Category::Normal),
+ (p_normal_value, m_normal_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_normal_value, p_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_normal_value, m_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_normal_value, p_smallest_value, "0x1p+0", Status::INEXACT, Category::Normal),
+ (p_normal_value, m_smallest_value, "0x1p+0", Status::INEXACT, Category::Normal),
+ (p_normal_value, p_smallest_normalized, "0x1p+0", Status::INEXACT, Category::Normal),
+ (p_normal_value, m_smallest_normalized, "0x1p+0", Status::INEXACT, Category::Normal),
+ (m_normal_value, p_inf, "inf", Status::OK, Category::Infinity),
+ (m_normal_value, m_inf, "-inf", Status::OK, Category::Infinity),
+ (m_normal_value, p_zero, "-0x1p+0", Status::OK, Category::Normal),
+ (m_normal_value, m_zero, "-0x1p+0", Status::OK, Category::Normal),
+ (m_normal_value, qnan, "nan", Status::OK, Category::NaN),
+ (m_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_normal_value, p_normal_value, "0x0p+0", Status::OK, Category::Zero),
+ (m_normal_value, m_normal_value, "-0x1p+1", Status::OK, Category::Normal),
+ (m_normal_value, p_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_normal_value, m_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_normal_value, p_smallest_value, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (m_normal_value, m_smallest_value, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (m_normal_value, p_smallest_normalized, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (m_normal_value, m_smallest_normalized, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (p_largest_value, p_inf, "inf", Status::OK, Category::Infinity),
+ (p_largest_value, m_inf, "-inf", Status::OK, Category::Infinity),
+ (p_largest_value, p_zero, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_largest_value, m_zero, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_largest_value, qnan, "nan", Status::OK, Category::NaN),
+ (p_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_largest_value, p_normal_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_largest_value, m_normal_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_largest_value, p_largest_value, "inf", overflow_status, Category::Infinity),
+ (p_largest_value, m_largest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_largest_value, p_smallest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_largest_value, m_smallest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_largest_value, p_smallest_normalized, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_largest_value, m_smallest_normalized, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_largest_value, p_inf, "inf", Status::OK, Category::Infinity),
+ (m_largest_value, m_inf, "-inf", Status::OK, Category::Infinity),
+ (m_largest_value, p_zero, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_largest_value, m_zero, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_largest_value, qnan, "nan", Status::OK, Category::NaN),
+ (m_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_largest_value, p_normal_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_largest_value, m_normal_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_largest_value, p_largest_value, "0x0p+0", Status::OK, Category::Zero),
+ (m_largest_value, m_largest_value, "-inf", overflow_status, Category::Infinity),
+ (m_largest_value, p_smallest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_largest_value, m_smallest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_largest_value, p_smallest_normalized, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_largest_value, m_smallest_normalized, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_smallest_value, p_inf, "inf", Status::OK, Category::Infinity),
+ (p_smallest_value, m_inf, "-inf", Status::OK, Category::Infinity),
+ (p_smallest_value, p_zero, "0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_value, m_zero, "0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_value, qnan, "nan", Status::OK, Category::NaN),
+ (p_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_smallest_value, p_normal_value, "0x1p+0", Status::INEXACT, Category::Normal),
+ (p_smallest_value, m_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (p_smallest_value, p_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_smallest_value, m_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_smallest_value, p_smallest_value, "0x1p-148", Status::OK, Category::Normal),
+ (p_smallest_value, m_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_value, p_smallest_normalized, "0x1.000002p-126", Status::OK, Category::Normal),
+ (p_smallest_value, m_smallest_normalized, "-0x1.fffffcp-127", Status::OK, Category::Normal),
+ (m_smallest_value, p_inf, "inf", Status::OK, Category::Infinity),
+ (m_smallest_value, m_inf, "-inf", Status::OK, Category::Infinity),
+ (m_smallest_value, p_zero, "-0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, m_zero, "-0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, qnan, "nan", Status::OK, Category::NaN),
+ (m_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_smallest_value, p_normal_value, "0x1p+0", Status::INEXACT, Category::Normal),
+ (m_smallest_value, m_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (m_smallest_value, p_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_smallest_value, m_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_smallest_value, p_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_value, m_smallest_value, "-0x1p-148", Status::OK, Category::Normal),
+ (m_smallest_value, p_smallest_normalized, "0x1.fffffcp-127", Status::OK, Category::Normal),
+ (m_smallest_value, m_smallest_normalized, "-0x1.000002p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, p_inf, "inf", Status::OK, Category::Infinity),
+ (p_smallest_normalized, m_inf, "-inf", Status::OK, Category::Infinity),
+ (p_smallest_normalized, p_zero, "0x1p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, m_zero, "0x1p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, qnan, "nan", Status::OK, Category::NaN),
+ (p_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_smallest_normalized, p_normal_value, "0x1p+0", Status::INEXACT, Category::Normal),
+ (p_smallest_normalized, m_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (p_smallest_normalized, p_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_smallest_normalized, m_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_smallest_normalized, p_smallest_value, "0x1.000002p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, m_smallest_value, "0x1.fffffcp-127", Status::OK, Category::Normal),
+ (p_smallest_normalized, p_smallest_normalized, "0x1p-125", Status::OK, Category::Normal),
+ (p_smallest_normalized, m_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_normalized, p_inf, "inf", Status::OK, Category::Infinity),
+ (m_smallest_normalized, m_inf, "-inf", Status::OK, Category::Infinity),
+ (m_smallest_normalized, p_zero, "-0x1p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, m_zero, "-0x1p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, qnan, "nan", Status::OK, Category::NaN),
+ (m_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_smallest_normalized, p_normal_value, "0x1p+0", Status::INEXACT, Category::Normal),
+ (m_smallest_normalized, m_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (m_smallest_normalized, p_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_smallest_normalized, m_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_smallest_normalized, p_smallest_value, "-0x1.fffffcp-127", Status::OK, Category::Normal),
+ (m_smallest_normalized, m_smallest_value, "-0x1.000002p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, p_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_normalized, m_smallest_normalized, "-0x1p-125", Status::OK, Category::Normal),
+ ];
+
+ for case @ &(x, y, e_result, e_status, e_category) in &special_cases[..] {
+ let status;
+ let result = unpack!(status=, x + y);
+ assert_eq!(e_status, status);
+ assert_eq!(e_category, result.category());
+ assert!(result.bitwise_eq(e_result.parse::<Single>().unwrap()), "result = {result:?}, case = {case:?}");
+ }
+}
+
+#[test]
+fn subtract() {
+ // Test Special Cases against each other and normal values.
+
+ let p_inf = Single::INFINITY;
+ let m_inf = -Single::INFINITY;
+ let p_zero = Single::ZERO;
+ let m_zero = -Single::ZERO;
+ let qnan = Single::NAN;
+ let snan = "snan123".parse::<Single>().unwrap();
+ let p_normal_value = "0x1p+0".parse::<Single>().unwrap();
+ let m_normal_value = "-0x1p+0".parse::<Single>().unwrap();
+ let p_largest_value = Single::largest();
+ let m_largest_value = -Single::largest();
+ let p_smallest_value = Single::SMALLEST;
+ let m_smallest_value = -Single::SMALLEST;
+ let p_smallest_normalized = Single::smallest_normalized();
+ let m_smallest_normalized = -Single::smallest_normalized();
+
+ let overflow_status = Status::OVERFLOW | Status::INEXACT;
+
+ let special_cases = [
+ (p_inf, p_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, m_inf, "inf", Status::OK, Category::Infinity),
+ (p_inf, p_zero, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_zero, "inf", Status::OK, Category::Infinity),
+ (p_inf, qnan, "nan", Status::OK, Category::NaN),
+ (p_inf, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_inf, p_normal_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_normal_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, p_largest_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_largest_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, p_smallest_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_smallest_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, p_smallest_normalized, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_smallest_normalized, "inf", Status::OK, Category::Infinity),
+ (m_inf, p_inf, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, p_zero, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_zero, "-inf", Status::OK, Category::Infinity),
+ (m_inf, qnan, "nan", Status::OK, Category::NaN),
+ (m_inf, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_inf, p_normal_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_normal_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, p_largest_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_largest_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, p_smallest_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_smallest_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, p_smallest_normalized, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_smallest_normalized, "-inf", Status::OK, Category::Infinity),
+ (p_zero, p_inf, "-inf", Status::OK, Category::Infinity),
+ (p_zero, m_inf, "inf", Status::OK, Category::Infinity),
+ (p_zero, p_zero, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_zero, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, qnan, "nan", Status::OK, Category::NaN),
+ (p_zero, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_zero, p_normal_value, "-0x1p+0", Status::OK, Category::Normal),
+ (p_zero, m_normal_value, "0x1p+0", Status::OK, Category::Normal),
+ (p_zero, p_largest_value, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_zero, m_largest_value, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_zero, p_smallest_value, "-0x1p-149", Status::OK, Category::Normal),
+ (p_zero, m_smallest_value, "0x1p-149", Status::OK, Category::Normal),
+ (p_zero, p_smallest_normalized, "-0x1p-126", Status::OK, Category::Normal),
+ (p_zero, m_smallest_normalized, "0x1p-126", Status::OK, Category::Normal),
+ (m_zero, p_inf, "-inf", Status::OK, Category::Infinity),
+ (m_zero, m_inf, "inf", Status::OK, Category::Infinity),
+ (m_zero, p_zero, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_zero, "0x0p+0", Status::OK, Category::Zero),
+ (m_zero, qnan, "nan", Status::OK, Category::NaN),
+ (m_zero, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_zero, p_normal_value, "-0x1p+0", Status::OK, Category::Normal),
+ (m_zero, m_normal_value, "0x1p+0", Status::OK, Category::Normal),
+ (m_zero, p_largest_value, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_zero, m_largest_value, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_zero, p_smallest_value, "-0x1p-149", Status::OK, Category::Normal),
+ (m_zero, m_smallest_value, "0x1p-149", Status::OK, Category::Normal),
+ (m_zero, p_smallest_normalized, "-0x1p-126", Status::OK, Category::Normal),
+ (m_zero, m_smallest_normalized, "0x1p-126", Status::OK, Category::Normal),
+ (qnan, p_inf, "nan", Status::OK, Category::NaN),
+ (qnan, m_inf, "nan", Status::OK, Category::NaN),
+ (qnan, p_zero, "nan", Status::OK, Category::NaN),
+ (qnan, m_zero, "nan", Status::OK, Category::NaN),
+ (qnan, qnan, "nan", Status::OK, Category::NaN),
+ (qnan, snan, "nan", Status::INVALID_OP, Category::NaN),
+ (qnan, p_normal_value, "nan", Status::OK, Category::NaN),
+ (qnan, m_normal_value, "nan", Status::OK, Category::NaN),
+ (qnan, p_largest_value, "nan", Status::OK, Category::NaN),
+ (qnan, m_largest_value, "nan", Status::OK, Category::NaN),
+ (qnan, p_smallest_value, "nan", Status::OK, Category::NaN),
+ (qnan, m_smallest_value, "nan", Status::OK, Category::NaN),
+ (qnan, p_smallest_normalized, "nan", Status::OK, Category::NaN),
+ (qnan, m_smallest_normalized, "nan", Status::OK, Category::NaN),
+ (snan, p_inf, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_inf, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_zero, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_zero, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, qnan, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_normal_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_normal_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_largest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_largest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_smallest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_smallest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_normal_value, p_inf, "-inf", Status::OK, Category::Infinity),
+ (p_normal_value, m_inf, "inf", Status::OK, Category::Infinity),
+ (p_normal_value, p_zero, "0x1p+0", Status::OK, Category::Normal),
+ (p_normal_value, m_zero, "0x1p+0", Status::OK, Category::Normal),
+ (p_normal_value, qnan, "nan", Status::OK, Category::NaN),
+ (p_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_normal_value, p_normal_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_normal_value, m_normal_value, "0x1p+1", Status::OK, Category::Normal),
+ (p_normal_value, p_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_normal_value, m_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_normal_value, p_smallest_value, "0x1p+0", Status::INEXACT, Category::Normal),
+ (p_normal_value, m_smallest_value, "0x1p+0", Status::INEXACT, Category::Normal),
+ (p_normal_value, p_smallest_normalized, "0x1p+0", Status::INEXACT, Category::Normal),
+ (p_normal_value, m_smallest_normalized, "0x1p+0", Status::INEXACT, Category::Normal),
+ (m_normal_value, p_inf, "-inf", Status::OK, Category::Infinity),
+ (m_normal_value, m_inf, "inf", Status::OK, Category::Infinity),
+ (m_normal_value, p_zero, "-0x1p+0", Status::OK, Category::Normal),
+ (m_normal_value, m_zero, "-0x1p+0", Status::OK, Category::Normal),
+ (m_normal_value, qnan, "nan", Status::OK, Category::NaN),
+ (m_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_normal_value, p_normal_value, "-0x1p+1", Status::OK, Category::Normal),
+ (m_normal_value, m_normal_value, "0x0p+0", Status::OK, Category::Zero),
+ (m_normal_value, p_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_normal_value, m_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_normal_value, p_smallest_value, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (m_normal_value, m_smallest_value, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (m_normal_value, p_smallest_normalized, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (m_normal_value, m_smallest_normalized, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (p_largest_value, p_inf, "-inf", Status::OK, Category::Infinity),
+ (p_largest_value, m_inf, "inf", Status::OK, Category::Infinity),
+ (p_largest_value, p_zero, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_largest_value, m_zero, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_largest_value, qnan, "nan", Status::OK, Category::NaN),
+ (p_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_largest_value, p_normal_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_largest_value, m_normal_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_largest_value, p_largest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_largest_value, m_largest_value, "inf", overflow_status, Category::Infinity),
+ (p_largest_value, p_smallest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_largest_value, m_smallest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_largest_value, p_smallest_normalized, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_largest_value, m_smallest_normalized, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_largest_value, p_inf, "-inf", Status::OK, Category::Infinity),
+ (m_largest_value, m_inf, "inf", Status::OK, Category::Infinity),
+ (m_largest_value, p_zero, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_largest_value, m_zero, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_largest_value, qnan, "nan", Status::OK, Category::NaN),
+ (m_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_largest_value, p_normal_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_largest_value, m_normal_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_largest_value, p_largest_value, "-inf", overflow_status, Category::Infinity),
+ (m_largest_value, m_largest_value, "0x0p+0", Status::OK, Category::Zero),
+ (m_largest_value, p_smallest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_largest_value, m_smallest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_largest_value, p_smallest_normalized, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_largest_value, m_smallest_normalized, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_smallest_value, p_inf, "-inf", Status::OK, Category::Infinity),
+ (p_smallest_value, m_inf, "inf", Status::OK, Category::Infinity),
+ (p_smallest_value, p_zero, "0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_value, m_zero, "0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_value, qnan, "nan", Status::OK, Category::NaN),
+ (p_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_smallest_value, p_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (p_smallest_value, m_normal_value, "0x1p+0", Status::INEXACT, Category::Normal),
+ (p_smallest_value, p_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_smallest_value, m_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_smallest_value, p_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_value, m_smallest_value, "0x1p-148", Status::OK, Category::Normal),
+ (p_smallest_value, p_smallest_normalized, "-0x1.fffffcp-127", Status::OK, Category::Normal),
+ (p_smallest_value, m_smallest_normalized, "0x1.000002p-126", Status::OK, Category::Normal),
+ (m_smallest_value, p_inf, "-inf", Status::OK, Category::Infinity),
+ (m_smallest_value, m_inf, "inf", Status::OK, Category::Infinity),
+ (m_smallest_value, p_zero, "-0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, m_zero, "-0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, qnan, "nan", Status::OK, Category::NaN),
+ (m_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_smallest_value, p_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (m_smallest_value, m_normal_value, "0x1p+0", Status::INEXACT, Category::Normal),
+ (m_smallest_value, p_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_smallest_value, m_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_smallest_value, p_smallest_value, "-0x1p-148", Status::OK, Category::Normal),
+ (m_smallest_value, m_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_value, p_smallest_normalized, "-0x1.000002p-126", Status::OK, Category::Normal),
+ (m_smallest_value, m_smallest_normalized, "0x1.fffffcp-127", Status::OK, Category::Normal),
+ (p_smallest_normalized, p_inf, "-inf", Status::OK, Category::Infinity),
+ (p_smallest_normalized, m_inf, "inf", Status::OK, Category::Infinity),
+ (p_smallest_normalized, p_zero, "0x1p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, m_zero, "0x1p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, qnan, "nan", Status::OK, Category::NaN),
+ (p_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_smallest_normalized, p_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (p_smallest_normalized, m_normal_value, "0x1p+0", Status::INEXACT, Category::Normal),
+ (p_smallest_normalized, p_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_smallest_normalized, m_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (p_smallest_normalized, p_smallest_value, "0x1.fffffcp-127", Status::OK, Category::Normal),
+ (p_smallest_normalized, m_smallest_value, "0x1.000002p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, p_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_normalized, m_smallest_normalized, "0x1p-125", Status::OK, Category::Normal),
+ (m_smallest_normalized, p_inf, "-inf", Status::OK, Category::Infinity),
+ (m_smallest_normalized, m_inf, "inf", Status::OK, Category::Infinity),
+ (m_smallest_normalized, p_zero, "-0x1p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, m_zero, "-0x1p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, qnan, "nan", Status::OK, Category::NaN),
+ (m_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_smallest_normalized, p_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal),
+ (m_smallest_normalized, m_normal_value, "0x1p+0", Status::INEXACT, Category::Normal),
+ (m_smallest_normalized, p_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_smallest_normalized, m_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal),
+ (m_smallest_normalized, p_smallest_value, "-0x1.000002p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, m_smallest_value, "-0x1.fffffcp-127", Status::OK, Category::Normal),
+ (m_smallest_normalized, p_smallest_normalized, "-0x1p-125", Status::OK, Category::Normal),
+ (m_smallest_normalized, m_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ ];
+
+ for case @ &(x, y, e_result, e_status, e_category) in &special_cases[..] {
+ let status;
+ let result = unpack!(status=, x - y);
+ assert_eq!(e_status, status);
+ assert_eq!(e_category, result.category());
+ assert!(result.bitwise_eq(e_result.parse::<Single>().unwrap()), "result = {result:?}, case = {case:?}");
+ }
+}
+
+#[test]
+fn multiply() {
+ // Test Special Cases against each other and normal values.
+
+ let p_inf = Single::INFINITY;
+ let m_inf = -Single::INFINITY;
+ let p_zero = Single::ZERO;
+ let m_zero = -Single::ZERO;
+ let qnan = Single::NAN;
+ let snan = "snan123".parse::<Single>().unwrap();
+ let p_normal_value = "0x1p+0".parse::<Single>().unwrap();
+ let m_normal_value = "-0x1p+0".parse::<Single>().unwrap();
+ let p_largest_value = Single::largest();
+ let m_largest_value = -Single::largest();
+ let p_smallest_value = Single::SMALLEST;
+ let m_smallest_value = -Single::SMALLEST;
+ let p_smallest_normalized = Single::smallest_normalized();
+ let m_smallest_normalized = -Single::smallest_normalized();
+
+ let max_quad = "0x1.ffffffffffffffffffffffffffffp+16383".parse::<Quad>().unwrap();
+ let min_quad = "0x0.0000000000000000000000000001p-16382".parse::<Quad>().unwrap();
+ let n_min_quad = "-0x0.0000000000000000000000000001p-16382".parse::<Quad>().unwrap();
+
+ let overflow_status = Status::OVERFLOW | Status::INEXACT;
+ let underflow_status = Status::UNDERFLOW | Status::INEXACT;
+
+ let single_special_cases = [
+ (p_inf, p_inf, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_inf, "-inf", Status::OK, Category::Infinity),
+ (p_inf, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, qnan, "nan", Status::OK, Category::NaN),
+ (p_inf, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_inf, p_normal_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_normal_value, "-inf", Status::OK, Category::Infinity),
+ (p_inf, p_largest_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_largest_value, "-inf", Status::OK, Category::Infinity),
+ (p_inf, p_smallest_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_smallest_value, "-inf", Status::OK, Category::Infinity),
+ (p_inf, p_smallest_normalized, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_smallest_normalized, "-inf", Status::OK, Category::Infinity),
+ (m_inf, p_inf, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_inf, "inf", Status::OK, Category::Infinity),
+ (m_inf, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, qnan, "nan", Status::OK, Category::NaN),
+ (m_inf, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_inf, p_normal_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_normal_value, "inf", Status::OK, Category::Infinity),
+ (m_inf, p_largest_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_largest_value, "inf", Status::OK, Category::Infinity),
+ (m_inf, p_smallest_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_smallest_value, "inf", Status::OK, Category::Infinity),
+ (m_inf, p_smallest_normalized, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_smallest_normalized, "inf", Status::OK, Category::Infinity),
+ (p_zero, p_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (p_zero, m_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (p_zero, p_zero, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_zero, "-0x0p+0", Status::OK, Category::Zero),
+ (p_zero, qnan, "nan", Status::OK, Category::NaN),
+ (p_zero, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_zero, p_normal_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_normal_value, "-0x0p+0", Status::OK, Category::Zero),
+ (p_zero, p_largest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_largest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (p_zero, p_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_smallest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (p_zero, p_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, p_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (m_zero, m_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (m_zero, p_zero, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_zero, "0x0p+0", Status::OK, Category::Zero),
+ (m_zero, qnan, "nan", Status::OK, Category::NaN),
+ (m_zero, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_zero, p_normal_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_normal_value, "0x0p+0", Status::OK, Category::Zero),
+ (m_zero, p_largest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_largest_value, "0x0p+0", Status::OK, Category::Zero),
+ (m_zero, p_smallest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (m_zero, p_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ (qnan, p_inf, "nan", Status::OK, Category::NaN),
+ (qnan, m_inf, "nan", Status::OK, Category::NaN),
+ (qnan, p_zero, "nan", Status::OK, Category::NaN),
+ (qnan, m_zero, "nan", Status::OK, Category::NaN),
+ (qnan, qnan, "nan", Status::OK, Category::NaN),
+ (qnan, snan, "nan", Status::INVALID_OP, Category::NaN),
+ (qnan, p_normal_value, "nan", Status::OK, Category::NaN),
+ (qnan, m_normal_value, "nan", Status::OK, Category::NaN),
+ (qnan, p_largest_value, "nan", Status::OK, Category::NaN),
+ (qnan, m_largest_value, "nan", Status::OK, Category::NaN),
+ (qnan, p_smallest_value, "nan", Status::OK, Category::NaN),
+ (qnan, m_smallest_value, "nan", Status::OK, Category::NaN),
+ (qnan, p_smallest_normalized, "nan", Status::OK, Category::NaN),
+ (qnan, m_smallest_normalized, "nan", Status::OK, Category::NaN),
+ (snan, p_inf, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_inf, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_zero, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_zero, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, qnan, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_normal_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_normal_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_largest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_largest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_smallest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_smallest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_normal_value, p_inf, "inf", Status::OK, Category::Infinity),
+ (p_normal_value, m_inf, "-inf", Status::OK, Category::Infinity),
+ (p_normal_value, p_zero, "0x0p+0", Status::OK, Category::Zero),
+ (p_normal_value, m_zero, "-0x0p+0", Status::OK, Category::Zero),
+ (p_normal_value, qnan, "nan", Status::OK, Category::NaN),
+ (p_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_normal_value, p_normal_value, "0x1p+0", Status::OK, Category::Normal),
+ (p_normal_value, m_normal_value, "-0x1p+0", Status::OK, Category::Normal),
+ (p_normal_value, p_largest_value, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_normal_value, m_largest_value, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_normal_value, p_smallest_value, "0x1p-149", Status::OK, Category::Normal),
+ (p_normal_value, m_smallest_value, "-0x1p-149", Status::OK, Category::Normal),
+ (p_normal_value, p_smallest_normalized, "0x1p-126", Status::OK, Category::Normal),
+ (p_normal_value, m_smallest_normalized, "-0x1p-126", Status::OK, Category::Normal),
+ (m_normal_value, p_inf, "-inf", Status::OK, Category::Infinity),
+ (m_normal_value, m_inf, "inf", Status::OK, Category::Infinity),
+ (m_normal_value, p_zero, "-0x0p+0", Status::OK, Category::Zero),
+ (m_normal_value, m_zero, "0x0p+0", Status::OK, Category::Zero),
+ (m_normal_value, qnan, "nan", Status::OK, Category::NaN),
+ (m_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_normal_value, p_normal_value, "-0x1p+0", Status::OK, Category::Normal),
+ (m_normal_value, m_normal_value, "0x1p+0", Status::OK, Category::Normal),
+ (m_normal_value, p_largest_value, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_normal_value, m_largest_value, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_normal_value, p_smallest_value, "-0x1p-149", Status::OK, Category::Normal),
+ (m_normal_value, m_smallest_value, "0x1p-149", Status::OK, Category::Normal),
+ (m_normal_value, p_smallest_normalized, "-0x1p-126", Status::OK, Category::Normal),
+ (m_normal_value, m_smallest_normalized, "0x1p-126", Status::OK, Category::Normal),
+ (p_largest_value, p_inf, "inf", Status::OK, Category::Infinity),
+ (p_largest_value, m_inf, "-inf", Status::OK, Category::Infinity),
+ (p_largest_value, p_zero, "0x0p+0", Status::OK, Category::Zero),
+ (p_largest_value, m_zero, "-0x0p+0", Status::OK, Category::Zero),
+ (p_largest_value, qnan, "nan", Status::OK, Category::NaN),
+ (p_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_largest_value, p_normal_value, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_largest_value, m_normal_value, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_largest_value, p_largest_value, "inf", overflow_status, Category::Infinity),
+ (p_largest_value, m_largest_value, "-inf", overflow_status, Category::Infinity),
+ (p_largest_value, p_smallest_value, "0x1.fffffep-22", Status::OK, Category::Normal),
+ (p_largest_value, m_smallest_value, "-0x1.fffffep-22", Status::OK, Category::Normal),
+ (p_largest_value, p_smallest_normalized, "0x1.fffffep+1", Status::OK, Category::Normal),
+ (p_largest_value, m_smallest_normalized, "-0x1.fffffep+1", Status::OK, Category::Normal),
+ (m_largest_value, p_inf, "-inf", Status::OK, Category::Infinity),
+ (m_largest_value, m_inf, "inf", Status::OK, Category::Infinity),
+ (m_largest_value, p_zero, "-0x0p+0", Status::OK, Category::Zero),
+ (m_largest_value, m_zero, "0x0p+0", Status::OK, Category::Zero),
+ (m_largest_value, qnan, "nan", Status::OK, Category::NaN),
+ (m_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_largest_value, p_normal_value, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_largest_value, m_normal_value, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_largest_value, p_largest_value, "-inf", overflow_status, Category::Infinity),
+ (m_largest_value, m_largest_value, "inf", overflow_status, Category::Infinity),
+ (m_largest_value, p_smallest_value, "-0x1.fffffep-22", Status::OK, Category::Normal),
+ (m_largest_value, m_smallest_value, "0x1.fffffep-22", Status::OK, Category::Normal),
+ (m_largest_value, p_smallest_normalized, "-0x1.fffffep+1", Status::OK, Category::Normal),
+ (m_largest_value, m_smallest_normalized, "0x1.fffffep+1", Status::OK, Category::Normal),
+ (p_smallest_value, p_inf, "inf", Status::OK, Category::Infinity),
+ (p_smallest_value, m_inf, "-inf", Status::OK, Category::Infinity),
+ (p_smallest_value, p_zero, "0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_value, m_zero, "-0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_value, qnan, "nan", Status::OK, Category::NaN),
+ (p_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_smallest_value, p_normal_value, "0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_value, m_normal_value, "-0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_value, p_largest_value, "0x1.fffffep-22", Status::OK, Category::Normal),
+ (p_smallest_value, m_largest_value, "-0x1.fffffep-22", Status::OK, Category::Normal),
+ (p_smallest_value, p_smallest_value, "0x0p+0", underflow_status, Category::Zero),
+ (p_smallest_value, m_smallest_value, "-0x0p+0", underflow_status, Category::Zero),
+ (p_smallest_value, p_smallest_normalized, "0x0p+0", underflow_status, Category::Zero),
+ (p_smallest_value, m_smallest_normalized, "-0x0p+0", underflow_status, Category::Zero),
+ (m_smallest_value, p_inf, "-inf", Status::OK, Category::Infinity),
+ (m_smallest_value, m_inf, "inf", Status::OK, Category::Infinity),
+ (m_smallest_value, p_zero, "-0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_value, m_zero, "0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_value, qnan, "nan", Status::OK, Category::NaN),
+ (m_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_smallest_value, p_normal_value, "-0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, m_normal_value, "0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, p_largest_value, "-0x1.fffffep-22", Status::OK, Category::Normal),
+ (m_smallest_value, m_largest_value, "0x1.fffffep-22", Status::OK, Category::Normal),
+ (m_smallest_value, p_smallest_value, "-0x0p+0", underflow_status, Category::Zero),
+ (m_smallest_value, m_smallest_value, "0x0p+0", underflow_status, Category::Zero),
+ (m_smallest_value, p_smallest_normalized, "-0x0p+0", underflow_status, Category::Zero),
+ (m_smallest_value, m_smallest_normalized, "0x0p+0", underflow_status, Category::Zero),
+ (p_smallest_normalized, p_inf, "inf", Status::OK, Category::Infinity),
+ (p_smallest_normalized, m_inf, "-inf", Status::OK, Category::Infinity),
+ (p_smallest_normalized, p_zero, "0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_normalized, m_zero, "-0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_normalized, qnan, "nan", Status::OK, Category::NaN),
+ (p_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_smallest_normalized, p_normal_value, "0x1p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, m_normal_value, "-0x1p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, p_largest_value, "0x1.fffffep+1", Status::OK, Category::Normal),
+ (p_smallest_normalized, m_largest_value, "-0x1.fffffep+1", Status::OK, Category::Normal),
+ (p_smallest_normalized, p_smallest_value, "0x0p+0", underflow_status, Category::Zero),
+ (p_smallest_normalized, m_smallest_value, "-0x0p+0", underflow_status, Category::Zero),
+ (p_smallest_normalized, p_smallest_normalized, "0x0p+0", underflow_status, Category::Zero),
+ (p_smallest_normalized, m_smallest_normalized, "-0x0p+0", underflow_status, Category::Zero),
+ (m_smallest_normalized, p_inf, "-inf", Status::OK, Category::Infinity),
+ (m_smallest_normalized, m_inf, "inf", Status::OK, Category::Infinity),
+ (m_smallest_normalized, p_zero, "-0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_normalized, m_zero, "0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_normalized, qnan, "nan", Status::OK, Category::NaN),
+ (m_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_smallest_normalized, p_normal_value, "-0x1p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, m_normal_value, "0x1p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, p_largest_value, "-0x1.fffffep+1", Status::OK, Category::Normal),
+ (m_smallest_normalized, m_largest_value, "0x1.fffffep+1", Status::OK, Category::Normal),
+ (m_smallest_normalized, p_smallest_value, "-0x0p+0", underflow_status, Category::Zero),
+ (m_smallest_normalized, m_smallest_value, "0x0p+0", underflow_status, Category::Zero),
+ (m_smallest_normalized, p_smallest_normalized, "-0x0p+0", underflow_status, Category::Zero),
+ (m_smallest_normalized, m_smallest_normalized, "0x0p+0", underflow_status, Category::Zero),
+ ];
+ let quad_special_cases = [
+ (
+ max_quad,
+ min_quad,
+ "0x1.ffffffffffffffffffffffffffffp-111",
+ Status::OK,
+ Category::Normal,
+ Round::NearestTiesToEven,
+ ),
+ (
+ max_quad,
+ min_quad,
+ "0x1.ffffffffffffffffffffffffffffp-111",
+ Status::OK,
+ Category::Normal,
+ Round::TowardPositive,
+ ),
+ (
+ max_quad,
+ min_quad,
+ "0x1.ffffffffffffffffffffffffffffp-111",
+ Status::OK,
+ Category::Normal,
+ Round::TowardNegative,
+ ),
+ (max_quad, min_quad, "0x1.ffffffffffffffffffffffffffffp-111", Status::OK, Category::Normal, Round::TowardZero),
+ (
+ max_quad,
+ min_quad,
+ "0x1.ffffffffffffffffffffffffffffp-111",
+ Status::OK,
+ Category::Normal,
+ Round::NearestTiesToAway,
+ ),
+ (
+ max_quad,
+ n_min_quad,
+ "-0x1.ffffffffffffffffffffffffffffp-111",
+ Status::OK,
+ Category::Normal,
+ Round::NearestTiesToEven,
+ ),
+ (
+ max_quad,
+ n_min_quad,
+ "-0x1.ffffffffffffffffffffffffffffp-111",
+ Status::OK,
+ Category::Normal,
+ Round::TowardPositive,
+ ),
+ (
+ max_quad,
+ n_min_quad,
+ "-0x1.ffffffffffffffffffffffffffffp-111",
+ Status::OK,
+ Category::Normal,
+ Round::TowardNegative,
+ ),
+ (
+ max_quad,
+ n_min_quad,
+ "-0x1.ffffffffffffffffffffffffffffp-111",
+ Status::OK,
+ Category::Normal,
+ Round::TowardZero,
+ ),
+ (
+ max_quad,
+ n_min_quad,
+ "-0x1.ffffffffffffffffffffffffffffp-111",
+ Status::OK,
+ Category::Normal,
+ Round::NearestTiesToAway,
+ ),
+ (max_quad, max_quad, "inf", overflow_status, Category::Infinity, Round::NearestTiesToEven),
+ (max_quad, max_quad, "inf", overflow_status, Category::Infinity, Round::TowardPositive),
+ (
+ max_quad,
+ max_quad,
+ "0x1.ffffffffffffffffffffffffffffp+16383",
+ Status::INEXACT,
+ Category::Normal,
+ Round::TowardNegative,
+ ),
+ (
+ max_quad,
+ max_quad,
+ "0x1.ffffffffffffffffffffffffffffp+16383",
+ Status::INEXACT,
+ Category::Normal,
+ Round::TowardZero,
+ ),
+ (max_quad, max_quad, "inf", overflow_status, Category::Infinity, Round::NearestTiesToAway),
+ (min_quad, min_quad, "0", underflow_status, Category::Zero, Round::NearestTiesToEven),
+ (
+ min_quad,
+ min_quad,
+ "0x0.0000000000000000000000000001p-16382",
+ underflow_status,
+ Category::Normal,
+ Round::TowardPositive,
+ ),
+ (min_quad, min_quad, "0", underflow_status, Category::Zero, Round::TowardNegative),
+ (min_quad, min_quad, "0", underflow_status, Category::Zero, Round::TowardZero),
+ (min_quad, min_quad, "0", underflow_status, Category::Zero, Round::NearestTiesToAway),
+ (min_quad, n_min_quad, "-0", underflow_status, Category::Zero, Round::NearestTiesToEven),
+ (min_quad, n_min_quad, "-0", underflow_status, Category::Zero, Round::TowardPositive),
+ (
+ min_quad,
+ n_min_quad,
+ "-0x0.0000000000000000000000000001p-16382",
+ underflow_status,
+ Category::Normal,
+ Round::TowardNegative,
+ ),
+ (min_quad, n_min_quad, "-0", underflow_status, Category::Zero, Round::TowardZero),
+ (min_quad, n_min_quad, "-0", underflow_status, Category::Zero, Round::NearestTiesToAway),
+ ];
+
+ for case @ &(x, y, e_result, e_status, e_category) in &single_special_cases {
+ let status;
+ let result = unpack!(status=, x * y);
+ assert_eq!(e_status, status);
+ assert_eq!(e_category, result.category());
+ assert!(result.bitwise_eq(e_result.parse::<Single>().unwrap()), "result = {result:?}, case = {case:?}");
+ }
+ for case @ &(x, y, e_result, e_status, e_category, round) in &quad_special_cases {
+ let status;
+ let result = unpack!(status=, x.mul_r(y, round));
+ assert_eq!(e_status, status);
+ assert_eq!(e_category, result.category());
+ assert!(result.bitwise_eq(e_result.parse::<Quad>().unwrap()), "result = {result:?}, case = {case:?}");
+ }
+}
+
+#[test]
+fn divide() {
+ // Test Special Cases against each other and normal values.
+
+ let p_inf = Single::INFINITY;
+ let m_inf = -Single::INFINITY;
+ let p_zero = Single::ZERO;
+ let m_zero = -Single::ZERO;
+ let qnan = Single::NAN;
+ let snan = "snan123".parse::<Single>().unwrap();
+ let p_normal_value = "0x1p+0".parse::<Single>().unwrap();
+ let m_normal_value = "-0x1p+0".parse::<Single>().unwrap();
+ let p_largest_value = Single::largest();
+ let m_largest_value = -Single::largest();
+ let p_smallest_value = Single::SMALLEST;
+ let m_smallest_value = -Single::SMALLEST;
+ let p_smallest_normalized = Single::smallest_normalized();
+ let m_smallest_normalized = -Single::smallest_normalized();
+
+ let max_quad = "0x1.ffffffffffffffffffffffffffffp+16383".parse::<Quad>().unwrap();
+ let min_quad = "0x0.0000000000000000000000000001p-16382".parse::<Quad>().unwrap();
+ let n_min_quad = "-0x0.0000000000000000000000000001p-16382".parse::<Quad>().unwrap();
+
+ let overflow_status = Status::OVERFLOW | Status::INEXACT;
+ let underflow_status = Status::UNDERFLOW | Status::INEXACT;
+
+ let single_special_cases = [
+ (p_inf, p_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, m_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, p_zero, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_zero, "-inf", Status::OK, Category::Infinity),
+ (p_inf, qnan, "nan", Status::OK, Category::NaN),
+ (p_inf, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_inf, p_normal_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_normal_value, "-inf", Status::OK, Category::Infinity),
+ (p_inf, p_largest_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_largest_value, "-inf", Status::OK, Category::Infinity),
+ (p_inf, p_smallest_value, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_smallest_value, "-inf", Status::OK, Category::Infinity),
+ (p_inf, p_smallest_normalized, "inf", Status::OK, Category::Infinity),
+ (p_inf, m_smallest_normalized, "-inf", Status::OK, Category::Infinity),
+ (m_inf, p_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, m_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, p_zero, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_zero, "inf", Status::OK, Category::Infinity),
+ (m_inf, qnan, "nan", Status::OK, Category::NaN),
+ (m_inf, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_inf, p_normal_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_normal_value, "inf", Status::OK, Category::Infinity),
+ (m_inf, p_largest_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_largest_value, "inf", Status::OK, Category::Infinity),
+ (m_inf, p_smallest_value, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_smallest_value, "inf", Status::OK, Category::Infinity),
+ (m_inf, p_smallest_normalized, "-inf", Status::OK, Category::Infinity),
+ (m_inf, m_smallest_normalized, "inf", Status::OK, Category::Infinity),
+ (p_zero, p_inf, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_inf, "-0x0p+0", Status::OK, Category::Zero),
+ (p_zero, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_zero, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_zero, qnan, "nan", Status::OK, Category::NaN),
+ (p_zero, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_zero, p_normal_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_normal_value, "-0x0p+0", Status::OK, Category::Zero),
+ (p_zero, p_largest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_largest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (p_zero, p_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_smallest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (p_zero, p_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, p_inf, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_inf, "0x0p+0", Status::OK, Category::Zero),
+ (m_zero, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_zero, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_zero, qnan, "nan", Status::OK, Category::NaN),
+ (m_zero, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_zero, p_normal_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_normal_value, "0x0p+0", Status::OK, Category::Zero),
+ (m_zero, p_largest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_largest_value, "0x0p+0", Status::OK, Category::Zero),
+ (m_zero, p_smallest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (m_zero, p_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ (qnan, p_inf, "nan", Status::OK, Category::NaN),
+ (qnan, m_inf, "nan", Status::OK, Category::NaN),
+ (qnan, p_zero, "nan", Status::OK, Category::NaN),
+ (qnan, m_zero, "nan", Status::OK, Category::NaN),
+ (qnan, qnan, "nan", Status::OK, Category::NaN),
+ (qnan, snan, "nan", Status::INVALID_OP, Category::NaN),
+ (qnan, p_normal_value, "nan", Status::OK, Category::NaN),
+ (qnan, m_normal_value, "nan", Status::OK, Category::NaN),
+ (qnan, p_largest_value, "nan", Status::OK, Category::NaN),
+ (qnan, m_largest_value, "nan", Status::OK, Category::NaN),
+ (qnan, p_smallest_value, "nan", Status::OK, Category::NaN),
+ (qnan, m_smallest_value, "nan", Status::OK, Category::NaN),
+ (qnan, p_smallest_normalized, "nan", Status::OK, Category::NaN),
+ (qnan, m_smallest_normalized, "nan", Status::OK, Category::NaN),
+ (snan, p_inf, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_inf, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_zero, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_zero, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, qnan, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_normal_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_normal_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_largest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_largest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_smallest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_smallest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_normal_value, p_inf, "0x0p+0", Status::OK, Category::Zero),
+ (p_normal_value, m_inf, "-0x0p+0", Status::OK, Category::Zero),
+ (p_normal_value, p_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (p_normal_value, m_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (p_normal_value, qnan, "nan", Status::OK, Category::NaN),
+ (p_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_normal_value, p_normal_value, "0x1p+0", Status::OK, Category::Normal),
+ (p_normal_value, m_normal_value, "-0x1p+0", Status::OK, Category::Normal),
+ (p_normal_value, p_largest_value, "0x1p-128", underflow_status, Category::Normal),
+ (p_normal_value, m_largest_value, "-0x1p-128", underflow_status, Category::Normal),
+ (p_normal_value, p_smallest_value, "inf", overflow_status, Category::Infinity),
+ (p_normal_value, m_smallest_value, "-inf", overflow_status, Category::Infinity),
+ (p_normal_value, p_smallest_normalized, "0x1p+126", Status::OK, Category::Normal),
+ (p_normal_value, m_smallest_normalized, "-0x1p+126", Status::OK, Category::Normal),
+ (m_normal_value, p_inf, "-0x0p+0", Status::OK, Category::Zero),
+ (m_normal_value, m_inf, "0x0p+0", Status::OK, Category::Zero),
+ (m_normal_value, p_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (m_normal_value, m_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (m_normal_value, qnan, "nan", Status::OK, Category::NaN),
+ (m_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_normal_value, p_normal_value, "-0x1p+0", Status::OK, Category::Normal),
+ (m_normal_value, m_normal_value, "0x1p+0", Status::OK, Category::Normal),
+ (m_normal_value, p_largest_value, "-0x1p-128", underflow_status, Category::Normal),
+ (m_normal_value, m_largest_value, "0x1p-128", underflow_status, Category::Normal),
+ (m_normal_value, p_smallest_value, "-inf", overflow_status, Category::Infinity),
+ (m_normal_value, m_smallest_value, "inf", overflow_status, Category::Infinity),
+ (m_normal_value, p_smallest_normalized, "-0x1p+126", Status::OK, Category::Normal),
+ (m_normal_value, m_smallest_normalized, "0x1p+126", Status::OK, Category::Normal),
+ (p_largest_value, p_inf, "0x0p+0", Status::OK, Category::Zero),
+ (p_largest_value, m_inf, "-0x0p+0", Status::OK, Category::Zero),
+ (p_largest_value, p_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (p_largest_value, m_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (p_largest_value, qnan, "nan", Status::OK, Category::NaN),
+ (p_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_largest_value, p_normal_value, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_largest_value, m_normal_value, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_largest_value, p_largest_value, "0x1p+0", Status::OK, Category::Normal),
+ (p_largest_value, m_largest_value, "-0x1p+0", Status::OK, Category::Normal),
+ (p_largest_value, p_smallest_value, "inf", overflow_status, Category::Infinity),
+ (p_largest_value, m_smallest_value, "-inf", overflow_status, Category::Infinity),
+ (p_largest_value, p_smallest_normalized, "inf", overflow_status, Category::Infinity),
+ (p_largest_value, m_smallest_normalized, "-inf", overflow_status, Category::Infinity),
+ (m_largest_value, p_inf, "-0x0p+0", Status::OK, Category::Zero),
+ (m_largest_value, m_inf, "0x0p+0", Status::OK, Category::Zero),
+ (m_largest_value, p_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (m_largest_value, m_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (m_largest_value, qnan, "nan", Status::OK, Category::NaN),
+ (m_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_largest_value, p_normal_value, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_largest_value, m_normal_value, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_largest_value, p_largest_value, "-0x1p+0", Status::OK, Category::Normal),
+ (m_largest_value, m_largest_value, "0x1p+0", Status::OK, Category::Normal),
+ (m_largest_value, p_smallest_value, "-inf", overflow_status, Category::Infinity),
+ (m_largest_value, m_smallest_value, "inf", overflow_status, Category::Infinity),
+ (m_largest_value, p_smallest_normalized, "-inf", overflow_status, Category::Infinity),
+ (m_largest_value, m_smallest_normalized, "inf", overflow_status, Category::Infinity),
+ (p_smallest_value, p_inf, "0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_value, m_inf, "-0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_value, p_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (p_smallest_value, m_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (p_smallest_value, qnan, "nan", Status::OK, Category::NaN),
+ (p_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_smallest_value, p_normal_value, "0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_value, m_normal_value, "-0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_value, p_largest_value, "0x0p+0", underflow_status, Category::Zero),
+ (p_smallest_value, m_largest_value, "-0x0p+0", underflow_status, Category::Zero),
+ (p_smallest_value, p_smallest_value, "0x1p+0", Status::OK, Category::Normal),
+ (p_smallest_value, m_smallest_value, "-0x1p+0", Status::OK, Category::Normal),
+ (p_smallest_value, p_smallest_normalized, "0x1p-23", Status::OK, Category::Normal),
+ (p_smallest_value, m_smallest_normalized, "-0x1p-23", Status::OK, Category::Normal),
+ (m_smallest_value, p_inf, "-0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_value, m_inf, "0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_value, p_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (m_smallest_value, m_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (m_smallest_value, qnan, "nan", Status::OK, Category::NaN),
+ (m_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_smallest_value, p_normal_value, "-0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, m_normal_value, "0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, p_largest_value, "-0x0p+0", underflow_status, Category::Zero),
+ (m_smallest_value, m_largest_value, "0x0p+0", underflow_status, Category::Zero),
+ (m_smallest_value, p_smallest_value, "-0x1p+0", Status::OK, Category::Normal),
+ (m_smallest_value, m_smallest_value, "0x1p+0", Status::OK, Category::Normal),
+ (m_smallest_value, p_smallest_normalized, "-0x1p-23", Status::OK, Category::Normal),
+ (m_smallest_value, m_smallest_normalized, "0x1p-23", Status::OK, Category::Normal),
+ (p_smallest_normalized, p_inf, "0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_normalized, m_inf, "-0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_normalized, p_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (p_smallest_normalized, m_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (p_smallest_normalized, qnan, "nan", Status::OK, Category::NaN),
+ (p_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_smallest_normalized, p_normal_value, "0x1p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, m_normal_value, "-0x1p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, p_largest_value, "0x0p+0", underflow_status, Category::Zero),
+ (p_smallest_normalized, m_largest_value, "-0x0p+0", underflow_status, Category::Zero),
+ (p_smallest_normalized, p_smallest_value, "0x1p+23", Status::OK, Category::Normal),
+ (p_smallest_normalized, m_smallest_value, "-0x1p+23", Status::OK, Category::Normal),
+ (p_smallest_normalized, p_smallest_normalized, "0x1p+0", Status::OK, Category::Normal),
+ (p_smallest_normalized, m_smallest_normalized, "-0x1p+0", Status::OK, Category::Normal),
+ (m_smallest_normalized, p_inf, "-0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_normalized, m_inf, "0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_normalized, p_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (m_smallest_normalized, m_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity),
+ (m_smallest_normalized, qnan, "nan", Status::OK, Category::NaN),
+ (m_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_smallest_normalized, p_normal_value, "-0x1p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, m_normal_value, "0x1p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, p_largest_value, "-0x0p+0", underflow_status, Category::Zero),
+ (m_smallest_normalized, m_largest_value, "0x0p+0", underflow_status, Category::Zero),
+ (m_smallest_normalized, p_smallest_value, "-0x1p+23", Status::OK, Category::Normal),
+ (m_smallest_normalized, m_smallest_value, "0x1p+23", Status::OK, Category::Normal),
+ (m_smallest_normalized, p_smallest_normalized, "-0x1p+0", Status::OK, Category::Normal),
+ (m_smallest_normalized, m_smallest_normalized, "0x1p+0", Status::OK, Category::Normal),
+ ];
+ let quad_special_cases = [
+ (max_quad, n_min_quad, "-inf", overflow_status, Category::Infinity, Round::NearestTiesToEven),
+ (
+ max_quad,
+ n_min_quad,
+ "-0x1.ffffffffffffffffffffffffffffp+16383",
+ Status::INEXACT,
+ Category::Normal,
+ Round::TowardPositive,
+ ),
+ (max_quad, n_min_quad, "-inf", overflow_status, Category::Infinity, Round::TowardNegative),
+ (
+ max_quad,
+ n_min_quad,
+ "-0x1.ffffffffffffffffffffffffffffp+16383",
+ Status::INEXACT,
+ Category::Normal,
+ Round::TowardZero,
+ ),
+ (max_quad, n_min_quad, "-inf", overflow_status, Category::Infinity, Round::NearestTiesToAway),
+ (min_quad, max_quad, "0", underflow_status, Category::Zero, Round::NearestTiesToEven),
+ (
+ min_quad,
+ max_quad,
+ "0x0.0000000000000000000000000001p-16382",
+ underflow_status,
+ Category::Normal,
+ Round::TowardPositive,
+ ),
+ (min_quad, max_quad, "0", underflow_status, Category::Zero, Round::TowardNegative),
+ (min_quad, max_quad, "0", underflow_status, Category::Zero, Round::TowardZero),
+ (min_quad, max_quad, "0", underflow_status, Category::Zero, Round::NearestTiesToAway),
+ (n_min_quad, max_quad, "-0", underflow_status, Category::Zero, Round::NearestTiesToEven),
+ (n_min_quad, max_quad, "-0", underflow_status, Category::Zero, Round::TowardPositive),
+ (
+ n_min_quad,
+ max_quad,
+ "-0x0.0000000000000000000000000001p-16382",
+ underflow_status,
+ Category::Normal,
+ Round::TowardNegative,
+ ),
+ (n_min_quad, max_quad, "-0", underflow_status, Category::Zero, Round::TowardZero),
+ (n_min_quad, max_quad, "-0", underflow_status, Category::Zero, Round::NearestTiesToAway),
+ ];
+
+ for case @ &(x, y, e_result, e_status, e_category) in &single_special_cases {
+ let status;
+ let result = unpack!(status=, x / y);
+ assert_eq!(e_status, status);
+ assert_eq!(e_category, result.category());
+ assert!(result.bitwise_eq(e_result.parse::<Single>().unwrap()), "result = {result:?}, case = {case:?}");
+ }
+ for case @ &(x, y, e_result, e_status, e_category, round) in &quad_special_cases {
+ let status;
+ let result = unpack!(status=, x.div_r(y, round));
+ assert_eq!(e_status, status);
+ assert_eq!(e_category, result.category());
+ assert!(result.bitwise_eq(e_result.parse::<Quad>().unwrap()), "result = {result:?}, case = {case:?}");
+ }
+}
+
+#[test]
+fn operator_overloads() {
+ // This is mostly testing that these operator overloads compile.
+ let one = "0x1p+0".parse::<Single>().unwrap();
+ let two = "0x2p+0".parse::<Single>().unwrap();
+ assert!(two.bitwise_eq((one + one).value));
+ assert!(one.bitwise_eq((two - one).value));
+ assert!(two.bitwise_eq((one * two).value));
+ assert!(one.bitwise_eq((two / two).value));
+}
+
+#[test]
+fn comparisons() {
+ let vals = [
+ /* MNan */ -Single::NAN,
+ /* MInf */ -Single::INFINITY,
+ /* MBig */ -Single::largest(),
+ /* MOne */ "-0x1p+0".parse::<Single>().unwrap(),
+ /* MZer */ -Single::ZERO,
+ /* PZer */ Single::ZERO,
+ /* POne */ "0x1p+0".parse::<Single>().unwrap(),
+ /* PBig */ Single::largest(),
+ /* PInf */ Single::INFINITY,
+ /* PNan */ Single::NAN,
+ ];
+
+ const LT: Option<Ordering> = Some(Ordering::Less);
+ const EQ: Option<Ordering> = Some(Ordering::Equal);
+ const GT: Option<Ordering> = Some(Ordering::Greater);
+ const UN: Option<Ordering> = None;
+
+ // HACK(eddyb) for some reason the first row (MNan) gets formatted differently.
+ #[rustfmt::skip]
+ let relations = [
+ // -N -I -B -1 -0 +0 +1 +B +I +N
+ /* MNan */ [UN, UN, UN, UN, UN, UN, UN, UN, UN, UN],
+ /* MInf */ [UN, EQ, LT, LT, LT, LT, LT, LT, LT, UN],
+ /* MBig */ [UN, GT, EQ, LT, LT, LT, LT, LT, LT, UN],
+ /* MOne */ [UN, GT, GT, EQ, LT, LT, LT, LT, LT, UN],
+ /* MZer */ [UN, GT, GT, GT, EQ, EQ, LT, LT, LT, UN],
+ /* PZer */ [UN, GT, GT, GT, EQ, EQ, LT, LT, LT, UN],
+ /* POne */ [UN, GT, GT, GT, GT, GT, EQ, LT, LT, UN],
+ /* PBig */ [UN, GT, GT, GT, GT, GT, GT, EQ, LT, UN],
+ /* PInf */ [UN, GT, GT, GT, GT, GT, GT, GT, EQ, UN],
+ /* PNan */ [UN, UN, UN, UN, UN, UN, UN, UN, UN, UN],
+ ];
+ for (i, &lhs) in vals.iter().enumerate() {
+ for (j, &rhs) in vals.iter().enumerate() {
+ let relation = lhs.partial_cmp(&rhs);
+ assert_eq!(relation, relations[i][j]);
+
+ // NOTE(eddyb) these checks have been kept from the C++ code which didn't
+ // appear to have a concept like `Option<Ordering>`, but in Rust they
+ // should be entirely redundant with the single `assert_eq!` above.
+ match relation {
+ LT => {
+ assert!(!(lhs == rhs));
+ assert!(lhs != rhs);
+ assert!(lhs < rhs);
+ assert!(!(lhs > rhs));
+ assert!(lhs <= rhs);
+ assert!(!(lhs >= rhs));
+ }
+ EQ => {
+ assert!(lhs == rhs);
+ assert!(!(lhs != rhs));
+ assert!(!(lhs < rhs));
+ assert!(!(lhs > rhs));
+ assert!(lhs <= rhs);
+ assert!(lhs >= rhs);
+ }
+ GT => {
+ assert!(!(lhs == rhs));
+ assert!(lhs != rhs);
+ assert!(!(lhs < rhs));
+ assert!(lhs > rhs);
+ assert!(!(lhs <= rhs));
+ assert!(lhs >= rhs);
+ }
+ UN => {
+ assert!(!(lhs == rhs));
+ assert!(lhs != rhs);
+ assert!(!(lhs < rhs));
+ assert!(!(lhs > rhs));
+ assert!(!(lhs <= rhs));
+ assert!(!(lhs >= rhs));
+ }
+ }
+ }
+ }
+}
+
+#[test]
+fn abs() {
+ let p_inf = Single::INFINITY;
+ let m_inf = -Single::INFINITY;
+ let p_zero = Single::ZERO;
+ let m_zero = -Single::ZERO;
+ let p_qnan = Single::NAN;
+ let m_qnan = -Single::NAN;
+ let p_snan = Single::snan(None);
+ let m_snan = -Single::snan(None);
+ let p_normal_value = "0x1p+0".parse::<Single>().unwrap();
+ let m_normal_value = "-0x1p+0".parse::<Single>().unwrap();
+ let p_largest_value = Single::largest();
+ let m_largest_value = -Single::largest();
+ let p_smallest_value = Single::SMALLEST;
+ let m_smallest_value = -Single::SMALLEST;
+ let p_smallest_normalized = Single::smallest_normalized();
+ let m_smallest_normalized = -Single::smallest_normalized();
+
+ assert!(p_inf.bitwise_eq(p_inf.abs()));
+ assert!(p_inf.bitwise_eq(m_inf.abs()));
+ assert!(p_zero.bitwise_eq(p_zero.abs()));
+ assert!(p_zero.bitwise_eq(m_zero.abs()));
+ assert!(p_qnan.bitwise_eq(p_qnan.abs()));
+ assert!(p_qnan.bitwise_eq(m_qnan.abs()));
+ assert!(p_snan.bitwise_eq(p_snan.abs()));
+ assert!(p_snan.bitwise_eq(m_snan.abs()));
+ assert!(p_normal_value.bitwise_eq(p_normal_value.abs()));
+ assert!(p_normal_value.bitwise_eq(m_normal_value.abs()));
+ assert!(p_largest_value.bitwise_eq(p_largest_value.abs()));
+ assert!(p_largest_value.bitwise_eq(m_largest_value.abs()));
+ assert!(p_smallest_value.bitwise_eq(p_smallest_value.abs()));
+ assert!(p_smallest_value.bitwise_eq(m_smallest_value.abs()));
+ assert!(p_smallest_normalized.bitwise_eq(p_smallest_normalized.abs(),));
+ assert!(p_smallest_normalized.bitwise_eq(m_smallest_normalized.abs(),));
+}
+
+#[test]
+fn neg() {
+ let one = "1.0".parse::<Single>().unwrap();
+ let neg_one = "-1.0".parse::<Single>().unwrap();
+ let zero = Single::ZERO;
+ let neg_zero = -Single::ZERO;
+ let inf = Single::INFINITY;
+ let neg_inf = -Single::INFINITY;
+ let qnan = Single::NAN;
+ let neg_qnan = -Single::NAN;
+
+ assert!(neg_one.bitwise_eq(-one));
+ assert!(one.bitwise_eq(-neg_one));
+ assert!(neg_zero.bitwise_eq(-zero));
+ assert!(zero.bitwise_eq(-neg_zero));
+ assert!(neg_inf.bitwise_eq(-inf));
+ assert!(inf.bitwise_eq(-neg_inf));
+ assert!(neg_inf.bitwise_eq(-inf));
+ assert!(inf.bitwise_eq(-neg_inf));
+ assert!(neg_qnan.bitwise_eq(-qnan));
+ assert!(qnan.bitwise_eq(-neg_qnan));
+}
+
+#[test]
+fn ilogb() {
+ assert_eq!(-1074, Double::SMALLEST.ilogb());
+ assert_eq!(-1074, (-Double::SMALLEST).ilogb());
+ assert_eq!(-1023, "0x1.ffffffffffffep-1024".parse::<Double>().unwrap().ilogb());
+ assert_eq!(-1023, "0x1.ffffffffffffep-1023".parse::<Double>().unwrap().ilogb());
+ assert_eq!(-1023, "-0x1.ffffffffffffep-1023".parse::<Double>().unwrap().ilogb());
+ assert_eq!(-51, "0x1p-51".parse::<Double>().unwrap().ilogb());
+ assert_eq!(-1023, "0x1.c60f120d9f87cp-1023".parse::<Double>().unwrap().ilogb());
+ assert_eq!(-2, "0x0.ffffp-1".parse::<Double>().unwrap().ilogb());
+ assert_eq!(-1023, "0x1.fffep-1023".parse::<Double>().unwrap().ilogb());
+ assert_eq!(1023, Double::largest().ilogb());
+ assert_eq!(1023, (-Double::largest()).ilogb());
+
+ assert_eq!(0, "0x1p+0".parse::<Single>().unwrap().ilogb());
+ assert_eq!(0, "-0x1p+0".parse::<Single>().unwrap().ilogb());
+ assert_eq!(42, "0x1p+42".parse::<Single>().unwrap().ilogb());
+ assert_eq!(-42, "0x1p-42".parse::<Single>().unwrap().ilogb());
+
+ assert_eq!(IEK_INF, Single::INFINITY.ilogb());
+ assert_eq!(IEK_INF, (-Single::INFINITY).ilogb());
+ assert_eq!(IEK_ZERO, Single::ZERO.ilogb());
+ assert_eq!(IEK_ZERO, (-Single::ZERO).ilogb());
+ assert_eq!(IEK_NAN, Single::NAN.ilogb());
+ assert_eq!(IEK_NAN, Single::snan(None).ilogb());
+
+ assert_eq!(127, Single::largest().ilogb());
+ assert_eq!(127, (-Single::largest()).ilogb());
+
+ assert_eq!(-149, Single::SMALLEST.ilogb());
+ assert_eq!(-149, (-Single::SMALLEST).ilogb());
+ assert_eq!(-126, Single::smallest_normalized().ilogb());
+ assert_eq!(-126, (-Single::smallest_normalized()).ilogb());
+}
+
+#[test]
+fn scalbn() {
+ assert!("0x1p+0"
+ .parse::<Single>()
+ .unwrap()
+ .bitwise_eq("0x1p+0".parse::<Single>().unwrap().scalbn(0),));
+ assert!("0x1p+42"
+ .parse::<Single>()
+ .unwrap()
+ .bitwise_eq("0x1p+0".parse::<Single>().unwrap().scalbn(42),));
+ assert!("0x1p-42"
+ .parse::<Single>()
+ .unwrap()
+ .bitwise_eq("0x1p+0".parse::<Single>().unwrap().scalbn(-42),));
+
+ let p_inf = Single::INFINITY;
+ let m_inf = -Single::INFINITY;
+ let p_zero = Single::ZERO;
+ let m_zero = -Single::ZERO;
+ let p_qnan = Single::NAN;
+ let m_qnan = -Single::NAN;
+ let snan = Single::snan(None);
+
+ assert!(p_inf.bitwise_eq(p_inf.scalbn(0)));
+ assert!(m_inf.bitwise_eq(m_inf.scalbn(0)));
+ assert!(p_zero.bitwise_eq(p_zero.scalbn(0)));
+ assert!(m_zero.bitwise_eq(m_zero.scalbn(0)));
+ assert!(p_qnan.bitwise_eq(p_qnan.scalbn(0)));
+ assert!(m_qnan.bitwise_eq(m_qnan.scalbn(0)));
+ assert!(!snan.scalbn(0).is_signaling());
+
+ let scalbn_snan = snan.scalbn(1);
+ assert!(scalbn_snan.is_nan() && !scalbn_snan.is_signaling());
+
+ // Make sure highest bit of payload is preserved.
+ let payload = (1 << 50) | (1 << 49) | (1234 << 32) | 1;
+
+ let snan_with_payload = Double::snan(Some(payload));
+ let quiet_payload = snan_with_payload.scalbn(1);
+ assert!(quiet_payload.is_nan() && !quiet_payload.is_signaling());
+ assert_eq!(payload, quiet_payload.to_bits() & ((1 << 51) - 1));
+
+ assert!(p_inf.bitwise_eq("0x1p+0".parse::<Single>().unwrap().scalbn(128),));
+ assert!(m_inf.bitwise_eq("-0x1p+0".parse::<Single>().unwrap().scalbn(128),));
+ assert!(p_inf.bitwise_eq("0x1p+127".parse::<Single>().unwrap().scalbn(1),));
+ assert!(p_zero.bitwise_eq("0x1p-127".parse::<Single>().unwrap().scalbn(-127),));
+ assert!(m_zero.bitwise_eq("-0x1p-127".parse::<Single>().unwrap().scalbn(-127),));
+ assert!("-0x1p-149"
+ .parse::<Single>()
+ .unwrap()
+ .bitwise_eq("-0x1p-127".parse::<Single>().unwrap().scalbn(-22),));
+ assert!(p_zero.bitwise_eq("0x1p-126".parse::<Single>().unwrap().scalbn(-24),));
+
+ let smallest_f64 = Double::SMALLEST;
+ let neg_smallest_f64 = -Double::SMALLEST;
+
+ let largest_f64 = Double::largest();
+ let neg_largest_f64 = -Double::largest();
+
+ let largest_denormal_f64 = "0x1.ffffffffffffep-1023".parse::<Double>().unwrap();
+ let neg_largest_denormal_f64 = "-0x1.ffffffffffffep-1023".parse::<Double>().unwrap();
+
+ assert!(smallest_f64.bitwise_eq("0x1p-1074".parse::<Double>().unwrap().scalbn(0),));
+ assert!(neg_smallest_f64.bitwise_eq("-0x1p-1074".parse::<Double>().unwrap().scalbn(0),));
+
+ assert!("0x1p+1023"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(smallest_f64.scalbn(2097,),));
+
+ assert!(smallest_f64.scalbn(-2097).is_pos_zero());
+ assert!(smallest_f64.scalbn(-2098).is_pos_zero());
+ assert!(smallest_f64.scalbn(-2099).is_pos_zero());
+ assert!("0x1p+1022"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(smallest_f64.scalbn(2096,),));
+ assert!("0x1p+1023"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(smallest_f64.scalbn(2097,),));
+ assert!(smallest_f64.scalbn(2098).is_infinite());
+ assert!(smallest_f64.scalbn(2099).is_infinite());
+
+ // Test for integer overflows when adding to exponent.
+ assert!(smallest_f64.scalbn(-ExpInt::max_value()).is_pos_zero());
+ assert!(largest_f64.scalbn(ExpInt::max_value()).is_infinite());
+
+ assert!(largest_denormal_f64.bitwise_eq(largest_denormal_f64.scalbn(0),));
+ assert!(neg_largest_denormal_f64.bitwise_eq(neg_largest_denormal_f64.scalbn(0),));
+
+ assert!("0x1.ffffffffffffep-1022"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(largest_denormal_f64.scalbn(1)));
+ assert!("-0x1.ffffffffffffep-1021"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(neg_largest_denormal_f64.scalbn(2)));
+
+ assert!("0x1.ffffffffffffep+1"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(largest_denormal_f64.scalbn(1024)));
+ assert!(largest_denormal_f64.scalbn(-1023).is_pos_zero());
+ assert!(largest_denormal_f64.scalbn(-1024).is_pos_zero());
+ assert!(largest_denormal_f64.scalbn(-2048).is_pos_zero());
+ assert!(largest_denormal_f64.scalbn(2047).is_infinite());
+ assert!(largest_denormal_f64.scalbn(2098).is_infinite());
+ assert!(largest_denormal_f64.scalbn(2099).is_infinite());
+
+ assert!("0x1.ffffffffffffep-2"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(largest_denormal_f64.scalbn(1021)));
+ assert!("0x1.ffffffffffffep-1"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(largest_denormal_f64.scalbn(1022)));
+ assert!("0x1.ffffffffffffep+0"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(largest_denormal_f64.scalbn(1023)));
+ assert!("0x1.ffffffffffffep+1023"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(largest_denormal_f64.scalbn(2046)));
+ assert!("0x1p+974"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(smallest_f64.scalbn(2048,),));
+
+ let random_denormal_f64 = "0x1.c60f120d9f87cp+51".parse::<Double>().unwrap();
+ assert!("0x1.c60f120d9f87cp-972"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(random_denormal_f64.scalbn(-1023)));
+ assert!("0x1.c60f120d9f87cp-1"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(random_denormal_f64.scalbn(-52)));
+ assert!("0x1.c60f120d9f87cp-2"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(random_denormal_f64.scalbn(-53)));
+ assert!("0x1.c60f120d9f87cp+0"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(random_denormal_f64.scalbn(-51)));
+
+ assert!(random_denormal_f64.scalbn(-2097).is_pos_zero());
+ assert!(random_denormal_f64.scalbn(-2090).is_pos_zero());
+
+ assert!("-0x1p-1073"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(neg_largest_f64.scalbn(-2097),));
+
+ assert!("-0x1p-1024"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(neg_largest_f64.scalbn(-2048),));
+
+ assert!("0x1p-1073"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(largest_f64.scalbn(-2097,),));
+
+ assert!("0x1p-1074"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(largest_f64.scalbn(-2098,),));
+ assert!("-0x1p-1074"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq(neg_largest_f64.scalbn(-2098),));
+ assert!(neg_largest_f64.scalbn(-2099).is_neg_zero());
+ assert!(largest_f64.scalbn(1).is_infinite());
+
+ assert!("0x1p+0"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq("0x1p+52".parse::<Double>().unwrap().scalbn(-52),));
+
+ assert!("0x1p-103"
+ .parse::<Double>()
+ .unwrap()
+ .bitwise_eq("0x1p-51".parse::<Double>().unwrap().scalbn(-52),));
+}
+
+#[test]
+fn frexp() {
+ let p_zero = Double::ZERO;
+ let m_zero = -Double::ZERO;
+ let one = Double::from_f64(1.0);
+ let m_one = Double::from_f64(-1.0);
+
+ let largest_denormal = "0x1.ffffffffffffep-1023".parse::<Double>().unwrap();
+ let neg_largest_denormal = "-0x1.ffffffffffffep-1023".parse::<Double>().unwrap();
+
+ let smallest = Double::SMALLEST;
+ let neg_smallest = -Double::SMALLEST;
+
+ let largest = Double::largest();
+ let neg_largest = -Double::largest();
+
+ let p_inf = Double::INFINITY;
+ let m_inf = -Double::INFINITY;
+
+ let p_qnan = Double::NAN;
+ let m_qnan = -Double::NAN;
+ let snan = Double::snan(None);
+
+ // Make sure highest bit of payload is preserved.
+ let payload = (1 << 50) | (1 << 49) | (1234 << 32) | 1;
+
+ let snan_with_payload = Double::snan(Some(payload));
+
+ let mut exp = 0;
+
+ let frac = p_zero.frexp(&mut exp);
+ assert_eq!(0, exp);
+ assert!(frac.is_pos_zero());
+
+ let frac = m_zero.frexp(&mut exp);
+ assert_eq!(0, exp);
+ assert!(frac.is_neg_zero());
+
+ let frac = one.frexp(&mut exp);
+ assert_eq!(1, exp);
+ assert!("0x1p-1".parse::<Double>().unwrap().bitwise_eq(frac));
+
+ let frac = m_one.frexp(&mut exp);
+ assert_eq!(1, exp);
+ assert!("-0x1p-1".parse::<Double>().unwrap().bitwise_eq(frac));
+
+ let frac = largest_denormal.frexp(&mut exp);
+ assert_eq!(-1022, exp);
+ assert!("0x1.ffffffffffffep-1".parse::<Double>().unwrap().bitwise_eq(frac));
+
+ let frac = neg_largest_denormal.frexp(&mut exp);
+ assert_eq!(-1022, exp);
+ assert!("-0x1.ffffffffffffep-1".parse::<Double>().unwrap().bitwise_eq(frac));
+
+ let frac = smallest.frexp(&mut exp);
+ assert_eq!(-1073, exp);
+ assert!("0x1p-1".parse::<Double>().unwrap().bitwise_eq(frac));
+
+ let frac = neg_smallest.frexp(&mut exp);
+ assert_eq!(-1073, exp);
+ assert!("-0x1p-1".parse::<Double>().unwrap().bitwise_eq(frac));
+
+ let frac = largest.frexp(&mut exp);
+ assert_eq!(1024, exp);
+ assert!("0x1.fffffffffffffp-1".parse::<Double>().unwrap().bitwise_eq(frac));
+
+ let frac = neg_largest.frexp(&mut exp);
+ assert_eq!(1024, exp);
+ assert!("-0x1.fffffffffffffp-1".parse::<Double>().unwrap().bitwise_eq(frac));
+
+ let frac = p_inf.frexp(&mut exp);
+ assert_eq!(IEK_INF, exp);
+ assert!(frac.is_infinite() && !frac.is_negative());
+
+ let frac = m_inf.frexp(&mut exp);
+ assert_eq!(IEK_INF, exp);
+ assert!(frac.is_infinite() && frac.is_negative());
+
+ let frac = p_qnan.frexp(&mut exp);
+ assert_eq!(IEK_NAN, exp);
+ assert!(frac.is_nan());
+
+ let frac = m_qnan.frexp(&mut exp);
+ assert_eq!(IEK_NAN, exp);
+ assert!(frac.is_nan());
+
+ let frac = snan.frexp(&mut exp);
+ assert_eq!(IEK_NAN, exp);
+ assert!(frac.is_nan() && !frac.is_signaling());
+
+ let frac = snan_with_payload.frexp(&mut exp);
+ assert_eq!(IEK_NAN, exp);
+ assert!(frac.is_nan() && !frac.is_signaling());
+ assert_eq!(payload, frac.to_bits() & ((1 << 51) - 1));
+
+ let frac = "0x0.ffffp-1".parse::<Double>().unwrap().frexp(&mut exp);
+ assert_eq!(-1, exp);
+ assert!("0x1.fffep-1".parse::<Double>().unwrap().bitwise_eq(frac));
+
+ let frac = "0x1p-51".parse::<Double>().unwrap().frexp(&mut exp);
+ assert_eq!(-50, exp);
+ assert!("0x1p-1".parse::<Double>().unwrap().bitwise_eq(frac));
+
+ let frac = "0x1.c60f120d9f87cp+51".parse::<Double>().unwrap().frexp(&mut exp);
+ assert_eq!(52, exp);
+ assert!("0x1.c60f120d9f87cp-1".parse::<Double>().unwrap().bitwise_eq(frac));
+}
+
+#[test]
+fn modulo() {
+ let mut status;
+ {
+ let f1 = "1.5".parse::<Double>().unwrap();
+ let f2 = "1.0".parse::<Double>().unwrap();
+ let expected = "0.5".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1 % f2).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+ {
+ let f1 = "0.5".parse::<Double>().unwrap();
+ let f2 = "1.0".parse::<Double>().unwrap();
+ let expected = "0.5".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1 % f2).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+ {
+ let f1 = "0x1.3333333333333p-2".parse::<Double>().unwrap(); // 0.3
+ let f2 = "0x1.47ae147ae147bp-7".parse::<Double>().unwrap(); // 0.01
+ // 0.009999999999999983
+ let expected = "0x1.47ae147ae1471p-7".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1 % f2).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+ {
+ let f1 = "0x1p64".parse::<Double>().unwrap(); // 1.8446744073709552e19
+ let f2 = "1.5".parse::<Double>().unwrap();
+ let expected = "1.0".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1 % f2).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+ {
+ let f1 = "0x1p1000".parse::<Double>().unwrap();
+ let f2 = "0x1p-1000".parse::<Double>().unwrap();
+ let expected = "0.0".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1 % f2).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+ {
+ let f1 = "0.0".parse::<Double>().unwrap();
+ let f2 = "1.0".parse::<Double>().unwrap();
+ let expected = "0.0".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1 % f2).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+ {
+ let f1 = "1.0".parse::<Double>().unwrap();
+ let f2 = "0.0".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1 % f2).is_nan());
+ assert_eq!(status, Status::INVALID_OP);
+ }
+ {
+ let f1 = "0.0".parse::<Double>().unwrap();
+ let f2 = "0.0".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1 % f2).is_nan());
+ assert_eq!(status, Status::INVALID_OP);
+ }
+ {
+ let f1 = Double::INFINITY;
+ let f2 = "1.0".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1 % f2).is_nan());
+ assert_eq!(status, Status::INVALID_OP);
+ }
+ {
+ let f1 = "-4.0".parse::<Double>().unwrap();
+ let f2 = "-2.0".parse::<Double>().unwrap();
+ let expected = "-0.0".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1 % f2).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+ {
+ let f1 = "-4.0".parse::<Double>().unwrap();
+ let f2 = "2.0".parse::<Double>().unwrap();
+ let expected = "-0.0".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1 % f2).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+ {
+ // Test E4M3FN mod where the LHS exponent is maxExponent (8) and the RHS is
+ // the max value whose exponent is minExponent (-6). This requires special
+ // logic in the mod implementation to prevent overflow to NaN.
+ let f1 = "0x1p8".parse::<Float8E4M3FN>().unwrap(); // 256
+ let f2 = "0x1.ep-6".parse::<Float8E4M3FN>().unwrap(); // 0.029296875
+ let expected = "0x1p-8".parse::<Float8E4M3FN>().unwrap(); // 0.00390625
+ assert!(unpack!(status=, f1 % f2).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+}
+
+#[test]
+fn remainder() {
+ // Test Special Cases against each other and normal values.
+
+ let p_inf = Single::INFINITY;
+ let m_inf = -Single::INFINITY;
+ let p_zero = Single::ZERO;
+ let m_zero = -Single::ZERO;
+ let qnan = Single::NAN;
+ let snan = "snan123".parse::<Single>().unwrap();
+ let p_normal_value = "0x1p+0".parse::<Single>().unwrap();
+ let m_normal_value = "-0x1p+0".parse::<Single>().unwrap();
+ let p_largest_value = Single::largest();
+ let m_largest_value = -Single::largest();
+ let p_smallest_value = Single::SMALLEST;
+ let m_smallest_value = -Single::SMALLEST;
+ let p_smallest_normalized = Single::smallest_normalized();
+ let m_smallest_normalized = -Single::smallest_normalized();
+
+ let p_val1 = "0x1.fffffep+126".parse::<Single>().unwrap();
+ let m_val1 = "-0x1.fffffep+126".parse::<Single>().unwrap();
+ let p_val2 = "0x1.fffffep-126".parse::<Single>().unwrap();
+ let m_val2 = "-0x1.fffffep-126".parse::<Single>().unwrap();
+ let p_val3 = "0x1p-125".parse::<Single>().unwrap();
+ let m_val3 = "-0x1p-125".parse::<Single>().unwrap();
+ let p_val4 = "0x1p+127".parse::<Single>().unwrap();
+ let m_val4 = "-0x1p+127".parse::<Single>().unwrap();
+ let p_val5 = "1.5".parse::<Single>().unwrap();
+ let m_val5 = "-1.5".parse::<Single>().unwrap();
+ let p_val6 = "1".parse::<Single>().unwrap();
+ let m_val6 = "-1".parse::<Single>().unwrap();
+
+ let special_cases = [
+ (p_inf, p_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, m_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, qnan, "nan", Status::OK, Category::NaN),
+ (p_inf, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_inf, p_normal_value, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, m_normal_value, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, p_largest_value, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, m_largest_value, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, p_smallest_value, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, m_smallest_value, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, p_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN),
+ (p_inf, m_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, p_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, m_inf, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, qnan, "nan", Status::OK, Category::NaN),
+ (m_inf, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_inf, p_normal_value, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, m_normal_value, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, p_largest_value, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, m_largest_value, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, p_smallest_value, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, m_smallest_value, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, p_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN),
+ (m_inf, m_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN),
+ (p_zero, p_inf, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_inf, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_zero, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_zero, qnan, "nan", Status::OK, Category::NaN),
+ (p_zero, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_zero, p_normal_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_normal_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, p_largest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_largest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, p_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, p_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ (p_zero, m_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ (m_zero, p_inf, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_inf, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_zero, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_zero, qnan, "nan", Status::OK, Category::NaN),
+ (m_zero, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_zero, p_normal_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_normal_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, p_largest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_largest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, p_smallest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_smallest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, p_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero),
+ (m_zero, m_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero),
+ (qnan, p_inf, "nan", Status::OK, Category::NaN),
+ (qnan, m_inf, "nan", Status::OK, Category::NaN),
+ (qnan, p_zero, "nan", Status::OK, Category::NaN),
+ (qnan, m_zero, "nan", Status::OK, Category::NaN),
+ (qnan, qnan, "nan", Status::OK, Category::NaN),
+ (qnan, snan, "nan", Status::INVALID_OP, Category::NaN),
+ (qnan, p_normal_value, "nan", Status::OK, Category::NaN),
+ (qnan, m_normal_value, "nan", Status::OK, Category::NaN),
+ (qnan, p_largest_value, "nan", Status::OK, Category::NaN),
+ (qnan, m_largest_value, "nan", Status::OK, Category::NaN),
+ (qnan, p_smallest_value, "nan", Status::OK, Category::NaN),
+ (qnan, m_smallest_value, "nan", Status::OK, Category::NaN),
+ (qnan, p_smallest_normalized, "nan", Status::OK, Category::NaN),
+ (qnan, m_smallest_normalized, "nan", Status::OK, Category::NaN),
+ (snan, p_inf, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_inf, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_zero, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_zero, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, qnan, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_normal_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_normal_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_largest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_largest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_smallest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_smallest_value, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, p_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN),
+ (snan, m_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_normal_value, p_inf, "0x1p+0", Status::OK, Category::Normal),
+ (p_normal_value, m_inf, "0x1p+0", Status::OK, Category::Normal),
+ (p_normal_value, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_normal_value, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_normal_value, qnan, "nan", Status::OK, Category::NaN),
+ (p_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_normal_value, p_normal_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_normal_value, m_normal_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_normal_value, p_largest_value, "0x1p+0", Status::OK, Category::Normal),
+ (p_normal_value, m_largest_value, "0x1p+0", Status::OK, Category::Normal),
+ (p_normal_value, p_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_normal_value, m_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_normal_value, p_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ (p_normal_value, m_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ (m_normal_value, p_inf, "-0x1p+0", Status::OK, Category::Normal),
+ (m_normal_value, m_inf, "-0x1p+0", Status::OK, Category::Normal),
+ (m_normal_value, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_normal_value, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_normal_value, qnan, "nan", Status::OK, Category::NaN),
+ (m_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_normal_value, p_normal_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_normal_value, m_normal_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_normal_value, p_largest_value, "-0x1p+0", Status::OK, Category::Normal),
+ (m_normal_value, m_largest_value, "-0x1p+0", Status::OK, Category::Normal),
+ (m_normal_value, p_smallest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_normal_value, m_smallest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_normal_value, p_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero),
+ (m_normal_value, m_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero),
+ (p_largest_value, p_inf, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_largest_value, m_inf, "0x1.fffffep+127", Status::OK, Category::Normal),
+ (p_largest_value, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_largest_value, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_largest_value, qnan, "nan", Status::OK, Category::NaN),
+ (p_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_largest_value, p_normal_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_largest_value, m_normal_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_largest_value, p_largest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_largest_value, m_largest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_largest_value, p_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_largest_value, m_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_largest_value, p_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ (p_largest_value, m_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ (m_largest_value, p_inf, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_largest_value, m_inf, "-0x1.fffffep+127", Status::OK, Category::Normal),
+ (m_largest_value, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_largest_value, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_largest_value, qnan, "nan", Status::OK, Category::NaN),
+ (m_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_largest_value, p_normal_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_largest_value, m_normal_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_largest_value, p_largest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_largest_value, m_largest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_largest_value, p_smallest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_largest_value, m_smallest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_largest_value, p_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero),
+ (m_largest_value, m_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_value, p_inf, "0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_value, m_inf, "0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_value, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_smallest_value, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_smallest_value, qnan, "nan", Status::OK, Category::NaN),
+ (p_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_smallest_value, p_normal_value, "0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_value, m_normal_value, "0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_value, p_largest_value, "0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_value, m_largest_value, "0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_value, p_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_value, m_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_value, p_smallest_normalized, "0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_value, m_smallest_normalized, "0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, p_inf, "-0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, m_inf, "-0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_smallest_value, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_smallest_value, qnan, "nan", Status::OK, Category::NaN),
+ (m_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_smallest_value, p_normal_value, "-0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, m_normal_value, "-0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, p_largest_value, "-0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, m_largest_value, "-0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, p_smallest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_value, m_smallest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_value, p_smallest_normalized, "-0x1p-149", Status::OK, Category::Normal),
+ (m_smallest_value, m_smallest_normalized, "-0x1p-149", Status::OK, Category::Normal),
+ (p_smallest_normalized, p_inf, "0x1p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, m_inf, "0x1p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_smallest_normalized, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (p_smallest_normalized, qnan, "nan", Status::OK, Category::NaN),
+ (p_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (p_smallest_normalized, p_normal_value, "0x1p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, m_normal_value, "0x1p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, p_largest_value, "0x1p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, m_largest_value, "0x1p-126", Status::OK, Category::Normal),
+ (p_smallest_normalized, p_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_normalized, m_smallest_value, "0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_normalized, p_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ (p_smallest_normalized, m_smallest_normalized, "0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_normalized, p_inf, "-0x1p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, m_inf, "-0x1p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, p_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_smallest_normalized, m_zero, "nan", Status::INVALID_OP, Category::NaN),
+ (m_smallest_normalized, qnan, "nan", Status::OK, Category::NaN),
+ (m_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN),
+ (m_smallest_normalized, p_normal_value, "-0x1p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, m_normal_value, "-0x1p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, p_largest_value, "-0x1p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, m_largest_value, "-0x1p-126", Status::OK, Category::Normal),
+ (m_smallest_normalized, p_smallest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_normalized, m_smallest_value, "-0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_normalized, p_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero),
+ (m_smallest_normalized, m_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero),
+ (p_val1, p_val1, "0x0p+0", Status::OK, Category::Zero),
+ (p_val1, m_val1, "0x0p+0", Status::OK, Category::Zero),
+ (p_val1, p_val2, "0x0p+0", Status::OK, Category::Zero),
+ (p_val1, m_val2, "0x0p+0", Status::OK, Category::Zero),
+ (p_val1, p_val3, "0x0p+0", Status::OK, Category::Zero),
+ (p_val1, m_val3, "0x0p+0", Status::OK, Category::Zero),
+ (p_val1, p_val4, "-0x1p+103", Status::OK, Category::Normal),
+ (p_val1, m_val4, "-0x1p+103", Status::OK, Category::Normal),
+ (p_val1, p_val5, "0x0p+0", Status::OK, Category::Zero),
+ (p_val1, m_val5, "0x0p+0", Status::OK, Category::Zero),
+ (p_val1, p_val6, "0x0p+0", Status::OK, Category::Zero),
+ (p_val1, m_val6, "0x0p+0", Status::OK, Category::Zero),
+ (m_val1, p_val1, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val1, m_val1, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val1, p_val2, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val1, m_val2, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val1, p_val3, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val1, m_val3, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val1, p_val4, "0x1p+103", Status::OK, Category::Normal),
+ (m_val1, m_val4, "0x1p+103", Status::OK, Category::Normal),
+ (m_val1, p_val5, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val1, m_val5, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val1, p_val6, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val1, m_val6, "-0x0p+0", Status::OK, Category::Zero),
+ (p_val2, p_val1, "0x1.fffffep-126", Status::OK, Category::Normal),
+ (p_val2, m_val1, "0x1.fffffep-126", Status::OK, Category::Normal),
+ (p_val2, p_val2, "0x0p+0", Status::OK, Category::Zero),
+ (p_val2, m_val2, "0x0p+0", Status::OK, Category::Zero),
+ (p_val2, p_val3, "-0x0.000002p-126", Status::OK, Category::Normal),
+ (p_val2, m_val3, "-0x0.000002p-126", Status::OK, Category::Normal),
+ (p_val2, p_val4, "0x1.fffffep-126", Status::OK, Category::Normal),
+ (p_val2, m_val4, "0x1.fffffep-126", Status::OK, Category::Normal),
+ (p_val2, p_val5, "0x1.fffffep-126", Status::OK, Category::Normal),
+ (p_val2, m_val5, "0x1.fffffep-126", Status::OK, Category::Normal),
+ (p_val2, p_val6, "0x1.fffffep-126", Status::OK, Category::Normal),
+ (p_val2, m_val6, "0x1.fffffep-126", Status::OK, Category::Normal),
+ (m_val2, p_val1, "-0x1.fffffep-126", Status::OK, Category::Normal),
+ (m_val2, m_val1, "-0x1.fffffep-126", Status::OK, Category::Normal),
+ (m_val2, p_val2, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val2, m_val2, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val2, p_val3, "0x0.000002p-126", Status::OK, Category::Normal),
+ (m_val2, m_val3, "0x0.000002p-126", Status::OK, Category::Normal),
+ (m_val2, p_val4, "-0x1.fffffep-126", Status::OK, Category::Normal),
+ (m_val2, m_val4, "-0x1.fffffep-126", Status::OK, Category::Normal),
+ (m_val2, p_val5, "-0x1.fffffep-126", Status::OK, Category::Normal),
+ (m_val2, m_val5, "-0x1.fffffep-126", Status::OK, Category::Normal),
+ (m_val2, p_val6, "-0x1.fffffep-126", Status::OK, Category::Normal),
+ (m_val2, m_val6, "-0x1.fffffep-126", Status::OK, Category::Normal),
+ (p_val3, p_val1, "0x1p-125", Status::OK, Category::Normal),
+ (p_val3, m_val1, "0x1p-125", Status::OK, Category::Normal),
+ (p_val3, p_val2, "0x0.000002p-126", Status::OK, Category::Normal),
+ (p_val3, m_val2, "0x0.000002p-126", Status::OK, Category::Normal),
+ (p_val3, p_val3, "0x0p+0", Status::OK, Category::Zero),
+ (p_val3, m_val3, "0x0p+0", Status::OK, Category::Zero),
+ (p_val3, p_val4, "0x1p-125", Status::OK, Category::Normal),
+ (p_val3, m_val4, "0x1p-125", Status::OK, Category::Normal),
+ (p_val3, p_val5, "0x1p-125", Status::OK, Category::Normal),
+ (p_val3, m_val5, "0x1p-125", Status::OK, Category::Normal),
+ (p_val3, p_val6, "0x1p-125", Status::OK, Category::Normal),
+ (p_val3, m_val6, "0x1p-125", Status::OK, Category::Normal),
+ (m_val3, p_val1, "-0x1p-125", Status::OK, Category::Normal),
+ (m_val3, m_val1, "-0x1p-125", Status::OK, Category::Normal),
+ (m_val3, p_val2, "-0x0.000002p-126", Status::OK, Category::Normal),
+ (m_val3, m_val2, "-0x0.000002p-126", Status::OK, Category::Normal),
+ (m_val3, p_val3, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val3, m_val3, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val3, p_val4, "-0x1p-125", Status::OK, Category::Normal),
+ (m_val3, m_val4, "-0x1p-125", Status::OK, Category::Normal),
+ (m_val3, p_val5, "-0x1p-125", Status::OK, Category::Normal),
+ (m_val3, m_val5, "-0x1p-125", Status::OK, Category::Normal),
+ (m_val3, p_val6, "-0x1p-125", Status::OK, Category::Normal),
+ (m_val3, m_val6, "-0x1p-125", Status::OK, Category::Normal),
+ (p_val4, p_val1, "0x1p+103", Status::OK, Category::Normal),
+ (p_val4, m_val1, "0x1p+103", Status::OK, Category::Normal),
+ (p_val4, p_val2, "0x0.002p-126", Status::OK, Category::Normal),
+ (p_val4, m_val2, "0x0.002p-126", Status::OK, Category::Normal),
+ (p_val4, p_val3, "0x0p+0", Status::OK, Category::Zero),
+ (p_val4, m_val3, "0x0p+0", Status::OK, Category::Zero),
+ (p_val4, p_val4, "0x0p+0", Status::OK, Category::Zero),
+ (p_val4, m_val4, "0x0p+0", Status::OK, Category::Zero),
+ (p_val4, p_val5, "0.5", Status::OK, Category::Normal),
+ (p_val4, m_val5, "0.5", Status::OK, Category::Normal),
+ (p_val4, p_val6, "0x0p+0", Status::OK, Category::Zero),
+ (p_val4, m_val6, "0x0p+0", Status::OK, Category::Zero),
+ (m_val4, p_val1, "-0x1p+103", Status::OK, Category::Normal),
+ (m_val4, m_val1, "-0x1p+103", Status::OK, Category::Normal),
+ (m_val4, p_val2, "-0x0.002p-126", Status::OK, Category::Normal),
+ (m_val4, m_val2, "-0x0.002p-126", Status::OK, Category::Normal),
+ (m_val4, p_val3, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val4, m_val3, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val4, p_val4, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val4, m_val4, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val4, p_val5, "-0.5", Status::OK, Category::Normal),
+ (m_val4, m_val5, "-0.5", Status::OK, Category::Normal),
+ (m_val4, p_val6, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val4, m_val6, "-0x0p+0", Status::OK, Category::Zero),
+ (p_val5, p_val1, "1.5", Status::OK, Category::Normal),
+ (p_val5, m_val1, "1.5", Status::OK, Category::Normal),
+ (p_val5, p_val2, "0x0.00006p-126", Status::OK, Category::Normal),
+ (p_val5, m_val2, "0x0.00006p-126", Status::OK, Category::Normal),
+ (p_val5, p_val3, "0x0p+0", Status::OK, Category::Zero),
+ (p_val5, m_val3, "0x0p+0", Status::OK, Category::Zero),
+ (p_val5, p_val4, "1.5", Status::OK, Category::Normal),
+ (p_val5, m_val4, "1.5", Status::OK, Category::Normal),
+ (p_val5, p_val5, "0x0p+0", Status::OK, Category::Zero),
+ (p_val5, m_val5, "0x0p+0", Status::OK, Category::Zero),
+ (p_val5, p_val6, "-0.5", Status::OK, Category::Normal),
+ (p_val5, m_val6, "-0.5", Status::OK, Category::Normal),
+ (m_val5, p_val1, "-1.5", Status::OK, Category::Normal),
+ (m_val5, m_val1, "-1.5", Status::OK, Category::Normal),
+ (m_val5, p_val2, "-0x0.00006p-126", Status::OK, Category::Normal),
+ (m_val5, m_val2, "-0x0.00006p-126", Status::OK, Category::Normal),
+ (m_val5, p_val3, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val5, m_val3, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val5, p_val4, "-1.5", Status::OK, Category::Normal),
+ (m_val5, m_val4, "-1.5", Status::OK, Category::Normal),
+ (m_val5, p_val5, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val5, m_val5, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val5, p_val6, "0.5", Status::OK, Category::Normal),
+ (m_val5, m_val6, "0.5", Status::OK, Category::Normal),
+ (p_val6, p_val1, "0x1p+0", Status::OK, Category::Normal),
+ (p_val6, m_val1, "0x1p+0", Status::OK, Category::Normal),
+ (p_val6, p_val2, "0x0.00004p-126", Status::OK, Category::Normal),
+ (p_val6, m_val2, "0x0.00004p-126", Status::OK, Category::Normal),
+ (p_val6, p_val3, "0x0p+0", Status::OK, Category::Zero),
+ (p_val6, m_val3, "0x0p+0", Status::OK, Category::Zero),
+ (p_val6, p_val4, "0x1p+0", Status::OK, Category::Normal),
+ (p_val6, m_val4, "0x1p+0", Status::OK, Category::Normal),
+ (p_val6, p_val5, "-0.5", Status::OK, Category::Normal),
+ (p_val6, m_val5, "-0.5", Status::OK, Category::Normal),
+ (p_val6, p_val6, "0x0p+0", Status::OK, Category::Zero),
+ (p_val6, m_val6, "0x0p+0", Status::OK, Category::Zero),
+ (m_val6, p_val1, "-0x1p+0", Status::OK, Category::Normal),
+ (m_val6, m_val1, "-0x1p+0", Status::OK, Category::Normal),
+ (m_val6, p_val2, "-0x0.00004p-126", Status::OK, Category::Normal),
+ (m_val6, m_val2, "-0x0.00004p-126", Status::OK, Category::Normal),
+ (m_val6, p_val3, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val6, m_val3, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val6, p_val4, "-0x1p+0", Status::OK, Category::Normal),
+ (m_val6, m_val4, "-0x1p+0", Status::OK, Category::Normal),
+ (m_val6, p_val5, "0.5", Status::OK, Category::Normal),
+ (m_val6, m_val5, "0.5", Status::OK, Category::Normal),
+ (m_val6, p_val6, "-0x0p+0", Status::OK, Category::Zero),
+ (m_val6, m_val6, "-0x0p+0", Status::OK, Category::Zero),
+ ];
+
+ for case @ &(x, y, e_result, e_status, e_category) in &special_cases {
+ let status;
+ let result = unpack!(status=, x.ieee_rem(y));
+ assert_eq!(e_status, status);
+ assert_eq!(e_category, result.category());
+ assert!(result.bitwise_eq(e_result.parse::<Single>().unwrap()), "result = {result:?}, case = {case:?}");
+ }
+
+ let mut status;
+ {
+ let f1 = "0x1.3333333333333p-2".parse::<Double>().unwrap(); // 0.3
+ let f2 = "0x1.47ae147ae147bp-7".parse::<Double>().unwrap(); // 0.01
+ let expected = "-0x1.4p-56".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1.ieee_rem(f2)).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+ {
+ let f1 = "0x1p64".parse::<Double>().unwrap(); // 1.8446744073709552e19
+ let f2 = "1.5".parse::<Double>().unwrap();
+ let expected = "-0.5".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1.ieee_rem(f2)).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+ {
+ let f1 = "0x1p1000".parse::<Double>().unwrap();
+ let f2 = "0x1p-1000".parse::<Double>().unwrap();
+ let expected = "0.0".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1.ieee_rem(f2)).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+ {
+ let f1 = Double::INFINITY;
+ let f2 = "1.0".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1.ieee_rem(f2)).is_nan());
+ assert_eq!(status, Status::INVALID_OP);
+ }
+ {
+ let f1 = "-4.0".parse::<Double>().unwrap();
+ let f2 = "-2.0".parse::<Double>().unwrap();
+ let expected = "-0.0".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1.ieee_rem(f2)).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+ {
+ let f1 = "-4.0".parse::<Double>().unwrap();
+ let f2 = "2.0".parse::<Double>().unwrap();
+ let expected = "-0.0".parse::<Double>().unwrap();
+ assert!(unpack!(status=, f1.ieee_rem(f2)).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+}
+
+#[test]
+fn x87_largest() {
+ assert!(X87DoubleExtended::largest().is_largest());
+}
+
+#[test]
+fn x87_next() {
+ assert_eq!("-1.0".parse::<X87DoubleExtended>().unwrap().next_up().value.ilogb(), -1);
+}
+
+#[test]
+fn convert_e4m3fn_to_e5m2() {
+ let mut status;
+ let mut loses_info = false;
+
+ let test = "1.0".parse::<Float8E4M3FN>().unwrap();
+ let test: Float8E5M2 = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(1.0, test.to_f32());
+ assert!(!loses_info);
+ assert_eq!(status, Status::OK);
+
+ let test = "0.0".parse::<Float8E4M3FN>().unwrap();
+ let test: Float8E5M2 = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(0.0, test.to_f32());
+ assert!(!loses_info);
+ assert_eq!(status, Status::OK);
+
+ let test = "0x1.2p0".parse::<Float8E4M3FN>().unwrap(); // 1.125
+ let test: Float8E5M2 = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(/* 0x1.0p0 */ 1.0, test.to_f32());
+ assert!(loses_info);
+ assert_eq!(status, Status::INEXACT);
+
+ let test = "0x1.6p0".parse::<Float8E4M3FN>().unwrap(); // 1.375
+ let test: Float8E5M2 = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(/* 0x1.8p0 */ 1.5, test.to_f32());
+ assert!(loses_info);
+ assert_eq!(status, Status::INEXACT);
+
+ // Convert E4M3 denormal to E5M2 normal. Should not be truncated, despite the
+ // destination format having one fewer significand bit
+ let test = "0x1.Cp-7".parse::<Float8E4M3FN>().unwrap();
+ let test: Float8E5M2 = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(/* 0x1.Cp-7 */ 0.013671875, test.to_f32());
+ assert!(!loses_info);
+ assert_eq!(status, Status::OK);
+
+ // Test convert from NaN
+ let test = "nan".parse::<Float8E4M3FN>().unwrap();
+ let test: Float8E5M2 = unpack!(status=, test.convert(&mut loses_info));
+ assert!(test.to_f32().is_nan());
+ assert!(!loses_info);
+ assert_eq!(status, Status::OK);
+}
+
+#[test]
+fn convert_e5m2_to_e4m3fn() {
+ let mut status;
+ let mut loses_info = false;
+
+ let test = "1.0".parse::<Float8E5M2>().unwrap();
+ let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(1.0, test.to_f32());
+ assert!(!loses_info);
+ assert_eq!(status, Status::OK);
+
+ let test = "0.0".parse::<Float8E5M2>().unwrap();
+ let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(0.0, test.to_f32());
+ assert!(!loses_info);
+ assert_eq!(status, Status::OK);
+
+ let test = "0x1.Cp8".parse::<Float8E5M2>().unwrap(); // 448
+ let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(/* 0x1.Cp8 */ 448.0, test.to_f32());
+ assert!(!loses_info);
+ assert_eq!(status, Status::OK);
+
+ // Test overflow
+ let test = "0x1.0p9".parse::<Float8E5M2>().unwrap(); // 512
+ let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info));
+ assert!(test.to_f32().is_nan());
+ assert!(loses_info);
+ assert_eq!(status, Status::OVERFLOW | Status::INEXACT);
+
+ // Test underflow
+ let test = "0x1.0p-10".parse::<Float8E5M2>().unwrap();
+ let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(0., test.to_f32());
+ assert!(loses_info);
+ assert_eq!(status, Status::UNDERFLOW | Status::INEXACT);
+
+ // Test rounding up to smallest denormal number
+ let test = "0x1.8p-10".parse::<Float8E5M2>().unwrap();
+ let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(/* 0x1.0p-9 */ 0.001953125, test.to_f32());
+ assert!(loses_info);
+ assert_eq!(status, Status::UNDERFLOW | Status::INEXACT);
+
+ // Testing inexact rounding to denormal number
+ let test = "0x1.8p-9".parse::<Float8E5M2>().unwrap();
+ let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(/* 0x1.0p-8 */ 0.00390625, test.to_f32());
+ assert!(loses_info);
+ assert_eq!(status, Status::UNDERFLOW | Status::INEXACT);
+
+ let nan = "nan".parse::<Float8E4M3FN>().unwrap();
+
+ // Testing convert from Inf
+ let test = "inf".parse::<Float8E5M2>().unwrap();
+ let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info));
+ assert!(test.to_f32().is_nan());
+ assert!(loses_info);
+ assert_eq!(status, Status::INEXACT);
+ assert!(test.bitwise_eq(nan));
+
+ // Testing convert from quiet NaN
+ let test = "nan".parse::<Float8E5M2>().unwrap();
+ let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info));
+ assert!(test.to_f32().is_nan());
+ assert!(loses_info);
+ assert_eq!(status, Status::OK);
+ assert!(test.bitwise_eq(nan));
+
+ // Testing convert from signaling NaN
+ let test = "snan".parse::<Float8E5M2>().unwrap();
+ let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info));
+ assert!(test.to_f32().is_nan());
+ assert!(loses_info);
+ assert_eq!(status, Status::INVALID_OP);
+ assert!(test.bitwise_eq(nan));
+}
+
+#[test]
+fn float8e4m3fn_infinity() {
+ let t = Float8E4M3FN::INFINITY;
+ assert!(t.is_nan());
+ assert!(!t.is_infinite());
+}
+
+#[test]
+fn float8e4m3fn_from_string() {
+ // Exactly representable
+ assert_eq!(448.0, "448".parse::<Float8E4M3FN>().unwrap().to_f64());
+ // Round down to maximum value
+ assert_eq!(448.0, "464".parse::<Float8E4M3FN>().unwrap().to_f64());
+ // Round up, causing overflow to NaN
+ assert!("465".parse::<Float8E4M3FN>().unwrap().is_nan());
+ // Overflow without rounding
+ assert!("480".parse::<Float8E4M3FN>().unwrap().is_nan());
+ // Inf converted to NaN
+ assert!("inf".parse::<Float8E4M3FN>().unwrap().is_nan());
+ // NaN converted to NaN
+ assert!("nan".parse::<Float8E4M3FN>().unwrap().is_nan());
+}
+
+#[test]
+fn float8e4m3fn_add() {
+ let qnan = Float8E4M3FN::NAN;
+
+ let from_str = |s: &str| s.parse::<Float8E4M3FN>().unwrap();
+
+ let addition_tests = [
+ // Test addition operations involving NaN, overflow, and the max E4M3
+ // value (448) because E4M3 differs from IEEE-754 types in these regards
+ (from_str("448"), from_str("16"), "448", Status::INEXACT, Category::Normal, Round::NearestTiesToEven),
+ (
+ from_str("448"),
+ from_str("18"),
+ "NaN",
+ Status::OVERFLOW | Status::INEXACT,
+ Category::NaN,
+ Round::NearestTiesToEven,
+ ),
+ (
+ from_str("448"),
+ from_str("32"),
+ "NaN",
+ Status::OVERFLOW | Status::INEXACT,
+ Category::NaN,
+ Round::NearestTiesToEven,
+ ),
+ (
+ from_str("-448"),
+ from_str("-32"),
+ "-NaN",
+ Status::OVERFLOW | Status::INEXACT,
+ Category::NaN,
+ Round::NearestTiesToEven,
+ ),
+ (qnan, from_str("-448"), "NaN", Status::OK, Category::NaN, Round::NearestTiesToEven),
+ (from_str("448"), from_str("-32"), "416", Status::OK, Category::Normal, Round::NearestTiesToEven),
+ (from_str("448"), from_str("0"), "448", Status::OK, Category::Normal, Round::NearestTiesToEven),
+ (from_str("448"), from_str("32"), "448", Status::INEXACT, Category::Normal, Round::TowardZero),
+ (from_str("448"), from_str("448"), "448", Status::INEXACT, Category::Normal, Round::TowardZero),
+ ];
+
+ for case @ &(x, y, e_result, e_status, e_category, round) in &addition_tests {
+ let status;
+ let result = unpack!(status=, x.add_r(y, round));
+ assert_eq!(e_status, status);
+ assert_eq!(e_category, result.category());
+ assert!(result.bitwise_eq(e_result.parse::<Float8E4M3FN>().unwrap()), "result = {result:?}, case = {case:?}");
+ }
+}
+
+#[test]
+fn float8e4m3fn_divide_by_zero() {
+ let x = "1".parse::<Float8E4M3FN>().unwrap();
+ let zero = "0".parse::<Float8E4M3FN>().unwrap();
+ let status;
+ assert!(unpack!(status=, x / zero).is_nan());
+ assert_eq!(status, Status::DIV_BY_ZERO);
+}
+
+#[test]
+fn float8e4m3fn_next() {
+ let mut status;
+
+ // nextUp on positive numbers
+ for i in 0..127 {
+ let test = Float8E4M3FN::from_bits(i);
+ let expected = Float8E4M3FN::from_bits(i + 1);
+ assert!(unpack!(status=, test.next_up()).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+
+ // nextUp on negative zero
+ let test = -Float8E4M3FN::ZERO;
+ let expected = Float8E4M3FN::SMALLEST;
+ assert!(unpack!(status=, test.next_up()).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+
+ // nextUp on negative nonzero numbers
+ for i in 129..255 {
+ let test = Float8E4M3FN::from_bits(i);
+ let expected = Float8E4M3FN::from_bits(i - 1);
+ assert!(unpack!(status=, test.next_up()).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+
+ // nextUp on NaN
+ let test = Float8E4M3FN::qnan(None);
+ let expected = Float8E4M3FN::qnan(None);
+ assert!(unpack!(status=, test.next_up()).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+
+ // nextDown on positive nonzero finite numbers
+ for i in 1..127 {
+ let test = Float8E4M3FN::from_bits(i);
+ let expected = Float8E4M3FN::from_bits(i - 1);
+ assert!(unpack!(status=, test.next_down()).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+
+ // nextDown on positive zero
+ let test = -Float8E4M3FN::ZERO;
+ let expected = -Float8E4M3FN::SMALLEST;
+ assert!(unpack!(status=, test.next_down()).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+
+ // nextDown on negative finite numbers
+ for i in 128..255 {
+ let test = Float8E4M3FN::from_bits(i);
+ let expected = Float8E4M3FN::from_bits(i + 1);
+ assert!(unpack!(status=, test.next_down()).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+ }
+
+ // nextDown on NaN
+ let test = Float8E4M3FN::qnan(None);
+ let expected = Float8E4M3FN::qnan(None);
+ assert!(unpack!(status=, test.next_down()).bitwise_eq(expected));
+ assert_eq!(status, Status::OK);
+}
+
+#[test]
+fn float8e4m3fn_exhaustive() {
+ // Test each of the 256 Float8E4M3FN values.
+ for i in 0..=u8::MAX {
+ let test = Float8E4M3FN::from_bits(i.into());
+
+ // isLargest
+ if i == 126 || i == 254 {
+ assert!(test.is_largest());
+ assert_eq!(test.abs().to_f64(), 448.);
+ } else {
+ assert!(!test.is_largest());
+ }
+
+ // isSmallest
+ if i == 1 || i == 129 {
+ assert!(test.is_smallest());
+ assert_eq!(test.abs().to_f64(), /* 0x1p-9 */ 0.001953125);
+ } else {
+ assert!(!test.is_smallest());
+ }
+
+ // convert to BFloat
+ let status;
+ let mut loses_info = false;
+ let test2: BFloat = unpack!(status=, test.convert(&mut loses_info));
+ assert_eq!(status, Status::OK);
+ assert!(!loses_info);
+ if i == 127 || i == 255 {
+ assert!(test2.is_nan());
+ } else {
+ assert_eq!(test.to_f32(), test2.to_f32());
+ }
+
+ // bitcastToAPInt
+ assert_eq!(u128::from(i), test.to_bits());
+ }
+}
+
+#[test]
+fn float8e4m3fn_exhaustive_pair() {
+ // Test each pair of Float8E4M3FN values.
+ for i in 0..=u8::MAX {
+ for j in 0..=u8::MAX {
+ let x = Float8E4M3FN::from_bits(i.into());
+ let y = Float8E4M3FN::from_bits(j.into());
+
+ let mut loses_info = false;
+ let x16: Half = x.convert(&mut loses_info).value;
+ assert!(!loses_info);
+ let y16: Half = y.convert(&mut loses_info).value;
+ assert!(!loses_info);
+
+ // Add
+ let z = (x + y).value;
+ let z16 = (x16 + y16).value;
+ assert!(z.bitwise_eq(z16.convert(&mut loses_info).value), "i={i}, j={j}");
+
+ // Subtract
+ let z = (x - y).value;
+ let z16 = (x16 - y16).value;
+ assert!(z.bitwise_eq(z16.convert(&mut loses_info).value), "i={i}, j={j}");
+
+ // Multiply
+ let z = (x * y).value;
+ let z16 = (x16 * y16).value;
+ assert!(z.bitwise_eq(z16.convert(&mut loses_info).value), "i={i}, j={j}");
+
+ // Divide
+ let z = (x / y).value;
+ let z16 = (x16 / y16).value;
+ assert!(z.bitwise_eq(z16.convert(&mut loses_info).value), "i={i}, j={j}");
+
+ // Mod
+ let z = (x % y).value;
+ let z16 = (x16 % y16).value;
+ assert!(z.bitwise_eq(z16.convert(&mut loses_info).value), "i={i}, j={j}");
+
+ // Remainder
+ let z = x.ieee_rem(y).value;
+ let z16 = x16.ieee_rem(y16).value;
+ assert!(z.bitwise_eq(z16.convert(&mut loses_info).value), "i={i}, j={j}");
+ }
+ }
+}
+
+#[test]
+fn f8_to_string() {
+ for_each_float_type!(for<F: Float> test::<F>());
+ fn test<F: Float>() {
+ if F::BITS != 8 {
+ return;
+ }
+
+ // NOTE(eddyb) this was buggy upstream as it didn't test `F` but `Float8E5M2`,
+ // https://github.com/llvm/llvm-project/commit/6109e70c72fc5171d25c4467fc3cfe6eb2029f50
+ // fixed it upstream so we've effectively backported that commit.
+ for i in 0..=u8::MAX {
+ let test = F::from_bits(i.into());
+ let str = test.to_string();
+
+ if test.is_nan() {
+ assert_eq!(str, "NaN");
+ } else {
+ assert!(test.bitwise_eq(str.parse::<F>().unwrap()));
+ }
+ }
+ }
+}
+
+// HACK(eddyb) C`{FLT,DBL}_TRUE_MIN` / C++ `std::numeric_limits<T>::denorm_min`
+// equivalents, for the two tests below, as Rust seems to lack anything like them,
+// but their bit-patterns are thankfuly trivial, with the main caveat that they
+// can't be `const` (subnormals and NaNs are banned from CTFE `{to,from}_bits`).
+fn f64_smallest_subnormal() -> f64 {
+ f64::from_bits(1)
+}
+fn f32_smallest_subnormal() -> f32 {
+ f32::from_bits(1)
+}
+
+#[test]
+fn double_to_f64() {
+ let d_pos_zero = Double::from_f64(0.0);
+ assert!(Double::from_f64(d_pos_zero.to_f64()).is_pos_zero());
+ let d_neg_zero = Double::from_f64(-0.0);
+ assert!(Double::from_f64(d_neg_zero.to_f64()).is_neg_zero());
+
+ let d_one = Double::from_f64(1.0);
+ assert_eq!(1.0, d_one.to_f64());
+ let d_pos_largest = Double::largest();
+ assert_eq!(f64::MAX, d_pos_largest.to_f64());
+ let d_neg_largest = -Double::largest();
+ assert_eq!(-f64::MAX, d_neg_largest.to_f64());
+ let d_pos_smallest = Double::smallest_normalized();
+ assert_eq!(f64::MIN_POSITIVE, d_pos_smallest.to_f64());
+ let d_neg_smallest = -Double::smallest_normalized();
+ assert_eq!(-f64::MIN_POSITIVE, d_neg_smallest.to_f64());
+
+ let d_smallest_denorm = Double::SMALLEST;
+ assert_eq!(f64_smallest_subnormal(), d_smallest_denorm.to_f64());
+ let d_largest_denorm = "0x0.FFFFFFFFFFFFFp-1022".parse::<Double>().unwrap();
+ assert_eq!(/*0x0.FFFFFFFFFFFFFp-1022*/ 2.225073858507201e-308, d_largest_denorm.to_f64());
+
+ let d_pos_inf = Double::INFINITY;
+ assert_eq!(f64::INFINITY, d_pos_inf.to_f64());
+ let d_neg_inf = -Double::INFINITY;
+ assert_eq!(-f64::INFINITY, d_neg_inf.to_f64());
+ let d_qnan = Double::qnan(None);
+ assert!(d_qnan.to_f64().is_nan());
+}
+
+#[test]
+fn single_to_f64() {
+ let f_pos_zero = Single::from_f32(0.0);
+ assert!(Double::from_f64(f_pos_zero.to_f64()).is_pos_zero());
+ let f_neg_zero = Single::from_f32(-0.0);
+ assert!(Double::from_f64(f_neg_zero.to_f64()).is_neg_zero());
+
+ let f_one = Single::from_f32(1.0);
+ assert_eq!(1.0, f_one.to_f64());
+ let f_pos_largest = Single::largest();
+ assert_eq!(f32::MAX as f64, f_pos_largest.to_f64());
+ let f_neg_largest = -Single::largest();
+ assert_eq!(-f32::MAX as f64, f_neg_largest.to_f64());
+ let f_pos_smallest = Single::smallest_normalized();
+ assert_eq!(f32::MIN_POSITIVE as f64, f_pos_smallest.to_f64());
+ let f_neg_smallest = -Single::smallest_normalized();
+ assert_eq!(-f32::MIN_POSITIVE as f64, f_neg_smallest.to_f64());
+
+ let f_smallest_denorm = Single::SMALLEST;
+ assert_eq!(f32_smallest_subnormal() as f64, f_smallest_denorm.to_f64());
+ let f_largest_denorm = "0x0.FFFFFEp-126".parse::<Double>().unwrap();
+ assert_eq!(/*0x0.FFFFFEp-126*/ 1.1754942106924411e-38, f_largest_denorm.to_f64());
+
+ let f_pos_inf = Single::INFINITY;
+ assert_eq!(f64::INFINITY, f_pos_inf.to_f64());
+ let f_neg_inf = -Single::INFINITY;
+ assert_eq!(-f64::INFINITY, f_neg_inf.to_f64());
+ let f_qnan = Single::qnan(None);
+ assert!(f_qnan.to_f64().is_nan());
+
+ let h_pos_zero = Half::ZERO;
+ assert!(Double::from_f64(h_pos_zero.to_f64()).is_pos_zero());
+ let h_neg_zero = -Half::ZERO;
+ assert!(Double::from_f64(h_neg_zero.to_f64()).is_neg_zero());
+}
+
+#[test]
+fn half_to_f64() {
+ let h_one = "1.0".parse::<Half>().unwrap();
+ assert_eq!(1.0, h_one.to_f64());
+ let h_pos_largest = Half::largest();
+ assert_eq!(65504.0, h_pos_largest.to_f64());
+ let h_neg_largest = -Half::largest();
+ assert_eq!(-65504.0, h_neg_largest.to_f64());
+ let h_pos_smallest = Half::smallest_normalized();
+ assert_eq!(/*0x1.p-14*/ 6.103515625e-05, h_pos_smallest.to_f64());
+ let h_neg_smallest = -Half::smallest_normalized();
+ assert_eq!(/*-0x1.p-14*/ -6.103515625e-05, h_neg_smallest.to_f64());
+
+ let h_smallest_denorm = Half::SMALLEST;
+ assert_eq!(/*0x1.p-24*/ 5.960464477539063e-08, h_smallest_denorm.to_f64());
+ let h_largest_denorm = "0x1.FFCp-14".parse::<Half>().unwrap();
+ assert_eq!(/*0x1.FFCp-14*/ 0.00012201070785522461, h_largest_denorm.to_f64());
+
+ let h_pos_inf = Half::INFINITY;
+ assert_eq!(f64::INFINITY, h_pos_inf.to_f64());
+ let h_neg_inf = -Half::INFINITY;
+ assert_eq!(-f64::INFINITY, h_neg_inf.to_f64());
+ let h_qnan = Half::qnan(None);
+ assert!(h_qnan.to_f64().is_nan());
+}
+
+#[test]
+fn bfloat_to_f64() {
+ let b_pos_zero = Half::ZERO;
+ assert!(Double::from_f64(b_pos_zero.to_f64()).is_pos_zero());
+ let b_neg_zero = -Half::ZERO;
+ assert!(Double::from_f64(b_neg_zero.to_f64()).is_neg_zero());
+
+ let b_one = "1.0".parse::<BFloat>().unwrap();
+ assert_eq!(1.0, b_one.to_f64());
+ let b_pos_largest = BFloat::largest();
+ assert_eq!(/*0x1.FEp127*/ 3.3895313892515355e+38, b_pos_largest.to_f64());
+ let b_neg_largest = -BFloat::largest();
+ assert_eq!(/*-0x1.FEp127*/ -3.3895313892515355e+38, b_neg_largest.to_f64());
+ let b_pos_smallest = BFloat::smallest_normalized();
+ assert_eq!(/*0x1.p-126*/ 1.1754943508222875e-38, b_pos_smallest.to_f64());
+ let b_neg_smallest = -BFloat::smallest_normalized();
+ assert_eq!(/*-0x1.p-126*/ -1.1754943508222875e-38, b_neg_smallest.to_f64());
+
+ let b_smallest_denorm = BFloat::SMALLEST;
+ assert_eq!(/*0x1.p-133*/ 9.183549615799121e-41, b_smallest_denorm.to_f64());
+ let b_largest_denorm = "0x1.FCp-127".parse::<BFloat>().unwrap();
+ assert_eq!(/*0x1.FCp-127*/ 1.1663108012064884e-38, b_largest_denorm.to_f64());
+
+ let b_pos_inf = BFloat::INFINITY;
+ assert_eq!(f64::INFINITY, b_pos_inf.to_f64());
+ let b_neg_inf = -BFloat::INFINITY;
+ assert_eq!(-f64::INFINITY, b_neg_inf.to_f64());
+ let b_qnan = BFloat::qnan(None);
+ assert!(b_qnan.to_f64().is_nan());
+}
+
+#[test]
+fn float8e5m2_to_f64() {
+ let one = "1.0".parse::<Float8E5M2>().unwrap();
+ assert_eq!(1.0, one.to_f64());
+ let two = "2.0".parse::<Float8E5M2>().unwrap();
+ assert_eq!(2.0, two.to_f64());
+ let pos_largest = Float8E5M2::largest();
+ assert_eq!(5.734400e+04, pos_largest.to_f64());
+ let neg_largest = -Float8E5M2::largest();
+ assert_eq!(-5.734400e+04, neg_largest.to_f64());
+ let pos_smallest = Float8E5M2::smallest_normalized();
+ assert_eq!(/* 0x1.p-14 */ 6.103515625e-05, pos_smallest.to_f64());
+ let neg_smallest = -Float8E5M2::smallest_normalized();
+ assert_eq!(/* -0x1.p-14 */ -6.103515625e-05, neg_smallest.to_f64());
+
+ let smallest_denorm = Float8E5M2::SMALLEST;
+ assert!(smallest_denorm.is_denormal());
+ assert_eq!(/* 0x1p-16 */ 0.0000152587890625, smallest_denorm.to_f64());
+
+ let pos_inf = Float8E5M2::INFINITY;
+ assert_eq!(f64::INFINITY, pos_inf.to_f64());
+ let neg_inf = -Float8E5M2::INFINITY;
+ assert_eq!(-f64::INFINITY, neg_inf.to_f64());
+ let qnan = Float8E5M2::qnan(None);
+ assert!(qnan.to_f64().is_nan());
+}
+
+#[test]
+fn float8e4m3fn_to_f64() {
+ let one = "1.0".parse::<Float8E4M3FN>().unwrap();
+ assert_eq!(1.0, one.to_f64());
+ let two = "2.0".parse::<Float8E4M3FN>().unwrap();
+ assert_eq!(2.0, two.to_f64());
+ let pos_largest = Float8E4M3FN::largest();
+ assert_eq!(448., pos_largest.to_f64());
+ let neg_largest = -Float8E4M3FN::largest();
+ assert_eq!(-448., neg_largest.to_f64());
+ let pos_smallest = Float8E4M3FN::smallest_normalized();
+ assert_eq!(/* 0x1.p-6 */ 0.015625, pos_smallest.to_f64());
+ let neg_smallest = -Float8E4M3FN::smallest_normalized();
+ assert_eq!(/* -0x1.p-6 */ -0.015625, neg_smallest.to_f64());
+
+ let smallest_denorm = Float8E4M3FN::SMALLEST;
+ assert!(smallest_denorm.is_denormal());
+ assert_eq!(/* 0x1p-9 */ 0.001953125, smallest_denorm.to_f64());
+
+ let qnan = Float8E4M3FN::qnan(None);
+ assert!(qnan.to_f64().is_nan());
+}
+
+#[test]
+fn single_to_f32() {
+ let f_pos_zero = Single::from_f32(0.0);
+ assert!(Single::from_f32(f_pos_zero.to_f32()).is_pos_zero());
+ let f_neg_zero = Single::from_f32(-0.0);
+ assert!(Single::from_f32(f_neg_zero.to_f32()).is_neg_zero());
+
+ let f_one = Single::from_f32(1.0);
+ assert_eq!(1.0, f_one.to_f32());
+ let f_pos_largest = Single::largest();
+ assert_eq!(f32::MAX, f_pos_largest.to_f32());
+ let f_neg_largest = -Single::largest();
+ assert_eq!(-f32::MAX, f_neg_largest.to_f32());
+ let f_pos_smallest = Single::smallest_normalized();
+ assert_eq!(f32::MIN_POSITIVE, f_pos_smallest.to_f32());
+ let f_neg_smallest = -Single::smallest_normalized();
+ assert_eq!(-f32::MIN_POSITIVE, f_neg_smallest.to_f32());
+
+ let f_smallest_denorm = Single::SMALLEST;
+ assert_eq!(f32_smallest_subnormal(), f_smallest_denorm.to_f32());
+ let f_largest_denorm = "0x1.FFFFFEp-126".parse::<Single>().unwrap();
+ assert_eq!(/*0x1.FFFFFEp-126*/ 2.3509885615147286e-38, f_largest_denorm.to_f32());
+
+ let f_pos_inf = Single::INFINITY;
+ assert_eq!(f32::INFINITY, f_pos_inf.to_f32());
+ let f_neg_inf = -Single::INFINITY;
+ assert_eq!(-f32::INFINITY, f_neg_inf.to_f32());
+ let f_qnan = Single::qnan(None);
+ assert!(f_qnan.to_f32().is_nan());
+}
+
+#[test]
+fn half_to_f32() {
+ let h_pos_zero = Half::ZERO;
+ assert!(Single::from_f32(h_pos_zero.to_f32()).is_pos_zero());
+ let h_neg_zero = -Half::ZERO;
+ assert!(Single::from_f32(h_neg_zero.to_f32()).is_neg_zero());
+
+ let h_one = "1.0".parse::<Half>().unwrap();
+ assert_eq!(1.0, h_one.to_f32());
+ let h_pos_largest = Half::largest();
+ assert_eq!(/*0x1.FFCp15*/ 65504.0, h_pos_largest.to_f32());
+ let h_neg_largest = -Half::largest();
+ assert_eq!(/*-0x1.FFCp15*/ -65504.0, h_neg_largest.to_f32());
+ let h_pos_smallest = Half::smallest_normalized();
+ assert_eq!(/*0x1.p-14*/ 6.103515625e-05, h_pos_smallest.to_f32());
+ let h_neg_smallest = -Half::smallest_normalized();
+ assert_eq!(/*-0x1.p-14*/ -6.103515625e-05, h_neg_smallest.to_f32());
+
+ let h_smallest_denorm = Half::SMALLEST;
+ assert_eq!(/*0x1.p-24*/ 5.960464477539063e-08, h_smallest_denorm.to_f32());
+ let h_largest_denorm = "0x1.FFCp-14".parse::<Half>().unwrap();
+ assert_eq!(/*0x1.FFCp-14*/ 0.00012201070785522461, h_largest_denorm.to_f32());
+
+ let h_pos_inf = Half::INFINITY;
+ assert_eq!(f32::INFINITY, h_pos_inf.to_f32());
+ let h_neg_inf = -Half::INFINITY;
+ assert_eq!(-f32::INFINITY, h_neg_inf.to_f32());
+ let h_qnan = Half::qnan(None);
+ assert!(h_qnan.to_f32().is_nan());
+}
+
+#[test]
+fn bfloat_to_f32() {
+ let b_pos_zero = BFloat::ZERO;
+ assert!(Single::from_f32(b_pos_zero.to_f32()).is_pos_zero());
+ let b_neg_zero = -BFloat::ZERO;
+ assert!(Single::from_f32(b_neg_zero.to_f32()).is_neg_zero());
+
+ let b_one = "1.0".parse::<BFloat>().unwrap();
+ assert_eq!(1.0, b_one.to_f32());
+ let b_pos_largest = BFloat::largest();
+ assert_eq!(/*0x1.FEp127*/ 3.3895313892515355e+38, b_pos_largest.to_f32());
+ let b_neg_largest = -BFloat::largest();
+ assert_eq!(/*-0x1.FEp127*/ -3.3895313892515355e+38, b_neg_largest.to_f32());
+ let b_pos_smallest = BFloat::smallest_normalized();
+ assert_eq!(/*0x1.p-126*/ 1.1754943508222875e-38, b_pos_smallest.to_f32());
+ let b_neg_smallest = -BFloat::smallest_normalized();
+ assert_eq!(/*-0x1.p-126*/ -1.1754943508222875e-38, b_neg_smallest.to_f32());
+
+ let b_smallest_denorm = BFloat::SMALLEST;
+ assert_eq!(/*0x1.p-133*/ 9.183549615799121e-41, b_smallest_denorm.to_f32());
+ let b_largest_denorm = "0x1.FCp-127".parse::<BFloat>().unwrap();
+ assert_eq!(/*0x1.FCp-127*/ 1.1663108012064884e-38, b_largest_denorm.to_f32());
+
+ let b_pos_inf = BFloat::INFINITY;
+ assert_eq!(f32::INFINITY, b_pos_inf.to_f32());
+ let b_neg_inf = -BFloat::INFINITY;
+ assert_eq!(-f32::INFINITY, b_neg_inf.to_f32());
+ let b_qnan = BFloat::qnan(None);
+ assert!(b_qnan.to_f32().is_nan());
+}
+
+#[test]
+fn float8e5m2_to_f32() {
+ let pos_zero = Float8E5M2::ZERO;
+ assert!(Single::from_f32(pos_zero.to_f32()).is_pos_zero());
+ let neg_zero = -Float8E5M2::ZERO;
+ assert!(Single::from_f32(neg_zero.to_f32()).is_neg_zero());
+
+ let one = "1.0".parse::<Float8E5M2>().unwrap();
+ assert_eq!(1.0, one.to_f32());
+ let two = "2.0".parse::<Float8E5M2>().unwrap();
+ assert_eq!(2.0, two.to_f32());
+
+ let pos_largest = Float8E5M2::largest();
+ assert_eq!(5.734400e+04, pos_largest.to_f32());
+ let neg_largest = -Float8E5M2::largest();
+ assert_eq!(-5.734400e+04, neg_largest.to_f32());
+ let pos_smallest = Float8E5M2::smallest_normalized();
+ assert_eq!(/* 0x1.p-14 */ 6.103515625e-05, pos_smallest.to_f32());
+ let neg_smallest = -Float8E5M2::smallest_normalized();
+ assert_eq!(/* -0x1.p-14 */ -6.103515625e-05, neg_smallest.to_f32());
+
+ let smallest_denorm = Float8E5M2::SMALLEST;
+ assert!(smallest_denorm.is_denormal());
+ assert_eq!(/* 0x1.p-16 */ 0.0000152587890625, smallest_denorm.to_f32());
+
+ let pos_inf = Float8E5M2::INFINITY;
+ assert_eq!(f32::INFINITY, pos_inf.to_f32());
+ let neg_inf = -Float8E5M2::INFINITY;
+ assert_eq!(-f32::INFINITY, neg_inf.to_f32());
+ let qnan = Float8E5M2::qnan(None);
+ assert!(qnan.to_f32().is_nan());
+}
+
+#[test]
+fn float8e4m3fn_to_f32() {
+ let pos_zero = Float8E4M3FN::ZERO;
+ assert!(Single::from_f32(pos_zero.to_f32()).is_pos_zero());
+ let neg_zero = -Float8E4M3FN::ZERO;
+ assert!(Single::from_f32(neg_zero.to_f32()).is_neg_zero());
+
+ let one = "1.0".parse::<Float8E4M3FN>().unwrap();
+ assert_eq!(1.0, one.to_f32());
+ let two = "2.0".parse::<Float8E4M3FN>().unwrap();
+ assert_eq!(2.0, two.to_f32());
+
+ let pos_largest = Float8E4M3FN::largest();
+ assert_eq!(448., pos_largest.to_f32());
+ let neg_largest = -Float8E4M3FN::largest();
+ assert_eq!(-448.0, neg_largest.to_f32());
+ let pos_smallest = Float8E4M3FN::smallest_normalized();
+ assert_eq!(/* 0x1.p-6 */ 0.015625, pos_smallest.to_f32());
+ let neg_smallest = -Float8E4M3FN::smallest_normalized();
+ assert_eq!(/* -0x1.p-6 */ -0.015625, neg_smallest.to_f32());
+
+ let smallest_denorm = Float8E4M3FN::SMALLEST;
+ assert!(smallest_denorm.is_denormal());
+ assert_eq!(/* 0x1.p-9 */ 0.001953125, smallest_denorm.to_f32());
+
+ let qnan = Float8E4M3FN::qnan(None);
+ assert!(qnan.to_f32().is_nan());
+}
diff --git a/vendor/rustc_apfloat/tests/ppc.rs b/vendor/rustc_apfloat/tests/ppc.rs
new file mode 100644
index 000000000..7da8f8ac3
--- /dev/null
+++ b/vendor/rustc_apfloat/tests/ppc.rs
@@ -0,0 +1,470 @@
+use rustc_apfloat::ppc::DoubleDouble;
+use rustc_apfloat::{Category, Float, Round};
+
+use std::cmp::Ordering;
+
+#[test]
+fn ppc_double_double() {
+ let test = DoubleDouble::ZERO;
+ let expected = "0x0p+0".parse::<DoubleDouble>().unwrap();
+ assert!(test.is_zero());
+ assert!(!test.is_negative());
+ assert!(test.bitwise_eq(expected));
+ assert_eq!(0, test.to_bits());
+
+ let test = -DoubleDouble::ZERO;
+ let expected = "-0x0p+0".parse::<DoubleDouble>().unwrap();
+ assert!(test.is_zero());
+ assert!(test.is_negative());
+ assert!(test.bitwise_eq(expected));
+ assert_eq!(0x8000000000000000, test.to_bits());
+
+ let test = "1.0".parse::<DoubleDouble>().unwrap();
+ assert_eq!(0x3ff0000000000000, test.to_bits());
+
+ // LDBL_MAX
+ let test = "1.79769313486231580793728971405301e+308"
+ .parse::<DoubleDouble>()
+ .unwrap();
+ assert_eq!(0x7c8ffffffffffffe_7fefffffffffffff, test.to_bits());
+
+ // LDBL_MIN
+ let test = "2.00416836000897277799610805135016e-292"
+ .parse::<DoubleDouble>()
+ .unwrap();
+ assert_eq!(0x0000000000000000_0360000000000000, test.to_bits());
+}
+
+#[test]
+fn ppc_double_double_add_special() {
+ let data = [
+ // (1 + 0) + (-1 + 0) = Category::Zero
+ (0x3ff0000000000000, 0xbff0000000000000, Category::Zero, Round::NearestTiesToEven),
+ // LDBL_MAX + (1.1 >> (1023 - 106) + 0)) = Category::Infinity
+ (0x7c8ffffffffffffe_7fefffffffffffff, 0x7948000000000000, Category::Infinity, Round::NearestTiesToEven),
+ // FIXME: change the 4th 0x75effffffffffffe to 0x75efffffffffffff when
+ // DoubleDouble's fallback is gone.
+ // LDBL_MAX + (1.011111... >> (1023 - 106) + (1.1111111...0 >> (1023 -
+ // 160))) = Category::Normal
+ (
+ 0x7c8ffffffffffffe_7fefffffffffffff,
+ 0x75effffffffffffe_7947ffffffffffff,
+ Category::Normal,
+ Round::NearestTiesToEven,
+ ),
+ // LDBL_MAX + (1.1 >> (1023 - 106) + 0)) = Category::Infinity
+ (
+ 0x7c8ffffffffffffe_7fefffffffffffff,
+ 0x7c8ffffffffffffe_7fefffffffffffff,
+ Category::Infinity,
+ Round::NearestTiesToEven,
+ ),
+ // NaN + (1 + 0) = Category::NaN
+ (0x7ff8000000000000, 0x3ff0000000000000, Category::NaN, Round::NearestTiesToEven),
+ ];
+
+ for &(op1, op2, expected, round) in &data {
+ {
+ let mut a1 = DoubleDouble::from_bits(op1);
+ let a2 = DoubleDouble::from_bits(op2);
+ a1 = a1.add_r(a2, round).value;
+
+ assert_eq!(expected, a1.category(), "{:#x} + {:#x}", op1, op2);
+ }
+ {
+ let a1 = DoubleDouble::from_bits(op1);
+ let mut a2 = DoubleDouble::from_bits(op2);
+ a2 = a2.add_r(a1, round).value;
+
+ assert_eq!(expected, a2.category(), "{:#x} + {:#x}", op2, op1);
+ }
+ }
+}
+
+#[test]
+fn ppc_double_double_add() {
+ let data = [
+ // (1 + 0) + (1e-105 + 0) = (1 + 1e-105)
+ (0x3ff0000000000000, 0x3960000000000000, 0x3960000000000000_3ff0000000000000, Round::NearestTiesToEven),
+ // (1 + 0) + (1e-106 + 0) = (1 + 1e-106)
+ (0x3ff0000000000000, 0x3950000000000000, 0x3950000000000000_3ff0000000000000, Round::NearestTiesToEven),
+ // (1 + 1e-106) + (1e-106 + 0) = (1 + 1e-105)
+ (
+ 0x3950000000000000_3ff0000000000000,
+ 0x3950000000000000,
+ 0x3960000000000000_3ff0000000000000,
+ Round::NearestTiesToEven,
+ ),
+ // (1 + 0) + (epsilon + 0) = (1 + epsilon)
+ (0x3ff0000000000000, 0x0000000000000001, 0x0000000000000001_3ff0000000000000, Round::NearestTiesToEven),
+ // FIXME: change 0xf950000000000000 to 0xf940000000000000, when
+ // DoubleDouble's fallback is gone.
+ // (DBL_MAX - 1 << (1023 - 105)) + (1 << (1023 - 53) + 0) = DBL_MAX +
+ // 1.11111... << (1023 - 52)
+ (
+ 0xf950000000000000_7fefffffffffffff,
+ 0x7c90000000000000,
+ 0x7c8ffffffffffffe_7fefffffffffffff,
+ Round::NearestTiesToEven,
+ ),
+ // FIXME: change 0xf950000000000000 to 0xf940000000000000, when
+ // DoubleDouble's fallback is gone.
+ // (1 << (1023 - 53) + 0) + (DBL_MAX - 1 << (1023 - 105)) = DBL_MAX +
+ // 1.11111... << (1023 - 52)
+ (
+ 0x7c90000000000000,
+ 0xf950000000000000_7fefffffffffffff,
+ 0x7c8ffffffffffffe_7fefffffffffffff,
+ Round::NearestTiesToEven,
+ ),
+ ];
+
+ for &(op1, op2, expected, round) in &data {
+ {
+ let mut a1 = DoubleDouble::from_bits(op1);
+ let a2 = DoubleDouble::from_bits(op2);
+ a1 = a1.add_r(a2, round).value;
+
+ assert_eq!(expected, a1.to_bits(), "{:#x} + {:#x}", op1, op2);
+ }
+ {
+ let a1 = DoubleDouble::from_bits(op1);
+ let mut a2 = DoubleDouble::from_bits(op2);
+ a2 = a2.add_r(a1, round).value;
+
+ assert_eq!(expected, a2.to_bits(), "{:#x} + {:#x}", op2, op1);
+ }
+ }
+}
+
+#[test]
+fn ppc_double_double_subtract() {
+ let data = [
+ // (1 + 0) - (-1e-105 + 0) = (1 + 1e-105)
+ (0x3ff0000000000000, 0xb960000000000000, 0x3960000000000000_3ff0000000000000, Round::NearestTiesToEven),
+ // (1 + 0) - (-1e-106 + 0) = (1 + 1e-106)
+ (0x3ff0000000000000, 0xb950000000000000, 0x3950000000000000_3ff0000000000000, Round::NearestTiesToEven),
+ ];
+
+ for &(op1, op2, expected, round) in &data {
+ let mut a1 = DoubleDouble::from_bits(op1);
+ let a2 = DoubleDouble::from_bits(op2);
+ a1 = a1.sub_r(a2, round).value;
+
+ assert_eq!(expected, a1.to_bits(), "{:#x} - {:#x}", op1, op2);
+ }
+}
+
+#[test]
+fn ppc_double_double_multiply_special() {
+ let data = [
+ // Category::NaN * Category::NaN = Category::NaN
+ (0x7ff8000000000000, 0x7ff8000000000000, Category::NaN, Round::NearestTiesToEven),
+ // Category::NaN * Category::Zero = Category::NaN
+ (0x7ff8000000000000, 0, Category::NaN, Round::NearestTiesToEven),
+ // Category::NaN * Category::Infinity = Category::NaN
+ (0x7ff8000000000000, 0x7ff0000000000000, Category::NaN, Round::NearestTiesToEven),
+ // Category::NaN * Category::Normal = Category::NaN
+ (0x7ff8000000000000, 0x3ff0000000000000, Category::NaN, Round::NearestTiesToEven),
+ // Category::Infinity * Category::Infinity = Category::Infinity
+ (0x7ff0000000000000, 0x7ff0000000000000, Category::Infinity, Round::NearestTiesToEven),
+ // Category::Infinity * Category::Zero = Category::NaN
+ (0x7ff0000000000000, 0, Category::NaN, Round::NearestTiesToEven),
+ // Category::Infinity * Category::Normal = Category::Infinity
+ (0x7ff0000000000000, 0x3ff0000000000000, Category::Infinity, Round::NearestTiesToEven),
+ // Category::Zero * Category::Zero = Category::Zero
+ (0, 0, Category::Zero, Round::NearestTiesToEven),
+ // Category::Zero * Category::Normal = Category::Zero
+ (0, 0x3ff0000000000000, Category::Zero, Round::NearestTiesToEven),
+ ];
+
+ for &(op1, op2, expected, round) in &data {
+ {
+ let mut a1 = DoubleDouble::from_bits(op1);
+ let a2 = DoubleDouble::from_bits(op2);
+ a1 = a1.mul_r(a2, round).value;
+
+ assert_eq!(expected, a1.category(), "{:#x} * {:#x}", op1, op2);
+ }
+ {
+ let a1 = DoubleDouble::from_bits(op1);
+ let mut a2 = DoubleDouble::from_bits(op2);
+ a2 = a2.mul_r(a1, round).value;
+
+ assert_eq!(expected, a2.category(), "{:#x} * {:#x}", op2, op1);
+ }
+ }
+}
+
+#[test]
+fn ppc_double_double_multiply() {
+ let data = [
+ // 1/3 * 3 = 1.0
+ (0x3c75555555555556_3fd5555555555555, 0x4008000000000000, 0x3ff0000000000000, Round::NearestTiesToEven),
+ // (1 + epsilon) * (1 + 0) = Category::Zero
+ (
+ 0x0000000000000001_3ff0000000000000,
+ 0x3ff0000000000000,
+ 0x0000000000000001_3ff0000000000000,
+ Round::NearestTiesToEven,
+ ),
+ // (1 + epsilon) * (1 + epsilon) = 1 + 2 * epsilon
+ (
+ 0x0000000000000001_3ff0000000000000,
+ 0x0000000000000001_3ff0000000000000,
+ 0x0000000000000002_3ff0000000000000,
+ Round::NearestTiesToEven,
+ ),
+ // -(1 + epsilon) * (1 + epsilon) = -1
+ (
+ 0x0000000000000001_bff0000000000000,
+ 0x0000000000000001_3ff0000000000000,
+ 0xbff0000000000000,
+ Round::NearestTiesToEven,
+ ),
+ // (0.5 + 0) * (1 + 2 * epsilon) = 0.5 + epsilon
+ (
+ 0x3fe0000000000000,
+ 0x0000000000000002_3ff0000000000000,
+ 0x0000000000000001_3fe0000000000000,
+ Round::NearestTiesToEven,
+ ),
+ // (0.5 + 0) * (1 + epsilon) = 0.5
+ (0x3fe0000000000000, 0x0000000000000001_3ff0000000000000, 0x3fe0000000000000, Round::NearestTiesToEven),
+ // __LDBL_MAX__ * (1 + 1 << 106) = inf
+ (
+ 0x7c8ffffffffffffe_7fefffffffffffff,
+ 0x3950000000000000_3ff0000000000000,
+ 0x7ff0000000000000,
+ Round::NearestTiesToEven,
+ ),
+ // __LDBL_MAX__ * (1 + 1 << 107) > __LDBL_MAX__, but not inf, yes =_=|||
+ (
+ 0x7c8ffffffffffffe_7fefffffffffffff,
+ 0x3940000000000000_3ff0000000000000,
+ 0x7c8fffffffffffff_7fefffffffffffff,
+ Round::NearestTiesToEven,
+ ),
+ // __LDBL_MAX__ * (1 + 1 << 108) = __LDBL_MAX__
+ (
+ 0x7c8ffffffffffffe_7fefffffffffffff,
+ 0x3930000000000000_3ff0000000000000,
+ 0x7c8ffffffffffffe_7fefffffffffffff,
+ Round::NearestTiesToEven,
+ ),
+ ];
+
+ for &(op1, op2, expected, round) in &data {
+ {
+ let mut a1 = DoubleDouble::from_bits(op1);
+ let a2 = DoubleDouble::from_bits(op2);
+ a1 = a1.mul_r(a2, round).value;
+
+ assert_eq!(expected, a1.to_bits(), "{:#x} * {:#x}", op1, op2);
+ }
+ {
+ let a1 = DoubleDouble::from_bits(op1);
+ let mut a2 = DoubleDouble::from_bits(op2);
+ a2 = a2.mul_r(a1, round).value;
+
+ assert_eq!(expected, a2.to_bits(), "{:#x} * {:#x}", op2, op1);
+ }
+ }
+}
+
+#[test]
+fn ppc_double_double_divide() {
+ // FIXME: Only a sanity check for now. Add more edge cases when the
+ // double-double algorithm is implemented.
+ let data = [
+ // 1 / 3 = 1/3
+ (0x3ff0000000000000, 0x4008000000000000, 0x3c75555555555556_3fd5555555555555, Round::NearestTiesToEven),
+ ];
+
+ for &(op1, op2, expected, round) in &data {
+ let mut a1 = DoubleDouble::from_bits(op1);
+ let a2 = DoubleDouble::from_bits(op2);
+ a1 = a1.div_r(a2, round).value;
+
+ assert_eq!(expected, a1.to_bits(), "{:#x} / {:#x}", op1, op2);
+ }
+}
+
+#[test]
+fn ppc_double_double_remainder() {
+ let data = [
+ // ieee_rem(3.0 + 3.0 << 53, 1.25 + 1.25 << 53) = (0.5 + 0.5 << 53)
+ (0x3cb8000000000000_4008000000000000, 0x3ca4000000000000_3ff4000000000000, 0x3c90000000000000_3fe0000000000000),
+ // ieee_rem(3.0 + 3.0 << 53, 1.75 + 1.75 << 53) = (-0.5 - 0.5 << 53)
+ (0x3cb8000000000000_4008000000000000, 0x3cac000000000000_3ffc000000000000, 0xbc90000000000000_bfe0000000000000),
+ ];
+
+ for &(op1, op2, expected) in &data {
+ let a1 = DoubleDouble::from_bits(op1);
+ let a2 = DoubleDouble::from_bits(op2);
+ let result = a1.ieee_rem(a2).value;
+
+ assert_eq!(expected, result.to_bits(), "ieee_rem({:#x}, {:#x})", op1, op2);
+ }
+}
+
+#[test]
+fn ppc_double_double_mod() {
+ let data = [
+ // mod(3.0 + 3.0 << 53, 1.25 + 1.25 << 53) = (0.5 + 0.5 << 53)
+ (0x3cb8000000000000_4008000000000000, 0x3ca4000000000000_3ff4000000000000, 0x3c90000000000000_3fe0000000000000),
+ // mod(3.0 + 3.0 << 53, 1.75 + 1.75 << 53) = (1.25 + 1.25 << 53)
+ // 0xbc98000000000000 doesn't seem right, but it's what we currently have.
+ // FIXME: investigate
+ (0x3cb8000000000000_4008000000000000, 0x3cac000000000000_3ffc000000000000, 0xbc98000000000000_3ff4000000000001),
+ ];
+
+ for &(op1, op2, expected) in &data {
+ let a1 = DoubleDouble::from_bits(op1);
+ let a2 = DoubleDouble::from_bits(op2);
+ let r = (a1 % a2).value;
+
+ assert_eq!(expected, r.to_bits(), "fmod({:#x}, {:#x})", op1, op2);
+ }
+}
+
+#[test]
+fn ppc_double_double_fma() {
+ // Sanity check for now.
+ let mut a = "2".parse::<DoubleDouble>().unwrap();
+ a = a
+ .mul_add("3".parse::<DoubleDouble>().unwrap(), "4".parse::<DoubleDouble>().unwrap())
+ .value;
+ assert_eq!(Some(Ordering::Equal), "10".parse::<DoubleDouble>().unwrap().partial_cmp(&a));
+}
+
+#[test]
+fn ppc_double_double_round_to_integral() {
+ {
+ let a = "1.5".parse::<DoubleDouble>().unwrap();
+ let a = a.round_to_integral(Round::NearestTiesToEven).value;
+ assert_eq!(Some(Ordering::Equal), "2".parse::<DoubleDouble>().unwrap().partial_cmp(&a));
+ }
+ {
+ let a = "2.5".parse::<DoubleDouble>().unwrap();
+ let a = a.round_to_integral(Round::NearestTiesToEven).value;
+ assert_eq!(Some(Ordering::Equal), "2".parse::<DoubleDouble>().unwrap().partial_cmp(&a));
+ }
+}
+
+#[test]
+fn ppc_double_double_compare() {
+ let data = [
+ // (1 + 0) = (1 + 0)
+ (0x3ff0000000000000, 0x3ff0000000000000, Some(Ordering::Equal)),
+ // (1 + 0) < (1.00...1 + 0)
+ (0x3ff0000000000000, 0x3ff0000000000001, Some(Ordering::Less)),
+ // (1.00...1 + 0) > (1 + 0)
+ (0x3ff0000000000001, 0x3ff0000000000000, Some(Ordering::Greater)),
+ // (1 + 0) < (1 + epsilon)
+ (0x3ff0000000000000, 0x0000000000000001_3ff0000000000001, Some(Ordering::Less)),
+ // NaN != NaN
+ (0x7ff8000000000000, 0x7ff8000000000000, None),
+ // (1 + 0) != NaN
+ (0x3ff0000000000000, 0x7ff8000000000000, None),
+ // Inf = Inf
+ (0x7ff0000000000000, 0x7ff0000000000000, Some(Ordering::Equal)),
+ ];
+
+ for &(op1, op2, expected) in &data {
+ let a1 = DoubleDouble::from_bits(op1);
+ let a2 = DoubleDouble::from_bits(op2);
+ assert_eq!(expected, a1.partial_cmp(&a2), "compare({:#x}, {:#x})", op1, op2,);
+ }
+}
+
+#[test]
+fn ppc_double_double_bitwise_eq() {
+ let data = [
+ // (1 + 0) = (1 + 0)
+ (0x3ff0000000000000, 0x3ff0000000000000, true),
+ // (1 + 0) != (1.00...1 + 0)
+ (0x3ff0000000000000, 0x3ff0000000000001, false),
+ // NaN = NaN
+ (0x7ff8000000000000, 0x7ff8000000000000, true),
+ // NaN != NaN with a different bit pattern
+ (0x7ff8000000000000, 0x3ff0000000000000_7ff8000000000000, false),
+ // Inf = Inf
+ (0x7ff0000000000000, 0x7ff0000000000000, true),
+ ];
+
+ for &(op1, op2, expected) in &data {
+ let a1 = DoubleDouble::from_bits(op1);
+ let a2 = DoubleDouble::from_bits(op2);
+ assert_eq!(expected, a1.bitwise_eq(a2), "{:#x} = {:#x}", op1, op2);
+ }
+}
+
+#[test]
+fn ppc_double_double_change_sign() {
+ let float = DoubleDouble::from_bits(0xbcb0000000000000_400f000000000000);
+ {
+ let actual = float.copy_sign("1".parse::<DoubleDouble>().unwrap());
+ assert_eq!(0xbcb0000000000000_400f000000000000, actual.to_bits());
+ }
+ {
+ let actual = float.copy_sign("-1".parse::<DoubleDouble>().unwrap());
+ assert_eq!(0x3cb0000000000000_c00f000000000000, actual.to_bits());
+ }
+}
+
+#[test]
+fn ppc_double_double_factories() {
+ assert_eq!(0, DoubleDouble::ZERO.to_bits());
+ assert_eq!(0x7c8ffffffffffffe_7fefffffffffffff, DoubleDouble::largest().to_bits());
+ assert_eq!(0x0000000000000001, DoubleDouble::SMALLEST.to_bits());
+ assert_eq!(0x0360000000000000, DoubleDouble::smallest_normalized().to_bits());
+ assert_eq!(0x0000000000000000_8000000000000000, (-DoubleDouble::ZERO).to_bits());
+ assert_eq!(0xfc8ffffffffffffe_ffefffffffffffff, (-DoubleDouble::largest()).to_bits());
+ assert_eq!(0x0000000000000000_8000000000000001, (-DoubleDouble::SMALLEST).to_bits());
+ assert_eq!(0x0000000000000000_8360000000000000, (-DoubleDouble::smallest_normalized()).to_bits());
+ assert!(DoubleDouble::SMALLEST.is_smallest());
+ assert!(DoubleDouble::largest().is_largest());
+}
+
+#[test]
+fn ppc_double_double_is_denormal() {
+ assert!(DoubleDouble::SMALLEST.is_denormal());
+ assert!(!DoubleDouble::largest().is_denormal());
+ assert!(!DoubleDouble::smallest_normalized().is_denormal());
+ {
+ // (4 + 3) is not normalized
+ let data = 0x4008000000000000_4010000000000000;
+ assert!(DoubleDouble::from_bits(data).is_denormal());
+ }
+}
+
+#[test]
+fn ppc_double_double_exact_inverse() {
+ assert!("2.0"
+ .parse::<DoubleDouble>()
+ .unwrap()
+ .get_exact_inverse()
+ .unwrap()
+ .bitwise_eq("0.5".parse::<DoubleDouble>().unwrap()));
+}
+
+#[test]
+fn ppc_double_double_scalbn() {
+ // 3.0 + 3.0 << 53
+ let input = 0x3cb8000000000000_4008000000000000;
+ let result = DoubleDouble::from_bits(input).scalbn(1);
+ // 6.0 + 6.0 << 53
+ assert_eq!(0x3cc8000000000000_4018000000000000, result.to_bits());
+}
+
+#[test]
+fn ppc_double_double_frexp() {
+ // 3.0 + 3.0 << 53
+ let input = 0x3cb8000000000000_4008000000000000;
+ let mut exp = 0;
+ // 0.75 + 0.75 << 53
+ let result = DoubleDouble::from_bits(input).frexp(&mut exp);
+ assert_eq!(2, exp);
+ assert_eq!(0x3c98000000000000_3fe8000000000000, result.to_bits());
+}