diff options
Diffstat (limited to 'vendor/compiler_builtins')
159 files changed, 20985 insertions, 0 deletions
diff --git a/vendor/compiler_builtins/.cargo-checksum.json b/vendor/compiler_builtins/.cargo-checksum.json new file mode 100644 index 000000000..9324f092f --- /dev/null +++ b/vendor/compiler_builtins/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.lock":"b369bba03aca1dd529671fb54000fd8dfc0d4d5b5cfe1afe71294155c974bc34","Cargo.toml":"1f727cb37da8b846367a12e034c01aca729f745a695383dd596b05b7f86ba970","README.md":"5eb36fbab30693dbbe9f0de54749c95bd06fd6e42013b5b9eff3c062b9fdd34f","build.rs":"991919d9ab1eca85e87a85acafcb86d4880f1afe9fe35d6bd87039dcb1fa9aa7","examples/intrinsics.rs":"a7aa69c17af3aa8f6edff32c214e80827d3cbe3aea386a2be42244444752d253","libm/src/math/acos.rs":"fb066ba84aba1372d706425ec14f35ff8d971756d15eeebd22ecf42a716493bb","libm/src/math/acosf.rs":"a112b82309bba1d35c4e3d6ad4d6c21ef305343d9ab601ddf4bc61d43bc9f1af","libm/src/math/acosh.rs":"56dac8538e4350cd7cf001327c89f087b68abb2e6aaad58edba8a094b09f6b0f","libm/src/math/acoshf.rs":"df5b0c4d8e37e64cf5ff2d8328b28bc35c78e84060ff769e64523ea9ff9065c1","libm/src/math/asin.rs":"095a1e98996daff45df0b154ca0ec35bbf31db964ee9fdda0207308cb20df441","libm/src/math/asinf.rs":"49cccb4db2881982643a4a7d5453f4f8daf527711bbb67313607a3c178856d61","libm/src/math/asinh.rs":"e8fc94031015fddf35e9c26b94da9f6431ee17c81cd7bd37da8ffc98f7e0b32c","libm/src/math/asinhf.rs":"8a0b8933a98a17617a66fef4c7b89eba645fdf05302000babf4a5a5f45328430","libm/src/math/atan.rs":"d4fe46e1c5739dd09997869dcfbc3c85f03c534af52e700d6c6bcf9c3fedda07","libm/src/math/atan2.rs":"2623bc8ca707d13a7092ce49adf68e9cbf4452ad1bf4a861dc40ca858606a747","libm/src/math/atan2f.rs":"dd01943e0e1f1955912e5c3ffc9467529cf64bd02ac0a6ad5ab31dbe6657f05d","libm/src/math/atanf.rs":"e41b41569474a59c970ede3538e00bda4072cf4d90040017101cc79d7dc28caa","libm/src/math/atanh.rs":"5934dbd6b7395ca4f103ace7598da723a9270e1cf6b47e7f786debe4bb3651ff","libm/src/math/atanhf.rs":"8ba4711dda19ef2dc33622be65c1483902868083543198c6bbd040d4026293de","libm/src/math/cbrt.rs":"f2c45612d2eecd93cfcdd9ebf824c754fc8f8dfd6d16862c0b9c4ccea78c2a0f","libm/src/math/cbrtf.rs":"ad0b483854aa9f17a44d36c049bf0e8ebab34c27e90b787c05f45cc230ec7d19","libm/src/math/ceil.rs":"57ba5b6e207a0ccbd34190d1aa544389ca12126be23821dfb5746497f620ce03","libm/src/math/ceilf.rs":"c922a0475a599b9ea5473e615f74700b99707cebd6927f24ea59cb2a3cb3bbc3","libm/src/math/copysign.rs":"d80c880efaf0cdf2ce0a4d4f5a68dd6c36c88d46fa997ec8ac8604bfdb26fa33","libm/src/math/copysignf.rs":"1547116071e68a42b1605eb2fc722db6466a34517dc96b92de1f29a274c3d8e3","libm/src/math/cos.rs":"74babdc13ede78e400c5ca1854c3e22d2e08cbdc5618aefa5bba6f9303ef65b6","libm/src/math/cosf.rs":"09c40f93c445b741e22477ceedf163ca33b6a47f973f7c9876cfba2692edb29c","libm/src/math/cosh.rs":"0d0a7cef18577f321996b8b87561963139f754ad7f2ea0a3b3883811f3f0693a","libm/src/math/coshf.rs":"be8ca8739e4cf1978425b349f941cb4838bba8c10cb559c7940b9fd4fdde21ad","libm/src/math/erf.rs":"9c55fc6756ba816996f0b585e07ccfa4cd87575ad525cd30c4a968b30acffda3","libm/src/math/erff.rs":"cb020e8bada9a54573a11fe3271750d73f14fed3092a881a9ceaf98fe32fd5a6","libm/src/math/exp.rs":"ca7405ad0d1993fffcf9aae96f9256307bed3c4916545aaebd1cf1d2df1807fa","libm/src/math/exp10.rs":"2deb037f88feac87a0e924b69dd496f0dd3b5d35f2a58e09d4c5166b207e517b","libm/src/math/exp10f.rs":"6979464dfe3f4f2da1f9afc909646499c4bfaef15e10a039384750e2f1586fea","libm/src/math/exp2.rs":"94a9304a2ce3bc81f6d2aefd3cde6faa30f13260d46cb13692863cdea1c9a3a1","libm/src/math/exp2f.rs":"785f2630accd35118ec07bf60273e219ed91a215b956b1552eeea5bc2a708cc8","libm/src/math/expf.rs":"ec14c18f891a9e37735ec39e6fc2e9bf674a2c2e083f22e2533b481177359c98","libm/src/math/expm1.rs":"124069f456c8ad331f265c7509d9e223b2a300e461bbfd3d6adfdcdd2ee5b8ac","libm/src/math/expm1f.rs":"18e2116d31ea8410051cc709b9d04b754b0e3ba6758ee1bf0b48749f4999b840","libm/src/math/expo2.rs":"4f4f9fecfccb43f30c2784aa7c0bb656754a52b8ab431f7d1b551c673ab133f1","libm/src/math/fabs.rs":"e6c7db39f98508098cdf64ac0c2f53866c466149a7490afb9fe22b44c4dd81b3","libm/src/math/fabsf.rs":"83a1f5f4d9ca899ba2b701d7332e18b40258b83e111db4c5d8fab2cc1be58aa3","libm/src/math/fdim.rs":"8ec091996005207297c2389ae563e1b18dbc6a9eac951de29a976c5cd7bc32a7","libm/src/math/fdimf.rs":"c7f3f2269834d55be26b6580ddc07c42531577955fa4de35bad1e2a361085614","libm/src/math/fenv.rs":"8730d45aa4c591f91dccdcc1ce533fa23e9c6df0c38defb9c57f749cb25e1cd0","libm/src/math/floor.rs":"5050804cae173af6775c0678d6c1aafb5ca2b744bc8a2f50d9d03b95dcee1fb0","libm/src/math/floorf.rs":"c903e0c57bc60a888c513eb7a873a87a4759ba68fc791b6b931652f8ee74cc03","libm/src/math/fma.rs":"d88e01c2c2d11333dd49b61166caa036ff5f08f3a8c5973b9e3c9574bf2eb675","libm/src/math/fmaf.rs":"3e0f5727e56f31218f674b9b8975d7e67b3a24a097f06a2a3eca9723cd786213","libm/src/math/fmax.rs":"f6c8e96a8b1a170648d2fa3513e7b6b459085d708c839869f82e305fe58fac37","libm/src/math/fmaxf.rs":"dff0025433232e8a5ec7bd54d847ccf596d762ea4e35f5c54fbaac9404d732fd","libm/src/math/fmin.rs":"95b6cb66ca0e0e22276f0bf88dbe8fb69796a69a196a7491bd4802efbcf2e298","libm/src/math/fminf.rs":"304bc839b15ea3d84e68d2af9f40524ec120d30a36a667b22fcb98a6c258f4c7","libm/src/math/fmod.rs":"a1c0550fc7df8164733d914e222ff0966a2ab886d6e75a1098f24fe0283ae227","libm/src/math/fmodf.rs":"ee51ed092c0eeb8195f35735ff725cfd46612e0d689a7c483538bd92fbe61828","libm/src/math/frexp.rs":"28af70026922a8ab979744c7ad4d8faba6079c4743b7eeb6d14c983a982fbbcc","libm/src/math/frexpf.rs":"2e2593ae8002ba420809ebfaf737ef001cdc912354be3d978a8c0cb930350d4d","libm/src/math/hypot.rs":"841131c4a0cea75bc8a86e29f3f6d0815a61fc99731c9984651ce83d3050d218","libm/src/math/hypotf.rs":"5f317323edc2eb699580fe54b074b7e570a7734d51a0a149c0b49b54470a836c","libm/src/math/ilogb.rs":"813413bf6266d4fc40db9c5921af3cef4f892ba93e8f6d9efe62a449d1234532","libm/src/math/ilogbf.rs":"dec462780f46682e16cfaa733238bed3b692729e951f53a44726100b6c73a716","libm/src/math/j0.rs":"9572b6396c489927d332d0e717920e61ec0618e5e9c31f7eeeec70f5e4abab06","libm/src/math/j0f.rs":"802c8254bded9b3afb6eea8b9af240038a5a4a5d811396729f69ca509e3e7d87","libm/src/math/j1.rs":"97b1af1611fa3d110c2b349ee8e4176100132ea1391b619086b47ac063b81803","libm/src/math/j1f.rs":"9c9b128752e8ea2e7d81b637ba84907ab54a545e7602c49167b313743927930b","libm/src/math/jn.rs":"847d122334e5707ad9627146cddccc082a1f2f5bcd3e5ef54399013a7007ce88","libm/src/math/jnf.rs":"4045076f7d1a1b89882ed60d4dd60a4cbbc66b85cfb90491378c8015effcc476","libm/src/math/k_cos.rs":"f34a69e44d6b8901b03b578a75972f438ab20a7b98a0903fc1903d6fde3899be","libm/src/math/k_cosf.rs":"8f7117ff21cebf8e890a5bcfd7ea858a94172f4172b79a66d53824c2cb0888b1","libm/src/math/k_expo2.rs":"eb4ca9e6a525b7ea6da868c3cb136896682cc46f8396ba2a2ebc3ae9e9ba54b0","libm/src/math/k_expo2f.rs":"d51ad5df61cb5d1258bdb90c52bfed4572bb446a9337de9c04411ed9454ae0cb","libm/src/math/k_sin.rs":"14b2aba6ca07150c92768b5a72acaf5cde6a11d6619e14896512a7ba242e289a","libm/src/math/k_sinf.rs":"2775fcc710807164e6f37a4f8da3c8143cd5f16e19ce7c31c5591522151d7a96","libm/src/math/k_tan.rs":"a72beae4ccd9631eeeb61d6365bbeecae81c8411f3120a999c515cca0d5ea5c5","libm/src/math/k_tanf.rs":"6a794be56fa4b2f60452b9bab19af01c388f174560acbf829a351378ea39495d","libm/src/math/ldexp.rs":"b647f0096e80e4d926d8dd18d294c892ee2cb1778effe2c5e1b2664ae5cb1a4e","libm/src/math/ldexpf.rs":"98743fad2cd97a7be496f40ba3157ac1438fce0d0c25d5ab90c3b8c71c3fd0ed","libm/src/math/lgamma.rs":"498552658cc8106d7754f85ae8dbc3306ac2f0a9f7eb5a796be70c5beac92c41","libm/src/math/lgamma_r.rs":"77fb6442aeb5343926d8965e1549dde3e2cc4fd09555de6b56506001d956c344","libm/src/math/lgammaf.rs":"457105f53a4c8717e8f5a117d261dcf94e222e83981337fe23602abe883fe3f7","libm/src/math/lgammaf_r.rs":"44de75babbdd53c4a5879cd6f426e7311db82669def39df5f63914d67d6cc1b1","libm/src/math/log.rs":"b5e0c5f30d9e94351488732801be3107c12b854c3f95ad37e256dd88eeca408f","libm/src/math/log10.rs":"3425ff8be001fd1646ba15e254eb6ef4bdc6ccaf0cbee27ddf1fa84e04178b90","libm/src/math/log10f.rs":"fee4f71879bc4c99259e68c0c641364901629fb29a8ebddfcc0d090102cceddd","libm/src/math/log1p.rs":"9cf400852f165e6be19b97036ae9521fb9ca857d0a9a91c117d9123221622185","libm/src/math/log1pf.rs":"2716e6d2afa271996b7c8f47fd9e4952c88f4c1fd8c07c3e8ce8c62794bf71d8","libm/src/math/log2.rs":"dbbbfbaaa8aa6a4dbefea554ea3983090a9691228b011910c751f6adca912c40","libm/src/math/log2f.rs":"92a90350d8edce21c31c285c3e620fca7c62a2366008921715945c2c73b5b79f","libm/src/math/logf.rs":"845342cffc34d3db1f5ec12d8e5b773cd5a79056e28662fcb9bcd80207596f50","libm/src/math/mod.rs":"9de65aedb36910356bc47d25b1468b40ab63faf7e319e599e478a239681bdf9c","libm/src/math/modf.rs":"d012ed5a708ef52b6d1313c22a46cadaf5764dde1220816e3df2f03a0fcc60ae","libm/src/math/modff.rs":"f8f1e4c27a85d2cdb3c8e74439d59ef64aa543b948f22c23227d02d8388d61c2","libm/src/math/nextafter.rs":"3282e7eef214a32736fb6928d490198ad394b26b402b45495115b104839eebfe","libm/src/math/nextafterf.rs":"0937dc8a8155c19842c12181e741cec1f7df1f7a00cee81fcb2475e2842761b7","libm/src/math/pow.rs":"17c38297c5bf99accd915f292b777f8716ecf328916297c8bb9dfde6fd8ce522","libm/src/math/powf.rs":"2c423a0ea57fdc4e20f3533f744c6e6288c998b4de8f2914fafaa0e78be81b04","libm/src/math/rem_pio2.rs":"3e53234977daf61c89c29c940791714aad2f676a6f38188c7d17543a2aa8806f","libm/src/math/rem_pio2_large.rs":"21762d08d72dc6f2e313123a7311683000974a09b8fcae50994d9c39239721b1","libm/src/math/rem_pio2f.rs":"07fb48f6d5cbadfd32ce4124b2b74af98b8391a2a6f36ce2a7d32e4500cb65ac","libm/src/math/remainder.rs":"63865f4370853c476b45bb27a5c54a4072146aa4a626835ae5263871a4e7e5dc","libm/src/math/remainderf.rs":"dd3fa432dbda8f2135428198be7bd69c57f8d13df3f365b12f52bf6a82352ac4","libm/src/math/remquo.rs":"3cc0bf55069f165c4843f2c358b3a27279c01e8cdd99f9057a3f7f31f45408f2","libm/src/math/remquof.rs":"cc749e18ecb7e766b8b8eeabdbf89ac99087d3d587e71e30f690676a3d2c1f9b","libm/src/math/round.rs":"f10797ef15dd34a74e912ba8621d60bc0200c87b94308c9de3cc88d7aec4feb4","libm/src/math/roundf.rs":"27e37cfcf82373709e7debf9c0c18f7ed00ae0f5d97a214c388041f7a6996d35","libm/src/math/scalbn.rs":"b5c9d6d4177fe393cbfe1c634d75ce14b754f6cbce87c5bf979a9661491748a2","libm/src/math/scalbnf.rs":"4f198d06db1896386256fb9a5ac5b805b16b836226c18780a475cf18d7c1449c","libm/src/math/sin.rs":"bb483a2138ca779e03a191222636f0c60fd75a77a2a12f263bda4b6aa9136317","libm/src/math/sincos.rs":"55b7446984db26bc118eb3d9b01f4fd2594de06558e0f9b6dc936a57888ca9be","libm/src/math/sincosf.rs":"f7b18383b242dad59e6033240d0e4a0fd08a2633e5048eff27532085f1c67f87","libm/src/math/sinf.rs":"dcddac1d56b084cbb8d0e019433c9c5fe2201d9b257a7dcf2f85c9a8f14b79cf","libm/src/math/sinh.rs":"d8ee4c7af883a526f36c1a6da13bb81fba9181b477e2f2538161a2bee97edc35","libm/src/math/sinhf.rs":"d06eb030ba9dbf7094df127262bfe99f149b4db49fa8ab8c15499660f1e46b26","libm/src/math/sqrt.rs":"824570a631c2542ccee68b65e3eb08fe79c037a29bbaaf54da5367e7b236124a","libm/src/math/sqrtf.rs":"4cf418d74f7751d522a642a9a8d6b86ee3472c6aaef44f0eb1bc26f4d8a90985","libm/src/math/tan.rs":"930ecedaadc60f704c2dfa4e15186f59713c1ba7d948529d215223b424827db5","libm/src/math/tanf.rs":"894156a3b107aee08461eb4e7e412fc049aa237d176ae705c6e3e2d7060d94e3","libm/src/math/tanh.rs":"f1f08eb98ed959a17370a7aaf0177be36e3764543424e78feb033ed3f5e8ec98","libm/src/math/tanhf.rs":"74027b0c672a4e64bdef6d7a3069b90caec50e1e7dbb2c12d2828f310502f41e","libm/src/math/tgamma.rs":"a6aabb8365410af6611f19f58694ccb74e82bb9ba9e1cdec7e1af787cfa44815","libm/src/math/tgammaf.rs":"c95bd69957387533853532164f7e2251d2b04f5e775406b9e647226ae2bdd5ad","libm/src/math/trunc.rs":"642264897cc1505e720c8cf313be81aa9fd53aae866644a2e988d01dbc77fd8a","libm/src/math/truncf.rs":"dee3607baf1af0f01deae46e429e097234c50b268eaefebbe716f19f38597900","src/arm.rs":"8ccf47608cc7862d8d834ca292d0d3d97faaab6cc6f20ba59ccf2987ff0f3f77","src/arm_linux.rs":"87136556091817b2bded52c3ece101123aae84ff13ccaf13a196826b2e3fc297","src/float/add.rs":"3ec32ceaf470a89777b54f9cde61832fdadeade0f4894f268a949e968520bc57","src/float/cmp.rs":"79b1fdc8d5f943c4ad5ea4ad32623b18f63e17ac3852fbc64a4942228007e1fc","src/float/conv.rs":"55138604371e00ef14167894159156b68f939039b0e5b2b1f3db61456e3d3870","src/float/div.rs":"bc808a5372d041b96aa603ac814165e1e7dbd690e4ab9b026d9465c1cbc2c583","src/float/extend.rs":"180b2e791c58e0526de0a798845c580ce3222c8a15c8665e6e6a4bf5cf1a34aa","src/float/mod.rs":"48d76632575789a6ecf99213b1ed38c21c86ad5a5c3fa33ccb31f77829271b79","src/float/mul.rs":"0d0c1f0c28c149ecadeafd459d3c4c9327e4cfcae2cba479957bb8010ef51a01","src/float/pow.rs":"2ada190738731eb6f24104f8fb8c4d6f03cfb16451536dbee32f2b33db0c4b19","src/float/sub.rs":"c2a87f4628f51d5d908d0f25b5d51ce0599dc559d5a72b20e131261f484d5848","src/float/trunc.rs":"d21d2a2f9a1918b4bbb594691e397972a7c04b74b2acf04016c55693abf6d24b","src/int/addsub.rs":"7ec45ce1ba15b56a5b7129d3e5722c4db764c6545306d3fa9090983bcabd6f17","src/int/leading_zeros.rs":"ccf5e9d098c80034dcf6e38437c9a2eb670fa8043558bbfb574f2293164729a6","src/int/mod.rs":"bab1b77535ceebdebb89fd4e59e7105f8c45347bb638351a626615b24544a0b1","src/int/mul.rs":"bb48d8fd42d8f9f5fe9271d8d0f7a92dbae320bf4346e19d1071eb2093cb8ed9","src/int/sdiv.rs":"ace4cb0ec388a38834e01cab2c5bc87182d31588dfc0b1ae117c11ed0c4781cf","src/int/shift.rs":"3967c28a8d61279546e91958d64745fec63f15aee9175eb0602cc6353830da6c","src/int/specialized_div_rem/asymmetric.rs":"27f5bf70a35109f9d4e4e1ad1e8003aa17da5a1e436bf3e63a493d7528a3a566","src/int/specialized_div_rem/binary_long.rs":"9f1ced81a394f000a21a329683144d68ee431a954136a3634eb55b1ee2cf6d51","src/int/specialized_div_rem/delegate.rs":"9df141af98e391361e25d71ae38d5e845a91d896edd2c041132fd46af8268e85","src/int/specialized_div_rem/mod.rs":"c32a7c416f2c81d6368720053864c3e7082417bca983c329520a8e7dc8c34736","src/int/specialized_div_rem/norm_shift.rs":"3be7ee0dea545c1f702d9daf67dad2b624bf7b17b075c8b90d3d3f7b53df4c21","src/int/specialized_div_rem/trifecta.rs":"87eef69da255b809fd710b14f2eb3f9f59e3dac625f8564ebc8ba78f9763523b","src/int/udiv.rs":"3732b490a472505411577f008b92f489287745968ce6791665201201377d3475","src/lib.rs":"557cd24974b4be235d3cdc4846eeacd8e536330ff54d037b8d56b21d99f64b3c","src/macros.rs":"4e6c006a47d7437f91a92802ecdceea63c60f3924ec98a429a14506c41c5f5dc","src/math.rs":"48bb0d97a1b56d960c6b5130b9a62a1cb1e8e5d62a522163022b8ea24c6cfcca","src/mem/impls.rs":"4517ae10a8adc2b0527529a872d8a7f98627f166d9a18a7aed6fc774b18dd843","src/mem/mod.rs":"791fa5fab759060e966de994b637e88c3c329cf2c0343efccca1da9dc80fcb66","src/mem/x86_64.rs":"912d4e955440113b97bd7dc836697df01a692524edd48cafaf599783ae277b85","src/probestack.rs":"ef5c07e9b95de7b2b77a937789fcfefd9846274317489ad6d623e377c9888601","src/riscv.rs":"8aa8cde1106a179e9a37724193bc5cea5defe212eb38e3049c23b254578f78d4","src/x86.rs":"117b50d6725ee0af0a7b3d197ea580655561f66a870ebc450d96af22bf7f39f6","src/x86_64.rs":"4f16bc9fad7757d48a6da3a078c715dd3a22154aadb4f1998d4c1b5d91396f9e"},"package":"71b72fde1d7792ca3bd654f7c3ea4508f9e4d0c826e24179eabb7fcc97a90bc3"}
\ No newline at end of file diff --git a/vendor/compiler_builtins/Cargo.lock b/vendor/compiler_builtins/Cargo.lock new file mode 100644 index 000000000..e184f7e20 --- /dev/null +++ b/vendor/compiler_builtins/Cargo.lock @@ -0,0 +1,23 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "cc" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26a6ce4b6a484fa3edb70f7efa6fc430fd2b87285fe8b84304fd0936faa0dc0" + +[[package]] +name = "compiler_builtins" +version = "0.1.73" +dependencies = [ + "cc", + "rustc-std-workspace-core", +] + +[[package]] +name = "rustc-std-workspace-core" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1956f5517128a2b6f23ab2dadf1a976f4f5b27962e7724c2bf3d45e539ec098c" diff --git a/vendor/compiler_builtins/Cargo.toml b/vendor/compiler_builtins/Cargo.toml new file mode 100644 index 000000000..765e99f94 --- /dev/null +++ b/vendor/compiler_builtins/Cargo.toml @@ -0,0 +1,73 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +name = "compiler_builtins" +version = "0.1.73" +authors = ["Jorge Aparicio <japaricious@gmail.com>"] +links = "compiler-rt" +include = [ + "/Cargo.toml", + "/build.rs", + "/src/*", + "/examples/*", + "/LICENSE.txt", + "/README.md", + "/compiler-rt/*", + "/libm/src/math/*", +] +description = """ +Compiler intrinsics used by the Rust compiler. Also available for other targets +if necessary! +""" +homepage = "https://github.com/rust-lang/compiler-builtins" +documentation = "https://docs.rs/compiler_builtins" +readme = "README.md" +license = "MIT/Apache-2.0" +repository = "https://github.com/rust-lang/compiler-builtins" + +[profile.dev] +panic = "abort" + +[profile.release] +panic = "abort" + +[lib] +test = false + +[[example]] +name = "intrinsics" +required-features = ["compiler-builtins"] + +[dependencies.core] +version = "1.0.0" +optional = true +package = "rustc-std-workspace-core" + +[dev-dependencies] + +[build-dependencies.cc] +version = "1.0" +optional = true + +[features] +c = ["cc"] +compiler-builtins = [] +default = ["compiler-builtins"] +mangled-names = [] +mem = [] +no-asm = [] +no-lang-items = [] +public-test-deps = [] +rustc-dep-of-std = [ + "compiler-builtins", + "core", +] diff --git a/vendor/compiler_builtins/README.md b/vendor/compiler_builtins/README.md new file mode 100644 index 000000000..8b25558a8 --- /dev/null +++ b/vendor/compiler_builtins/README.md @@ -0,0 +1,398 @@ +# `compiler-builtins` + +> Porting `compiler-rt` intrinsics to Rust + +See [rust-lang/rust#35437][0]. + +[0]: https://github.com/rust-lang/rust/issues/35437 + +## When and how to use this crate? + +If you are working with a target that doesn't have binary releases of std +available via rustup (this probably means you are building the core crate +yourself) and need compiler-rt intrinsics (i.e. you are probably getting linker +errors when building an executable: `undefined reference to __aeabi_memcpy`), +you can use this crate to get those intrinsics and solve the linker errors. To +do that, add this crate somewhere in the dependency graph of the crate you are +building: + +``` toml +# Cargo.toml +[dependencies] +compiler_builtins = { git = "https://github.com/rust-lang/compiler-builtins" } +``` + +``` rust +extern crate compiler_builtins; + +// ... +``` + +If you still get an "undefined reference to $INTRINSIC" error after that change, +that means that we haven't ported `$INTRINSIC` to Rust yet! Please open [an +issue] with the name of the intrinsic and the LLVM triple (e.g. +thumbv7m-none-eabi) of the target you are using. That way we can prioritize +porting that particular intrinsic. + +If you've got a C compiler available for your target then while we implement +this intrinsic you can temporarily enable a fallback to the actual compiler-rt +implementation as well for unimplemented intrinsics: + +```toml +[dependencies.compiler_builtins] +git = "https://github.com/rust-lang/compiler-builtins" +features = ["c"] +``` + +[an issue]: https://github.com/rust-lang/compiler-builtins/issues + +## Contributing + +1. Pick one or more intrinsics from the [pending list](#progress). +2. Fork this repository. +3. Port the intrinsic(s) and their corresponding [unit tests][1] from their + [C implementation][2] to Rust. +4. Implement a [test generator][3] to compare the behavior of the ported intrinsic(s) + with their implementation on the testing host. Note that randomized compiler-builtin tests + should be run using `cargo test --features gen-tests`. +4. Send a Pull Request (PR). +5. Once the PR passes our extensive [testing infrastructure][4], we'll merge it! +6. Celebrate :tada: + +[1]: https://github.com/rust-lang/compiler-rt/tree/8598065bd965d9713bfafb6c1e766d63a7b17b89/test/builtins/Unit +[2]: https://github.com/rust-lang/compiler-rt/tree/8598065bd965d9713bfafb6c1e766d63a7b17b89/lib/builtins +[3]: https://github.com/rust-lang/compiler-builtins/blob/0ba07e49264a54cb5bbd4856fcea083bb3fbec15/build.rs#L180-L265 +[4]: https://travis-ci.org/rust-lang/compiler-builtins + +### Porting Reminders + +1. [Rust][5a] and [C][5b] have slightly different operator precedence. C evaluates comparisons (`== !=`) before bitwise operations (`& | ^`), while Rust evaluates the other way. +2. C assumes wrapping operations everywhere. Rust panics on overflow when in debug mode. Consider using the [Wrapping][6] type or the explicit [wrapping_*][7] functions where applicable. +3. Note [C implicit casts][8], especially integer promotion. Rust is much more explicit about casting, so be sure that any cast which affects the output is ported to the Rust implementation. +4. Rust has [many functions][9] for integer or floating point manipulation in the standard library. Consider using one of these functions rather than porting a new one. + +[5a]: https://doc.rust-lang.org/reference/expressions.html#expression-precedence +[5b]: http://en.cppreference.com/w/c/language/operator_precedence +[6]: https://doc.rust-lang.org/core/num/struct.Wrapping.html +[7]: https://doc.rust-lang.org/std/primitive.i32.html#method.wrapping_add +[8]: http://en.cppreference.com/w/cpp/language/implicit_conversion +[9]: https://doc.rust-lang.org/std/primitive.i32.html + +## Progress + +- [x] adddf3.c +- [x] addsf3.c +- [x] arm/adddf3vfp.S +- [x] arm/addsf3vfp.S +- [x] arm/aeabi_dcmp.S +- [x] arm/aeabi_fcmp.S +- [x] arm/aeabi_idivmod.S +- [x] arm/aeabi_ldivmod.S +- [x] arm/aeabi_memcpy.S +- [x] arm/aeabi_memmove.S +- [x] arm/aeabi_memset.S +- [x] arm/aeabi_uidivmod.S +- [x] arm/aeabi_uldivmod.S +- [x] arm/divdf3vfp.S +- [ ] arm/divmodsi4.S (generic version is done) +- [x] arm/divsf3vfp.S +- [ ] arm/divsi3.S (generic version is done) +- [x] arm/eqdf2vfp.S +- [x] arm/eqsf2vfp.S +- [x] arm/extendsfdf2vfp.S +- [ ] arm/fixdfsivfp.S +- [ ] arm/fixsfsivfp.S +- [ ] arm/fixunsdfsivfp.S +- [ ] arm/fixunssfsivfp.S +- [ ] arm/floatsidfvfp.S +- [ ] arm/floatsisfvfp.S +- [ ] arm/floatunssidfvfp.S +- [ ] arm/floatunssisfvfp.S +- [x] arm/gedf2vfp.S +- [x] arm/gesf2vfp.S +- [x] arm/gtdf2vfp.S +- [x] arm/gtsf2vfp.S +- [x] arm/ledf2vfp.S +- [x] arm/lesf2vfp.S +- [x] arm/ltdf2vfp.S +- [x] arm/ltsf2vfp.S +- [ ] arm/modsi3.S (generic version is done) +- [x] arm/muldf3vfp.S +- [x] arm/mulsf3vfp.S +- [x] arm/nedf2vfp.S +- [ ] arm/negdf2vfp.S +- [ ] arm/negsf2vfp.S +- [x] arm/nesf2vfp.S +- [x] arm/softfloat-alias.list +- [x] arm/subdf3vfp.S +- [x] arm/subsf3vfp.S +- [x] arm/truncdfsf2vfp.S +- [ ] arm/udivmodsi4.S (generic version is done) +- [ ] arm/udivsi3.S (generic version is done) +- [ ] arm/umodsi3.S (generic version is done) +- [ ] arm/unorddf2vfp.S +- [ ] arm/unordsf2vfp.S +- [x] ashldi3.c +- [x] ashrdi3.c +- [x] comparedf2.c +- [x] comparesf2.c +- [x] divdf3.c +- [x] divdi3.c +- [x] divmoddi4.c +- [x] divmodsi4.c +- [x] divsf3.c +- [x] divsi3.c +- [ ] extendhfsf2.c +- [x] extendsfdf2.c +- [x] fixdfdi.c +- [x] fixdfsi.c +- [x] fixsfdi.c +- [x] fixsfsi.c +- [x] fixunsdfdi.c +- [x] fixunsdfsi.c +- [x] fixunssfdi.c +- [x] fixunssfsi.c +- [x] floatdidf.c +- [x] floatdisf.c +- [x] floatsidf.c +- [x] floatsisf.c +- [x] floatundidf.c +- [x] floatundisf.c +- [x] floatunsidf.c +- [x] floatunsisf.c +- [ ] i386/ashldi3.S +- [ ] i386/ashrdi3.S +- [x] i386/chkstk.S +- [x] i386/chkstk2.S +- [ ] i386/divdi3.S +- [ ] i386/lshrdi3.S +- [ ] i386/moddi3.S +- [ ] i386/muldi3.S +- [ ] i386/udivdi3.S +- [ ] i386/umoddi3.S +- [x] lshrdi3.c +- [x] moddi3.c +- [x] modsi3.c +- [x] muldf3.c +- [x] muldi3.c +- [x] mulodi4.c +- [x] mulosi4.c +- [x] mulsf3.c +- [x] powidf2.c +- [x] powisf2.c +- [x] subdf3.c +- [x] subsf3.c +- [ ] truncdfhf2.c +- [x] truncdfsf2.c +- [ ] truncsfhf2.c +- [x] udivdi3.c +- [x] udivmoddi4.c +- [x] udivmodsi4.c +- [x] udivsi3.c +- [x] umoddi3.c +- [x] umodsi3.c +- [x] x86_64/chkstk.S +- [x] x86_64/chkstk2.S + +These builtins are needed to support 128-bit integers, which are in the process of being added to Rust. + +- [x] ashlti3.c +- [x] ashrti3.c +- [x] divti3.c +- [x] fixdfti.c +- [x] fixsfti.c +- [x] fixunsdfti.c +- [x] fixunssfti.c +- [x] floattidf.c +- [x] floattisf.c +- [x] floatuntidf.c +- [x] floatuntisf.c +- [x] lshrti3.c +- [x] modti3.c +- [x] muloti4.c +- [x] multi3.c +- [x] udivmodti4.c +- [x] udivti3.c +- [x] umodti3.c + +## Unimplemented functions + +These builtins involve floating-point types ("`f128`", "`f80`" and complex numbers) that are not supported by Rust. + +- ~~addtf3.c~~ +- ~~comparetf2.c~~ +- ~~divdc3.c~~ +- ~~divsc3.c~~ +- ~~divtc3.c~~ +- ~~divtf3.c~~ +- ~~divxc3.c~~ +- ~~extenddftf2.c~~ +- ~~extendsftf2.c~~ +- ~~fixtfdi.c~~ +- ~~fixtfsi.c~~ +- ~~fixtfti.c~~ +- ~~fixunstfdi.c~~ +- ~~fixunstfsi.c~~ +- ~~fixunstfti.c~~ +- ~~fixunsxfdi.c~~ +- ~~fixunsxfsi.c~~ +- ~~fixunsxfti.c~~ +- ~~fixxfdi.c~~ +- ~~fixxfti.c~~ +- ~~floatditf.c~~ +- ~~floatdixf.c~~ +- ~~floatsitf.c~~ +- ~~floattixf.c~~ +- ~~floatunditf.c~~ +- ~~floatundixf.c~~ +- ~~floatunsitf.c~~ +- ~~floatuntixf.c~~ +- ~~i386/floatdixf.S~~ +- ~~i386/floatundixf.S~~ +- ~~muldc3.c~~ +- ~~mulsc3.c~~ +- ~~multc3.c~~ +- ~~multf3.c~~ +- ~~mulxc3.c~~ +- ~~powitf2.c~~ +- ~~powixf2.c~~ +- ~~ppc/divtc3.c~~ +- ~~ppc/fixtfdi.c~~ +- ~~ppc/fixunstfdi.c~~ +- ~~ppc/floatditf.c~~ +- ~~ppc/floatunditf.c~~ +- ~~ppc/gcc_qadd.c~~ +- ~~ppc/gcc_qdiv.c~~ +- ~~ppc/gcc_qmul.c~~ +- ~~ppc/gcc_qsub.c~~ +- ~~ppc/multc3.c~~ +- ~~subtf3.c~~ +- ~~trunctfdf2.c~~ +- ~~trunctfsf2.c~~ +- ~~x86_64/floatdixf.c~~ +- ~~x86_64/floatundixf.S~~ + +These builtins are never called by LLVM. + +- ~~absvdi2.c~~ +- ~~absvsi2.c~~ +- ~~absvti2.c~~ +- ~~addvdi3.c~~ +- ~~addvsi3.c~~ +- ~~addvti3.c~~ +- ~~arm/aeabi_cdcmp.S~~ +- ~~arm/aeabi_cdcmpeq_check_nan.c~~ +- ~~arm/aeabi_cfcmp.S~~ +- ~~arm/aeabi_cfcmpeq_check_nan.c~~ +- ~~arm/aeabi_div0.c~~ +- ~~arm/aeabi_drsub.c~~ +- ~~arm/aeabi_frsub.c~~ +- ~~arm/aeabi_memcmp.S~~ +- ~~arm/bswapdi2.S~~ +- ~~arm/bswapsi2.S~~ +- ~~arm/clzdi2.S~~ +- ~~arm/clzsi2.S~~ +- ~~arm/comparesf2.S~~ +- ~~arm/restore_vfp_d8_d15_regs.S~~ +- ~~arm/save_vfp_d8_d15_regs.S~~ +- ~~arm/switch16.S~~ +- ~~arm/switch32.S~~ +- ~~arm/switch8.S~~ +- ~~arm/switchu8.S~~ +- ~~clzdi2.c~~ +- ~~clzsi2.c~~ +- ~~clzti2.c~~ +- ~~cmpdi2.c~~ +- ~~cmpti2.c~~ +- ~~ctzdi2.c~~ +- ~~ctzsi2.c~~ +- ~~ctzti2.c~~ +- ~~ffsdi2.c~~ - this is [called by gcc][jemalloc-fail] though! +- ~~ffsti2.c~~ +- ~~mulvdi3.c~~ +- ~~mulvsi3.c~~ +- ~~mulvti3.c~~ +- ~~negdf2.c~~ +- ~~negdi2.c~~ +- ~~negsf2.c~~ +- ~~negti2.c~~ +- ~~negvdi2.c~~ +- ~~negvsi2.c~~ +- ~~negvti2.c~~ +- ~~paritydi2.c~~ +- ~~paritysi2.c~~ +- ~~parityti2.c~~ +- ~~popcountdi2.c~~ +- ~~popcountsi2.c~~ +- ~~popcountti2.c~~ +- ~~ppc/restFP.S~~ +- ~~ppc/saveFP.S~~ +- ~~subvdi3.c~~ +- ~~subvsi3.c~~ +- ~~subvti3.c~~ +- ~~ucmpdi2.c~~ +- ~~ucmpti2.c~~ +- ~~udivmodti4.c~~ + +[jemalloc-fail]: https://travis-ci.org/rust-lang/rust/jobs/249772758 + +Rust only exposes atomic types on platforms that support them, and therefore does not need to fall back to software implementations. + +- ~~arm/sync_fetch_and_add_4.S~~ +- ~~arm/sync_fetch_and_add_8.S~~ +- ~~arm/sync_fetch_and_and_4.S~~ +- ~~arm/sync_fetch_and_and_8.S~~ +- ~~arm/sync_fetch_and_max_4.S~~ +- ~~arm/sync_fetch_and_max_8.S~~ +- ~~arm/sync_fetch_and_min_4.S~~ +- ~~arm/sync_fetch_and_min_8.S~~ +- ~~arm/sync_fetch_and_nand_4.S~~ +- ~~arm/sync_fetch_and_nand_8.S~~ +- ~~arm/sync_fetch_and_or_4.S~~ +- ~~arm/sync_fetch_and_or_8.S~~ +- ~~arm/sync_fetch_and_sub_4.S~~ +- ~~arm/sync_fetch_and_sub_8.S~~ +- ~~arm/sync_fetch_and_umax_4.S~~ +- ~~arm/sync_fetch_and_umax_8.S~~ +- ~~arm/sync_fetch_and_umin_4.S~~ +- ~~arm/sync_fetch_and_umin_8.S~~ +- ~~arm/sync_fetch_and_xor_4.S~~ +- ~~arm/sync_fetch_and_xor_8.S~~ +- ~~arm/sync_synchronize.S~~ +- ~~atomic.c~~ +- ~~atomic_flag_clear.c~~ +- ~~atomic_flag_clear_explicit.c~~ +- ~~atomic_flag_test_and_set.c~~ +- ~~atomic_flag_test_and_set_explicit.c~~ +- ~~atomic_signal_fence.c~~ +- ~~atomic_thread_fence.c~~ + +Miscellaneous functionality that is not used by Rust. + +- ~~apple_versioning.c~~ +- ~~clear_cache.c~~ +- ~~emutls.c~~ +- ~~enable_execute_stack.c~~ +- ~~eprintf.c~~ +- ~~gcc_personality_v0.c~~ +- ~~trampoline_setup.c~~ + +Floating-point implementations of builtins that are only called from soft-float code. It would be better to simply use the generic soft-float versions in this case. + +- ~~i386/floatdidf.S~~ +- ~~i386/floatdisf.S~~ +- ~~i386/floatundidf.S~~ +- ~~i386/floatundisf.S~~ +- ~~x86_64/floatundidf.S~~ +- ~~x86_64/floatundisf.S~~ +- ~~x86_64/floatdidf.c~~ +- ~~x86_64/floatdisf.c~~ + +## License + +The compiler-builtins crate is dual licensed under both the University of +Illinois "BSD-Like" license and the MIT license. As a user of this code you may +choose to use it under either license. As a contributor, you agree to allow +your code to be used under both. + +Full text of the relevant licenses is in LICENSE.TXT. diff --git a/vendor/compiler_builtins/build.rs b/vendor/compiler_builtins/build.rs new file mode 100644 index 000000000..11deffb9c --- /dev/null +++ b/vendor/compiler_builtins/build.rs @@ -0,0 +1,579 @@ +use std::env; + +fn main() { + println!("cargo:rerun-if-changed=build.rs"); + + let target = env::var("TARGET").unwrap(); + let cwd = env::current_dir().unwrap(); + + println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display()); + + // Activate libm's unstable features to make full use of Nightly. + println!("cargo:rustc-cfg=feature=\"unstable\""); + + // Emscripten's runtime includes all the builtins + if target.contains("emscripten") { + return; + } + + // OpenBSD provides compiler_rt by default, use it instead of rebuilding it from source + if target.contains("openbsd") { + println!("cargo:rustc-link-search=native=/usr/lib"); + println!("cargo:rustc-link-lib=compiler_rt"); + return; + } + + // Forcibly enable memory intrinsics on wasm & SGX as we don't have a libc to + // provide them. + if (target.contains("wasm") && !target.contains("wasi")) + || (target.contains("sgx") && target.contains("fortanix")) + || target.contains("-none") + || target.contains("nvptx") + { + println!("cargo:rustc-cfg=feature=\"mem\""); + } + + // These targets have hardware unaligned access support. + if target.contains("x86_64") + || target.contains("i686") + || target.contains("aarch64") + || target.contains("bpf") + { + println!("cargo:rustc-cfg=feature=\"mem-unaligned\""); + } + + // NOTE we are going to assume that llvm-target, what determines our codegen option, matches the + // target triple. This is usually correct for our built-in targets but can break in presence of + // custom targets, which can have arbitrary names. + let llvm_target = target.split('-').collect::<Vec<_>>(); + + // Build missing intrinsics from compiler-rt C source code. If we're + // mangling names though we assume that we're also in test mode so we don't + // build anything and we rely on the upstream implementation of compiler-rt + // functions + if !cfg!(feature = "mangled-names") && cfg!(feature = "c") { + // Don't use a C compiler for these targets: + // + // * wasm - clang for wasm is somewhat hard to come by and it's + // unlikely that the C is really that much better than our own Rust. + // * nvptx - everything is bitcode, not compatible with mixed C/Rust + // * riscv - the rust-lang/rust distribution container doesn't have a C + // compiler nor is cc-rs ready for compilation to riscv (at this + // time). This can probably be removed in the future + if !target.contains("wasm") && !target.contains("nvptx") && !target.starts_with("riscv") { + #[cfg(feature = "c")] + c::compile(&llvm_target, &target); + } + } + + // To compile intrinsics.rs for thumb targets, where there is no libc + if llvm_target[0].starts_with("thumb") { + println!("cargo:rustc-cfg=thumb") + } + + // compiler-rt `cfg`s away some intrinsics for thumbv6m and thumbv8m.base because + // these targets do not have full Thumb-2 support but only original Thumb-1. + // We have to cfg our code accordingly. + if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" { + println!("cargo:rustc-cfg=thumb_1") + } + + // Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. This + // includes the old androideabi. It is deprecated but it is available as a + // rustc target (arm-linux-androideabi). + if llvm_target[0] == "armv4t" + || llvm_target[0] == "armv5te" + || target == "arm-linux-androideabi" + { + println!("cargo:rustc-cfg=kernel_user_helpers") + } +} + +#[cfg(feature = "c")] +mod c { + extern crate cc; + + use std::collections::{BTreeMap, HashSet}; + use std::env; + use std::fs::File; + use std::io::Write; + use std::path::{Path, PathBuf}; + + struct Sources { + // SYMBOL -> PATH TO SOURCE + map: BTreeMap<&'static str, &'static str>, + } + + impl Sources { + fn new() -> Sources { + Sources { + map: BTreeMap::new(), + } + } + + fn extend(&mut self, sources: &[(&'static str, &'static str)]) { + // NOTE Some intrinsics have both a generic implementation (e.g. + // `floatdidf.c`) and an arch optimized implementation + // (`x86_64/floatdidf.c`). In those cases, we keep the arch optimized + // implementation and discard the generic implementation. If we don't + // and keep both implementations, the linker will yell at us about + // duplicate symbols! + for (symbol, src) in sources { + if src.contains("/") { + // Arch-optimized implementation (preferred) + self.map.insert(symbol, src); + } else { + // Generic implementation + if !self.map.contains_key(symbol) { + self.map.insert(symbol, src); + } + } + } + } + + fn remove(&mut self, symbols: &[&str]) { + for symbol in symbols { + self.map.remove(*symbol).unwrap(); + } + } + } + + /// Compile intrinsics from the compiler-rt C source code + pub fn compile(llvm_target: &[&str], target: &String) { + let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); + let target_env = env::var("CARGO_CFG_TARGET_ENV").unwrap(); + let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap(); + let target_vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap(); + let mut consider_float_intrinsics = true; + let cfg = &mut cc::Build::new(); + + // AArch64 GCCs exit with an error condition when they encounter any kind of floating point + // code if the `nofp` and/or `nosimd` compiler flags have been set. + // + // Therefore, evaluate if those flags are present and set a boolean that causes any + // compiler-rt intrinsics that contain floating point source to be excluded for this target. + if target_arch == "aarch64" { + let cflags_key = String::from("CFLAGS_") + &(target.to_owned().replace("-", "_")); + if let Ok(cflags_value) = env::var(cflags_key) { + if cflags_value.contains("+nofp") || cflags_value.contains("+nosimd") { + consider_float_intrinsics = false; + } + } + } + + cfg.warnings(false); + + if target_env == "msvc" { + // Don't pull in extra libraries on MSVC + cfg.flag("/Zl"); + + // Emulate C99 and C++11's __func__ for MSVC prior to 2013 CTP + cfg.define("__func__", Some("__FUNCTION__")); + } else { + // Turn off various features of gcc and such, mostly copying + // compiler-rt's build system already + cfg.flag("-fno-builtin"); + cfg.flag("-fvisibility=hidden"); + cfg.flag("-ffreestanding"); + // Avoid the following warning appearing once **per file**: + // clang: warning: optimization flag '-fomit-frame-pointer' is not supported for target 'armv7' [-Wignored-optimization-argument] + // + // Note that compiler-rt's build system also checks + // + // `check_cxx_compiler_flag(-fomit-frame-pointer COMPILER_RT_HAS_FOMIT_FRAME_POINTER_FLAG)` + // + // in https://github.com/rust-lang/compiler-rt/blob/c8fbcb3/cmake/config-ix.cmake#L19. + cfg.flag_if_supported("-fomit-frame-pointer"); + cfg.define("VISIBILITY_HIDDEN", None); + } + + let mut sources = Sources::new(); + sources.extend(&[ + ("__absvdi2", "absvdi2.c"), + ("__absvsi2", "absvsi2.c"), + ("__addvdi3", "addvdi3.c"), + ("__addvsi3", "addvsi3.c"), + ("__clzdi2", "clzdi2.c"), + ("__clzsi2", "clzsi2.c"), + ("__cmpdi2", "cmpdi2.c"), + ("__ctzdi2", "ctzdi2.c"), + ("__ctzsi2", "ctzsi2.c"), + ("__int_util", "int_util.c"), + ("__mulvdi3", "mulvdi3.c"), + ("__mulvsi3", "mulvsi3.c"), + ("__negdi2", "negdi2.c"), + ("__negvdi2", "negvdi2.c"), + ("__negvsi2", "negvsi2.c"), + ("__paritydi2", "paritydi2.c"), + ("__paritysi2", "paritysi2.c"), + ("__popcountdi2", "popcountdi2.c"), + ("__popcountsi2", "popcountsi2.c"), + ("__subvdi3", "subvdi3.c"), + ("__subvsi3", "subvsi3.c"), + ("__ucmpdi2", "ucmpdi2.c"), + ]); + + if consider_float_intrinsics { + sources.extend(&[ + ("__divdc3", "divdc3.c"), + ("__divsc3", "divsc3.c"), + ("__divxc3", "divxc3.c"), + ("__extendhfsf2", "extendhfsf2.c"), + ("__muldc3", "muldc3.c"), + ("__mulsc3", "mulsc3.c"), + ("__mulxc3", "mulxc3.c"), + ("__negdf2", "negdf2.c"), + ("__negsf2", "negsf2.c"), + ("__powixf2", "powixf2.c"), + ("__truncdfhf2", "truncdfhf2.c"), + ("__truncsfhf2", "truncsfhf2.c"), + ]); + } + + // When compiling in rustbuild (the rust-lang/rust repo) this library + // also needs to satisfy intrinsics that jemalloc or C in general may + // need, so include a few more that aren't typically needed by + // LLVM/Rust. + if cfg!(feature = "rustbuild") { + sources.extend(&[("__ffsdi2", "ffsdi2.c")]); + } + + // On iOS and 32-bit OSX these are all just empty intrinsics, no need to + // include them. + if target_os != "ios" + && target_os != "watchos" + && (target_vendor != "apple" || target_arch != "x86") + { + sources.extend(&[ + ("__absvti2", "absvti2.c"), + ("__addvti3", "addvti3.c"), + ("__clzti2", "clzti2.c"), + ("__cmpti2", "cmpti2.c"), + ("__ctzti2", "ctzti2.c"), + ("__ffsti2", "ffsti2.c"), + ("__mulvti3", "mulvti3.c"), + ("__negti2", "negti2.c"), + ("__parityti2", "parityti2.c"), + ("__popcountti2", "popcountti2.c"), + ("__subvti3", "subvti3.c"), + ("__ucmpti2", "ucmpti2.c"), + ]); + + if consider_float_intrinsics { + sources.extend(&[("__negvti2", "negvti2.c")]); + } + } + + if target_vendor == "apple" { + sources.extend(&[ + ("atomic_flag_clear", "atomic_flag_clear.c"), + ("atomic_flag_clear_explicit", "atomic_flag_clear_explicit.c"), + ("atomic_flag_test_and_set", "atomic_flag_test_and_set.c"), + ( + "atomic_flag_test_and_set_explicit", + "atomic_flag_test_and_set_explicit.c", + ), + ("atomic_signal_fence", "atomic_signal_fence.c"), + ("atomic_thread_fence", "atomic_thread_fence.c"), + ]); + } + + if target_env == "msvc" { + if target_arch == "x86_64" { + sources.extend(&[("__floatdixf", "x86_64/floatdixf.c")]); + } + } else { + // None of these seem to be used on x86_64 windows, and they've all + // got the wrong ABI anyway, so we want to avoid them. + if target_os != "windows" { + if target_arch == "x86_64" { + sources.extend(&[ + ("__floatdixf", "x86_64/floatdixf.c"), + ("__floatundixf", "x86_64/floatundixf.S"), + ]); + } + } + + if target_arch == "x86" { + sources.extend(&[ + ("__ashldi3", "i386/ashldi3.S"), + ("__ashrdi3", "i386/ashrdi3.S"), + ("__divdi3", "i386/divdi3.S"), + ("__floatdixf", "i386/floatdixf.S"), + ("__floatundixf", "i386/floatundixf.S"), + ("__lshrdi3", "i386/lshrdi3.S"), + ("__moddi3", "i386/moddi3.S"), + ("__muldi3", "i386/muldi3.S"), + ("__udivdi3", "i386/udivdi3.S"), + ("__umoddi3", "i386/umoddi3.S"), + ]); + } + } + + if target_arch == "arm" + && target_os != "ios" + && target_os != "watchos" + && target_env != "msvc" + { + sources.extend(&[ + ("__aeabi_div0", "arm/aeabi_div0.c"), + ("__aeabi_drsub", "arm/aeabi_drsub.c"), + ("__aeabi_frsub", "arm/aeabi_frsub.c"), + ("__bswapdi2", "arm/bswapdi2.S"), + ("__bswapsi2", "arm/bswapsi2.S"), + ("__clzdi2", "arm/clzdi2.S"), + ("__clzsi2", "arm/clzsi2.S"), + ("__divmodsi4", "arm/divmodsi4.S"), + ("__divsi3", "arm/divsi3.S"), + ("__modsi3", "arm/modsi3.S"), + ("__switch16", "arm/switch16.S"), + ("__switch32", "arm/switch32.S"), + ("__switch8", "arm/switch8.S"), + ("__switchu8", "arm/switchu8.S"), + ("__sync_synchronize", "arm/sync_synchronize.S"), + ("__udivmodsi4", "arm/udivmodsi4.S"), + ("__udivsi3", "arm/udivsi3.S"), + ("__umodsi3", "arm/umodsi3.S"), + ]); + + if target_os == "freebsd" { + sources.extend(&[("__clear_cache", "clear_cache.c")]); + } + + // First of all aeabi_cdcmp and aeabi_cfcmp are never called by LLVM. + // Second are little-endian only, so build fail on big-endian targets. + // Temporally workaround: exclude these files for big-endian targets. + if !llvm_target[0].starts_with("thumbeb") && !llvm_target[0].starts_with("armeb") { + sources.extend(&[ + ("__aeabi_cdcmp", "arm/aeabi_cdcmp.S"), + ("__aeabi_cdcmpeq_check_nan", "arm/aeabi_cdcmpeq_check_nan.c"), + ("__aeabi_cfcmp", "arm/aeabi_cfcmp.S"), + ("__aeabi_cfcmpeq_check_nan", "arm/aeabi_cfcmpeq_check_nan.c"), + ]); + } + } + + if llvm_target[0] == "armv7" { + sources.extend(&[ + ("__sync_fetch_and_add_4", "arm/sync_fetch_and_add_4.S"), + ("__sync_fetch_and_add_8", "arm/sync_fetch_and_add_8.S"), + ("__sync_fetch_and_and_4", "arm/sync_fetch_and_and_4.S"), + ("__sync_fetch_and_and_8", "arm/sync_fetch_and_and_8.S"), + ("__sync_fetch_and_max_4", "arm/sync_fetch_and_max_4.S"), + ("__sync_fetch_and_max_8", "arm/sync_fetch_and_max_8.S"), + ("__sync_fetch_and_min_4", "arm/sync_fetch_and_min_4.S"), + ("__sync_fetch_and_min_8", "arm/sync_fetch_and_min_8.S"), + ("__sync_fetch_and_nand_4", "arm/sync_fetch_and_nand_4.S"), + ("__sync_fetch_and_nand_8", "arm/sync_fetch_and_nand_8.S"), + ("__sync_fetch_and_or_4", "arm/sync_fetch_and_or_4.S"), + ("__sync_fetch_and_or_8", "arm/sync_fetch_and_or_8.S"), + ("__sync_fetch_and_sub_4", "arm/sync_fetch_and_sub_4.S"), + ("__sync_fetch_and_sub_8", "arm/sync_fetch_and_sub_8.S"), + ("__sync_fetch_and_umax_4", "arm/sync_fetch_and_umax_4.S"), + ("__sync_fetch_and_umax_8", "arm/sync_fetch_and_umax_8.S"), + ("__sync_fetch_and_umin_4", "arm/sync_fetch_and_umin_4.S"), + ("__sync_fetch_and_umin_8", "arm/sync_fetch_and_umin_8.S"), + ("__sync_fetch_and_xor_4", "arm/sync_fetch_and_xor_4.S"), + ("__sync_fetch_and_xor_8", "arm/sync_fetch_and_xor_8.S"), + ]); + } + + if llvm_target.last().unwrap().ends_with("eabihf") { + if !llvm_target[0].starts_with("thumbv7em") + && !llvm_target[0].starts_with("thumbv8m.main") + { + // The FPU option chosen for these architectures in cc-rs, ie: + // -mfpu=fpv4-sp-d16 for thumbv7em + // -mfpu=fpv5-sp-d16 for thumbv8m.main + // do not support double precision floating points conversions so the files + // that include such instructions are not included for these targets. + sources.extend(&[ + ("__fixdfsivfp", "arm/fixdfsivfp.S"), + ("__fixunsdfsivfp", "arm/fixunsdfsivfp.S"), + ("__floatsidfvfp", "arm/floatsidfvfp.S"), + ("__floatunssidfvfp", "arm/floatunssidfvfp.S"), + ]); + } + + sources.extend(&[ + ("__fixsfsivfp", "arm/fixsfsivfp.S"), + ("__fixunssfsivfp", "arm/fixunssfsivfp.S"), + ("__floatsisfvfp", "arm/floatsisfvfp.S"), + ("__floatunssisfvfp", "arm/floatunssisfvfp.S"), + ("__floatunssisfvfp", "arm/floatunssisfvfp.S"), + ("__restore_vfp_d8_d15_regs", "arm/restore_vfp_d8_d15_regs.S"), + ("__save_vfp_d8_d15_regs", "arm/save_vfp_d8_d15_regs.S"), + ("__negdf2vfp", "arm/negdf2vfp.S"), + ("__negsf2vfp", "arm/negsf2vfp.S"), + ]); + } + + if target_arch == "aarch64" && consider_float_intrinsics { + sources.extend(&[ + ("__comparetf2", "comparetf2.c"), + ("__extenddftf2", "extenddftf2.c"), + ("__extendsftf2", "extendsftf2.c"), + ("__fixtfdi", "fixtfdi.c"), + ("__fixtfsi", "fixtfsi.c"), + ("__fixtfti", "fixtfti.c"), + ("__fixunstfdi", "fixunstfdi.c"), + ("__fixunstfsi", "fixunstfsi.c"), + ("__fixunstfti", "fixunstfti.c"), + ("__floatditf", "floatditf.c"), + ("__floatsitf", "floatsitf.c"), + ("__floatunditf", "floatunditf.c"), + ("__floatunsitf", "floatunsitf.c"), + ("__trunctfdf2", "trunctfdf2.c"), + ("__trunctfsf2", "trunctfsf2.c"), + ("__addtf3", "addtf3.c"), + ("__multf3", "multf3.c"), + ("__subtf3", "subtf3.c"), + ("__divtf3", "divtf3.c"), + ("__powitf2", "powitf2.c"), + ("__fe_getround", "fp_mode.c"), + ("__fe_raise_inexact", "fp_mode.c"), + ]); + + if target_os != "windows" { + sources.extend(&[("__multc3", "multc3.c")]); + } + } + + if target_arch == "mips" { + sources.extend(&[("__bswapsi2", "bswapsi2.c")]); + } + + if target_arch == "mips64" { + sources.extend(&[ + ("__extenddftf2", "extenddftf2.c"), + ("__netf2", "comparetf2.c"), + ("__addtf3", "addtf3.c"), + ("__multf3", "multf3.c"), + ("__subtf3", "subtf3.c"), + ("__fixtfsi", "fixtfsi.c"), + ("__floatsitf", "floatsitf.c"), + ("__fixunstfsi", "fixunstfsi.c"), + ("__floatunsitf", "floatunsitf.c"), + ("__fe_getround", "fp_mode.c"), + ("__divtf3", "divtf3.c"), + ("__trunctfdf2", "trunctfdf2.c"), + ]); + } + + // Remove the assembly implementations that won't compile for the target + if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" { + let mut to_remove = Vec::new(); + for (k, v) in sources.map.iter() { + if v.ends_with(".S") { + to_remove.push(*k); + } + } + sources.remove(&to_remove); + + // But use some generic implementations where possible + sources.extend(&[("__clzdi2", "clzdi2.c"), ("__clzsi2", "clzsi2.c")]) + } + + if llvm_target[0] == "thumbv7m" || llvm_target[0] == "thumbv7em" { + sources.remove(&["__aeabi_cdcmp", "__aeabi_cfcmp"]); + } + + // Android uses emulated TLS so we need a runtime support function. + if target_os == "android" { + sources.extend(&[("__emutls_get_address", "emutls.c")]); + + // Work around a bug in the NDK headers (fixed in + // https://r.android.com/2038949 which will be released in a future + // NDK version) by providing a definition of LONG_BIT. + cfg.define("LONG_BIT", "(8 * sizeof(long))"); + } + + // When compiling the C code we require the user to tell us where the + // source code is, and this is largely done so when we're compiling as + // part of rust-lang/rust we can use the same llvm-project repository as + // rust-lang/rust. + let root = match env::var_os("RUST_COMPILER_RT_ROOT") { + Some(s) => PathBuf::from(s), + None => panic!("RUST_COMPILER_RT_ROOT is not set"), + }; + if !root.exists() { + panic!("RUST_COMPILER_RT_ROOT={} does not exist", root.display()); + } + + // Support deterministic builds by remapping the __FILE__ prefix if the + // compiler supports it. This fixes the nondeterminism caused by the + // use of that macro in lib/builtins/int_util.h in compiler-rt. + cfg.flag_if_supported(&format!("-ffile-prefix-map={}=.", root.display())); + + // Include out-of-line atomics for aarch64, which are all generated by supplying different + // sets of flags to the same source file. + // Note: Out-of-line aarch64 atomics are not supported by the msvc toolchain (#430). + let src_dir = root.join("lib/builtins"); + if target_arch == "aarch64" && target_env != "msvc" { + // See below for why we're building these as separate libraries. + build_aarch64_out_of_line_atomics_libraries(&src_dir, cfg); + + // Some run-time CPU feature detection is necessary, as well. + sources.extend(&[("__aarch64_have_lse_atomics", "cpu_model.c")]); + } + + let mut added_sources = HashSet::new(); + for (sym, src) in sources.map.iter() { + let src = src_dir.join(src); + if added_sources.insert(src.clone()) { + cfg.file(&src); + println!("cargo:rerun-if-changed={}", src.display()); + } + println!("cargo:rustc-cfg={}=\"optimized-c\"", sym); + } + + cfg.compile("libcompiler-rt.a"); + } + + fn build_aarch64_out_of_line_atomics_libraries(builtins_dir: &Path, cfg: &mut cc::Build) { + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); + let outlined_atomics_file = builtins_dir.join("aarch64/lse.S"); + println!("cargo:rerun-if-changed={}", outlined_atomics_file.display()); + + cfg.include(&builtins_dir); + + for instruction_type in &["cas", "swp", "ldadd", "ldclr", "ldeor", "ldset"] { + for size in &[1, 2, 4, 8, 16] { + if *size == 16 && *instruction_type != "cas" { + continue; + } + + for (model_number, model_name) in + &[(1, "relax"), (2, "acq"), (3, "rel"), (4, "acq_rel")] + { + // The original compiler-rt build system compiles the same + // source file multiple times with different compiler + // options. Here we do something slightly different: we + // create multiple .S files with the proper #defines and + // then include the original file. + // + // This is needed because the cc crate doesn't allow us to + // override the name of object files and libtool requires + // all objects in an archive to have unique names. + let path = + out_dir.join(format!("lse_{}{}_{}.S", instruction_type, size, model_name)); + let mut file = File::create(&path).unwrap(); + writeln!(file, "#define L_{}", instruction_type).unwrap(); + writeln!(file, "#define SIZE {}", size).unwrap(); + writeln!(file, "#define MODEL {}", model_number).unwrap(); + writeln!( + file, + "#include \"{}\"", + outlined_atomics_file.canonicalize().unwrap().display() + ) + .unwrap(); + drop(file); + cfg.file(path); + + let sym = format!("__aarch64_{}{}_{}", instruction_type, size, model_name); + println!("cargo:rustc-cfg={}=\"optimized-c\"", sym); + } + } + } + } +} diff --git a/vendor/compiler_builtins/examples/intrinsics.rs b/vendor/compiler_builtins/examples/intrinsics.rs new file mode 100644 index 000000000..0ca30c215 --- /dev/null +++ b/vendor/compiler_builtins/examples/intrinsics.rs @@ -0,0 +1,398 @@ +// By compiling this file we check that all the intrinsics we care about continue to be provided by +// the `compiler_builtins` crate regardless of the changes we make to it. If we, by mistake, stop +// compiling a C implementation and forget to implement that intrinsic in Rust, this file will fail +// to link due to the missing intrinsic (symbol). + +#![allow(unused_features)] +#![cfg_attr(thumb, no_main)] +#![deny(dead_code)] +#![feature(bench_black_box)] +#![feature(lang_items)] +#![feature(start)] +#![feature(allocator_api)] +#![no_std] + +extern crate panic_handler; + +#[cfg(all(not(thumb), not(windows), not(target_arch = "wasm32")))] +#[link(name = "c")] +extern "C" {} + +// Every function in this module maps will be lowered to an intrinsic by LLVM, if the platform +// doesn't have native support for the operation used in the function. ARM has a naming convention +// convention for its intrinsics that's different from other architectures; that's why some function +// have an additional comment: the function name is the ARM name for the intrinsic and the comment +// in the non-ARM name for the intrinsic. +mod intrinsics { + // truncdfsf2 + pub fn aeabi_d2f(x: f64) -> f32 { + x as f32 + } + + // fixdfsi + pub fn aeabi_d2i(x: f64) -> i32 { + x as i32 + } + + // fixdfdi + pub fn aeabi_d2l(x: f64) -> i64 { + x as i64 + } + + // fixunsdfsi + pub fn aeabi_d2uiz(x: f64) -> u32 { + x as u32 + } + + // fixunsdfdi + pub fn aeabi_d2ulz(x: f64) -> u64 { + x as u64 + } + + // adddf3 + pub fn aeabi_dadd(a: f64, b: f64) -> f64 { + a + b + } + + // eqdf2 + pub fn aeabi_dcmpeq(a: f64, b: f64) -> bool { + a == b + } + + // gtdf2 + pub fn aeabi_dcmpgt(a: f64, b: f64) -> bool { + a > b + } + + // ltdf2 + pub fn aeabi_dcmplt(a: f64, b: f64) -> bool { + a < b + } + + // divdf3 + pub fn aeabi_ddiv(a: f64, b: f64) -> f64 { + a / b + } + + // muldf3 + pub fn aeabi_dmul(a: f64, b: f64) -> f64 { + a * b + } + + // subdf3 + pub fn aeabi_dsub(a: f64, b: f64) -> f64 { + a - b + } + + // extendsfdf2 + pub fn aeabi_f2d(x: f32) -> f64 { + x as f64 + } + + // fixsfsi + pub fn aeabi_f2iz(x: f32) -> i32 { + x as i32 + } + + // fixsfdi + pub fn aeabi_f2lz(x: f32) -> i64 { + x as i64 + } + + // fixunssfsi + pub fn aeabi_f2uiz(x: f32) -> u32 { + x as u32 + } + + // fixunssfdi + pub fn aeabi_f2ulz(x: f32) -> u64 { + x as u64 + } + + // addsf3 + pub fn aeabi_fadd(a: f32, b: f32) -> f32 { + a + b + } + + // eqsf2 + pub fn aeabi_fcmpeq(a: f32, b: f32) -> bool { + a == b + } + + // gtsf2 + pub fn aeabi_fcmpgt(a: f32, b: f32) -> bool { + a > b + } + + // ltsf2 + pub fn aeabi_fcmplt(a: f32, b: f32) -> bool { + a < b + } + + // divsf3 + pub fn aeabi_fdiv(a: f32, b: f32) -> f32 { + a / b + } + + // mulsf3 + pub fn aeabi_fmul(a: f32, b: f32) -> f32 { + a * b + } + + // subsf3 + pub fn aeabi_fsub(a: f32, b: f32) -> f32 { + a - b + } + + // floatsidf + pub fn aeabi_i2d(x: i32) -> f64 { + x as f64 + } + + // floatsisf + pub fn aeabi_i2f(x: i32) -> f32 { + x as f32 + } + + pub fn aeabi_idiv(a: i32, b: i32) -> i32 { + a.wrapping_div(b) + } + + pub fn aeabi_idivmod(a: i32, b: i32) -> i32 { + a % b + } + + // floatdidf + pub fn aeabi_l2d(x: i64) -> f64 { + x as f64 + } + + // floatdisf + pub fn aeabi_l2f(x: i64) -> f32 { + x as f32 + } + + // divdi3 + pub fn aeabi_ldivmod(a: i64, b: i64) -> i64 { + a / b + } + + // muldi3 + pub fn aeabi_lmul(a: i64, b: i64) -> i64 { + a.wrapping_mul(b) + } + + // floatunsidf + pub fn aeabi_ui2d(x: u32) -> f64 { + x as f64 + } + + // floatunsisf + pub fn aeabi_ui2f(x: u32) -> f32 { + x as f32 + } + + pub fn aeabi_uidiv(a: u32, b: u32) -> u32 { + a / b + } + + pub fn aeabi_uidivmod(a: u32, b: u32) -> u32 { + a % b + } + + // floatundidf + pub fn aeabi_ul2d(x: u64) -> f64 { + x as f64 + } + + // floatundisf + pub fn aeabi_ul2f(x: u64) -> f32 { + x as f32 + } + + // udivdi3 + pub fn aeabi_uldivmod(a: u64, b: u64) -> u64 { + a * b + } + + pub fn moddi3(a: i64, b: i64) -> i64 { + a % b + } + + pub fn mulodi4(a: i64, b: i64) -> i64 { + a * b + } + + pub fn umoddi3(a: u64, b: u64) -> u64 { + a % b + } + + pub fn muloti4(a: u128, b: u128) -> Option<u128> { + a.checked_mul(b) + } + + pub fn multi3(a: u128, b: u128) -> u128 { + a.wrapping_mul(b) + } + + pub fn ashlti3(a: u128, b: usize) -> u128 { + a >> b + } + + pub fn ashrti3(a: u128, b: usize) -> u128 { + a << b + } + + pub fn lshrti3(a: i128, b: usize) -> i128 { + a >> b + } + + pub fn udivti3(a: u128, b: u128) -> u128 { + a / b + } + + pub fn umodti3(a: u128, b: u128) -> u128 { + a % b + } + + pub fn divti3(a: i128, b: i128) -> i128 { + a / b + } + + pub fn modti3(a: i128, b: i128) -> i128 { + a % b + } + + pub fn udivsi3(a: u32, b: u32) -> u32 { + a / b + } +} + +fn run() { + use core::hint::black_box as bb; + use intrinsics::*; + + bb(aeabi_d2f(bb(2.))); + bb(aeabi_d2i(bb(2.))); + bb(aeabi_d2l(bb(2.))); + bb(aeabi_d2uiz(bb(2.))); + bb(aeabi_d2ulz(bb(2.))); + bb(aeabi_dadd(bb(2.), bb(3.))); + bb(aeabi_dcmpeq(bb(2.), bb(3.))); + bb(aeabi_dcmpgt(bb(2.), bb(3.))); + bb(aeabi_dcmplt(bb(2.), bb(3.))); + bb(aeabi_ddiv(bb(2.), bb(3.))); + bb(aeabi_dmul(bb(2.), bb(3.))); + bb(aeabi_dsub(bb(2.), bb(3.))); + bb(aeabi_f2d(bb(2.))); + bb(aeabi_f2iz(bb(2.))); + bb(aeabi_f2lz(bb(2.))); + bb(aeabi_f2uiz(bb(2.))); + bb(aeabi_f2ulz(bb(2.))); + bb(aeabi_fadd(bb(2.), bb(3.))); + bb(aeabi_fcmpeq(bb(2.), bb(3.))); + bb(aeabi_fcmpgt(bb(2.), bb(3.))); + bb(aeabi_fcmplt(bb(2.), bb(3.))); + bb(aeabi_fdiv(bb(2.), bb(3.))); + bb(aeabi_fmul(bb(2.), bb(3.))); + bb(aeabi_fsub(bb(2.), bb(3.))); + bb(aeabi_i2d(bb(2))); + bb(aeabi_i2f(bb(2))); + bb(aeabi_idiv(bb(2), bb(3))); + bb(aeabi_idivmod(bb(2), bb(3))); + bb(aeabi_l2d(bb(2))); + bb(aeabi_l2f(bb(2))); + bb(aeabi_ldivmod(bb(2), bb(3))); + bb(aeabi_lmul(bb(2), bb(3))); + bb(aeabi_ui2d(bb(2))); + bb(aeabi_ui2f(bb(2))); + bb(aeabi_uidiv(bb(2), bb(3))); + bb(aeabi_uidivmod(bb(2), bb(3))); + bb(aeabi_ul2d(bb(2))); + bb(aeabi_ul2f(bb(2))); + bb(aeabi_uldivmod(bb(2), bb(3))); + bb(moddi3(bb(2), bb(3))); + bb(mulodi4(bb(2), bb(3))); + bb(umoddi3(bb(2), bb(3))); + bb(muloti4(bb(2), bb(2))); + bb(multi3(bb(2), bb(2))); + bb(ashlti3(bb(2), bb(2))); + bb(ashrti3(bb(2), bb(2))); + bb(lshrti3(bb(2), bb(2))); + bb(udivti3(bb(2), bb(2))); + bb(umodti3(bb(2), bb(2))); + bb(divti3(bb(2), bb(2))); + bb(modti3(bb(2), bb(2))); + bb(udivsi3(bb(2), bb(2))); + + something_with_a_dtor(&|| assert_eq!(bb(1), 1)); + + extern "C" { + fn rust_begin_unwind(x: usize); + } + // if bb(false) { + unsafe { + rust_begin_unwind(0); + } + // } +} + +fn something_with_a_dtor(f: &dyn Fn()) { + struct A<'a>(&'a (dyn Fn() + 'a)); + + impl<'a> Drop for A<'a> { + fn drop(&mut self) { + (self.0)(); + } + } + let _a = A(f); + f(); +} + +#[cfg(not(thumb))] +#[start] +fn main(_: isize, _: *const *const u8) -> isize { + run(); + 0 +} + +#[cfg(thumb)] +#[no_mangle] +pub fn _start() -> ! { + run(); + loop {} +} + +#[cfg(windows)] +#[link(name = "kernel32")] +#[link(name = "msvcrt")] +extern "C" {} + +// ARM targets need these symbols +#[no_mangle] +pub fn __aeabi_unwind_cpp_pr0() {} + +#[no_mangle] +pub fn __aeabi_unwind_cpp_pr1() {} + +#[cfg(not(windows))] +#[allow(non_snake_case)] +#[no_mangle] +pub fn _Unwind_Resume() {} + +#[cfg(not(windows))] +#[lang = "eh_personality"] +#[no_mangle] +pub extern "C" fn eh_personality() {} + +#[cfg(all(windows, target_env = "gnu"))] +mod mingw_unwinding { + #[no_mangle] + pub fn rust_eh_personality() {} + #[no_mangle] + pub fn rust_eh_unwind_resume() {} + #[no_mangle] + pub fn rust_eh_register_frames() {} + #[no_mangle] + pub fn rust_eh_unregister_frames() {} +} diff --git a/vendor/compiler_builtins/libm/src/math/acos.rs b/vendor/compiler_builtins/libm/src/math/acos.rs new file mode 100644 index 000000000..23b13251e --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/acos.rs @@ -0,0 +1,112 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_acos.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* acos(x) + * Method : + * acos(x) = pi/2 - asin(x) + * acos(-x) = pi/2 + asin(x) + * For |x|<=0.5 + * acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c) + * For x>0.5 + * acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2))) + * = 2asin(sqrt((1-x)/2)) + * = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z) + * = 2f + (2c + 2s*z*R(z)) + * where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term + * for f so that f+c ~ sqrt(z). + * For x<-0.5 + * acos(x) = pi - 2asin(sqrt((1-|x|)/2)) + * = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z) + * + * Special cases: + * if x is NaN, return x itself; + * if |x|>1, return NaN with invalid signal. + * + * Function needed: sqrt + */ + +use super::sqrt; + +const PIO2_HI: f64 = 1.57079632679489655800e+00; /* 0x3FF921FB, 0x54442D18 */ +const PIO2_LO: f64 = 6.12323399573676603587e-17; /* 0x3C91A626, 0x33145C07 */ +const PS0: f64 = 1.66666666666666657415e-01; /* 0x3FC55555, 0x55555555 */ +const PS1: f64 = -3.25565818622400915405e-01; /* 0xBFD4D612, 0x03EB6F7D */ +const PS2: f64 = 2.01212532134862925881e-01; /* 0x3FC9C155, 0x0E884455 */ +const PS3: f64 = -4.00555345006794114027e-02; /* 0xBFA48228, 0xB5688F3B */ +const PS4: f64 = 7.91534994289814532176e-04; /* 0x3F49EFE0, 0x7501B288 */ +const PS5: f64 = 3.47933107596021167570e-05; /* 0x3F023DE1, 0x0DFDF709 */ +const QS1: f64 = -2.40339491173441421878e+00; /* 0xC0033A27, 0x1C8A2D4B */ +const QS2: f64 = 2.02094576023350569471e+00; /* 0x40002AE5, 0x9C598AC8 */ +const QS3: f64 = -6.88283971605453293030e-01; /* 0xBFE6066C, 0x1B8D0159 */ +const QS4: f64 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ + +fn r(z: f64) -> f64 { + let p: f64 = z * (PS0 + z * (PS1 + z * (PS2 + z * (PS3 + z * (PS4 + z * PS5))))); + let q: f64 = 1.0 + z * (QS1 + z * (QS2 + z * (QS3 + z * QS4))); + p / q +} + +/// Arccosine (f64) +/// +/// Computes the inverse cosine (arc cosine) of the input value. +/// Arguments must be in the range -1 to 1. +/// Returns values in radians, in the range of 0 to pi. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn acos(x: f64) -> f64 { + let x1p_120f = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ -120 + let z: f64; + let w: f64; + let s: f64; + let c: f64; + let df: f64; + let hx: u32; + let ix: u32; + + hx = (x.to_bits() >> 32) as u32; + ix = hx & 0x7fffffff; + /* |x| >= 1 or nan */ + if ix >= 0x3ff00000 { + let lx: u32 = x.to_bits() as u32; + + if ((ix - 0x3ff00000) | lx) == 0 { + /* acos(1)=0, acos(-1)=pi */ + if (hx >> 31) != 0 { + return 2. * PIO2_HI + x1p_120f; + } + return 0.; + } + return 0. / (x - x); + } + /* |x| < 0.5 */ + if ix < 0x3fe00000 { + if ix <= 0x3c600000 { + /* |x| < 2**-57 */ + return PIO2_HI + x1p_120f; + } + return PIO2_HI - (x - (PIO2_LO - x * r(x * x))); + } + /* x < -0.5 */ + if (hx >> 31) != 0 { + z = (1.0 + x) * 0.5; + s = sqrt(z); + w = r(z) * s - PIO2_LO; + return 2. * (PIO2_HI - (s + w)); + } + /* x > 0.5 */ + z = (1.0 - x) * 0.5; + s = sqrt(z); + // Set the low 4 bytes to zero + df = f64::from_bits(s.to_bits() & 0xff_ff_ff_ff_00_00_00_00); + + c = (z - df * df) / (s + df); + w = r(z) * s + c; + 2. * (df + w) +} diff --git a/vendor/compiler_builtins/libm/src/math/acosf.rs b/vendor/compiler_builtins/libm/src/math/acosf.rs new file mode 100644 index 000000000..1a60479e3 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/acosf.rs @@ -0,0 +1,79 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_acosf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::sqrtf::sqrtf; + +const PIO2_HI: f32 = 1.5707962513e+00; /* 0x3fc90fda */ +const PIO2_LO: f32 = 7.5497894159e-08; /* 0x33a22168 */ +const P_S0: f32 = 1.6666586697e-01; +const P_S1: f32 = -4.2743422091e-02; +const P_S2: f32 = -8.6563630030e-03; +const Q_S1: f32 = -7.0662963390e-01; + +fn r(z: f32) -> f32 { + let p = z * (P_S0 + z * (P_S1 + z * P_S2)); + let q = 1. + z * Q_S1; + p / q +} + +/// Arccosine (f32) +/// +/// Computes the inverse cosine (arc cosine) of the input value. +/// Arguments must be in the range -1 to 1. +/// Returns values in radians, in the range of 0 to pi. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn acosf(x: f32) -> f32 { + let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120) + + let z: f32; + let w: f32; + let s: f32; + + let mut hx = x.to_bits(); + let ix = hx & 0x7fffffff; + /* |x| >= 1 or nan */ + if ix >= 0x3f800000 { + if ix == 0x3f800000 { + if (hx >> 31) != 0 { + return 2. * PIO2_HI + x1p_120; + } + return 0.; + } + return 0. / (x - x); + } + /* |x| < 0.5 */ + if ix < 0x3f000000 { + if ix <= 0x32800000 { + /* |x| < 2**-26 */ + return PIO2_HI + x1p_120; + } + return PIO2_HI - (x - (PIO2_LO - x * r(x * x))); + } + /* x < -0.5 */ + if (hx >> 31) != 0 { + z = (1. + x) * 0.5; + s = sqrtf(z); + w = r(z) * s - PIO2_LO; + return 2. * (PIO2_HI - (s + w)); + } + /* x > 0.5 */ + z = (1. - x) * 0.5; + s = sqrtf(z); + hx = s.to_bits(); + let df = f32::from_bits(hx & 0xfffff000); + let c = (z - df * df) / (s + df); + w = r(z) * s + c; + 2. * (df + w) +} diff --git a/vendor/compiler_builtins/libm/src/math/acosh.rs b/vendor/compiler_builtins/libm/src/math/acosh.rs new file mode 100644 index 000000000..ac7a5f1c6 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/acosh.rs @@ -0,0 +1,26 @@ +use super::{log, log1p, sqrt}; + +const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa39ef*/ + +/// Inverse hyperbolic cosine (f64) +/// +/// Calculates the inverse hyperbolic cosine of `x`. +/// Is defined as `log(x + sqrt(x*x-1))`. +/// `x` must be a number greater than or equal to 1. +pub fn acosh(x: f64) -> f64 { + let u = x.to_bits(); + let e = ((u >> 52) as usize) & 0x7ff; + + /* x < 1 domain error is handled in the called functions */ + + if e < 0x3ff + 1 { + /* |x| < 2, up to 2ulp error in [1,1.125] */ + return log1p(x - 1.0 + sqrt((x - 1.0) * (x - 1.0) + 2.0 * (x - 1.0))); + } + if e < 0x3ff + 26 { + /* |x| < 0x1p26 */ + return log(2.0 * x - 1.0 / (x + sqrt(x * x - 1.0))); + } + /* |x| >= 0x1p26 or nan */ + return log(x) + LN2; +} diff --git a/vendor/compiler_builtins/libm/src/math/acoshf.rs b/vendor/compiler_builtins/libm/src/math/acoshf.rs new file mode 100644 index 000000000..0879e1edb --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/acoshf.rs @@ -0,0 +1,25 @@ +use super::{log1pf, logf, sqrtf}; + +const LN2: f32 = 0.693147180559945309417232121458176568; + +/// Inverse hyperbolic cosine (f32) +/// +/// Calculates the inverse hyperbolic cosine of `x`. +/// Is defined as `log(x + sqrt(x*x-1))`. +/// `x` must be a number greater than or equal to 1. +pub fn acoshf(x: f32) -> f32 { + let u = x.to_bits(); + let a = u & 0x7fffffff; + + if a < 0x3f800000 + (1 << 23) { + /* |x| < 2, invalid if x < 1 or nan */ + /* up to 2ulp error in [1,1.125] */ + return log1pf(x - 1.0 + sqrtf((x - 1.0) * (x - 1.0) + 2.0 * (x - 1.0))); + } + if a < 0x3f800000 + (12 << 23) { + /* |x| < 0x1p12 */ + return logf(2.0 * x - 1.0 / (x + sqrtf(x * x - 1.0))); + } + /* x >= 0x1p12 */ + return logf(x) + LN2; +} diff --git a/vendor/compiler_builtins/libm/src/math/asin.rs b/vendor/compiler_builtins/libm/src/math/asin.rs new file mode 100644 index 000000000..3e4b7c56e --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/asin.rs @@ -0,0 +1,119 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_asin.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* asin(x) + * Method : + * Since asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ... + * we approximate asin(x) on [0,0.5] by + * asin(x) = x + x*x^2*R(x^2) + * where + * R(x^2) is a rational approximation of (asin(x)-x)/x^3 + * and its remez error is bounded by + * |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75) + * + * For x in [0.5,1] + * asin(x) = pi/2-2*asin(sqrt((1-x)/2)) + * Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2; + * then for x>0.98 + * asin(x) = pi/2 - 2*(s+s*z*R(z)) + * = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo) + * For x<=0.98, let pio4_hi = pio2_hi/2, then + * f = hi part of s; + * c = sqrt(z) - f = (z-f*f)/(s+f) ...f+c=sqrt(z) + * and + * asin(x) = pi/2 - 2*(s+s*z*R(z)) + * = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo) + * = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c)) + * + * Special cases: + * if x is NaN, return x itself; + * if |x|>1, return NaN with invalid signal. + * + */ + +use super::{fabs, get_high_word, get_low_word, sqrt, with_set_low_word}; + +const PIO2_HI: f64 = 1.57079632679489655800e+00; /* 0x3FF921FB, 0x54442D18 */ +const PIO2_LO: f64 = 6.12323399573676603587e-17; /* 0x3C91A626, 0x33145C07 */ +/* coefficients for R(x^2) */ +const P_S0: f64 = 1.66666666666666657415e-01; /* 0x3FC55555, 0x55555555 */ +const P_S1: f64 = -3.25565818622400915405e-01; /* 0xBFD4D612, 0x03EB6F7D */ +const P_S2: f64 = 2.01212532134862925881e-01; /* 0x3FC9C155, 0x0E884455 */ +const P_S3: f64 = -4.00555345006794114027e-02; /* 0xBFA48228, 0xB5688F3B */ +const P_S4: f64 = 7.91534994289814532176e-04; /* 0x3F49EFE0, 0x7501B288 */ +const P_S5: f64 = 3.47933107596021167570e-05; /* 0x3F023DE1, 0x0DFDF709 */ +const Q_S1: f64 = -2.40339491173441421878e+00; /* 0xC0033A27, 0x1C8A2D4B */ +const Q_S2: f64 = 2.02094576023350569471e+00; /* 0x40002AE5, 0x9C598AC8 */ +const Q_S3: f64 = -6.88283971605453293030e-01; /* 0xBFE6066C, 0x1B8D0159 */ +const Q_S4: f64 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ + +fn comp_r(z: f64) -> f64 { + let p = z * (P_S0 + z * (P_S1 + z * (P_S2 + z * (P_S3 + z * (P_S4 + z * P_S5))))); + let q = 1.0 + z * (Q_S1 + z * (Q_S2 + z * (Q_S3 + z * Q_S4))); + p / q +} + +/// Arcsine (f64) +/// +/// Computes the inverse sine (arc sine) of the argument `x`. +/// Arguments to asin must be in the range -1 to 1. +/// Returns values in radians, in the range of -pi/2 to pi/2. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn asin(mut x: f64) -> f64 { + let z: f64; + let r: f64; + let s: f64; + let hx: u32; + let ix: u32; + + hx = get_high_word(x); + ix = hx & 0x7fffffff; + /* |x| >= 1 or nan */ + if ix >= 0x3ff00000 { + let lx: u32; + lx = get_low_word(x); + if ((ix - 0x3ff00000) | lx) == 0 { + /* asin(1) = +-pi/2 with inexact */ + return x * PIO2_HI + f64::from_bits(0x3870000000000000); + } else { + return 0.0 / (x - x); + } + } + /* |x| < 0.5 */ + if ix < 0x3fe00000 { + /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */ + if ix < 0x3e500000 && ix >= 0x00100000 { + return x; + } else { + return x + x * comp_r(x * x); + } + } + /* 1 > |x| >= 0.5 */ + z = (1.0 - fabs(x)) * 0.5; + s = sqrt(z); + r = comp_r(z); + if ix >= 0x3fef3333 { + /* if |x| > 0.975 */ + x = PIO2_HI - (2. * (s + s * r) - PIO2_LO); + } else { + let f: f64; + let c: f64; + /* f+c = sqrt(z) */ + f = with_set_low_word(s, 0); + c = (z - f * f) / (s + f); + x = 0.5 * PIO2_HI - (2.0 * s * r - (PIO2_LO - 2.0 * c) - (0.5 * PIO2_HI - 2.0 * f)); + } + if hx >> 31 != 0 { + -x + } else { + x + } +} diff --git a/vendor/compiler_builtins/libm/src/math/asinf.rs b/vendor/compiler_builtins/libm/src/math/asinf.rs new file mode 100644 index 000000000..6ec61b629 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/asinf.rs @@ -0,0 +1,72 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_asinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::fabsf::fabsf; +use super::sqrt::sqrt; + +const PIO2: f64 = 1.570796326794896558e+00; + +/* coefficients for R(x^2) */ +const P_S0: f32 = 1.6666586697e-01; +const P_S1: f32 = -4.2743422091e-02; +const P_S2: f32 = -8.6563630030e-03; +const Q_S1: f32 = -7.0662963390e-01; + +fn r(z: f32) -> f32 { + let p = z * (P_S0 + z * (P_S1 + z * P_S2)); + let q = 1. + z * Q_S1; + p / q +} + +/// Arcsine (f32) +/// +/// Computes the inverse sine (arc sine) of the argument `x`. +/// Arguments to asin must be in the range -1 to 1. +/// Returns values in radians, in the range of -pi/2 to pi/2. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn asinf(mut x: f32) -> f32 { + let x1p_120 = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ (-120) + + let hx = x.to_bits(); + let ix = hx & 0x7fffffff; + + if ix >= 0x3f800000 { + /* |x| >= 1 */ + if ix == 0x3f800000 { + /* |x| == 1 */ + return ((x as f64) * PIO2 + x1p_120) as f32; /* asin(+-1) = +-pi/2 with inexact */ + } + return 0. / (x - x); /* asin(|x|>1) is NaN */ + } + + if ix < 0x3f000000 { + /* |x| < 0.5 */ + /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */ + if (ix < 0x39800000) && (ix >= 0x00800000) { + return x; + } + return x + x * r(x * x); + } + + /* 1 > |x| >= 0.5 */ + let z = (1. - fabsf(x)) * 0.5; + let s = sqrt(z as f64); + x = (PIO2 - 2. * (s + s * (r(z) as f64))) as f32; + if (hx >> 31) != 0 { + -x + } else { + x + } +} diff --git a/vendor/compiler_builtins/libm/src/math/asinh.rs b/vendor/compiler_builtins/libm/src/math/asinh.rs new file mode 100644 index 000000000..14295357a --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/asinh.rs @@ -0,0 +1,39 @@ +use super::{log, log1p, sqrt}; + +const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa39ef*/ + +/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ +/// Inverse hyperbolic sine (f64) +/// +/// Calculates the inverse hyperbolic sine of `x`. +/// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`. +pub fn asinh(mut x: f64) -> f64 { + let mut u = x.to_bits(); + let e = ((u >> 52) as usize) & 0x7ff; + let sign = (u >> 63) != 0; + + /* |x| */ + u &= (!0) >> 1; + x = f64::from_bits(u); + + if e >= 0x3ff + 26 { + /* |x| >= 0x1p26 or inf or nan */ + x = log(x) + LN2; + } else if e >= 0x3ff + 1 { + /* |x| >= 2 */ + x = log(2.0 * x + 1.0 / (sqrt(x * x + 1.0) + x)); + } else if e >= 0x3ff - 26 { + /* |x| >= 0x1p-26, up to 1.6ulp error in [0.125,0.5] */ + x = log1p(x + x * x / (sqrt(x * x + 1.0) + 1.0)); + } else { + /* |x| < 0x1p-26, raise inexact if x != 0 */ + let x1p120 = f64::from_bits(0x4770000000000000); + force_eval!(x + x1p120); + } + + if sign { + -x + } else { + x + } +} diff --git a/vendor/compiler_builtins/libm/src/math/asinhf.rs b/vendor/compiler_builtins/libm/src/math/asinhf.rs new file mode 100644 index 000000000..e22a29132 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/asinhf.rs @@ -0,0 +1,38 @@ +use super::{log1pf, logf, sqrtf}; + +const LN2: f32 = 0.693147180559945309417232121458176568; + +/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ +/// Inverse hyperbolic sine (f32) +/// +/// Calculates the inverse hyperbolic sine of `x`. +/// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`. +pub fn asinhf(mut x: f32) -> f32 { + let u = x.to_bits(); + let i = u & 0x7fffffff; + let sign = (u >> 31) != 0; + + /* |x| */ + x = f32::from_bits(i); + + if i >= 0x3f800000 + (12 << 23) { + /* |x| >= 0x1p12 or inf or nan */ + x = logf(x) + LN2; + } else if i >= 0x3f800000 + (1 << 23) { + /* |x| >= 2 */ + x = logf(2.0 * x + 1.0 / (sqrtf(x * x + 1.0) + x)); + } else if i >= 0x3f800000 - (12 << 23) { + /* |x| >= 0x1p-12, up to 1.6ulp error in [0.125,0.5] */ + x = log1pf(x + x * x / (sqrtf(x * x + 1.0) + 1.0)); + } else { + /* |x| < 0x1p-12, raise inexact if x!=0 */ + let x1p120 = f32::from_bits(0x7b800000); + force_eval!(x + x1p120); + } + + if sign { + -x + } else { + x + } +} diff --git a/vendor/compiler_builtins/libm/src/math/atan.rs b/vendor/compiler_builtins/libm/src/math/atan.rs new file mode 100644 index 000000000..4259dc71a --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/atan.rs @@ -0,0 +1,184 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_atan.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* atan(x) + * Method + * 1. Reduce x to positive by atan(x) = -atan(-x). + * 2. According to the integer k=4t+0.25 chopped, t=x, the argument + * is further reduced to one of the following intervals and the + * arctangent of t is evaluated by the corresponding formula: + * + * [0,7/16] atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...) + * [7/16,11/16] atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) ) + * [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) ) + * [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) ) + * [39/16,INF] atan(x) = atan(INF) + atan( -1/t ) + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ + +use super::fabs; +use core::f64; + +const ATANHI: [f64; 4] = [ + 4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */ + 7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */ + 9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */ + 1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */ +]; + +const ATANLO: [f64; 4] = [ + 2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */ + 3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */ + 1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */ + 6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */ +]; + +const AT: [f64; 11] = [ + 3.33333333333329318027e-01, /* 0x3FD55555, 0x5555550D */ + -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */ + 1.42857142725034663711e-01, /* 0x3FC24924, 0x920083FF */ + -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */ + 9.09088713343650656196e-02, /* 0x3FB745CD, 0xC54C206E */ + -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */ + 6.66107313738753120669e-02, /* 0x3FB10D66, 0xA0D03D51 */ + -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */ + 4.97687799461593236017e-02, /* 0x3FA97B4B, 0x24760DEB */ + -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */ + 1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */ +]; + +/// Arctangent (f64) +/// +/// Computes the inverse tangent (arc tangent) of the input value. +/// Returns a value in radians, in the range of -pi/2 to pi/2. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn atan(x: f64) -> f64 { + let mut x = x; + let mut ix = (x.to_bits() >> 32) as u32; + let sign = ix >> 31; + ix &= 0x7fff_ffff; + if ix >= 0x4410_0000 { + if x.is_nan() { + return x; + } + + let z = ATANHI[3] + f64::from_bits(0x0380_0000); // 0x1p-120f + return if sign != 0 { -z } else { z }; + } + + let id = if ix < 0x3fdc_0000 { + /* |x| < 0.4375 */ + if ix < 0x3e40_0000 { + /* |x| < 2^-27 */ + if ix < 0x0010_0000 { + /* raise underflow for subnormal x */ + force_eval!(x as f32); + } + + return x; + } + + -1 + } else { + x = fabs(x); + if ix < 0x3ff30000 { + /* |x| < 1.1875 */ + if ix < 0x3fe60000 { + /* 7/16 <= |x| < 11/16 */ + x = (2. * x - 1.) / (2. + x); + 0 + } else { + /* 11/16 <= |x| < 19/16 */ + x = (x - 1.) / (x + 1.); + 1 + } + } else if ix < 0x40038000 { + /* |x| < 2.4375 */ + x = (x - 1.5) / (1. + 1.5 * x); + 2 + } else { + /* 2.4375 <= |x| < 2^66 */ + x = -1. / x; + 3 + } + }; + + let z = x * x; + let w = z * z; + /* break sum from i=0 to 10 AT[i]z**(i+1) into odd and even poly */ + let s1 = z * (AT[0] + w * (AT[2] + w * (AT[4] + w * (AT[6] + w * (AT[8] + w * AT[10]))))); + let s2 = w * (AT[1] + w * (AT[3] + w * (AT[5] + w * (AT[7] + w * AT[9])))); + + if id < 0 { + return x - x * (s1 + s2); + } + + let z = i!(ATANHI, id as usize) - (x * (s1 + s2) - i!(ATANLO, id as usize) - x); + + if sign != 0 { + -z + } else { + z + } +} + +#[cfg(test)] +mod tests { + use super::atan; + use core::f64; + + #[test] + fn sanity_check() { + for (input, answer) in [ + (3.0_f64.sqrt() / 3.0, f64::consts::FRAC_PI_6), + (1.0, f64::consts::FRAC_PI_4), + (3.0_f64.sqrt(), f64::consts::FRAC_PI_3), + (-3.0_f64.sqrt() / 3.0, -f64::consts::FRAC_PI_6), + (-1.0, -f64::consts::FRAC_PI_4), + (-3.0_f64.sqrt(), -f64::consts::FRAC_PI_3), + ] + .iter() + { + assert!( + (atan(*input) - answer) / answer < 1e-5, + "\natan({:.4}/16) = {:.4}, actual: {}", + input * 16.0, + answer, + atan(*input) + ); + } + } + + #[test] + fn zero() { + assert_eq!(atan(0.0), 0.0); + } + + #[test] + fn infinity() { + assert_eq!(atan(f64::INFINITY), f64::consts::FRAC_PI_2); + } + + #[test] + fn minus_infinity() { + assert_eq!(atan(f64::NEG_INFINITY), -f64::consts::FRAC_PI_2); + } + + #[test] + fn nan() { + assert!(atan(f64::NAN).is_nan()); + } +} diff --git a/vendor/compiler_builtins/libm/src/math/atan2.rs b/vendor/compiler_builtins/libm/src/math/atan2.rs new file mode 100644 index 000000000..fb2ea4eda --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/atan2.rs @@ -0,0 +1,126 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_atan2.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + */ +/* atan2(y,x) + * Method : + * 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x). + * 2. Reduce x to positive by (if x and y are unexceptional): + * ARG (x+iy) = arctan(y/x) ... if x > 0, + * ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0, + * + * Special cases: + * + * ATAN2((anything), NaN ) is NaN; + * ATAN2(NAN , (anything) ) is NaN; + * ATAN2(+-0, +(anything but NaN)) is +-0 ; + * ATAN2(+-0, -(anything but NaN)) is +-pi ; + * ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2; + * ATAN2(+-(anything but INF and NaN), +INF) is +-0 ; + * ATAN2(+-(anything but INF and NaN), -INF) is +-pi; + * ATAN2(+-INF,+INF ) is +-pi/4 ; + * ATAN2(+-INF,-INF ) is +-3pi/4; + * ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2; + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ + +use super::atan; +use super::fabs; + +const PI: f64 = 3.1415926535897931160E+00; /* 0x400921FB, 0x54442D18 */ +const PI_LO: f64 = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */ + +/// Arctangent of y/x (f64) +/// +/// Computes the inverse tangent (arc tangent) of `y/x`. +/// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0). +/// Returns a value in radians, in the range of -pi to pi. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn atan2(y: f64, x: f64) -> f64 { + if x.is_nan() || y.is_nan() { + return x + y; + } + let mut ix = (x.to_bits() >> 32) as u32; + let lx = x.to_bits() as u32; + let mut iy = (y.to_bits() >> 32) as u32; + let ly = y.to_bits() as u32; + if ((ix.wrapping_sub(0x3ff00000)) | lx) == 0 { + /* x = 1.0 */ + return atan(y); + } + let m = ((iy >> 31) & 1) | ((ix >> 30) & 2); /* 2*sign(x)+sign(y) */ + ix &= 0x7fffffff; + iy &= 0x7fffffff; + + /* when y = 0 */ + if (iy | ly) == 0 { + return match m { + 0 | 1 => y, /* atan(+-0,+anything)=+-0 */ + 2 => PI, /* atan(+0,-anything) = PI */ + _ => -PI, /* atan(-0,-anything) =-PI */ + }; + } + /* when x = 0 */ + if (ix | lx) == 0 { + return if m & 1 != 0 { -PI / 2.0 } else { PI / 2.0 }; + } + /* when x is INF */ + if ix == 0x7ff00000 { + if iy == 0x7ff00000 { + return match m { + 0 => PI / 4.0, /* atan(+INF,+INF) */ + 1 => -PI / 4.0, /* atan(-INF,+INF) */ + 2 => 3.0 * PI / 4.0, /* atan(+INF,-INF) */ + _ => -3.0 * PI / 4.0, /* atan(-INF,-INF) */ + }; + } else { + return match m { + 0 => 0.0, /* atan(+...,+INF) */ + 1 => -0.0, /* atan(-...,+INF) */ + 2 => PI, /* atan(+...,-INF) */ + _ => -PI, /* atan(-...,-INF) */ + }; + } + } + /* |y/x| > 0x1p64 */ + if ix.wrapping_add(64 << 20) < iy || iy == 0x7ff00000 { + return if m & 1 != 0 { -PI / 2.0 } else { PI / 2.0 }; + } + + /* z = atan(|y/x|) without spurious underflow */ + let z = if (m & 2 != 0) && iy.wrapping_add(64 << 20) < ix { + /* |y/x| < 0x1p-64, x<0 */ + 0.0 + } else { + atan(fabs(y / x)) + }; + match m { + 0 => z, /* atan(+,+) */ + 1 => -z, /* atan(-,+) */ + 2 => PI - (z - PI_LO), /* atan(+,-) */ + _ => (z - PI_LO) - PI, /* atan(-,-) */ + } +} + +#[test] +fn sanity_check() { + assert_eq!(atan2(0.0, 1.0), 0.0); + assert_eq!(atan2(0.0, -1.0), PI); + assert_eq!(atan2(-0.0, -1.0), -PI); + assert_eq!(atan2(3.0, 2.0), atan(3.0 / 2.0)); + assert_eq!(atan2(2.0, -1.0), atan(2.0 / -1.0) + PI); + assert_eq!(atan2(-2.0, -1.0), atan(-2.0 / -1.0) - PI); +} diff --git a/vendor/compiler_builtins/libm/src/math/atan2f.rs b/vendor/compiler_builtins/libm/src/math/atan2f.rs new file mode 100644 index 000000000..eae3b002d --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/atan2f.rs @@ -0,0 +1,91 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_atan2f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::atanf; +use super::fabsf; + +const PI: f32 = 3.1415927410e+00; /* 0x40490fdb */ +const PI_LO: f32 = -8.7422776573e-08; /* 0xb3bbbd2e */ + +/// Arctangent of y/x (f32) +/// +/// Computes the inverse tangent (arc tangent) of `y/x`. +/// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0). +/// Returns a value in radians, in the range of -pi to pi. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn atan2f(y: f32, x: f32) -> f32 { + if x.is_nan() || y.is_nan() { + return x + y; + } + let mut ix = x.to_bits(); + let mut iy = y.to_bits(); + + if ix == 0x3f800000 { + /* x=1.0 */ + return atanf(y); + } + let m = ((iy >> 31) & 1) | ((ix >> 30) & 2); /* 2*sign(x)+sign(y) */ + ix &= 0x7fffffff; + iy &= 0x7fffffff; + + /* when y = 0 */ + if iy == 0 { + return match m { + 0 | 1 => y, /* atan(+-0,+anything)=+-0 */ + 2 => PI, /* atan(+0,-anything) = pi */ + 3 | _ => -PI, /* atan(-0,-anything) =-pi */ + }; + } + /* when x = 0 */ + if ix == 0 { + return if m & 1 != 0 { -PI / 2. } else { PI / 2. }; + } + /* when x is INF */ + if ix == 0x7f800000 { + return if iy == 0x7f800000 { + match m { + 0 => PI / 4., /* atan(+INF,+INF) */ + 1 => -PI / 4., /* atan(-INF,+INF) */ + 2 => 3. * PI / 4., /* atan(+INF,-INF)*/ + 3 | _ => -3. * PI / 4., /* atan(-INF,-INF)*/ + } + } else { + match m { + 0 => 0., /* atan(+...,+INF) */ + 1 => -0., /* atan(-...,+INF) */ + 2 => PI, /* atan(+...,-INF) */ + 3 | _ => -PI, /* atan(-...,-INF) */ + } + }; + } + /* |y/x| > 0x1p26 */ + if (ix + (26 << 23) < iy) || (iy == 0x7f800000) { + return if m & 1 != 0 { -PI / 2. } else { PI / 2. }; + } + + /* z = atan(|y/x|) with correct underflow */ + let z = if (m & 2 != 0) && (iy + (26 << 23) < ix) { + /*|y/x| < 0x1p-26, x < 0 */ + 0. + } else { + atanf(fabsf(y / x)) + }; + match m { + 0 => z, /* atan(+,+) */ + 1 => -z, /* atan(-,+) */ + 2 => PI - (z - PI_LO), /* atan(+,-) */ + _ => (z - PI_LO) - PI, /* case 3 */ /* atan(-,-) */ + } +} diff --git a/vendor/compiler_builtins/libm/src/math/atanf.rs b/vendor/compiler_builtins/libm/src/math/atanf.rs new file mode 100644 index 000000000..d042b3bc0 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/atanf.rs @@ -0,0 +1,112 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_atanf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::fabsf; + +const ATAN_HI: [f32; 4] = [ + 4.6364760399e-01, /* atan(0.5)hi 0x3eed6338 */ + 7.8539812565e-01, /* atan(1.0)hi 0x3f490fda */ + 9.8279368877e-01, /* atan(1.5)hi 0x3f7b985e */ + 1.5707962513e+00, /* atan(inf)hi 0x3fc90fda */ +]; + +const ATAN_LO: [f32; 4] = [ + 5.0121582440e-09, /* atan(0.5)lo 0x31ac3769 */ + 3.7748947079e-08, /* atan(1.0)lo 0x33222168 */ + 3.4473217170e-08, /* atan(1.5)lo 0x33140fb4 */ + 7.5497894159e-08, /* atan(inf)lo 0x33a22168 */ +]; + +const A_T: [f32; 5] = [ + 3.3333328366e-01, + -1.9999158382e-01, + 1.4253635705e-01, + -1.0648017377e-01, + 6.1687607318e-02, +]; + +/// Arctangent (f32) +/// +/// Computes the inverse tangent (arc tangent) of the input value. +/// Returns a value in radians, in the range of -pi/2 to pi/2. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn atanf(mut x: f32) -> f32 { + let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120) + + let z: f32; + + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + if ix >= 0x4c800000 { + /* if |x| >= 2**26 */ + if x.is_nan() { + return x; + } + z = i!(ATAN_HI, 3) + x1p_120; + return if sign { -z } else { z }; + } + let id = if ix < 0x3ee00000 { + /* |x| < 0.4375 */ + if ix < 0x39800000 { + /* |x| < 2**-12 */ + if ix < 0x00800000 { + /* raise underflow for subnormal x */ + force_eval!(x * x); + } + return x; + } + -1 + } else { + x = fabsf(x); + if ix < 0x3f980000 { + /* |x| < 1.1875 */ + if ix < 0x3f300000 { + /* 7/16 <= |x| < 11/16 */ + x = (2. * x - 1.) / (2. + x); + 0 + } else { + /* 11/16 <= |x| < 19/16 */ + x = (x - 1.) / (x + 1.); + 1 + } + } else if ix < 0x401c0000 { + /* |x| < 2.4375 */ + x = (x - 1.5) / (1. + 1.5 * x); + 2 + } else { + /* 2.4375 <= |x| < 2**26 */ + x = -1. / x; + 3 + } + }; + /* end of argument reduction */ + z = x * x; + let w = z * z; + /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */ + let s1 = z * (i!(A_T, 0) + w * (i!(A_T, 2) + w * i!(A_T, 4))); + let s2 = w * (i!(A_T, 1) + w * i!(A_T, 3)); + if id < 0 { + return x - x * (s1 + s2); + } + let id = id as usize; + let z = i!(ATAN_HI, id) - ((x * (s1 + s2) - i!(ATAN_LO, id)) - x); + if sign { + -z + } else { + z + } +} diff --git a/vendor/compiler_builtins/libm/src/math/atanh.rs b/vendor/compiler_builtins/libm/src/math/atanh.rs new file mode 100644 index 000000000..79a989c42 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/atanh.rs @@ -0,0 +1,36 @@ +use super::log1p; + +/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ +/// Inverse hyperbolic tangent (f64) +/// +/// Calculates the inverse hyperbolic tangent of `x`. +/// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`. +pub fn atanh(x: f64) -> f64 { + let u = x.to_bits(); + let e = ((u >> 52) as usize) & 0x7ff; + let sign = (u >> 63) != 0; + + /* |x| */ + let mut y = f64::from_bits(u & 0x7fff_ffff_ffff_ffff); + + if e < 0x3ff - 1 { + if e < 0x3ff - 32 { + /* handle underflow */ + if e == 0 { + force_eval!(y as f32); + } + } else { + /* |x| < 0.5, up to 1.7ulp error */ + y = 0.5 * log1p(2.0 * y + 2.0 * y * y / (1.0 - y)); + } + } else { + /* avoid overflow */ + y = 0.5 * log1p(2.0 * (y / (1.0 - y))); + } + + if sign { + -y + } else { + y + } +} diff --git a/vendor/compiler_builtins/libm/src/math/atanhf.rs b/vendor/compiler_builtins/libm/src/math/atanhf.rs new file mode 100644 index 000000000..7b2f34d97 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/atanhf.rs @@ -0,0 +1,36 @@ +use super::log1pf; + +/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ +/// Inverse hyperbolic tangent (f32) +/// +/// Calculates the inverse hyperbolic tangent of `x`. +/// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`. +pub fn atanhf(mut x: f32) -> f32 { + let mut u = x.to_bits(); + let sign = (u >> 31) != 0; + + /* |x| */ + u &= 0x7fffffff; + x = f32::from_bits(u); + + if u < 0x3f800000 - (1 << 23) { + if u < 0x3f800000 - (32 << 23) { + /* handle underflow */ + if u < (1 << 23) { + force_eval!((x * x) as f32); + } + } else { + /* |x| < 0.5, up to 1.7ulp error */ + x = 0.5 * log1pf(2.0 * x + 2.0 * x * x / (1.0 - x)); + } + } else { + /* avoid overflow */ + x = 0.5 * log1pf(2.0 * (x / (1.0 - x))); + } + + if sign { + -x + } else { + x + } +} diff --git a/vendor/compiler_builtins/libm/src/math/cbrt.rs b/vendor/compiler_builtins/libm/src/math/cbrt.rs new file mode 100644 index 000000000..b4e77eaa2 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/cbrt.rs @@ -0,0 +1,113 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + * Optimized by Bruce D. Evans. + */ +/* cbrt(x) + * Return cube root of x + */ + +use core::f64; + +const B1: u32 = 715094163; /* B1 = (1023-1023/3-0.03306235651)*2**20 */ +const B2: u32 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */ + +/* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */ +const P0: f64 = 1.87595182427177009643; /* 0x3ffe03e6, 0x0f61e692 */ +const P1: f64 = -1.88497979543377169875; /* 0xbffe28e0, 0x92f02420 */ +const P2: f64 = 1.621429720105354466140; /* 0x3ff9f160, 0x4a49d6c2 */ +const P3: f64 = -0.758397934778766047437; /* 0xbfe844cb, 0xbee751d9 */ +const P4: f64 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */ + +// Cube root (f64) +/// +/// Computes the cube root of the argument. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn cbrt(x: f64) -> f64 { + let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 + + let mut ui: u64 = x.to_bits(); + let mut r: f64; + let s: f64; + let mut t: f64; + let w: f64; + let mut hx: u32 = (ui >> 32) as u32 & 0x7fffffff; + + if hx >= 0x7ff00000 { + /* cbrt(NaN,INF) is itself */ + return x + x; + } + + /* + * Rough cbrt to 5 bits: + * cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3) + * where e is integral and >= 0, m is real and in [0, 1), and "/" and + * "%" are integer division and modulus with rounding towards minus + * infinity. The RHS is always >= the LHS and has a maximum relative + * error of about 1 in 16. Adding a bias of -0.03306235651 to the + * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE + * floating point representation, for finite positive normal values, + * ordinary integer divison of the value in bits magically gives + * almost exactly the RHS of the above provided we first subtract the + * exponent bias (1023 for doubles) and later add it back. We do the + * subtraction virtually to keep e >= 0 so that ordinary integer + * division rounds towards minus infinity; this is also efficient. + */ + if hx < 0x00100000 { + /* zero or subnormal? */ + ui = (x * x1p54).to_bits(); + hx = (ui >> 32) as u32 & 0x7fffffff; + if hx == 0 { + return x; /* cbrt(0) is itself */ + } + hx = hx / 3 + B2; + } else { + hx = hx / 3 + B1; + } + ui &= 1 << 63; + ui |= (hx as u64) << 32; + t = f64::from_bits(ui); + + /* + * New cbrt to 23 bits: + * cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x) + * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r) + * to within 2**-23.5 when |r - 1| < 1/10. The rough approximation + * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this + * gives us bounds for r = t**3/x. + * + * Try to optimize for parallel evaluation as in __tanf.c. + */ + r = (t * t) * (t / x); + t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4)); + + /* + * Round t away from zero to 23 bits (sloppily except for ensuring that + * the result is larger in magnitude than cbrt(x) but not much more than + * 2 23-bit ulps larger). With rounding towards zero, the error bound + * would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps + * in the rounded t, the infinite-precision error in the Newton + * approximation barely affects third digit in the final error + * 0.667; the error in the rounded t can be up to about 3 23-bit ulps + * before the final error is larger than 0.667 ulps. + */ + ui = t.to_bits(); + ui = (ui + 0x80000000) & 0xffffffffc0000000; + t = f64::from_bits(ui); + + /* one step Newton iteration to 53 bits with error < 0.667 ulps */ + s = t * t; /* t*t is exact */ + r = x / s; /* error <= 0.5 ulps; |r| < |t| */ + w = t + t; /* t+t is exact */ + r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */ + t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */ + t +} diff --git a/vendor/compiler_builtins/libm/src/math/cbrtf.rs b/vendor/compiler_builtins/libm/src/math/cbrtf.rs new file mode 100644 index 000000000..9d70305c6 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/cbrtf.rs @@ -0,0 +1,75 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Debugged and optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* cbrtf(x) + * Return cube root of x + */ + +use core::f32; + +const B1: u32 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */ +const B2: u32 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */ + +/// Cube root (f32) +/// +/// Computes the cube root of the argument. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn cbrtf(x: f32) -> f32 { + let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 + + let mut r: f64; + let mut t: f64; + let mut ui: u32 = x.to_bits(); + let mut hx: u32 = ui & 0x7fffffff; + + if hx >= 0x7f800000 { + /* cbrt(NaN,INF) is itself */ + return x + x; + } + + /* rough cbrt to 5 bits */ + if hx < 0x00800000 { + /* zero or subnormal? */ + if hx == 0 { + return x; /* cbrt(+-0) is itself */ + } + ui = (x * x1p24).to_bits(); + hx = ui & 0x7fffffff; + hx = hx / 3 + B2; + } else { + hx = hx / 3 + B1; + } + ui &= 0x80000000; + ui |= hx; + + /* + * First step Newton iteration (solving t*t-x/t == 0) to 16 bits. In + * double precision so that its terms can be arranged for efficiency + * without causing overflow or underflow. + */ + t = f32::from_bits(ui) as f64; + r = t * t * t; + t = t * (x as f64 + x as f64 + r) / (x as f64 + r + r); + + /* + * Second step Newton iteration to 47 bits. In double precision for + * efficiency and accuracy. + */ + r = t * t * t; + t = t * (x as f64 + x as f64 + r) / (x as f64 + r + r); + + /* rounding to 24 bits is perfect in round-to-nearest mode */ + t as f32 +} diff --git a/vendor/compiler_builtins/libm/src/math/ceil.rs b/vendor/compiler_builtins/libm/src/math/ceil.rs new file mode 100644 index 000000000..22d892971 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/ceil.rs @@ -0,0 +1,82 @@ +#![allow(unreachable_code)] +use core::f64; + +const TOINT: f64 = 1. / f64::EPSILON; + +/// Ceil (f64) +/// +/// Finds the nearest integer greater than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ceil(x: f64) -> f64 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f64.ceil` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::ceilf64(x) } + } + } + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + { + //use an alternative implementation on x86, because the + //main implementation fails with the x87 FPU used by + //debian i386, probablly due to excess precision issues. + //basic implementation taken from https://github.com/rust-lang/libm/issues/219 + use super::fabs; + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated < x { + return truncated + 1.0; + } else { + return truncated; + } + } else { + return x; + } + } + let u: u64 = x.to_bits(); + let e: i64 = (u >> 52 & 0x7ff) as i64; + let y: f64; + + if e >= 0x3ff + 52 || x == 0. { + return x; + } + // y = int(x) - x, where int(x) is an integer neighbor of x + y = if (u >> 63) != 0 { + x - TOINT + TOINT - x + } else { + x + TOINT - TOINT - x + }; + // special case because of non-nearest rounding modes + if e < 0x3ff { + force_eval!(y); + return if (u >> 63) != 0 { -0. } else { 1. }; + } + if y < 0. { + x + y + 1. + } else { + x + y + } +} + +#[cfg(test)] +mod tests { + use super::*; + use core::f64::*; + + #[test] + fn sanity_check() { + assert_eq!(ceil(1.1), 2.0); + assert_eq!(ceil(2.9), 3.0); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil + #[test] + fn spec_tests() { + // Not Asserted: that the current rounding mode has no effect. + assert!(ceil(NAN).is_nan()); + for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { + assert_eq!(ceil(f), f); + } + } +} diff --git a/vendor/compiler_builtins/libm/src/math/ceilf.rs b/vendor/compiler_builtins/libm/src/math/ceilf.rs new file mode 100644 index 000000000..7bcc647ca --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/ceilf.rs @@ -0,0 +1,65 @@ +use core::f32; + +/// Ceil (f32) +/// +/// Finds the nearest integer greater than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ceilf(x: f32) -> f32 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f32.ceil` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::ceilf32(x) } + } + } + let mut ui = x.to_bits(); + let e = (((ui >> 23) & 0xff).wrapping_sub(0x7f)) as i32; + + if e >= 23 { + return x; + } + if e >= 0 { + let m = 0x007fffff >> e; + if (ui & m) == 0 { + return x; + } + force_eval!(x + f32::from_bits(0x7b800000)); + if ui >> 31 == 0 { + ui += m; + } + ui &= !m; + } else { + force_eval!(x + f32::from_bits(0x7b800000)); + if ui >> 31 != 0 { + return -0.0; + } else if ui << 1 != 0 { + return 1.0; + } + } + f32::from_bits(ui) +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(test)] +mod tests { + use super::*; + use core::f32::*; + + #[test] + fn sanity_check() { + assert_eq!(ceilf(1.1), 2.0); + assert_eq!(ceilf(2.9), 3.0); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil + #[test] + fn spec_tests() { + // Not Asserted: that the current rounding mode has no effect. + assert!(ceilf(NAN).is_nan()); + for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { + assert_eq!(ceilf(f), f); + } + } +} diff --git a/vendor/compiler_builtins/libm/src/math/copysign.rs b/vendor/compiler_builtins/libm/src/math/copysign.rs new file mode 100644 index 000000000..1527fb6ea --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/copysign.rs @@ -0,0 +1,11 @@ +/// Sign of Y, magnitude of X (f64) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +pub fn copysign(x: f64, y: f64) -> f64 { + let mut ux = x.to_bits(); + let uy = y.to_bits(); + ux &= (!0) >> 1; + ux |= uy & (1 << 63); + f64::from_bits(ux) +} diff --git a/vendor/compiler_builtins/libm/src/math/copysignf.rs b/vendor/compiler_builtins/libm/src/math/copysignf.rs new file mode 100644 index 000000000..35148561a --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/copysignf.rs @@ -0,0 +1,11 @@ +/// Sign of Y, magnitude of X (f32) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +pub fn copysignf(x: f32, y: f32) -> f32 { + let mut ux = x.to_bits(); + let uy = y.to_bits(); + ux &= 0x7fffffff; + ux |= uy & 0x80000000; + f32::from_bits(ux) +} diff --git a/vendor/compiler_builtins/libm/src/math/cos.rs b/vendor/compiler_builtins/libm/src/math/cos.rs new file mode 100644 index 000000000..db8bc4989 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/cos.rs @@ -0,0 +1,73 @@ +// origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */ +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +use super::{k_cos, k_sin, rem_pio2}; + +// cos(x) +// Return cosine function of x. +// +// kernel function: +// k_sin ... sine function on [-pi/4,pi/4] +// k_cos ... cosine function on [-pi/4,pi/4] +// rem_pio2 ... argument reduction routine +// +// Method. +// Let S,C and T denote the sin, cos and tan respectively on +// [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 +// in [-pi/4 , +pi/4], and let n = k mod 4. +// We have +// +// n sin(x) cos(x) tan(x) +// ---------------------------------------------------------- +// 0 S C T +// 1 C -S -1/T +// 2 -S -C T +// 3 -C S -1/T +// ---------------------------------------------------------- +// +// Special cases: +// Let trig be any of sin, cos, or tan. +// trig(+-INF) is NaN, with signals; +// trig(NaN) is that NaN; +// +// Accuracy: +// TRIG(x) returns trig(x) nearly rounded +// +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn cos(x: f64) -> f64 { + let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; + + /* |x| ~< pi/4 */ + if ix <= 0x3fe921fb { + if ix < 0x3e46a09e { + /* if x < 2**-27 * sqrt(2) */ + /* raise inexact if x != 0 */ + if x as i32 == 0 { + return 1.0; + } + } + return k_cos(x, 0.0); + } + + /* cos(Inf or NaN) is NaN */ + if ix >= 0x7ff00000 { + return x - x; + } + + /* argument reduction needed */ + let (n, y0, y1) = rem_pio2(x); + match n & 3 { + 0 => k_cos(y0, y1), + 1 => -k_sin(y0, y1, 1), + 2 => -k_cos(y0, y1), + _ => k_sin(y0, y1, 1), + } +} diff --git a/vendor/compiler_builtins/libm/src/math/cosf.rs b/vendor/compiler_builtins/libm/src/math/cosf.rs new file mode 100644 index 000000000..424fa42ed --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/cosf.rs @@ -0,0 +1,83 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_cosf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{k_cosf, k_sinf, rem_pio2f}; + +use core::f64::consts::FRAC_PI_2; + +/* Small multiples of pi/2 rounded to double precision. */ +const C1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ +const C2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ +const C3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const C4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn cosf(x: f32) -> f32 { + let x64 = x as f64; + + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + if ix <= 0x3f490fda { + /* |x| ~<= pi/4 */ + if ix < 0x39800000 { + /* |x| < 2**-12 */ + /* raise inexact if x != 0 */ + force_eval!(x + x1p120); + return 1.; + } + return k_cosf(x64); + } + if ix <= 0x407b53d1 { + /* |x| ~<= 5*pi/4 */ + if ix > 0x4016cbe3 { + /* |x| ~> 3*pi/4 */ + return -k_cosf(if sign { x64 + C2_PIO2 } else { x64 - C2_PIO2 }); + } else if sign { + return k_sinf(x64 + C1_PIO2); + } else { + return k_sinf(C1_PIO2 - x64); + } + } + if ix <= 0x40e231d5 { + /* |x| ~<= 9*pi/4 */ + if ix > 0x40afeddf { + /* |x| ~> 7*pi/4 */ + return k_cosf(if sign { x64 + C4_PIO2 } else { x64 - C4_PIO2 }); + } else if sign { + return k_sinf(-x64 - C3_PIO2); + } else { + return k_sinf(x64 - C3_PIO2); + } + } + + /* cos(Inf or NaN) is NaN */ + if ix >= 0x7f800000 { + return x - x; + } + + /* general argument reduction needed */ + let (n, y) = rem_pio2f(x); + match n & 3 { + 0 => k_cosf(y), + 1 => k_sinf(-y), + 2 => -k_cosf(y), + _ => k_sinf(y), + } +} diff --git a/vendor/compiler_builtins/libm/src/math/cosh.rs b/vendor/compiler_builtins/libm/src/math/cosh.rs new file mode 100644 index 000000000..2fb568ab3 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/cosh.rs @@ -0,0 +1,38 @@ +use super::exp; +use super::expm1; +use super::k_expo2; + +/// Hyperbolic cosine (f64) +/// +/// Computes the hyperbolic cosine of the argument x. +/// Is defined as `(exp(x) + exp(-x))/2` +/// Angles are specified in radians. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn cosh(mut x: f64) -> f64 { + /* |x| */ + let mut ix = x.to_bits(); + ix &= 0x7fffffffffffffff; + x = f64::from_bits(ix); + let w = ix >> 32; + + /* |x| < log(2) */ + if w < 0x3fe62e42 { + if w < 0x3ff00000 - (26 << 20) { + let x1p120 = f64::from_bits(0x4770000000000000); + force_eval!(x + x1p120); + return 1.; + } + let t = expm1(x); // exponential minus 1 + return 1. + t * t / (2. * (1. + t)); + } + + /* |x| < log(DBL_MAX) */ + if w < 0x40862e42 { + let t = exp(x); + /* note: if x>log(0x1p26) then the 1/t is not needed */ + return 0.5 * (t + 1. / t); + } + + /* |x| > log(DBL_MAX) or nan */ + k_expo2(x) +} diff --git a/vendor/compiler_builtins/libm/src/math/coshf.rs b/vendor/compiler_builtins/libm/src/math/coshf.rs new file mode 100644 index 000000000..e7b684587 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/coshf.rs @@ -0,0 +1,38 @@ +use super::expf; +use super::expm1f; +use super::k_expo2f; + +/// Hyperbolic cosine (f64) +/// +/// Computes the hyperbolic cosine of the argument x. +/// Is defined as `(exp(x) + exp(-x))/2` +/// Angles are specified in radians. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn coshf(mut x: f32) -> f32 { + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + /* |x| */ + let mut ix = x.to_bits(); + ix &= 0x7fffffff; + x = f32::from_bits(ix); + let w = ix; + + /* |x| < log(2) */ + if w < 0x3f317217 { + if w < (0x3f800000 - (12 << 23)) { + force_eval!(x + x1p120); + return 1.; + } + let t = expm1f(x); + return 1. + t * t / (2. * (1. + t)); + } + + /* |x| < log(FLT_MAX) */ + if w < 0x42b17217 { + let t = expf(x); + return 0.5 * (t + 1. / t); + } + + /* |x| > log(FLT_MAX) or nan */ + k_expo2f(x) +} diff --git a/vendor/compiler_builtins/libm/src/math/erf.rs b/vendor/compiler_builtins/libm/src/math/erf.rs new file mode 100644 index 000000000..a2c617d34 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/erf.rs @@ -0,0 +1,317 @@ +use super::{exp, fabs, get_high_word, with_set_low_word}; +/* origin: FreeBSD /usr/src/lib/msun/src/s_erf.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* double erf(double x) + * double erfc(double x) + * x + * 2 |\ + * erf(x) = --------- | exp(-t*t)dt + * sqrt(pi) \| + * 0 + * + * erfc(x) = 1-erf(x) + * Note that + * erf(-x) = -erf(x) + * erfc(-x) = 2 - erfc(x) + * + * Method: + * 1. For |x| in [0, 0.84375] + * erf(x) = x + x*R(x^2) + * erfc(x) = 1 - erf(x) if x in [-.84375,0.25] + * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] + * where R = P/Q where P is an odd poly of degree 8 and + * Q is an odd poly of degree 10. + * -57.90 + * | R - (erf(x)-x)/x | <= 2 + * + * + * Remark. The formula is derived by noting + * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....) + * and that + * 2/sqrt(pi) = 1.128379167095512573896158903121545171688 + * is close to one. The interval is chosen because the fix + * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is + * near 0.6174), and by some experiment, 0.84375 is chosen to + * guarantee the error is less than one ulp for erf. + * + * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and + * c = 0.84506291151 rounded to single (24 bits) + * erf(x) = sign(x) * (c + P1(s)/Q1(s)) + * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 + * 1+(c+P1(s)/Q1(s)) if x < 0 + * |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 + * Remark: here we use the taylor series expansion at x=1. + * erf(1+s) = erf(1) + s*Poly(s) + * = 0.845.. + P1(s)/Q1(s) + * That is, we use rational approximation to approximate + * erf(1+s) - (c = (single)0.84506291151) + * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] + * where + * P1(s) = degree 6 poly in s + * Q1(s) = degree 6 poly in s + * + * 3. For x in [1.25,1/0.35(~2.857143)], + * erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) + * erf(x) = 1 - erfc(x) + * where + * R1(z) = degree 7 poly in z, (z=1/x^2) + * S1(z) = degree 8 poly in z + * + * 4. For x in [1/0.35,28] + * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 + * = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0 + * = 2.0 - tiny (if x <= -6) + * erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else + * erf(x) = sign(x)*(1.0 - tiny) + * where + * R2(z) = degree 6 poly in z, (z=1/x^2) + * S2(z) = degree 7 poly in z + * + * Note1: + * To compute exp(-x*x-0.5625+R/S), let s be a single + * precision number and s := x; then + * -x*x = -s*s + (s-x)*(s+x) + * exp(-x*x-0.5626+R/S) = + * exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S); + * Note2: + * Here 4 and 5 make use of the asymptotic series + * exp(-x*x) + * erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) ) + * x*sqrt(pi) + * We use rational approximation to approximate + * g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625 + * Here is the error bound for R1/S1 and R2/S2 + * |R1/S1 - f(x)| < 2**(-62.57) + * |R2/S2 - f(x)| < 2**(-61.52) + * + * 5. For inf > x >= 28 + * erf(x) = sign(x) *(1 - tiny) (raise inexact) + * erfc(x) = tiny*tiny (raise underflow) if x > 0 + * = 2 - tiny if x<0 + * + * 7. Special case: + * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, + * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, + * erfc/erf(NaN) is NaN + */ + +const ERX: f64 = 8.45062911510467529297e-01; /* 0x3FEB0AC1, 0x60000000 */ +/* + * Coefficients for approximation to erf on [0,0.84375] + */ +const EFX8: f64 = 1.02703333676410069053e+00; /* 0x3FF06EBA, 0x8214DB69 */ +const PP0: f64 = 1.28379167095512558561e-01; /* 0x3FC06EBA, 0x8214DB68 */ +const PP1: f64 = -3.25042107247001499370e-01; /* 0xBFD4CD7D, 0x691CB913 */ +const PP2: f64 = -2.84817495755985104766e-02; /* 0xBF9D2A51, 0xDBD7194F */ +const PP3: f64 = -5.77027029648944159157e-03; /* 0xBF77A291, 0x236668E4 */ +const PP4: f64 = -2.37630166566501626084e-05; /* 0xBEF8EAD6, 0x120016AC */ +const QQ1: f64 = 3.97917223959155352819e-01; /* 0x3FD97779, 0xCDDADC09 */ +const QQ2: f64 = 6.50222499887672944485e-02; /* 0x3FB0A54C, 0x5536CEBA */ +const QQ3: f64 = 5.08130628187576562776e-03; /* 0x3F74D022, 0xC4D36B0F */ +const QQ4: f64 = 1.32494738004321644526e-04; /* 0x3F215DC9, 0x221C1A10 */ +const QQ5: f64 = -3.96022827877536812320e-06; /* 0xBED09C43, 0x42A26120 */ +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +const PA0: f64 = -2.36211856075265944077e-03; /* 0xBF6359B8, 0xBEF77538 */ +const PA1: f64 = 4.14856118683748331666e-01; /* 0x3FDA8D00, 0xAD92B34D */ +const PA2: f64 = -3.72207876035701323847e-01; /* 0xBFD7D240, 0xFBB8C3F1 */ +const PA3: f64 = 3.18346619901161753674e-01; /* 0x3FD45FCA, 0x805120E4 */ +const PA4: f64 = -1.10894694282396677476e-01; /* 0xBFBC6398, 0x3D3E28EC */ +const PA5: f64 = 3.54783043256182359371e-02; /* 0x3FA22A36, 0x599795EB */ +const PA6: f64 = -2.16637559486879084300e-03; /* 0xBF61BF38, 0x0A96073F */ +const QA1: f64 = 1.06420880400844228286e-01; /* 0x3FBB3E66, 0x18EEE323 */ +const QA2: f64 = 5.40397917702171048937e-01; /* 0x3FE14AF0, 0x92EB6F33 */ +const QA3: f64 = 7.18286544141962662868e-02; /* 0x3FB2635C, 0xD99FE9A7 */ +const QA4: f64 = 1.26171219808761642112e-01; /* 0x3FC02660, 0xE763351F */ +const QA5: f64 = 1.36370839120290507362e-02; /* 0x3F8BEDC2, 0x6B51DD1C */ +const QA6: f64 = 1.19844998467991074170e-02; /* 0x3F888B54, 0x5735151D */ +/* + * Coefficients for approximation to erfc in [1.25,1/0.35] + */ +const RA0: f64 = -9.86494403484714822705e-03; /* 0xBF843412, 0x600D6435 */ +const RA1: f64 = -6.93858572707181764372e-01; /* 0xBFE63416, 0xE4BA7360 */ +const RA2: f64 = -1.05586262253232909814e+01; /* 0xC0251E04, 0x41B0E726 */ +const RA3: f64 = -6.23753324503260060396e+01; /* 0xC04F300A, 0xE4CBA38D */ +const RA4: f64 = -1.62396669462573470355e+02; /* 0xC0644CB1, 0x84282266 */ +const RA5: f64 = -1.84605092906711035994e+02; /* 0xC067135C, 0xEBCCABB2 */ +const RA6: f64 = -8.12874355063065934246e+01; /* 0xC0545265, 0x57E4D2F2 */ +const RA7: f64 = -9.81432934416914548592e+00; /* 0xC023A0EF, 0xC69AC25C */ +const SA1: f64 = 1.96512716674392571292e+01; /* 0x4033A6B9, 0xBD707687 */ +const SA2: f64 = 1.37657754143519042600e+02; /* 0x4061350C, 0x526AE721 */ +const SA3: f64 = 4.34565877475229228821e+02; /* 0x407B290D, 0xD58A1A71 */ +const SA4: f64 = 6.45387271733267880336e+02; /* 0x40842B19, 0x21EC2868 */ +const SA5: f64 = 4.29008140027567833386e+02; /* 0x407AD021, 0x57700314 */ +const SA6: f64 = 1.08635005541779435134e+02; /* 0x405B28A3, 0xEE48AE2C */ +const SA7: f64 = 6.57024977031928170135e+00; /* 0x401A47EF, 0x8E484A93 */ +const SA8: f64 = -6.04244152148580987438e-02; /* 0xBFAEEFF2, 0xEE749A62 */ +/* + * Coefficients for approximation to erfc in [1/.35,28] + */ +const RB0: f64 = -9.86494292470009928597e-03; /* 0xBF843412, 0x39E86F4A */ +const RB1: f64 = -7.99283237680523006574e-01; /* 0xBFE993BA, 0x70C285DE */ +const RB2: f64 = -1.77579549177547519889e+01; /* 0xC031C209, 0x555F995A */ +const RB3: f64 = -1.60636384855821916062e+02; /* 0xC064145D, 0x43C5ED98 */ +const RB4: f64 = -6.37566443368389627722e+02; /* 0xC083EC88, 0x1375F228 */ +const RB5: f64 = -1.02509513161107724954e+03; /* 0xC0900461, 0x6A2E5992 */ +const RB6: f64 = -4.83519191608651397019e+02; /* 0xC07E384E, 0x9BDC383F */ +const SB1: f64 = 3.03380607434824582924e+01; /* 0x403E568B, 0x261D5190 */ +const SB2: f64 = 3.25792512996573918826e+02; /* 0x40745CAE, 0x221B9F0A */ +const SB3: f64 = 1.53672958608443695994e+03; /* 0x409802EB, 0x189D5118 */ +const SB4: f64 = 3.19985821950859553908e+03; /* 0x40A8FFB7, 0x688C246A */ +const SB5: f64 = 2.55305040643316442583e+03; /* 0x40A3F219, 0xCEDF3BE6 */ +const SB6: f64 = 4.74528541206955367215e+02; /* 0x407DA874, 0xE79FE763 */ +const SB7: f64 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */ + +fn erfc1(x: f64) -> f64 { + let s: f64; + let p: f64; + let q: f64; + + s = fabs(x) - 1.0; + p = PA0 + s * (PA1 + s * (PA2 + s * (PA3 + s * (PA4 + s * (PA5 + s * PA6))))); + q = 1.0 + s * (QA1 + s * (QA2 + s * (QA3 + s * (QA4 + s * (QA5 + s * QA6))))); + + 1.0 - ERX - p / q +} + +fn erfc2(ix: u32, mut x: f64) -> f64 { + let s: f64; + let r: f64; + let big_s: f64; + let z: f64; + + if ix < 0x3ff40000 { + /* |x| < 1.25 */ + return erfc1(x); + } + + x = fabs(x); + s = 1.0 / (x * x); + if ix < 0x4006db6d { + /* |x| < 1/.35 ~ 2.85714 */ + r = RA0 + s * (RA1 + s * (RA2 + s * (RA3 + s * (RA4 + s * (RA5 + s * (RA6 + s * RA7)))))); + big_s = 1.0 + + s * (SA1 + + s * (SA2 + s * (SA3 + s * (SA4 + s * (SA5 + s * (SA6 + s * (SA7 + s * SA8))))))); + } else { + /* |x| > 1/.35 */ + r = RB0 + s * (RB1 + s * (RB2 + s * (RB3 + s * (RB4 + s * (RB5 + s * RB6))))); + big_s = + 1.0 + s * (SB1 + s * (SB2 + s * (SB3 + s * (SB4 + s * (SB5 + s * (SB6 + s * SB7)))))); + } + z = with_set_low_word(x, 0); + + exp(-z * z - 0.5625) * exp((z - x) * (z + x) + r / big_s) / x +} + +/// Error function (f64) +/// +/// Calculates an approximation to the āerror functionā, which estimates +/// the probability that an observation will fall within x standard +/// deviations of the mean (assuming a normal distribution). +pub fn erf(x: f64) -> f64 { + let r: f64; + let s: f64; + let z: f64; + let y: f64; + let mut ix: u32; + let sign: usize; + + ix = get_high_word(x); + sign = (ix >> 31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7ff00000 { + /* erf(nan)=nan, erf(+-inf)=+-1 */ + return 1.0 - 2.0 * (sign as f64) + 1.0 / x; + } + if ix < 0x3feb0000 { + /* |x| < 0.84375 */ + if ix < 0x3e300000 { + /* |x| < 2**-28 */ + /* avoid underflow */ + return 0.125 * (8.0 * x + EFX8 * x); + } + z = x * x; + r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); + s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); + y = r / s; + return x + x * y; + } + if ix < 0x40180000 { + /* 0.84375 <= |x| < 6 */ + y = 1.0 - erfc2(ix, x); + } else { + let x1p_1022 = f64::from_bits(0x0010000000000000); + y = 1.0 - x1p_1022; + } + + if sign != 0 { + -y + } else { + y + } +} + +/// Error function (f64) +/// +/// Calculates the complementary probability. +/// Is `1 - erf(x)`. Is computed directly, so that you can use it to avoid +/// the loss of precision that would result from subtracting +/// large probabilities (on large `x`) from 1. +pub fn erfc(x: f64) -> f64 { + let r: f64; + let s: f64; + let z: f64; + let y: f64; + let mut ix: u32; + let sign: usize; + + ix = get_high_word(x); + sign = (ix >> 31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7ff00000 { + /* erfc(nan)=nan, erfc(+-inf)=0,2 */ + return 2.0 * (sign as f64) + 1.0 / x; + } + if ix < 0x3feb0000 { + /* |x| < 0.84375 */ + if ix < 0x3c700000 { + /* |x| < 2**-56 */ + return 1.0 - x; + } + z = x * x; + r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); + s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); + y = r / s; + if sign != 0 || ix < 0x3fd00000 { + /* x < 1/4 */ + return 1.0 - (x + x * y); + } + return 0.5 - (x - 0.5 + x * y); + } + if ix < 0x403c0000 { + /* 0.84375 <= |x| < 28 */ + if sign != 0 { + return 2.0 - erfc2(ix, x); + } else { + return erfc2(ix, x); + } + } + + let x1p_1022 = f64::from_bits(0x0010000000000000); + if sign != 0 { + 2.0 - x1p_1022 + } else { + x1p_1022 * x1p_1022 + } +} diff --git a/vendor/compiler_builtins/libm/src/math/erff.rs b/vendor/compiler_builtins/libm/src/math/erff.rs new file mode 100644 index 000000000..384052293 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/erff.rs @@ -0,0 +1,229 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_erff.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{expf, fabsf}; + +const ERX: f32 = 8.4506291151e-01; /* 0x3f58560b */ +/* + * Coefficients for approximation to erf on [0,0.84375] + */ +const EFX8: f32 = 1.0270333290e+00; /* 0x3f8375d4 */ +const PP0: f32 = 1.2837916613e-01; /* 0x3e0375d4 */ +const PP1: f32 = -3.2504209876e-01; /* 0xbea66beb */ +const PP2: f32 = -2.8481749818e-02; /* 0xbce9528f */ +const PP3: f32 = -5.7702702470e-03; /* 0xbbbd1489 */ +const PP4: f32 = -2.3763017452e-05; /* 0xb7c756b1 */ +const QQ1: f32 = 3.9791721106e-01; /* 0x3ecbbbce */ +const QQ2: f32 = 6.5022252500e-02; /* 0x3d852a63 */ +const QQ3: f32 = 5.0813062117e-03; /* 0x3ba68116 */ +const QQ4: f32 = 1.3249473704e-04; /* 0x390aee49 */ +const QQ5: f32 = -3.9602282413e-06; /* 0xb684e21a */ +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +const PA0: f32 = -2.3621185683e-03; /* 0xbb1acdc6 */ +const PA1: f32 = 4.1485610604e-01; /* 0x3ed46805 */ +const PA2: f32 = -3.7220788002e-01; /* 0xbebe9208 */ +const PA3: f32 = 3.1834661961e-01; /* 0x3ea2fe54 */ +const PA4: f32 = -1.1089469492e-01; /* 0xbde31cc2 */ +const PA5: f32 = 3.5478305072e-02; /* 0x3d1151b3 */ +const PA6: f32 = -2.1663755178e-03; /* 0xbb0df9c0 */ +const QA1: f32 = 1.0642088205e-01; /* 0x3dd9f331 */ +const QA2: f32 = 5.4039794207e-01; /* 0x3f0a5785 */ +const QA3: f32 = 7.1828655899e-02; /* 0x3d931ae7 */ +const QA4: f32 = 1.2617121637e-01; /* 0x3e013307 */ +const QA5: f32 = 1.3637083583e-02; /* 0x3c5f6e13 */ +const QA6: f32 = 1.1984500103e-02; /* 0x3c445aa3 */ +/* + * Coefficients for approximation to erfc in [1.25,1/0.35] + */ +const RA0: f32 = -9.8649440333e-03; /* 0xbc21a093 */ +const RA1: f32 = -6.9385856390e-01; /* 0xbf31a0b7 */ +const RA2: f32 = -1.0558626175e+01; /* 0xc128f022 */ +const RA3: f32 = -6.2375331879e+01; /* 0xc2798057 */ +const RA4: f32 = -1.6239666748e+02; /* 0xc322658c */ +const RA5: f32 = -1.8460508728e+02; /* 0xc3389ae7 */ +const RA6: f32 = -8.1287437439e+01; /* 0xc2a2932b */ +const RA7: f32 = -9.8143291473e+00; /* 0xc11d077e */ +const SA1: f32 = 1.9651271820e+01; /* 0x419d35ce */ +const SA2: f32 = 1.3765776062e+02; /* 0x4309a863 */ +const SA3: f32 = 4.3456588745e+02; /* 0x43d9486f */ +const SA4: f32 = 6.4538726807e+02; /* 0x442158c9 */ +const SA5: f32 = 4.2900814819e+02; /* 0x43d6810b */ +const SA6: f32 = 1.0863500214e+02; /* 0x42d9451f */ +const SA7: f32 = 6.5702495575e+00; /* 0x40d23f7c */ +const SA8: f32 = -6.0424413532e-02; /* 0xbd777f97 */ +/* + * Coefficients for approximation to erfc in [1/.35,28] + */ +const RB0: f32 = -9.8649431020e-03; /* 0xbc21a092 */ +const RB1: f32 = -7.9928326607e-01; /* 0xbf4c9dd4 */ +const RB2: f32 = -1.7757955551e+01; /* 0xc18e104b */ +const RB3: f32 = -1.6063638306e+02; /* 0xc320a2ea */ +const RB4: f32 = -6.3756646729e+02; /* 0xc41f6441 */ +const RB5: f32 = -1.0250950928e+03; /* 0xc480230b */ +const RB6: f32 = -4.8351919556e+02; /* 0xc3f1c275 */ +const SB1: f32 = 3.0338060379e+01; /* 0x41f2b459 */ +const SB2: f32 = 3.2579251099e+02; /* 0x43a2e571 */ +const SB3: f32 = 1.5367296143e+03; /* 0x44c01759 */ +const SB4: f32 = 3.1998581543e+03; /* 0x4547fdbb */ +const SB5: f32 = 2.5530502930e+03; /* 0x451f90ce */ +const SB6: f32 = 4.7452853394e+02; /* 0x43ed43a7 */ +const SB7: f32 = -2.2440952301e+01; /* 0xc1b38712 */ + +fn erfc1(x: f32) -> f32 { + let s: f32; + let p: f32; + let q: f32; + + s = fabsf(x) - 1.0; + p = PA0 + s * (PA1 + s * (PA2 + s * (PA3 + s * (PA4 + s * (PA5 + s * PA6))))); + q = 1.0 + s * (QA1 + s * (QA2 + s * (QA3 + s * (QA4 + s * (QA5 + s * QA6))))); + return 1.0 - ERX - p / q; +} + +fn erfc2(mut ix: u32, mut x: f32) -> f32 { + let s: f32; + let r: f32; + let big_s: f32; + let z: f32; + + if ix < 0x3fa00000 { + /* |x| < 1.25 */ + return erfc1(x); + } + + x = fabsf(x); + s = 1.0 / (x * x); + if ix < 0x4036db6d { + /* |x| < 1/0.35 */ + r = RA0 + s * (RA1 + s * (RA2 + s * (RA3 + s * (RA4 + s * (RA5 + s * (RA6 + s * RA7)))))); + big_s = 1.0 + + s * (SA1 + + s * (SA2 + s * (SA3 + s * (SA4 + s * (SA5 + s * (SA6 + s * (SA7 + s * SA8))))))); + } else { + /* |x| >= 1/0.35 */ + r = RB0 + s * (RB1 + s * (RB2 + s * (RB3 + s * (RB4 + s * (RB5 + s * RB6))))); + big_s = + 1.0 + s * (SB1 + s * (SB2 + s * (SB3 + s * (SB4 + s * (SB5 + s * (SB6 + s * SB7)))))); + } + ix = x.to_bits(); + z = f32::from_bits(ix & 0xffffe000); + + expf(-z * z - 0.5625) * expf((z - x) * (z + x) + r / big_s) / x +} + +/// Error function (f32) +/// +/// Calculates an approximation to the āerror functionā, which estimates +/// the probability that an observation will fall within x standard +/// deviations of the mean (assuming a normal distribution). +pub fn erff(x: f32) -> f32 { + let r: f32; + let s: f32; + let z: f32; + let y: f32; + let mut ix: u32; + let sign: usize; + + ix = x.to_bits(); + sign = (ix >> 31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + /* erf(nan)=nan, erf(+-inf)=+-1 */ + return 1.0 - 2.0 * (sign as f32) + 1.0 / x; + } + if ix < 0x3f580000 { + /* |x| < 0.84375 */ + if ix < 0x31800000 { + /* |x| < 2**-28 */ + /*avoid underflow */ + return 0.125 * (8.0 * x + EFX8 * x); + } + z = x * x; + r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); + s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); + y = r / s; + return x + x * y; + } + if ix < 0x40c00000 { + /* |x| < 6 */ + y = 1.0 - erfc2(ix, x); + } else { + let x1p_120 = f32::from_bits(0x03800000); + y = 1.0 - x1p_120; + } + + if sign != 0 { + -y + } else { + y + } +} + +/// Error function (f32) +/// +/// Calculates the complementary probability. +/// Is `1 - erf(x)`. Is computed directly, so that you can use it to avoid +/// the loss of precision that would result from subtracting +/// large probabilities (on large `x`) from 1. +pub fn erfcf(x: f32) -> f32 { + let r: f32; + let s: f32; + let z: f32; + let y: f32; + let mut ix: u32; + let sign: usize; + + ix = x.to_bits(); + sign = (ix >> 31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + /* erfc(nan)=nan, erfc(+-inf)=0,2 */ + return 2.0 * (sign as f32) + 1.0 / x; + } + + if ix < 0x3f580000 { + /* |x| < 0.84375 */ + if ix < 0x23800000 { + /* |x| < 2**-56 */ + return 1.0 - x; + } + z = x * x; + r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); + s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); + y = r / s; + if sign != 0 || ix < 0x3e800000 { + /* x < 1/4 */ + return 1.0 - (x + x * y); + } + return 0.5 - (x - 0.5 + x * y); + } + if ix < 0x41e00000 { + /* |x| < 28 */ + if sign != 0 { + return 2.0 - erfc2(ix, x); + } else { + return erfc2(ix, x); + } + } + + let x1p_120 = f32::from_bits(0x03800000); + if sign != 0 { + 2.0 - x1p_120 + } else { + x1p_120 * x1p_120 + } +} diff --git a/vendor/compiler_builtins/libm/src/math/exp.rs b/vendor/compiler_builtins/libm/src/math/exp.rs new file mode 100644 index 000000000..d4994277f --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/exp.rs @@ -0,0 +1,154 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_exp.c */ +/* + * ==================================================== + * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. + * + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* exp(x) + * Returns the exponential of x. + * + * Method + * 1. Argument reduction: + * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. + * Given x, find r and integer k such that + * + * x = k*ln2 + r, |r| <= 0.5*ln2. + * + * Here r will be represented as r = hi-lo for better + * accuracy. + * + * 2. Approximation of exp(r) by a special rational function on + * the interval [0,0.34658]: + * Write + * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... + * We use a special Remez algorithm on [0,0.34658] to generate + * a polynomial of degree 5 to approximate R. The maximum error + * of this polynomial approximation is bounded by 2**-59. In + * other words, + * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 + * (where z=r*r, and the values of P1 to P5 are listed below) + * and + * | 5 | -59 + * | 2.0+P1*z+...+P5*z - R(z) | <= 2 + * | | + * The computation of exp(r) thus becomes + * 2*r + * exp(r) = 1 + ---------- + * R(r) - r + * r*c(r) + * = 1 + r + ----------- (for better accuracy) + * 2 - c(r) + * where + * 2 4 10 + * c(r) = r - (P1*r + P2*r + ... + P5*r ). + * + * 3. Scale back to obtain exp(x): + * From step 1, we have + * exp(x) = 2^k * exp(r) + * + * Special cases: + * exp(INF) is INF, exp(NaN) is NaN; + * exp(-INF) is 0, and + * for finite argument, only exp(0)=1 is exact. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Misc. info. + * For IEEE double + * if x > 709.782712893383973096 then exp(x) overflows + * if x < -745.133219101941108420 then exp(x) underflows + */ + +use super::scalbn; + +const HALF: [f64; 2] = [0.5, -0.5]; +const LN2HI: f64 = 6.93147180369123816490e-01; /* 0x3fe62e42, 0xfee00000 */ +const LN2LO: f64 = 1.90821492927058770002e-10; /* 0x3dea39ef, 0x35793c76 */ +const INVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547, 0x652b82fe */ +const P1: f64 = 1.66666666666666019037e-01; /* 0x3FC55555, 0x5555553E */ +const P2: f64 = -2.77777777770155933842e-03; /* 0xBF66C16C, 0x16BEBD93 */ +const P3: f64 = 6.61375632143793436117e-05; /* 0x3F11566A, 0xAF25DE2C */ +const P4: f64 = -1.65339022054652515390e-06; /* 0xBEBBBD41, 0xC5D26BF1 */ +const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ + +/// Exponential, base *e* (f64) +/// +/// Calculate the exponential of `x`, that is, *e* raised to the power `x` +/// (where *e* is the base of the natural system of logarithms, approximately 2.71828). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn exp(mut x: f64) -> f64 { + let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 + let x1p_149 = f64::from_bits(0x36a0000000000000); // 0x1p-149 === 2 ^ -149 + + let hi: f64; + let lo: f64; + let c: f64; + let xx: f64; + let y: f64; + let k: i32; + let sign: i32; + let mut hx: u32; + + hx = (x.to_bits() >> 32) as u32; + sign = (hx >> 31) as i32; + hx &= 0x7fffffff; /* high word of |x| */ + + /* special cases */ + if hx >= 0x4086232b { + /* if |x| >= 708.39... */ + if x.is_nan() { + return x; + } + if x > 709.782712893383973096 { + /* overflow if x!=inf */ + x *= x1p1023; + return x; + } + if x < -708.39641853226410622 { + /* underflow if x!=-inf */ + force_eval!((-x1p_149 / x) as f32); + if x < -745.13321910194110842 { + return 0.; + } + } + } + + /* argument reduction */ + if hx > 0x3fd62e42 { + /* if |x| > 0.5 ln2 */ + if hx >= 0x3ff0a2b2 { + /* if |x| >= 1.5 ln2 */ + k = (INVLN2 * x + i!(HALF, sign as usize)) as i32; + } else { + k = 1 - sign - sign; + } + hi = x - k as f64 * LN2HI; /* k*ln2hi is exact here */ + lo = k as f64 * LN2LO; + x = hi - lo; + } else if hx > 0x3e300000 { + /* if |x| > 2**-28 */ + k = 0; + hi = x; + lo = 0.; + } else { + /* inexact if x!=0 */ + force_eval!(x1p1023 + x); + return 1. + x; + } + + /* x is now in primary range */ + xx = x * x; + c = x - xx * (P1 + xx * (P2 + xx * (P3 + xx * (P4 + xx * P5)))); + y = 1. + (x * c / (2. - c) - lo + hi); + if k == 0 { + y + } else { + scalbn(y, k) + } +} diff --git a/vendor/compiler_builtins/libm/src/math/exp10.rs b/vendor/compiler_builtins/libm/src/math/exp10.rs new file mode 100644 index 000000000..9537f76f1 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/exp10.rs @@ -0,0 +1,21 @@ +use super::{exp2, modf, pow}; + +const LN10: f64 = 3.32192809488736234787031942948939; +const P10: &[f64] = &[ + 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, +]; + +pub fn exp10(x: f64) -> f64 { + let (mut y, n) = modf(x); + let u: u64 = n.to_bits(); + /* fabs(n) < 16 without raising invalid on nan */ + if (u >> 52 & 0x7ff) < 0x3ff + 4 { + if y == 0.0 { + return P10[((n as isize) + 15) as usize]; + } + y = exp2(LN10 * y); + return y * P10[((n as isize) + 15) as usize]; + } + return pow(10.0, x); +} diff --git a/vendor/compiler_builtins/libm/src/math/exp10f.rs b/vendor/compiler_builtins/libm/src/math/exp10f.rs new file mode 100644 index 000000000..d45fff36e --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/exp10f.rs @@ -0,0 +1,21 @@ +use super::{exp2, exp2f, modff}; + +const LN10_F32: f32 = 3.32192809488736234787031942948939; +const LN10_F64: f64 = 3.32192809488736234787031942948939; +const P10: &[f32] = &[ + 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, +]; + +pub fn exp10f(x: f32) -> f32 { + let (mut y, n) = modff(x); + let u = n.to_bits(); + /* fabsf(n) < 8 without raising invalid on nan */ + if (u >> 23 & 0xff) < 0x7f + 3 { + if y == 0.0 { + return P10[((n as isize) + 7) as usize]; + } + y = exp2f(LN10_F32 * y); + return y * P10[((n as isize) + 7) as usize]; + } + return exp2(LN10_F64 * (x as f64)) as f32; +} diff --git a/vendor/compiler_builtins/libm/src/math/exp2.rs b/vendor/compiler_builtins/libm/src/math/exp2.rs new file mode 100644 index 000000000..e0e385df2 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/exp2.rs @@ -0,0 +1,394 @@ +// origin: FreeBSD /usr/src/lib/msun/src/s_exp2.c */ +//- +// Copyright (c) 2005 David Schultz <das@FreeBSD.ORG> +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +use super::scalbn; + +const TBLSIZE: usize = 256; + +#[cfg_attr(rustfmt, rustfmt_skip)] +static TBL: [u64; TBLSIZE * 2] = [ + // exp2(z + eps) eps + 0x3fe6a09e667f3d5d, 0x3d39880000000000, + 0x3fe6b052fa751744, 0x3cd8000000000000, + 0x3fe6c012750bd9fe, 0xbd28780000000000, + 0x3fe6cfdcddd476bf, 0x3d1ec00000000000, + 0x3fe6dfb23c651a29, 0xbcd8000000000000, + 0x3fe6ef9298593ae3, 0xbcbc000000000000, + 0x3fe6ff7df9519386, 0xbd2fd80000000000, + 0x3fe70f7466f42da3, 0xbd2c880000000000, + 0x3fe71f75e8ec5fc3, 0x3d13c00000000000, + 0x3fe72f8286eacf05, 0xbd38300000000000, + 0x3fe73f9a48a58152, 0xbd00c00000000000, + 0x3fe74fbd35d7ccfc, 0x3d2f880000000000, + 0x3fe75feb564267f1, 0x3d03e00000000000, + 0x3fe77024b1ab6d48, 0xbd27d00000000000, + 0x3fe780694fde5d38, 0xbcdd000000000000, + 0x3fe790b938ac1d00, 0x3ce3000000000000, + 0x3fe7a11473eb0178, 0xbced000000000000, + 0x3fe7b17b0976d060, 0x3d20400000000000, + 0x3fe7c1ed0130c133, 0x3ca0000000000000, + 0x3fe7d26a62ff8636, 0xbd26900000000000, + 0x3fe7e2f336cf4e3b, 0xbd02e00000000000, + 0x3fe7f3878491c3e8, 0xbd24580000000000, + 0x3fe80427543e1b4e, 0x3d33000000000000, + 0x3fe814d2add1071a, 0x3d0f000000000000, + 0x3fe82589994ccd7e, 0xbd21c00000000000, + 0x3fe8364c1eb942d0, 0x3d29d00000000000, + 0x3fe8471a4623cab5, 0x3d47100000000000, + 0x3fe857f4179f5bbc, 0x3d22600000000000, + 0x3fe868d99b4491af, 0xbd32c40000000000, + 0x3fe879cad931a395, 0xbd23000000000000, + 0x3fe88ac7d98a65b8, 0xbd2a800000000000, + 0x3fe89bd0a4785800, 0xbced000000000000, + 0x3fe8ace5422aa223, 0x3d33280000000000, + 0x3fe8be05bad619fa, 0x3d42b40000000000, + 0x3fe8cf3216b54383, 0xbd2ed00000000000, + 0x3fe8e06a5e08664c, 0xbd20500000000000, + 0x3fe8f1ae99157807, 0x3d28280000000000, + 0x3fe902fed0282c0e, 0xbd1cb00000000000, + 0x3fe9145b0b91ff96, 0xbd05e00000000000, + 0x3fe925c353aa2ff9, 0x3cf5400000000000, + 0x3fe93737b0cdc64a, 0x3d17200000000000, + 0x3fe948b82b5f98ae, 0xbd09000000000000, + 0x3fe95a44cbc852cb, 0x3d25680000000000, + 0x3fe96bdd9a766f21, 0xbd36d00000000000, + 0x3fe97d829fde4e2a, 0xbd01000000000000, + 0x3fe98f33e47a23a3, 0x3d2d000000000000, + 0x3fe9a0f170ca0604, 0xbd38a40000000000, + 0x3fe9b2bb4d53ff89, 0x3d355c0000000000, + 0x3fe9c49182a3f15b, 0x3d26b80000000000, + 0x3fe9d674194bb8c5, 0xbcec000000000000, + 0x3fe9e86319e3238e, 0x3d17d00000000000, + 0x3fe9fa5e8d07f302, 0x3d16400000000000, + 0x3fea0c667b5de54d, 0xbcf5000000000000, + 0x3fea1e7aed8eb8f6, 0x3d09e00000000000, + 0x3fea309bec4a2e27, 0x3d2ad80000000000, + 0x3fea42c980460a5d, 0xbd1af00000000000, + 0x3fea5503b23e259b, 0x3d0b600000000000, + 0x3fea674a8af46213, 0x3d38880000000000, + 0x3fea799e1330b3a7, 0x3d11200000000000, + 0x3fea8bfe53c12e8d, 0x3d06c00000000000, + 0x3fea9e6b5579fcd2, 0xbd29b80000000000, + 0x3feab0e521356fb8, 0x3d2b700000000000, + 0x3feac36bbfd3f381, 0x3cd9000000000000, + 0x3fead5ff3a3c2780, 0x3ce4000000000000, + 0x3feae89f995ad2a3, 0xbd2c900000000000, + 0x3feafb4ce622f367, 0x3d16500000000000, + 0x3feb0e07298db790, 0x3d2fd40000000000, + 0x3feb20ce6c9a89a9, 0x3d12700000000000, + 0x3feb33a2b84f1a4b, 0x3d4d470000000000, + 0x3feb468415b747e7, 0xbd38380000000000, + 0x3feb59728de5593a, 0x3c98000000000000, + 0x3feb6c6e29f1c56a, 0x3d0ad00000000000, + 0x3feb7f76f2fb5e50, 0x3cde800000000000, + 0x3feb928cf22749b2, 0xbd04c00000000000, + 0x3feba5b030a10603, 0xbd0d700000000000, + 0x3febb8e0b79a6f66, 0x3d0d900000000000, + 0x3febcc1e904bc1ff, 0x3d02a00000000000, + 0x3febdf69c3f3a16f, 0xbd1f780000000000, + 0x3febf2c25bd71db8, 0xbd10a00000000000, + 0x3fec06286141b2e9, 0xbd11400000000000, + 0x3fec199bdd8552e0, 0x3d0be00000000000, + 0x3fec2d1cd9fa64ee, 0xbd09400000000000, + 0x3fec40ab5fffd02f, 0xbd0ed00000000000, + 0x3fec544778fafd15, 0x3d39660000000000, + 0x3fec67f12e57d0cb, 0xbd1a100000000000, + 0x3fec7ba88988c1b6, 0xbd58458000000000, + 0x3fec8f6d9406e733, 0xbd1a480000000000, + 0x3feca3405751c4df, 0x3ccb000000000000, + 0x3fecb720dcef9094, 0x3d01400000000000, + 0x3feccb0f2e6d1689, 0x3cf0200000000000, + 0x3fecdf0b555dc412, 0x3cf3600000000000, + 0x3fecf3155b5bab3b, 0xbd06900000000000, + 0x3fed072d4a0789bc, 0x3d09a00000000000, + 0x3fed1b532b08c8fa, 0xbd15e00000000000, + 0x3fed2f87080d8a85, 0x3d1d280000000000, + 0x3fed43c8eacaa203, 0x3d01a00000000000, + 0x3fed5818dcfba491, 0x3cdf000000000000, + 0x3fed6c76e862e6a1, 0xbd03a00000000000, + 0x3fed80e316c9834e, 0xbd0cd80000000000, + 0x3fed955d71ff6090, 0x3cf4c00000000000, + 0x3feda9e603db32ae, 0x3cff900000000000, + 0x3fedbe7cd63a8325, 0x3ce9800000000000, + 0x3fedd321f301b445, 0xbcf5200000000000, + 0x3fede7d5641c05bf, 0xbd1d700000000000, + 0x3fedfc97337b9aec, 0xbd16140000000000, + 0x3fee11676b197d5e, 0x3d0b480000000000, + 0x3fee264614f5a3e7, 0x3d40ce0000000000, + 0x3fee3b333b16ee5c, 0x3d0c680000000000, + 0x3fee502ee78b3fb4, 0xbd09300000000000, + 0x3fee653924676d68, 0xbce5000000000000, + 0x3fee7a51fbc74c44, 0xbd07f80000000000, + 0x3fee8f7977cdb726, 0xbcf3700000000000, + 0x3feea4afa2a490e8, 0x3ce5d00000000000, + 0x3feeb9f4867ccae4, 0x3d161a0000000000, + 0x3feecf482d8e680d, 0x3cf5500000000000, + 0x3feee4aaa2188514, 0x3cc6400000000000, + 0x3feefa1bee615a13, 0xbcee800000000000, + 0x3fef0f9c1cb64106, 0xbcfa880000000000, + 0x3fef252b376bb963, 0xbd2c900000000000, + 0x3fef3ac948dd7275, 0x3caa000000000000, + 0x3fef50765b6e4524, 0xbcf4f00000000000, + 0x3fef6632798844fd, 0x3cca800000000000, + 0x3fef7bfdad9cbe38, 0x3cfabc0000000000, + 0x3fef91d802243c82, 0xbcd4600000000000, + 0x3fefa7c1819e908e, 0xbd0b0c0000000000, + 0x3fefbdba3692d511, 0xbcc0e00000000000, + 0x3fefd3c22b8f7194, 0xbd10de8000000000, + 0x3fefe9d96b2a23ee, 0x3cee430000000000, + 0x3ff0000000000000, 0x0, + 0x3ff00b1afa5abcbe, 0xbcb3400000000000, + 0x3ff0163da9fb3303, 0xbd12170000000000, + 0x3ff02168143b0282, 0x3cba400000000000, + 0x3ff02c9a3e77806c, 0x3cef980000000000, + 0x3ff037d42e11bbca, 0xbcc7400000000000, + 0x3ff04315e86e7f89, 0x3cd8300000000000, + 0x3ff04e5f72f65467, 0xbd1a3f0000000000, + 0x3ff059b0d315855a, 0xbd02840000000000, + 0x3ff0650a0e3c1f95, 0x3cf1600000000000, + 0x3ff0706b29ddf71a, 0x3d15240000000000, + 0x3ff07bd42b72a82d, 0xbce9a00000000000, + 0x3ff0874518759bd0, 0x3ce6400000000000, + 0x3ff092bdf66607c8, 0xbd00780000000000, + 0x3ff09e3ecac6f383, 0xbc98000000000000, + 0x3ff0a9c79b1f3930, 0x3cffa00000000000, + 0x3ff0b5586cf988fc, 0xbcfac80000000000, + 0x3ff0c0f145e46c8a, 0x3cd9c00000000000, + 0x3ff0cc922b724816, 0x3d05200000000000, + 0x3ff0d83b23395dd8, 0xbcfad00000000000, + 0x3ff0e3ec32d3d1f3, 0x3d1bac0000000000, + 0x3ff0efa55fdfa9a6, 0xbd04e80000000000, + 0x3ff0fb66affed2f0, 0xbd0d300000000000, + 0x3ff1073028d7234b, 0x3cf1500000000000, + 0x3ff11301d0125b5b, 0x3cec000000000000, + 0x3ff11edbab5e2af9, 0x3d16bc0000000000, + 0x3ff12abdc06c31d5, 0x3ce8400000000000, + 0x3ff136a814f2047d, 0xbd0ed00000000000, + 0x3ff1429aaea92de9, 0x3ce8e00000000000, + 0x3ff14e95934f3138, 0x3ceb400000000000, + 0x3ff15a98c8a58e71, 0x3d05300000000000, + 0x3ff166a45471c3df, 0x3d03380000000000, + 0x3ff172b83c7d5211, 0x3d28d40000000000, + 0x3ff17ed48695bb9f, 0xbd05d00000000000, + 0x3ff18af9388c8d93, 0xbd1c880000000000, + 0x3ff1972658375d66, 0x3d11f00000000000, + 0x3ff1a35beb6fcba7, 0x3d10480000000000, + 0x3ff1af99f81387e3, 0xbd47390000000000, + 0x3ff1bbe084045d54, 0x3d24e40000000000, + 0x3ff1c82f95281c43, 0xbd0a200000000000, + 0x3ff1d4873168b9b2, 0x3ce3800000000000, + 0x3ff1e0e75eb44031, 0x3ceac00000000000, + 0x3ff1ed5022fcd938, 0x3d01900000000000, + 0x3ff1f9c18438cdf7, 0xbd1b780000000000, + 0x3ff2063b88628d8f, 0x3d2d940000000000, + 0x3ff212be3578a81e, 0x3cd8000000000000, + 0x3ff21f49917ddd41, 0x3d2b340000000000, + 0x3ff22bdda2791323, 0x3d19f80000000000, + 0x3ff2387a6e7561e7, 0xbd19c80000000000, + 0x3ff2451ffb821427, 0x3d02300000000000, + 0x3ff251ce4fb2a602, 0xbd13480000000000, + 0x3ff25e85711eceb0, 0x3d12700000000000, + 0x3ff26b4565e27d16, 0x3d11d00000000000, + 0x3ff2780e341de00f, 0x3d31ee0000000000, + 0x3ff284dfe1f5633e, 0xbd14c00000000000, + 0x3ff291ba7591bb30, 0xbd13d80000000000, + 0x3ff29e9df51fdf09, 0x3d08b00000000000, + 0x3ff2ab8a66d10e9b, 0xbd227c0000000000, + 0x3ff2b87fd0dada3a, 0x3d2a340000000000, + 0x3ff2c57e39771af9, 0xbd10800000000000, + 0x3ff2d285a6e402d9, 0xbd0ed00000000000, + 0x3ff2df961f641579, 0xbcf4200000000000, + 0x3ff2ecafa93e2ecf, 0xbd24980000000000, + 0x3ff2f9d24abd8822, 0xbd16300000000000, + 0x3ff306fe0a31b625, 0xbd32360000000000, + 0x3ff31432edeea50b, 0xbd70df8000000000, + 0x3ff32170fc4cd7b8, 0xbd22480000000000, + 0x3ff32eb83ba8e9a2, 0xbd25980000000000, + 0x3ff33c08b2641766, 0x3d1ed00000000000, + 0x3ff3496266e3fa27, 0xbcdc000000000000, + 0x3ff356c55f929f0f, 0xbd30d80000000000, + 0x3ff36431a2de88b9, 0x3d22c80000000000, + 0x3ff371a7373aaa39, 0x3d20600000000000, + 0x3ff37f26231e74fe, 0xbd16600000000000, + 0x3ff38cae6d05d838, 0xbd0ae00000000000, + 0x3ff39a401b713ec3, 0xbd44720000000000, + 0x3ff3a7db34e5a020, 0x3d08200000000000, + 0x3ff3b57fbfec6e95, 0x3d3e800000000000, + 0x3ff3c32dc313a8f2, 0x3cef800000000000, + 0x3ff3d0e544ede122, 0xbd17a00000000000, + 0x3ff3dea64c1234bb, 0x3d26300000000000, + 0x3ff3ec70df1c4ecc, 0xbd48a60000000000, + 0x3ff3fa4504ac7e8c, 0xbd3cdc0000000000, + 0x3ff40822c367a0bb, 0x3d25b80000000000, + 0x3ff4160a21f72e95, 0x3d1ec00000000000, + 0x3ff423fb27094646, 0xbd13600000000000, + 0x3ff431f5d950a920, 0x3d23980000000000, + 0x3ff43ffa3f84b9eb, 0x3cfa000000000000, + 0x3ff44e0860618919, 0xbcf6c00000000000, + 0x3ff45c2042a7d201, 0xbd0bc00000000000, + 0x3ff46a41ed1d0016, 0xbd12800000000000, + 0x3ff4786d668b3326, 0x3d30e00000000000, + 0x3ff486a2b5c13c00, 0xbd2d400000000000, + 0x3ff494e1e192af04, 0x3d0c200000000000, + 0x3ff4a32af0d7d372, 0xbd1e500000000000, + 0x3ff4b17dea6db801, 0x3d07800000000000, + 0x3ff4bfdad53629e1, 0xbd13800000000000, + 0x3ff4ce41b817c132, 0x3d00800000000000, + 0x3ff4dcb299fddddb, 0x3d2c700000000000, + 0x3ff4eb2d81d8ab96, 0xbd1ce00000000000, + 0x3ff4f9b2769d2d02, 0x3d19200000000000, + 0x3ff508417f4531c1, 0xbd08c00000000000, + 0x3ff516daa2cf662a, 0xbcfa000000000000, + 0x3ff5257de83f51ea, 0x3d4a080000000000, + 0x3ff5342b569d4eda, 0xbd26d80000000000, + 0x3ff542e2f4f6ac1a, 0xbd32440000000000, + 0x3ff551a4ca5d94db, 0x3d483c0000000000, + 0x3ff56070dde9116b, 0x3d24b00000000000, + 0x3ff56f4736b529de, 0x3d415a0000000000, + 0x3ff57e27dbe2c40e, 0xbd29e00000000000, + 0x3ff58d12d497c76f, 0xbd23080000000000, + 0x3ff59c0827ff0b4c, 0x3d4dec0000000000, + 0x3ff5ab07dd485427, 0xbcc4000000000000, + 0x3ff5ba11fba87af4, 0x3d30080000000000, + 0x3ff5c9268a59460b, 0xbd26c80000000000, + 0x3ff5d84590998e3f, 0x3d469a0000000000, + 0x3ff5e76f15ad20e1, 0xbd1b400000000000, + 0x3ff5f6a320dcebca, 0x3d17700000000000, + 0x3ff605e1b976dcb8, 0x3d26f80000000000, + 0x3ff6152ae6cdf715, 0x3d01000000000000, + 0x3ff6247eb03a5531, 0xbd15d00000000000, + 0x3ff633dd1d1929b5, 0xbd12d00000000000, + 0x3ff6434634ccc313, 0xbcea800000000000, + 0x3ff652b9febc8efa, 0xbd28600000000000, + 0x3ff6623882553397, 0x3d71fe0000000000, + 0x3ff671c1c708328e, 0xbd37200000000000, + 0x3ff68155d44ca97e, 0x3ce6800000000000, + 0x3ff690f4b19e9471, 0xbd29780000000000, +]; + +// exp2(x): compute the base 2 exponential of x +// +// Accuracy: Peak error < 0.503 ulp for normalized results. +// +// Method: (accurate tables) +// +// Reduce x: +// x = k + y, for integer k and |y| <= 1/2. +// Thus we have exp2(x) = 2**k * exp2(y). +// +// Reduce y: +// y = i/TBLSIZE + z - eps[i] for integer i near y * TBLSIZE. +// Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z - eps[i]), +// with |z - eps[i]| <= 2**-9 + 2**-39 for the table used. +// +// We compute exp2(i/TBLSIZE) via table lookup and exp2(z - eps[i]) via +// a degree-5 minimax polynomial with maximum error under 1.3 * 2**-61. +// The values in exp2t[] and eps[] are chosen such that +// exp2t[i] = exp2(i/TBLSIZE + eps[i]), and eps[i] is a small offset such +// that exp2t[i] is accurate to 2**-64. +// +// Note that the range of i is +-TBLSIZE/2, so we actually index the tables +// by i0 = i + TBLSIZE/2. For cache efficiency, exp2t[] and eps[] are +// virtual tables, interleaved in the real table tbl[]. +// +// This method is due to Gal, with many details due to Gal and Bachelis: +// +// Gal, S. and Bachelis, B. An Accurate Elementary Mathematical Library +// for the IEEE Floating Point Standard. TOMS 17(1), 26-46 (1991). + +/// Exponential, base 2 (f64) +/// +/// Calculate `2^x`, that is, 2 raised to the power `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn exp2(mut x: f64) -> f64 { + let redux = f64::from_bits(0x4338000000000000) / TBLSIZE as f64; + let p1 = f64::from_bits(0x3fe62e42fefa39ef); + let p2 = f64::from_bits(0x3fcebfbdff82c575); + let p3 = f64::from_bits(0x3fac6b08d704a0a6); + let p4 = f64::from_bits(0x3f83b2ab88f70400); + let p5 = f64::from_bits(0x3f55d88003875c74); + + // double_t r, t, z; + // uint32_t ix, i0; + // union {double f; uint64_t i;} u = {x}; + // union {uint32_t u; int32_t i;} k; + let x1p1023 = f64::from_bits(0x7fe0000000000000); + let x1p52 = f64::from_bits(0x4330000000000000); + let _0x1p_149 = f64::from_bits(0xb6a0000000000000); + + /* Filter out exceptional cases. */ + let ui = f64::to_bits(x); + let ix = ui >> 32 & 0x7fffffff; + if ix >= 0x408ff000 { + /* |x| >= 1022 or nan */ + if ix >= 0x40900000 && ui >> 63 == 0 { + /* x >= 1024 or nan */ + /* overflow */ + x *= x1p1023; + return x; + } + if ix >= 0x7ff00000 { + /* -inf or -nan */ + return -1.0 / x; + } + if ui >> 63 != 0 { + /* x <= -1022 */ + /* underflow */ + if x <= -1075.0 || x - x1p52 + x1p52 != x { + force_eval!((_0x1p_149 / x) as f32); + } + if x <= -1075.0 { + return 0.0; + } + } + } else if ix < 0x3c900000 { + /* |x| < 0x1p-54 */ + return 1.0 + x; + } + + /* Reduce x, computing z, i0, and k. */ + let ui = f64::to_bits(x + redux); + let mut i0 = ui as u32; + i0 = i0.wrapping_add(TBLSIZE as u32 / 2); + let ku = i0 / TBLSIZE as u32 * TBLSIZE as u32; + let ki = div!(ku as i32, TBLSIZE as i32); + i0 %= TBLSIZE as u32; + let uf = f64::from_bits(ui) - redux; + let mut z = x - uf; + + /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */ + let t = f64::from_bits(i!(TBL, 2 * i0 as usize)); /* exp2t[i0] */ + z -= f64::from_bits(i!(TBL, 2 * i0 as usize + 1)); /* eps[i0] */ + let r = t + t * z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * p5)))); + + scalbn(r, ki) +} + +#[test] +fn i0_wrap_test() { + let x = -3.0 / 256.0; + assert_eq!(exp2(x), f64::from_bits(0x3fefbdba3692d514)); +} diff --git a/vendor/compiler_builtins/libm/src/math/exp2f.rs b/vendor/compiler_builtins/libm/src/math/exp2f.rs new file mode 100644 index 000000000..f4867b80e --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/exp2f.rs @@ -0,0 +1,135 @@ +// origin: FreeBSD /usr/src/lib/msun/src/s_exp2f.c +//- +// Copyright (c) 2005 David Schultz <das@FreeBSD.ORG> +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +const TBLSIZE: usize = 16; + +static EXP2FT: [u64; TBLSIZE] = [ + 0x3fe6a09e667f3bcd, + 0x3fe7a11473eb0187, + 0x3fe8ace5422aa0db, + 0x3fe9c49182a3f090, + 0x3feae89f995ad3ad, + 0x3fec199bdd85529c, + 0x3fed5818dcfba487, + 0x3feea4afa2a490da, + 0x3ff0000000000000, + 0x3ff0b5586cf9890f, + 0x3ff172b83c7d517b, + 0x3ff2387a6e756238, + 0x3ff306fe0a31b715, + 0x3ff3dea64c123422, + 0x3ff4bfdad5362a27, + 0x3ff5ab07dd485429, +]; + +// exp2f(x): compute the base 2 exponential of x +// +// Accuracy: Peak error < 0.501 ulp; location of peak: -0.030110927. +// +// Method: (equally-spaced tables) +// +// Reduce x: +// x = k + y, for integer k and |y| <= 1/2. +// Thus we have exp2f(x) = 2**k * exp2(y). +// +// Reduce y: +// y = i/TBLSIZE + z for integer i near y * TBLSIZE. +// Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z), +// with |z| <= 2**-(TBLSIZE+1). +// +// We compute exp2(i/TBLSIZE) via table lookup and exp2(z) via a +// degree-4 minimax polynomial with maximum error under 1.4 * 2**-33. +// Using double precision for everything except the reduction makes +// roundoff error insignificant and simplifies the scaling step. +// +// This method is due to Tang, but I do not use his suggested parameters: +// +// Tang, P. Table-driven Implementation of the Exponential Function +// in IEEE Floating-Point Arithmetic. TOMS 15(2), 144-157 (1989). + +/// Exponential, base 2 (f32) +/// +/// Calculate `2^x`, that is, 2 raised to the power `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn exp2f(mut x: f32) -> f32 { + let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32; + let p1 = f32::from_bits(0x3f317218); + let p2 = f32::from_bits(0x3e75fdf0); + let p3 = f32::from_bits(0x3d6359a4); + let p4 = f32::from_bits(0x3c1d964e); + + // double_t t, r, z; + // uint32_t ix, i0, k; + + let x1p127 = f32::from_bits(0x7f000000); + + /* Filter out exceptional cases. */ + let ui = f32::to_bits(x); + let ix = ui & 0x7fffffff; + if ix > 0x42fc0000 { + /* |x| > 126 */ + if ix > 0x7f800000 { + /* NaN */ + return x; + } + if ui >= 0x43000000 && ui < 0x80000000 { + /* x >= 128 */ + x *= x1p127; + return x; + } + if ui >= 0x80000000 { + /* x < -126 */ + if ui >= 0xc3160000 || (ui & 0x0000ffff != 0) { + force_eval!(f32::from_bits(0x80000001) / x); + } + if ui >= 0xc3160000 { + /* x <= -150 */ + return 0.0; + } + } + } else if ix <= 0x33000000 { + /* |x| <= 0x1p-25 */ + return 1.0 + x; + } + + /* Reduce x, computing z, i0, and k. */ + let ui = f32::to_bits(x + redux); + let mut i0 = ui; + i0 += TBLSIZE as u32 / 2; + let k = i0 / TBLSIZE as u32; + let ukf = f64::from_bits(((0x3ff + k) as u64) << 52); + i0 &= TBLSIZE as u32 - 1; + let mut uf = f32::from_bits(ui); + uf -= redux; + let z: f64 = (x - uf) as f64; + /* Compute r = exp2(y) = exp2ft[i0] * p(z). */ + let r: f64 = f64::from_bits(i!(EXP2FT, i0 as usize)); + let t: f64 = r as f64 * z; + let r: f64 = r + t * (p1 as f64 + z * p2 as f64) + t * (z * z) * (p3 as f64 + z * p4 as f64); + + /* Scale by 2**k */ + (r * ukf) as f32 +} diff --git a/vendor/compiler_builtins/libm/src/math/expf.rs b/vendor/compiler_builtins/libm/src/math/expf.rs new file mode 100644 index 000000000..a53aa90a6 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/expf.rs @@ -0,0 +1,101 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_expf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::scalbnf; + +const HALF: [f32; 2] = [0.5, -0.5]; +const LN2_HI: f32 = 6.9314575195e-01; /* 0x3f317200 */ +const LN2_LO: f32 = 1.4286067653e-06; /* 0x35bfbe8e */ +const INV_LN2: f32 = 1.4426950216e+00; /* 0x3fb8aa3b */ +/* + * Domain [-0.34568, 0.34568], range ~[-4.278e-9, 4.447e-9]: + * |x*(exp(x)+1)/(exp(x)-1) - p(x)| < 2**-27.74 + */ +const P1: f32 = 1.6666625440e-1; /* 0xaaaa8f.0p-26 */ +const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */ + +/// Exponential, base *e* (f32) +/// +/// Calculate the exponential of `x`, that is, *e* raised to the power `x` +/// (where *e* is the base of the natural system of logarithms, approximately 2.71828). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn expf(mut x: f32) -> f32 { + let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 + let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 /*original 0x1p-149f ??????????? */ + let mut hx = x.to_bits(); + let sign = (hx >> 31) as i32; /* sign bit of x */ + let signb: bool = sign != 0; + hx &= 0x7fffffff; /* high word of |x| */ + + /* special cases */ + if hx >= 0x42aeac50 { + /* if |x| >= -87.33655f or NaN */ + if hx > 0x7f800000 { + /* NaN */ + return x; + } + if (hx >= 0x42b17218) && (!signb) { + /* x >= 88.722839f */ + /* overflow */ + x *= x1p127; + return x; + } + if signb { + /* underflow */ + force_eval!(-x1p_126 / x); + if hx >= 0x42cff1b5 { + /* x <= -103.972084f */ + return 0.; + } + } + } + + /* argument reduction */ + let k: i32; + let hi: f32; + let lo: f32; + if hx > 0x3eb17218 { + /* if |x| > 0.5 ln2 */ + if hx > 0x3f851592 { + /* if |x| > 1.5 ln2 */ + k = (INV_LN2 * x + i!(HALF, sign as usize)) as i32; + } else { + k = 1 - sign - sign; + } + let kf = k as f32; + hi = x - kf * LN2_HI; /* k*ln2hi is exact here */ + lo = kf * LN2_LO; + x = hi - lo; + } else if hx > 0x39000000 { + /* |x| > 2**-14 */ + k = 0; + hi = x; + lo = 0.; + } else { + /* raise inexact */ + force_eval!(x1p127 + x); + return 1. + x; + } + + /* x is now in primary range */ + let xx = x * x; + let c = x - xx * (P1 + xx * P2); + let y = 1. + (x * c / (2. - c) - lo + hi); + if k == 0 { + y + } else { + scalbnf(y, k) + } +} diff --git a/vendor/compiler_builtins/libm/src/math/expm1.rs b/vendor/compiler_builtins/libm/src/math/expm1.rs new file mode 100644 index 000000000..42608509a --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/expm1.rs @@ -0,0 +1,144 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_expm1.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use core::f64; + +const O_THRESHOLD: f64 = 7.09782712893383973096e+02; /* 0x40862E42, 0xFEFA39EF */ +const LN2_HI: f64 = 6.93147180369123816490e-01; /* 0x3fe62e42, 0xfee00000 */ +const LN2_LO: f64 = 1.90821492927058770002e-10; /* 0x3dea39ef, 0x35793c76 */ +const INVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547, 0x652b82fe */ +/* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs = x*x/2: */ +const Q1: f64 = -3.33333333333331316428e-02; /* BFA11111 111110F4 */ +const Q2: f64 = 1.58730158725481460165e-03; /* 3F5A01A0 19FE5585 */ +const Q3: f64 = -7.93650757867487942473e-05; /* BF14CE19 9EAADBB7 */ +const Q4: f64 = 4.00821782732936239552e-06; /* 3ED0CFCA 86E65239 */ +const Q5: f64 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ + +/// Exponential, base *e*, of x-1 (f64) +/// +/// Calculates the exponential of `x` and subtract 1, that is, *e* raised +/// to the power `x` minus 1 (where *e* is the base of the natural +/// system of logarithms, approximately 2.71828). +/// The result is accurate even for small values of `x`, +/// where using `exp(x)-1` would lose many significant digits. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn expm1(mut x: f64) -> f64 { + let hi: f64; + let lo: f64; + let k: i32; + let c: f64; + let mut t: f64; + let mut y: f64; + + let mut ui = x.to_bits(); + let hx = ((ui >> 32) & 0x7fffffff) as u32; + let sign = (ui >> 63) as i32; + + /* filter out huge and non-finite argument */ + if hx >= 0x4043687A { + /* if |x|>=56*ln2 */ + if x.is_nan() { + return x; + } + if sign != 0 { + return -1.0; + } + if x > O_THRESHOLD { + x *= f64::from_bits(0x7fe0000000000000); + return x; + } + } + + /* argument reduction */ + if hx > 0x3fd62e42 { + /* if |x| > 0.5 ln2 */ + if hx < 0x3FF0A2B2 { + /* and |x| < 1.5 ln2 */ + if sign == 0 { + hi = x - LN2_HI; + lo = LN2_LO; + k = 1; + } else { + hi = x + LN2_HI; + lo = -LN2_LO; + k = -1; + } + } else { + k = (INVLN2 * x + if sign != 0 { -0.5 } else { 0.5 }) as i32; + t = k as f64; + hi = x - t * LN2_HI; /* t*ln2_hi is exact here */ + lo = t * LN2_LO; + } + x = hi - lo; + c = (hi - x) - lo; + } else if hx < 0x3c900000 { + /* |x| < 2**-54, return x */ + if hx < 0x00100000 { + force_eval!(x); + } + return x; + } else { + c = 0.0; + k = 0; + } + + /* x is now in primary range */ + let hfx = 0.5 * x; + let hxs = x * hfx; + let r1 = 1.0 + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5)))); + t = 3.0 - r1 * hfx; + let mut e = hxs * ((r1 - t) / (6.0 - x * t)); + if k == 0 { + /* c is 0 */ + return x - (x * e - hxs); + } + e = x * (e - c) - c; + e -= hxs; + /* exp(x) ~ 2^k (x_reduced - e + 1) */ + if k == -1 { + return 0.5 * (x - e) - 0.5; + } + if k == 1 { + if x < -0.25 { + return -2.0 * (e - (x + 0.5)); + } + return 1.0 + 2.0 * (x - e); + } + ui = ((0x3ff + k) as u64) << 52; /* 2^k */ + let twopk = f64::from_bits(ui); + if k < 0 || k > 56 { + /* suffice to return exp(x)-1 */ + y = x - e + 1.0; + if k == 1024 { + y = y * 2.0 * f64::from_bits(0x7fe0000000000000); + } else { + y = y * twopk; + } + return y - 1.0; + } + ui = ((0x3ff - k) as u64) << 52; /* 2^-k */ + let uf = f64::from_bits(ui); + if k < 20 { + y = (x - e + (1.0 - uf)) * twopk; + } else { + y = (x - (e + uf) + 1.0) * twopk; + } + y +} + +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::expm1(1.1), 2.0041660239464334); + } +} diff --git a/vendor/compiler_builtins/libm/src/math/expm1f.rs b/vendor/compiler_builtins/libm/src/math/expm1f.rs new file mode 100644 index 000000000..3fc2a247b --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/expm1f.rs @@ -0,0 +1,134 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_expm1f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +const O_THRESHOLD: f32 = 8.8721679688e+01; /* 0x42b17180 */ +const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */ +const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */ +const INV_LN2: f32 = 1.4426950216e+00; /* 0x3fb8aa3b */ +/* + * Domain [-0.34568, 0.34568], range ~[-6.694e-10, 6.696e-10]: + * |6 / x * (1 + 2 * (1 / (exp(x) - 1) - 1 / x)) - q(x)| < 2**-30.04 + * Scaled coefficients: Qn_here = 2**n * Qn_for_q (see s_expm1.c): + */ +const Q1: f32 = -3.3333212137e-2; /* -0x888868.0p-28 */ +const Q2: f32 = 1.5807170421e-3; /* 0xcf3010.0p-33 */ + +/// Exponential, base *e*, of x-1 (f32) +/// +/// Calculates the exponential of `x` and subtract 1, that is, *e* raised +/// to the power `x` minus 1 (where *e* is the base of the natural +/// system of logarithms, approximately 2.71828). +/// The result is accurate even for small values of `x`, +/// where using `exp(x)-1` would lose many significant digits. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn expm1f(mut x: f32) -> f32 { + let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 + + let mut hx = x.to_bits(); + let sign = (hx >> 31) != 0; + hx &= 0x7fffffff; + + /* filter out huge and non-finite argument */ + if hx >= 0x4195b844 { + /* if |x|>=27*ln2 */ + if hx > 0x7f800000 { + /* NaN */ + return x; + } + if sign { + return -1.; + } + if x > O_THRESHOLD { + x *= x1p127; + return x; + } + } + + let k: i32; + let hi: f32; + let lo: f32; + let mut c = 0f32; + /* argument reduction */ + if hx > 0x3eb17218 { + /* if |x| > 0.5 ln2 */ + if hx < 0x3F851592 { + /* and |x| < 1.5 ln2 */ + if !sign { + hi = x - LN2_HI; + lo = LN2_LO; + k = 1; + } else { + hi = x + LN2_HI; + lo = -LN2_LO; + k = -1; + } + } else { + k = (INV_LN2 * x + (if sign { -0.5 } else { 0.5 })) as i32; + let t = k as f32; + hi = x - t * LN2_HI; /* t*ln2_hi is exact here */ + lo = t * LN2_LO; + } + x = hi - lo; + c = (hi - x) - lo; + } else if hx < 0x33000000 { + /* when |x|<2**-25, return x */ + if hx < 0x00800000 { + force_eval!(x * x); + } + return x; + } else { + k = 0; + } + + /* x is now in primary range */ + let hfx = 0.5 * x; + let hxs = x * hfx; + let r1 = 1. + hxs * (Q1 + hxs * Q2); + let t = 3. - r1 * hfx; + let mut e = hxs * ((r1 - t) / (6. - x * t)); + if k == 0 { + /* c is 0 */ + return x - (x * e - hxs); + } + e = x * (e - c) - c; + e -= hxs; + /* exp(x) ~ 2^k (x_reduced - e + 1) */ + if k == -1 { + return 0.5 * (x - e) - 0.5; + } + if k == 1 { + if x < -0.25 { + return -2. * (e - (x + 0.5)); + } + return 1. + 2. * (x - e); + } + let twopk = f32::from_bits(((0x7f + k) << 23) as u32); /* 2^k */ + if (k < 0) || (k > 56) { + /* suffice to return exp(x)-1 */ + let mut y = x - e + 1.; + if k == 128 { + y = y * 2. * x1p127; + } else { + y = y * twopk; + } + return y - 1.; + } + let uf = f32::from_bits(((0x7f - k) << 23) as u32); /* 2^-k */ + if k < 23 { + (x - e + (1. - uf)) * twopk + } else { + (x - (e + uf) + 1.) * twopk + } +} diff --git a/vendor/compiler_builtins/libm/src/math/expo2.rs b/vendor/compiler_builtins/libm/src/math/expo2.rs new file mode 100644 index 000000000..82e9b360a --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/expo2.rs @@ -0,0 +1,14 @@ +use super::{combine_words, exp}; + +/* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn expo2(x: f64) -> f64 { + /* k is such that k*ln2 has minimal relative error and x - kln2 > log(DBL_MIN) */ + const K: i32 = 2043; + let kln2 = f64::from_bits(0x40962066151add8b); + + /* note that k is odd and scale*scale overflows */ + let scale = combine_words(((0x3ff + K / 2) as u32) << 20, 0); + /* exp(x - k ln2) * 2**(k-1) */ + exp(x - kln2) * scale * scale +} diff --git a/vendor/compiler_builtins/libm/src/math/fabs.rs b/vendor/compiler_builtins/libm/src/math/fabs.rs new file mode 100644 index 000000000..b2255ad32 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/fabs.rs @@ -0,0 +1,41 @@ +use core::u64; + +/// Absolute value (magnitude) (f64) +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabs(x: f64) -> f64 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f64.abs` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::fabsf64(x) } + } + } + f64::from_bits(x.to_bits() & (u64::MAX / 2)) +} + +#[cfg(test)] +mod tests { + use super::*; + use core::f64::*; + + #[test] + fn sanity_check() { + assert_eq!(fabs(-1.0), 1.0); + assert_eq!(fabs(2.8), 2.8); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs + #[test] + fn spec_tests() { + assert!(fabs(NAN).is_nan()); + for f in [0.0, -0.0].iter().copied() { + assert_eq!(fabs(f), 0.0); + } + for f in [INFINITY, NEG_INFINITY].iter().copied() { + assert_eq!(fabs(f), INFINITY); + } + } +} diff --git a/vendor/compiler_builtins/libm/src/math/fabsf.rs b/vendor/compiler_builtins/libm/src/math/fabsf.rs new file mode 100644 index 000000000..23f3646dc --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/fabsf.rs @@ -0,0 +1,41 @@ +/// Absolute value (magnitude) (f32) +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf(x: f32) -> f32 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f32.abs` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::fabsf32(x) } + } + } + f32::from_bits(x.to_bits() & 0x7fffffff) +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(test)] +mod tests { + use super::*; + use core::f32::*; + + #[test] + fn sanity_check() { + assert_eq!(fabsf(-1.0), 1.0); + assert_eq!(fabsf(2.8), 2.8); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs + #[test] + fn spec_tests() { + assert!(fabsf(NAN).is_nan()); + for f in [0.0, -0.0].iter().copied() { + assert_eq!(fabsf(f), 0.0); + } + for f in [INFINITY, NEG_INFINITY].iter().copied() { + assert_eq!(fabsf(f), INFINITY); + } + } +} diff --git a/vendor/compiler_builtins/libm/src/math/fdim.rs b/vendor/compiler_builtins/libm/src/math/fdim.rs new file mode 100644 index 000000000..014930097 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/fdim.rs @@ -0,0 +1,22 @@ +use core::f64; + +/// Positive difference (f64) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdim(x: f64, y: f64) -> f64 { + if x.is_nan() { + x + } else if y.is_nan() { + y + } else if x > y { + x - y + } else { + 0.0 + } +} diff --git a/vendor/compiler_builtins/libm/src/math/fdimf.rs b/vendor/compiler_builtins/libm/src/math/fdimf.rs new file mode 100644 index 000000000..ea0b592d7 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/fdimf.rs @@ -0,0 +1,22 @@ +use core::f32; + +/// Positive difference (f32) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf(x: f32, y: f32) -> f32 { + if x.is_nan() { + x + } else if y.is_nan() { + y + } else if x > y { + x - y + } else { + 0.0 + } +} diff --git a/vendor/compiler_builtins/libm/src/math/fenv.rs b/vendor/compiler_builtins/libm/src/math/fenv.rs new file mode 100644 index 000000000..652e60324 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/fenv.rs @@ -0,0 +1,33 @@ +// src: musl/src/fenv/fenv.c +/* Dummy functions for archs lacking fenv implementation */ + +pub(crate) const FE_UNDERFLOW: i32 = 0; +pub(crate) const FE_INEXACT: i32 = 0; + +pub(crate) const FE_TONEAREST: i32 = 0; +pub(crate) const FE_TOWARDZERO: i32 = 0; + +#[inline] +pub(crate) fn feclearexcept(_mask: i32) -> i32 { + 0 +} + +#[inline] +pub(crate) fn feraiseexcept(_mask: i32) -> i32 { + 0 +} + +#[inline] +pub(crate) fn fetestexcept(_mask: i32) -> i32 { + 0 +} + +#[inline] +pub(crate) fn fegetround() -> i32 { + FE_TONEAREST +} + +#[inline] +pub(crate) fn fesetround(_r: i32) -> i32 { + 0 +} diff --git a/vendor/compiler_builtins/libm/src/math/floor.rs b/vendor/compiler_builtins/libm/src/math/floor.rs new file mode 100644 index 000000000..d09f9a1a1 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/floor.rs @@ -0,0 +1,81 @@ +#![allow(unreachable_code)] +use core::f64; + +const TOINT: f64 = 1. / f64::EPSILON; + +/// Floor (f64) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floor(x: f64) -> f64 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f64.floor` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::floorf64(x) } + } + } + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + { + //use an alternative implementation on x86, because the + //main implementation fails with the x87 FPU used by + //debian i386, probablly due to excess precision issues. + //basic implementation taken from https://github.com/rust-lang/libm/issues/219 + use super::fabs; + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated > x { + return truncated - 1.0; + } else { + return truncated; + } + } else { + return x; + } + } + let ui = x.to_bits(); + let e = ((ui >> 52) & 0x7ff) as i32; + + if (e >= 0x3ff + 52) || (x == 0.) { + return x; + } + /* y = int(x) - x, where int(x) is an integer neighbor of x */ + let y = if (ui >> 63) != 0 { + x - TOINT + TOINT - x + } else { + x + TOINT - TOINT - x + }; + /* special case because of non-nearest rounding modes */ + if e < 0x3ff { + force_eval!(y); + return if (ui >> 63) != 0 { -1. } else { 0. }; + } + if y > 0. { + x + y - 1. + } else { + x + y + } +} + +#[cfg(test)] +mod tests { + use super::*; + use core::f64::*; + + #[test] + fn sanity_check() { + assert_eq!(floor(1.1), 1.0); + assert_eq!(floor(2.9), 2.0); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor + #[test] + fn spec_tests() { + // Not Asserted: that the current rounding mode has no effect. + assert!(floor(NAN).is_nan()); + for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { + assert_eq!(floor(f), f); + } + } +} diff --git a/vendor/compiler_builtins/libm/src/math/floorf.rs b/vendor/compiler_builtins/libm/src/math/floorf.rs new file mode 100644 index 000000000..dfdab91a0 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/floorf.rs @@ -0,0 +1,66 @@ +use core::f32; + +/// Floor (f32) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf(x: f32) -> f32 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f32.floor` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::floorf32(x) } + } + } + let mut ui = x.to_bits(); + let e = (((ui >> 23) as i32) & 0xff) - 0x7f; + + if e >= 23 { + return x; + } + if e >= 0 { + let m: u32 = 0x007fffff >> e; + if (ui & m) == 0 { + return x; + } + force_eval!(x + f32::from_bits(0x7b800000)); + if ui >> 31 != 0 { + ui += m; + } + ui &= !m; + } else { + force_eval!(x + f32::from_bits(0x7b800000)); + if ui >> 31 == 0 { + ui = 0; + } else if ui << 1 != 0 { + return -1.0; + } + } + f32::from_bits(ui) +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(test)] +mod tests { + use super::*; + use core::f32::*; + + #[test] + fn sanity_check() { + assert_eq!(floorf(0.5), 0.0); + assert_eq!(floorf(1.1), 1.0); + assert_eq!(floorf(2.9), 2.0); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor + #[test] + fn spec_tests() { + // Not Asserted: that the current rounding mode has no effect. + assert!(floorf(NAN).is_nan()); + for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { + assert_eq!(floorf(f), f); + } + } +} diff --git a/vendor/compiler_builtins/libm/src/math/fma.rs b/vendor/compiler_builtins/libm/src/math/fma.rs new file mode 100644 index 000000000..516f9ad3a --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/fma.rs @@ -0,0 +1,235 @@ +use core::{f32, f64}; + +use super::scalbn; + +const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1; + +struct Num { + m: u64, + e: i32, + sign: i32, +} + +fn normalize(x: f64) -> Num { + let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 + + let mut ix: u64 = x.to_bits(); + let mut e: i32 = (ix >> 52) as i32; + let sign: i32 = e & 0x800; + e &= 0x7ff; + if e == 0 { + ix = (x * x1p63).to_bits(); + e = (ix >> 52) as i32 & 0x7ff; + e = if e != 0 { e - 63 } else { 0x800 }; + } + ix &= (1 << 52) - 1; + ix |= 1 << 52; + ix <<= 1; + e -= 0x3ff + 52 + 1; + Num { m: ix, e, sign } +} + +fn mul(x: u64, y: u64) -> (u64, u64) { + let t1: u64; + let t2: u64; + let t3: u64; + let xlo: u64 = x as u32 as u64; + let xhi: u64 = x >> 32; + let ylo: u64 = y as u32 as u64; + let yhi: u64 = y >> 32; + + t1 = xlo * ylo; + t2 = xlo * yhi + xhi * ylo; + t3 = xhi * yhi; + let lo = t1.wrapping_add(t2 << 32); + let hi = t3 + (t2 >> 32) + (t1 > lo) as u64; + (hi, lo) +} + +/// Floating multiply add (f64) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation: +/// Computes the value (as if) to infinite precision and rounds once to the result format, +/// according to the rounding mode characterized by the value of FLT_ROUNDS. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fma(x: f64, y: f64, z: f64) -> f64 { + let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 + let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63 + + /* normalize so top 10bits and last bit are 0 */ + let nx = normalize(x); + let ny = normalize(y); + let nz = normalize(z); + + if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN { + return x * y + z; + } + if nz.e >= ZEROINFNAN { + if nz.e > ZEROINFNAN { + /* z==0 */ + return x * y + z; + } + return z; + } + + /* mul: r = x*y */ + let zhi: u64; + let zlo: u64; + let (mut rhi, mut rlo) = mul(nx.m, ny.m); + /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */ + + /* align exponents */ + let mut e: i32 = nx.e + ny.e; + let mut d: i32 = nz.e - e; + /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */ + if d > 0 { + if d < 64 { + zlo = nz.m << d; + zhi = nz.m >> (64 - d); + } else { + zlo = 0; + zhi = nz.m; + e = nz.e - 64; + d -= 64; + if d == 0 { + } else if d < 64 { + rlo = rhi << (64 - d) | rlo >> d | ((rlo << (64 - d)) != 0) as u64; + rhi = rhi >> d; + } else { + rlo = 1; + rhi = 0; + } + } + } else { + zhi = 0; + d = -d; + if d == 0 { + zlo = nz.m; + } else if d < 64 { + zlo = nz.m >> d | ((nz.m << (64 - d)) != 0) as u64; + } else { + zlo = 1; + } + } + + /* add */ + let mut sign: i32 = nx.sign ^ ny.sign; + let samesign: bool = (sign ^ nz.sign) == 0; + let mut nonzero: i32 = 1; + if samesign { + /* r += z */ + rlo = rlo.wrapping_add(zlo); + rhi += zhi + (rlo < zlo) as u64; + } else { + /* r -= z */ + let (res, borrow) = rlo.overflowing_sub(zlo); + rlo = res; + rhi = rhi.wrapping_sub(zhi.wrapping_add(borrow as u64)); + if (rhi >> 63) != 0 { + rlo = (-(rlo as i64)) as u64; + rhi = (-(rhi as i64)) as u64 - (rlo != 0) as u64; + sign = (sign == 0) as i32; + } + nonzero = (rhi != 0) as i32; + } + + /* set rhi to top 63bit of the result (last bit is sticky) */ + if nonzero != 0 { + e += 64; + d = rhi.leading_zeros() as i32 - 1; + /* note: d > 0 */ + rhi = rhi << d | rlo >> (64 - d) | ((rlo << d) != 0) as u64; + } else if rlo != 0 { + d = rlo.leading_zeros() as i32 - 1; + if d < 0 { + rhi = rlo >> 1 | (rlo & 1); + } else { + rhi = rlo << d; + } + } else { + /* exact +-0 */ + return x * y + z; + } + e -= d; + + /* convert to double */ + let mut i: i64 = rhi as i64; /* i is in [1<<62,(1<<63)-1] */ + if sign != 0 { + i = -i; + } + let mut r: f64 = i as f64; /* |r| is in [0x1p62,0x1p63] */ + + if e < -1022 - 62 { + /* result is subnormal before rounding */ + if e == -1022 - 63 { + let mut c: f64 = x1p63; + if sign != 0 { + c = -c; + } + if r == c { + /* min normal after rounding, underflow depends + on arch behaviour which can be imitated by + a double to float conversion */ + let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32; + return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64; + } + /* one bit is lost when scaled, add another top bit to + only round once at conversion if it is inexact */ + if (rhi << 53) != 0 { + i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64; + if sign != 0 { + i = -i; + } + r = i as f64; + r = 2. * r - c; /* remove top bit */ + + /* raise underflow portably, such that it + cannot be optimized away */ + { + let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r; + r += (tiny * tiny) * (r - r); + } + } + } else { + /* only round once when scaled */ + d = 10; + i = ((rhi >> d | ((rhi << (64 - d)) != 0) as u64) << d) as i64; + if sign != 0 { + i = -i; + } + r = i as f64; + } + } + scalbn(r, e) +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn fma_segfault() { + // These two inputs cause fma to segfault on release due to overflow: + assert_eq!( + fma( + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313 + ), + -0.00000000000000022204460492503126, + ); + + let result = fma(-0.992, -0.992, -0.992); + //force rounding to storage format on x87 to prevent superious errors. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let result = force_eval!(result); + assert_eq!(result, -0.007936000000000007,); + } + + #[test] + fn fma_sbb() { + assert_eq!( + fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), + -3991680619069439e277 + ); + } +} diff --git a/vendor/compiler_builtins/libm/src/math/fmaf.rs b/vendor/compiler_builtins/libm/src/math/fmaf.rs new file mode 100644 index 000000000..03d371c55 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/fmaf.rs @@ -0,0 +1,106 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */ +/*- + * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +use core::f32; +use core::ptr::read_volatile; + +use super::fenv::{ + feclearexcept, fegetround, feraiseexcept, fesetround, fetestexcept, FE_INEXACT, FE_TONEAREST, + FE_TOWARDZERO, FE_UNDERFLOW, +}; + +/* + * Fused multiply-add: Compute x * y + z with a single rounding error. + * + * A double has more than twice as much precision than a float, so + * direct double-precision arithmetic suffices, except where double + * rounding occurs. + */ + +/// Floating multiply add (f32) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation: +/// Computes the value (as if) to infinite precision and rounds once to the result format, +/// according to the rounding mode characterized by the value of FLT_ROUNDS. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 { + let xy: f64; + let mut result: f64; + let mut ui: u64; + let e: i32; + + xy = x as f64 * y as f64; + result = xy + z as f64; + ui = result.to_bits(); + e = (ui >> 52) as i32 & 0x7ff; + /* Common case: The double precision result is fine. */ + if ( + /* not a halfway case */ + ui & 0x1fffffff) != 0x10000000 || + /* NaN */ + e == 0x7ff || + /* exact */ + (result - xy == z as f64 && result - z as f64 == xy) || + /* not round-to-nearest */ + fegetround() != FE_TONEAREST + { + /* + underflow may not be raised correctly, example: + fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) + */ + if e < 0x3ff - 126 && e >= 0x3ff - 149 && fetestexcept(FE_INEXACT) != 0 { + feclearexcept(FE_INEXACT); + // prevent `xy + vz` from being CSE'd with `xy + z` above + let vz: f32 = unsafe { read_volatile(&z) }; + result = xy + vz as f64; + if fetestexcept(FE_INEXACT) != 0 { + feraiseexcept(FE_UNDERFLOW); + } else { + feraiseexcept(FE_INEXACT); + } + } + z = result as f32; + return z; + } + + /* + * If result is inexact, and exactly halfway between two float values, + * we need to adjust the low-order bit in the direction of the error. + */ + fesetround(FE_TOWARDZERO); + // prevent `vxy + z` from being CSE'd with `xy + z` above + let vxy: f64 = unsafe { read_volatile(&xy) }; + let mut adjusted_result: f64 = vxy + z as f64; + fesetround(FE_TONEAREST); + if result == adjusted_result { + ui = adjusted_result.to_bits(); + ui += 1; + adjusted_result = f64::from_bits(ui); + } + z = adjusted_result as f32; + z +} diff --git a/vendor/compiler_builtins/libm/src/math/fmax.rs b/vendor/compiler_builtins/libm/src/math/fmax.rs new file mode 100644 index 000000000..93c97bc61 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/fmax.rs @@ -0,0 +1,12 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmax(x: f64, y: f64) -> f64 { + // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the + // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it + // is either x or y, canonicalized (this means results might differ among implementations). + // When either x or y is a signalingNaN, then the result is according to 6.2. + // + // Since we do not support sNaN in Rust yet, we do not need to handle them. + // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by + // multiplying by 1.0. Should switch to the `canonicalize` when it works. + (if x.is_nan() || x < y { y } else { x }) * 1.0 +} diff --git a/vendor/compiler_builtins/libm/src/math/fmaxf.rs b/vendor/compiler_builtins/libm/src/math/fmaxf.rs new file mode 100644 index 000000000..607746647 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/fmaxf.rs @@ -0,0 +1,12 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaxf(x: f32, y: f32) -> f32 { + // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the + // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it + // is either x or y, canonicalized (this means results might differ among implementations). + // When either x or y is a signalingNaN, then the result is according to 6.2. + // + // Since we do not support sNaN in Rust yet, we do not need to handle them. + // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by + // multiplying by 1.0. Should switch to the `canonicalize` when it works. + (if x.is_nan() || x < y { y } else { x }) * 1.0 +} diff --git a/vendor/compiler_builtins/libm/src/math/fmin.rs b/vendor/compiler_builtins/libm/src/math/fmin.rs new file mode 100644 index 000000000..ab1509f34 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/fmin.rs @@ -0,0 +1,12 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmin(x: f64, y: f64) -> f64 { + // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the + // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it + // is either x or y, canonicalized (this means results might differ among implementations). + // When either x or y is a signalingNaN, then the result is according to 6.2. + // + // Since we do not support sNaN in Rust yet, we do not need to handle them. + // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by + // multiplying by 1.0. Should switch to the `canonicalize` when it works. + (if y.is_nan() || x < y { x } else { y }) * 1.0 +} diff --git a/vendor/compiler_builtins/libm/src/math/fminf.rs b/vendor/compiler_builtins/libm/src/math/fminf.rs new file mode 100644 index 000000000..0049e7117 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/fminf.rs @@ -0,0 +1,12 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminf(x: f32, y: f32) -> f32 { + // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the + // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it + // is either x or y, canonicalized (this means results might differ among implementations). + // When either x or y is a signalingNaN, then the result is according to 6.2. + // + // Since we do not support sNaN in Rust yet, we do not need to handle them. + // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by + // multiplying by 1.0. Should switch to the `canonicalize` when it works. + (if y.is_nan() || x < y { x } else { y }) * 1.0 +} diff --git a/vendor/compiler_builtins/libm/src/math/fmod.rs b/vendor/compiler_builtins/libm/src/math/fmod.rs new file mode 100644 index 000000000..d892ffd8b --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/fmod.rs @@ -0,0 +1,80 @@ +use core::u64; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmod(x: f64, y: f64) -> f64 { + let mut uxi = x.to_bits(); + let mut uyi = y.to_bits(); + let mut ex = (uxi >> 52 & 0x7ff) as i64; + let mut ey = (uyi >> 52 & 0x7ff) as i64; + let sx = uxi >> 63; + let mut i; + + if uyi << 1 == 0 || y.is_nan() || ex == 0x7ff { + return (x * y) / (x * y); + } + if uxi << 1 <= uyi << 1 { + if uxi << 1 == uyi << 1 { + return 0.0 * x; + } + return x; + } + + /* normalize x and y */ + if ex == 0 { + i = uxi << 12; + while i >> 63 == 0 { + ex -= 1; + i <<= 1; + } + uxi <<= -ex + 1; + } else { + uxi &= u64::MAX >> 12; + uxi |= 1 << 52; + } + if ey == 0 { + i = uyi << 12; + while i >> 63 == 0 { + ey -= 1; + i <<= 1; + } + uyi <<= -ey + 1; + } else { + uyi &= u64::MAX >> 12; + uyi |= 1 << 52; + } + + /* x mod y */ + while ex > ey { + i = uxi.wrapping_sub(uyi); + if i >> 63 == 0 { + if i == 0 { + return 0.0 * x; + } + uxi = i; + } + uxi <<= 1; + ex -= 1; + } + i = uxi.wrapping_sub(uyi); + if i >> 63 == 0 { + if i == 0 { + return 0.0 * x; + } + uxi = i; + } + while uxi >> 52 == 0 { + uxi <<= 1; + ex -= 1; + } + + /* scale result */ + if ex > 0 { + uxi -= 1 << 52; + uxi |= (ex as u64) << 52; + } else { + uxi >>= -ex + 1; + } + uxi |= (sx as u64) << 63; + + f64::from_bits(uxi) +} diff --git a/vendor/compiler_builtins/libm/src/math/fmodf.rs b/vendor/compiler_builtins/libm/src/math/fmodf.rs new file mode 100644 index 000000000..c53dc186a --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/fmodf.rs @@ -0,0 +1,89 @@ +use core::f32; +use core::u32; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf(x: f32, y: f32) -> f32 { + let mut uxi = x.to_bits(); + let mut uyi = y.to_bits(); + let mut ex = (uxi >> 23 & 0xff) as i32; + let mut ey = (uyi >> 23 & 0xff) as i32; + let sx = uxi & 0x80000000; + let mut i; + + if uyi << 1 == 0 || y.is_nan() || ex == 0xff { + return (x * y) / (x * y); + } + + if uxi << 1 <= uyi << 1 { + if uxi << 1 == uyi << 1 { + return 0.0 * x; + } + + return x; + } + + /* normalize x and y */ + if ex == 0 { + i = uxi << 9; + while i >> 31 == 0 { + ex -= 1; + i <<= 1; + } + + uxi <<= -ex + 1; + } else { + uxi &= u32::MAX >> 9; + uxi |= 1 << 23; + } + + if ey == 0 { + i = uyi << 9; + while i >> 31 == 0 { + ey -= 1; + i <<= 1; + } + + uyi <<= -ey + 1; + } else { + uyi &= u32::MAX >> 9; + uyi |= 1 << 23; + } + + /* x mod y */ + while ex > ey { + i = uxi.wrapping_sub(uyi); + if i >> 31 == 0 { + if i == 0 { + return 0.0 * x; + } + uxi = i; + } + uxi <<= 1; + + ex -= 1; + } + + i = uxi.wrapping_sub(uyi); + if i >> 31 == 0 { + if i == 0 { + return 0.0 * x; + } + uxi = i; + } + + while uxi >> 23 == 0 { + uxi <<= 1; + ex -= 1; + } + + /* scale result up */ + if ex > 0 { + uxi -= 1 << 23; + uxi |= (ex as u32) << 23; + } else { + uxi >>= -ex + 1; + } + uxi |= sx; + + f32::from_bits(uxi) +} diff --git a/vendor/compiler_builtins/libm/src/math/frexp.rs b/vendor/compiler_builtins/libm/src/math/frexp.rs new file mode 100644 index 000000000..badad786a --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/frexp.rs @@ -0,0 +1,20 @@ +pub fn frexp(x: f64) -> (f64, i32) { + let mut y = x.to_bits(); + let ee = ((y >> 52) & 0x7ff) as i32; + + if ee == 0 { + if x != 0.0 { + let x1p64 = f64::from_bits(0x43f0000000000000); + let (x, e) = frexp(x * x1p64); + return (x, e - 64); + } + return (x, 0); + } else if ee == 0x7ff { + return (x, 0); + } + + let e = ee - 0x3fe; + y &= 0x800fffffffffffff; + y |= 0x3fe0000000000000; + return (f64::from_bits(y), e); +} diff --git a/vendor/compiler_builtins/libm/src/math/frexpf.rs b/vendor/compiler_builtins/libm/src/math/frexpf.rs new file mode 100644 index 000000000..2919c0ab0 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/frexpf.rs @@ -0,0 +1,21 @@ +pub fn frexpf(x: f32) -> (f32, i32) { + let mut y = x.to_bits(); + let ee: i32 = ((y >> 23) & 0xff) as i32; + + if ee == 0 { + if x != 0.0 { + let x1p64 = f32::from_bits(0x5f800000); + let (x, e) = frexpf(x * x1p64); + return (x, e - 64); + } else { + return (x, 0); + } + } else if ee == 0xff { + return (x, 0); + } + + let e = ee - 0x7e; + y &= 0x807fffff; + y |= 0x3f000000; + (f32::from_bits(y), e) +} diff --git a/vendor/compiler_builtins/libm/src/math/hypot.rs b/vendor/compiler_builtins/libm/src/math/hypot.rs new file mode 100644 index 000000000..da458ea1d --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/hypot.rs @@ -0,0 +1,74 @@ +use core::f64; + +use super::sqrt; + +const SPLIT: f64 = 134217728. + 1.; // 0x1p27 + 1 === (2 ^ 27) + 1 + +fn sq(x: f64) -> (f64, f64) { + let xh: f64; + let xl: f64; + let xc: f64; + + xc = x * SPLIT; + xh = x - xc + xc; + xl = x - xh; + let hi = x * x; + let lo = xh * xh - hi + 2. * xh * xl + xl * xl; + (hi, lo) +} + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn hypot(mut x: f64, mut y: f64) -> f64 { + let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700 + let x1p_700 = f64::from_bits(0x1430000000000000); // 0x1p-700 === 2 ^ -700 + + let mut uxi = x.to_bits(); + let mut uyi = y.to_bits(); + let uti; + let ex: i64; + let ey: i64; + let mut z: f64; + + /* arrange |x| >= |y| */ + uxi &= -1i64 as u64 >> 1; + uyi &= -1i64 as u64 >> 1; + if uxi < uyi { + uti = uxi; + uxi = uyi; + uyi = uti; + } + + /* special cases */ + ex = (uxi >> 52) as i64; + ey = (uyi >> 52) as i64; + x = f64::from_bits(uxi); + y = f64::from_bits(uyi); + /* note: hypot(inf,nan) == inf */ + if ey == 0x7ff { + return y; + } + if ex == 0x7ff || uyi == 0 { + return x; + } + /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */ + /* 64 difference is enough for ld80 double_t */ + if ex - ey > 64 { + return x + y; + } + + /* precise sqrt argument in nearest rounding mode without overflow */ + /* xh*xh must not overflow and xl*xl must not underflow in sq */ + z = 1.; + if ex > 0x3ff + 510 { + z = x1p700; + x *= x1p_700; + y *= x1p_700; + } else if ey < 0x3ff - 450 { + z = x1p_700; + x *= x1p700; + y *= x1p700; + } + let (hx, lx) = sq(x); + let (hy, ly) = sq(y); + z * sqrt(ly + lx + hy + hx) +} diff --git a/vendor/compiler_builtins/libm/src/math/hypotf.rs b/vendor/compiler_builtins/libm/src/math/hypotf.rs new file mode 100644 index 000000000..576eebb33 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/hypotf.rs @@ -0,0 +1,43 @@ +use core::f32; + +use super::sqrtf; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn hypotf(mut x: f32, mut y: f32) -> f32 { + let x1p90 = f32::from_bits(0x6c800000); // 0x1p90f === 2 ^ 90 + let x1p_90 = f32::from_bits(0x12800000); // 0x1p-90f === 2 ^ -90 + + let mut uxi = x.to_bits(); + let mut uyi = y.to_bits(); + let uti; + let mut z: f32; + + uxi &= -1i32 as u32 >> 1; + uyi &= -1i32 as u32 >> 1; + if uxi < uyi { + uti = uxi; + uxi = uyi; + uyi = uti; + } + + x = f32::from_bits(uxi); + y = f32::from_bits(uyi); + if uyi == 0xff << 23 { + return y; + } + if uxi >= 0xff << 23 || uyi == 0 || uxi - uyi >= 25 << 23 { + return x + y; + } + + z = 1.; + if uxi >= (0x7f + 60) << 23 { + z = x1p90; + x *= x1p_90; + y *= x1p_90; + } else if uyi < (0x7f - 60) << 23 { + z = x1p_90; + x *= x1p90; + y *= x1p90; + } + z * sqrtf((x as f64 * x as f64 + y as f64 * y as f64) as f32) +} diff --git a/vendor/compiler_builtins/libm/src/math/ilogb.rs b/vendor/compiler_builtins/libm/src/math/ilogb.rs new file mode 100644 index 000000000..0a380b7ef --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/ilogb.rs @@ -0,0 +1,31 @@ +const FP_ILOGBNAN: i32 = -1 - 0x7fffffff; +const FP_ILOGB0: i32 = FP_ILOGBNAN; + +pub fn ilogb(x: f64) -> i32 { + let mut i: u64 = x.to_bits(); + let e = ((i >> 52) & 0x7ff) as i32; + + if e == 0 { + i <<= 12; + if i == 0 { + force_eval!(0.0 / 0.0); + return FP_ILOGB0; + } + /* subnormal x */ + let mut e = -0x3ff; + while (i >> 63) == 0 { + e -= 1; + i <<= 1; + } + e + } else if e == 0x7ff { + force_eval!(0.0 / 0.0); + if (i << 12) != 0 { + FP_ILOGBNAN + } else { + i32::max_value() + } + } else { + e - 0x3ff + } +} diff --git a/vendor/compiler_builtins/libm/src/math/ilogbf.rs b/vendor/compiler_builtins/libm/src/math/ilogbf.rs new file mode 100644 index 000000000..b384fa4b2 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/ilogbf.rs @@ -0,0 +1,31 @@ +const FP_ILOGBNAN: i32 = -1 - 0x7fffffff; +const FP_ILOGB0: i32 = FP_ILOGBNAN; + +pub fn ilogbf(x: f32) -> i32 { + let mut i = x.to_bits(); + let e = ((i >> 23) & 0xff) as i32; + + if e == 0 { + i <<= 9; + if i == 0 { + force_eval!(0.0 / 0.0); + return FP_ILOGB0; + } + /* subnormal x */ + let mut e = -0x7f; + while (i >> 31) == 0 { + e -= 1; + i <<= 1; + } + e + } else if e == 0xff { + force_eval!(0.0 / 0.0); + if (i << 9) != 0 { + FP_ILOGBNAN + } else { + i32::max_value() + } + } else { + e - 0x7f + } +} diff --git a/vendor/compiler_builtins/libm/src/math/j0.rs b/vendor/compiler_builtins/libm/src/math/j0.rs new file mode 100644 index 000000000..c4258ccca --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/j0.rs @@ -0,0 +1,422 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j0.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* j0(x), y0(x) + * Bessel function of the first and second kinds of order zero. + * Method -- j0(x): + * 1. For tiny x, we use j0(x) = 1 - x^2/4 + x^4/64 - ... + * 2. Reduce x to |x| since j0(x)=j0(-x), and + * for x in (0,2) + * j0(x) = 1-z/4+ z^2*R0/S0, where z = x*x; + * (precision: |j0-1+z/4-z^2R0/S0 |<2**-63.67 ) + * for x in (2,inf) + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) + * as follow: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * (To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one.) + * + * 3 Special cases + * j0(nan)= nan + * j0(0) = 1 + * j0(inf) = 0 + * + * Method -- y0(x): + * 1. For x<2. + * Since + * y0(x) = 2/pi*(j0(x)*(ln(x/2)+Euler) + x^2/4 - ...) + * therefore y0(x)-2/pi*j0(x)*ln(x) is an even function. + * We use the following function to approximate y0, + * y0(x) = U(z)/V(z) + (2/pi)*(j0(x)*ln(x)), z= x^2 + * where + * U(z) = u00 + u01*z + ... + u06*z^6 + * V(z) = 1 + v01*z + ... + v04*z^4 + * with absolute approximation error bounded by 2**-72. + * Note: For tiny x, U/V = u0 and j0(x)~1, hence + * y0(tiny) = u0 + (2/pi)*ln(tiny), (choose tiny<2**-27) + * 2. For x>=2. + * y0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)+q0(x)*sin(x0)) + * where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) + * by the method mentioned above. + * 3. Special cases: y0(0)=-inf, y0(x<0)=NaN, y0(inf)=0. + */ + +use super::{cos, fabs, get_high_word, get_low_word, log, sin, sqrt}; +const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ +const TPI: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ + +/* common method when |x|>=2 */ +fn common(ix: u32, x: f64, y0: bool) -> f64 { + let s: f64; + let mut c: f64; + let mut ss: f64; + let mut cc: f64; + let z: f64; + + /* + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x-pi/4)-q0(x)*sin(x-pi/4)) + * y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x-pi/4)+q0(x)*cos(x-pi/4)) + * + * sin(x-pi/4) = (sin(x) - cos(x))/sqrt(2) + * cos(x-pi/4) = (sin(x) + cos(x))/sqrt(2) + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + */ + s = sin(x); + c = cos(x); + if y0 { + c = -c; + } + cc = s + c; + /* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */ + if ix < 0x7fe00000 { + ss = s - c; + z = -cos(2.0 * x); + if s * c < 0.0 { + cc = z / ss; + } else { + ss = z / cc; + } + if ix < 0x48000000 { + if y0 { + ss = -ss; + } + cc = pzero(x) * cc - qzero(x) * ss; + } + } + return INVSQRTPI * cc / sqrt(x); +} + +/* R0/S0 on [0, 2.00] */ +const R02: f64 = 1.56249999999999947958e-02; /* 0x3F8FFFFF, 0xFFFFFFFD */ +const R03: f64 = -1.89979294238854721751e-04; /* 0xBF28E6A5, 0xB61AC6E9 */ +const R04: f64 = 1.82954049532700665670e-06; /* 0x3EBEB1D1, 0x0C503919 */ +const R05: f64 = -4.61832688532103189199e-09; /* 0xBE33D5E7, 0x73D63FCE */ +const S01: f64 = 1.56191029464890010492e-02; /* 0x3F8FFCE8, 0x82C8C2A4 */ +const S02: f64 = 1.16926784663337450260e-04; /* 0x3F1EA6D2, 0xDD57DBF4 */ +const S03: f64 = 5.13546550207318111446e-07; /* 0x3EA13B54, 0xCE84D5A9 */ +const S04: f64 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */ + +pub fn j0(mut x: f64) -> f64 { + let z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + + /* j0(+-inf)=0, j0(nan)=nan */ + if ix >= 0x7ff00000 { + return 1.0 / (x * x); + } + x = fabs(x); + + if ix >= 0x40000000 { + /* |x| >= 2 */ + /* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */ + return common(ix, x, false); + } + + /* 1 - x*x/4 + x*x*R(x^2)/S(x^2) */ + if ix >= 0x3f200000 { + /* |x| >= 2**-13 */ + /* up to 4ulp error close to 2 */ + z = x * x; + r = z * (R02 + z * (R03 + z * (R04 + z * R05))); + s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * S04))); + return (1.0 + x / 2.0) * (1.0 - x / 2.0) + z * (r / s); + } + + /* 1 - x*x/4 */ + /* prevent underflow */ + /* inexact should be raised when x!=0, this is not done correctly */ + if ix >= 0x38000000 { + /* |x| >= 2**-127 */ + x = 0.25 * x * x; + } + return 1.0 - x; +} + +const U00: f64 = -7.38042951086872317523e-02; /* 0xBFB2E4D6, 0x99CBD01F */ +const U01: f64 = 1.76666452509181115538e-01; /* 0x3FC69D01, 0x9DE9E3FC */ +const U02: f64 = -1.38185671945596898896e-02; /* 0xBF8C4CE8, 0xB16CFA97 */ +const U03: f64 = 3.47453432093683650238e-04; /* 0x3F36C54D, 0x20B29B6B */ +const U04: f64 = -3.81407053724364161125e-06; /* 0xBECFFEA7, 0x73D25CAD */ +const U05: f64 = 1.95590137035022920206e-08; /* 0x3E550057, 0x3B4EABD4 */ +const U06: f64 = -3.98205194132103398453e-11; /* 0xBDC5E43D, 0x693FB3C8 */ +const V01: f64 = 1.27304834834123699328e-02; /* 0x3F8A1270, 0x91C9C71A */ +const V02: f64 = 7.60068627350353253702e-05; /* 0x3F13ECBB, 0xF578C6C1 */ +const V03: f64 = 2.59150851840457805467e-07; /* 0x3E91642D, 0x7FF202FD */ +const V04: f64 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */ + +pub fn y0(x: f64) -> f64 { + let z: f64; + let u: f64; + let v: f64; + let ix: u32; + let lx: u32; + + ix = get_high_word(x); + lx = get_low_word(x); + + /* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */ + if ((ix << 1) | lx) == 0 { + return -1.0 / 0.0; + } + if (ix >> 31) != 0 { + return 0.0 / 0.0; + } + if ix >= 0x7ff00000 { + return 1.0 / x; + } + + if ix >= 0x40000000 { + /* x >= 2 */ + /* large ulp errors near zeros: 3.958, 7.086,.. */ + return common(ix, x, true); + } + + /* U(x^2)/V(x^2) + (2/pi)*j0(x)*log(x) */ + if ix >= 0x3e400000 { + /* x >= 2**-27 */ + /* large ulp error near the first zero, x ~= 0.89 */ + z = x * x; + u = U00 + z * (U01 + z * (U02 + z * (U03 + z * (U04 + z * (U05 + z * U06))))); + v = 1.0 + z * (V01 + z * (V02 + z * (V03 + z * V04))); + return u / v + TPI * (j0(x) * log(x)); + } + return U00 + TPI * log(x); +} + +/* The asymptotic expansions of pzero is + * 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x. + * For x >= 2, We approximate pzero by + * pzero(x) = 1 + (R/S) + * where R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10 + * S = 1 + pS0*s^2 + ... + pS4*s^10 + * and + * | pzero(x)-1-R/S | <= 2 ** ( -60.26) + */ +const PR8: [f64; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + -7.03124999999900357484e-02, /* 0xBFB1FFFF, 0xFFFFFD32 */ + -8.08167041275349795626e+00, /* 0xC02029D0, 0xB44FA779 */ + -2.57063105679704847262e+02, /* 0xC0701102, 0x7B19E863 */ + -2.48521641009428822144e+03, /* 0xC0A36A6E, 0xCD4DCAFC */ + -5.25304380490729545272e+03, /* 0xC0B4850B, 0x36CC643D */ +]; +const PS8: [f64; 5] = [ + 1.16534364619668181717e+02, /* 0x405D2233, 0x07A96751 */ + 3.83374475364121826715e+03, /* 0x40ADF37D, 0x50596938 */ + 4.05978572648472545552e+04, /* 0x40E3D2BB, 0x6EB6B05F */ + 1.16752972564375915681e+05, /* 0x40FC810F, 0x8F9FA9BD */ + 4.76277284146730962675e+04, /* 0x40E74177, 0x4F2C49DC */ +]; + +const PR5: [f64; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -1.14125464691894502584e-11, /* 0xBDA918B1, 0x47E495CC */ + -7.03124940873599280078e-02, /* 0xBFB1FFFF, 0xE69AFBC6 */ + -4.15961064470587782438e+00, /* 0xC010A370, 0xF90C6BBF */ + -6.76747652265167261021e+01, /* 0xC050EB2F, 0x5A7D1783 */ + -3.31231299649172967747e+02, /* 0xC074B3B3, 0x6742CC63 */ + -3.46433388365604912451e+02, /* 0xC075A6EF, 0x28A38BD7 */ +]; +const PS5: [f64; 5] = [ + 6.07539382692300335975e+01, /* 0x404E6081, 0x0C98C5DE */ + 1.05125230595704579173e+03, /* 0x40906D02, 0x5C7E2864 */ + 5.97897094333855784498e+03, /* 0x40B75AF8, 0x8FBE1D60 */ + 9.62544514357774460223e+03, /* 0x40C2CCB8, 0xFA76FA38 */ + 2.40605815922939109441e+03, /* 0x40A2CC1D, 0xC70BE864 */ +]; + +const PR3: [f64; 6] = [ + /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + -2.54704601771951915620e-09, /* 0xBE25E103, 0x6FE1AA86 */ + -7.03119616381481654654e-02, /* 0xBFB1FFF6, 0xF7C0E24B */ + -2.40903221549529611423e+00, /* 0xC00345B2, 0xAEA48074 */ + -2.19659774734883086467e+01, /* 0xC035F74A, 0x4CB94E14 */ + -5.80791704701737572236e+01, /* 0xC04D0A22, 0x420A1A45 */ + -3.14479470594888503854e+01, /* 0xC03F72AC, 0xA892D80F */ +]; +const PS3: [f64; 5] = [ + 3.58560338055209726349e+01, /* 0x4041ED92, 0x84077DD3 */ + 3.61513983050303863820e+02, /* 0x40769839, 0x464A7C0E */ + 1.19360783792111533330e+03, /* 0x4092A66E, 0x6D1061D6 */ + 1.12799679856907414432e+03, /* 0x40919FFC, 0xB8C39B7E */ + 1.73580930813335754692e+02, /* 0x4065B296, 0xFC379081 */ +]; + +const PR2: [f64; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + -8.87534333032526411254e-08, /* 0xBE77D316, 0xE927026D */ + -7.03030995483624743247e-02, /* 0xBFB1FF62, 0x495E1E42 */ + -1.45073846780952986357e+00, /* 0xBFF73639, 0x8A24A843 */ + -7.63569613823527770791e+00, /* 0xC01E8AF3, 0xEDAFA7F3 */ + -1.11931668860356747786e+01, /* 0xC02662E6, 0xC5246303 */ + -3.23364579351335335033e+00, /* 0xC009DE81, 0xAF8FE70F */ +]; +const PS2: [f64; 5] = [ + 2.22202997532088808441e+01, /* 0x40363865, 0x908B5959 */ + 1.36206794218215208048e+02, /* 0x4061069E, 0x0EE8878F */ + 2.70470278658083486789e+02, /* 0x4070E786, 0x42EA079B */ + 1.53875394208320329881e+02, /* 0x40633C03, 0x3AB6FAFF */ + 1.46576176948256193810e+01, /* 0x402D50B3, 0x44391809 */ +]; + +fn pzero(x: f64) -> f64 { + let p: &[f64; 6]; + let q: &[f64; 5]; + let z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 { + p = &PR8; + q = &PS8; + } else if ix >= 0x40122E8B { + p = &PR5; + q = &PS5; + } else if ix >= 0x4006DB6D { + p = &PR3; + q = &PS3; + } else + /*ix >= 0x40000000*/ + { + p = &PR2; + q = &PS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); + return 1.0 + r / s; +} + +/* For x >= 8, the asymptotic expansions of qzero is + * -1/8 s + 75/1024 s^3 - ..., where s = 1/x. + * We approximate pzero by + * qzero(x) = s*(-1.25 + (R/S)) + * where R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10 + * S = 1 + qS0*s^2 + ... + qS5*s^12 + * and + * | qzero(x)/s +1.25-R/S | <= 2 ** ( -61.22) + */ +const QR8: [f64; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + 7.32421874999935051953e-02, /* 0x3FB2BFFF, 0xFFFFFE2C */ + 1.17682064682252693899e+01, /* 0x40278952, 0x5BB334D6 */ + 5.57673380256401856059e+02, /* 0x40816D63, 0x15301825 */ + 8.85919720756468632317e+03, /* 0x40C14D99, 0x3E18F46D */ + 3.70146267776887834771e+04, /* 0x40E212D4, 0x0E901566 */ +]; +const QS8: [f64; 6] = [ + 1.63776026895689824414e+02, /* 0x406478D5, 0x365B39BC */ + 8.09834494656449805916e+03, /* 0x40BFA258, 0x4E6B0563 */ + 1.42538291419120476348e+05, /* 0x41016652, 0x54D38C3F */ + 8.03309257119514397345e+05, /* 0x412883DA, 0x83A52B43 */ + 8.40501579819060512818e+05, /* 0x4129A66B, 0x28DE0B3D */ + -3.43899293537866615225e+05, /* 0xC114FD6D, 0x2C9530C5 */ +]; + +const QR5: [f64; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.84085963594515531381e-11, /* 0x3DB43D8F, 0x29CC8CD9 */ + 7.32421766612684765896e-02, /* 0x3FB2BFFF, 0xD172B04C */ + 5.83563508962056953777e+00, /* 0x401757B0, 0xB9953DD3 */ + 1.35111577286449829671e+02, /* 0x4060E392, 0x0A8788E9 */ + 1.02724376596164097464e+03, /* 0x40900CF9, 0x9DC8C481 */ + 1.98997785864605384631e+03, /* 0x409F17E9, 0x53C6E3A6 */ +]; +const QS5: [f64; 6] = [ + 8.27766102236537761883e+01, /* 0x4054B1B3, 0xFB5E1543 */ + 2.07781416421392987104e+03, /* 0x40A03BA0, 0xDA21C0CE */ + 1.88472887785718085070e+04, /* 0x40D267D2, 0x7B591E6D */ + 5.67511122894947329769e+04, /* 0x40EBB5E3, 0x97E02372 */ + 3.59767538425114471465e+04, /* 0x40E19118, 0x1F7A54A0 */ + -5.35434275601944773371e+03, /* 0xC0B4EA57, 0xBEDBC609 */ +]; + +const QR3: [f64; 6] = [ + /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + 4.37741014089738620906e-09, /* 0x3E32CD03, 0x6ADECB82 */ + 7.32411180042911447163e-02, /* 0x3FB2BFEE, 0x0E8D0842 */ + 3.34423137516170720929e+00, /* 0x400AC0FC, 0x61149CF5 */ + 4.26218440745412650017e+01, /* 0x40454F98, 0x962DAEDD */ + 1.70808091340565596283e+02, /* 0x406559DB, 0xE25EFD1F */ + 1.66733948696651168575e+02, /* 0x4064D77C, 0x81FA21E0 */ +]; +const QS3: [f64; 6] = [ + 4.87588729724587182091e+01, /* 0x40486122, 0xBFE343A6 */ + 7.09689221056606015736e+02, /* 0x40862D83, 0x86544EB3 */ + 3.70414822620111362994e+03, /* 0x40ACF04B, 0xE44DFC63 */ + 6.46042516752568917582e+03, /* 0x40B93C6C, 0xD7C76A28 */ + 2.51633368920368957333e+03, /* 0x40A3A8AA, 0xD94FB1C0 */ + -1.49247451836156386662e+02, /* 0xC062A7EB, 0x201CF40F */ +]; + +const QR2: [f64; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.50444444886983272379e-07, /* 0x3E84313B, 0x54F76BDB */ + 7.32234265963079278272e-02, /* 0x3FB2BEC5, 0x3E883E34 */ + 1.99819174093815998816e+00, /* 0x3FFFF897, 0xE727779C */ + 1.44956029347885735348e+01, /* 0x402CFDBF, 0xAAF96FE5 */ + 3.16662317504781540833e+01, /* 0x403FAA8E, 0x29FBDC4A */ + 1.62527075710929267416e+01, /* 0x403040B1, 0x71814BB4 */ +]; +const QS2: [f64; 6] = [ + 3.03655848355219184498e+01, /* 0x403E5D96, 0xF7C07AED */ + 2.69348118608049844624e+02, /* 0x4070D591, 0xE4D14B40 */ + 8.44783757595320139444e+02, /* 0x408A6645, 0x22B3BF22 */ + 8.82935845112488550512e+02, /* 0x408B977C, 0x9C5CC214 */ + 2.12666388511798828631e+02, /* 0x406A9553, 0x0E001365 */ + -5.31095493882666946917e+00, /* 0xC0153E6A, 0xF8B32931 */ +]; + +fn qzero(x: f64) -> f64 { + let p: &[f64; 6]; + let q: &[f64; 6]; + let s: f64; + let r: f64; + let z: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 { + p = &QR8; + q = &QS8; + } else if ix >= 0x40122E8B { + p = &QR5; + q = &QS5; + } else if ix >= 0x4006DB6D { + p = &QR3; + q = &QS3; + } else + /*ix >= 0x40000000*/ + { + p = &QR2; + q = &QS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); + return (-0.125 + r / s) / x; +} diff --git a/vendor/compiler_builtins/libm/src/math/j0f.rs b/vendor/compiler_builtins/libm/src/math/j0f.rs new file mode 100644 index 000000000..91c03dbbc --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/j0f.rs @@ -0,0 +1,359 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j0f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{cosf, fabsf, logf, sinf, sqrtf}; + +const INVSQRTPI: f32 = 5.6418961287e-01; /* 0x3f106ebb */ +const TPI: f32 = 6.3661974669e-01; /* 0x3f22f983 */ + +fn common(ix: u32, x: f32, y0: bool) -> f32 { + let z: f32; + let s: f32; + let mut c: f32; + let mut ss: f32; + let mut cc: f32; + /* + * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + */ + s = sinf(x); + c = cosf(x); + if y0 { + c = -c; + } + cc = s + c; + if ix < 0x7f000000 { + ss = s - c; + z = -cosf(2.0 * x); + if s * c < 0.0 { + cc = z / ss; + } else { + ss = z / cc; + } + if ix < 0x58800000 { + if y0 { + ss = -ss; + } + cc = pzerof(x) * cc - qzerof(x) * ss; + } + } + return INVSQRTPI * cc / sqrtf(x); +} + +/* R0/S0 on [0, 2.00] */ +const R02: f32 = 1.5625000000e-02; /* 0x3c800000 */ +const R03: f32 = -1.8997929874e-04; /* 0xb947352e */ +const R04: f32 = 1.8295404516e-06; /* 0x35f58e88 */ +const R05: f32 = -4.6183270541e-09; /* 0xb19eaf3c */ +const S01: f32 = 1.5619102865e-02; /* 0x3c7fe744 */ +const S02: f32 = 1.1692678527e-04; /* 0x38f53697 */ +const S03: f32 = 5.1354652442e-07; /* 0x3509daa6 */ +const S04: f32 = 1.1661400734e-09; /* 0x30a045e8 */ + +pub fn j0f(mut x: f32) -> f32 { + let z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + return 1.0 / (x * x); + } + x = fabsf(x); + + if ix >= 0x40000000 { + /* |x| >= 2 */ + /* large ulp error near zeros */ + return common(ix, x, false); + } + if ix >= 0x3a000000 { + /* |x| >= 2**-11 */ + /* up to 4ulp error near 2 */ + z = x * x; + r = z * (R02 + z * (R03 + z * (R04 + z * R05))); + s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * S04))); + return (1.0 + x / 2.0) * (1.0 - x / 2.0) + z * (r / s); + } + if ix >= 0x21800000 { + /* |x| >= 2**-60 */ + x = 0.25 * x * x; + } + return 1.0 - x; +} + +const U00: f32 = -7.3804296553e-02; /* 0xbd9726b5 */ +const U01: f32 = 1.7666645348e-01; /* 0x3e34e80d */ +const U02: f32 = -1.3818567619e-02; /* 0xbc626746 */ +const U03: f32 = 3.4745343146e-04; /* 0x39b62a69 */ +const U04: f32 = -3.8140706238e-06; /* 0xb67ff53c */ +const U05: f32 = 1.9559013964e-08; /* 0x32a802ba */ +const U06: f32 = -3.9820518410e-11; /* 0xae2f21eb */ +const V01: f32 = 1.2730483897e-02; /* 0x3c509385 */ +const V02: f32 = 7.6006865129e-05; /* 0x389f65e0 */ +const V03: f32 = 2.5915085189e-07; /* 0x348b216c */ +const V04: f32 = 4.4111031494e-10; /* 0x2ff280c2 */ + +pub fn y0f(x: f32) -> f32 { + let z: f32; + let u: f32; + let v: f32; + let ix: u32; + + ix = x.to_bits(); + if (ix & 0x7fffffff) == 0 { + return -1.0 / 0.0; + } + if (ix >> 31) != 0 { + return 0.0 / 0.0; + } + if ix >= 0x7f800000 { + return 1.0 / x; + } + if ix >= 0x40000000 { + /* |x| >= 2.0 */ + /* large ulp error near zeros */ + return common(ix, x, true); + } + if ix >= 0x39000000 { + /* x >= 2**-13 */ + /* large ulp error at x ~= 0.89 */ + z = x * x; + u = U00 + z * (U01 + z * (U02 + z * (U03 + z * (U04 + z * (U05 + z * U06))))); + v = 1.0 + z * (V01 + z * (V02 + z * (V03 + z * V04))); + return u / v + TPI * (j0f(x) * logf(x)); + } + return U00 + TPI * logf(x); +} + +/* The asymptotic expansions of pzero is + * 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x. + * For x >= 2, We approximate pzero by + * pzero(x) = 1 + (R/S) + * where R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10 + * S = 1 + pS0*s^2 + ... + pS4*s^10 + * and + * | pzero(x)-1-R/S | <= 2 ** ( -60.26) + */ +const PR8: [f32; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + -7.0312500000e-02, /* 0xbd900000 */ + -8.0816707611e+00, /* 0xc1014e86 */ + -2.5706311035e+02, /* 0xc3808814 */ + -2.4852163086e+03, /* 0xc51b5376 */ + -5.2530439453e+03, /* 0xc5a4285a */ +]; +const PS8: [f32; 5] = [ + 1.1653436279e+02, /* 0x42e91198 */ + 3.8337448730e+03, /* 0x456f9beb */ + 4.0597855469e+04, /* 0x471e95db */ + 1.1675296875e+05, /* 0x47e4087c */ + 4.7627726562e+04, /* 0x473a0bba */ +]; +const PR5: [f32; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -1.1412546255e-11, /* 0xad48c58a */ + -7.0312492549e-02, /* 0xbd8fffff */ + -4.1596107483e+00, /* 0xc0851b88 */ + -6.7674766541e+01, /* 0xc287597b */ + -3.3123129272e+02, /* 0xc3a59d9b */ + -3.4643338013e+02, /* 0xc3ad3779 */ +]; +const PS5: [f32; 5] = [ + 6.0753936768e+01, /* 0x42730408 */ + 1.0512523193e+03, /* 0x44836813 */ + 5.9789707031e+03, /* 0x45bad7c4 */ + 9.6254453125e+03, /* 0x461665c8 */ + 2.4060581055e+03, /* 0x451660ee */ +]; + +const PR3: [f32; 6] = [ + /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + -2.5470459075e-09, /* 0xb12f081b */ + -7.0311963558e-02, /* 0xbd8fffb8 */ + -2.4090321064e+00, /* 0xc01a2d95 */ + -2.1965976715e+01, /* 0xc1afba52 */ + -5.8079170227e+01, /* 0xc2685112 */ + -3.1447946548e+01, /* 0xc1fb9565 */ +]; +const PS3: [f32; 5] = [ + 3.5856033325e+01, /* 0x420f6c94 */ + 3.6151397705e+02, /* 0x43b4c1ca */ + 1.1936077881e+03, /* 0x44953373 */ + 1.1279968262e+03, /* 0x448cffe6 */ + 1.7358093262e+02, /* 0x432d94b8 */ +]; + +const PR2: [f32; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + -8.8753431271e-08, /* 0xb3be98b7 */ + -7.0303097367e-02, /* 0xbd8ffb12 */ + -1.4507384300e+00, /* 0xbfb9b1cc */ + -7.6356959343e+00, /* 0xc0f4579f */ + -1.1193166733e+01, /* 0xc1331736 */ + -3.2336456776e+00, /* 0xc04ef40d */ +]; +const PS2: [f32; 5] = [ + 2.2220300674e+01, /* 0x41b1c32d */ + 1.3620678711e+02, /* 0x430834f0 */ + 2.7047027588e+02, /* 0x43873c32 */ + 1.5387539673e+02, /* 0x4319e01a */ + 1.4657617569e+01, /* 0x416a859a */ +]; + +fn pzerof(x: f32) -> f32 { + let p: &[f32; 6]; + let q: &[f32; 5]; + let z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 { + p = &PR8; + q = &PS8; + } else if ix >= 0x409173eb { + p = &PR5; + q = &PS5; + } else if ix >= 0x4036d917 { + p = &PR3; + q = &PS3; + } else + /*ix >= 0x40000000*/ + { + p = &PR2; + q = &PS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); + return 1.0 + r / s; +} + +/* For x >= 8, the asymptotic expansions of qzero is + * -1/8 s + 75/1024 s^3 - ..., where s = 1/x. + * We approximate pzero by + * qzero(x) = s*(-1.25 + (R/S)) + * where R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10 + * S = 1 + qS0*s^2 + ... + qS5*s^12 + * and + * | qzero(x)/s +1.25-R/S | <= 2 ** ( -61.22) + */ +const QR8: [f32; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + 7.3242187500e-02, /* 0x3d960000 */ + 1.1768206596e+01, /* 0x413c4a93 */ + 5.5767340088e+02, /* 0x440b6b19 */ + 8.8591972656e+03, /* 0x460a6cca */ + 3.7014625000e+04, /* 0x471096a0 */ +]; +const QS8: [f32; 6] = [ + 1.6377603149e+02, /* 0x4323c6aa */ + 8.0983447266e+03, /* 0x45fd12c2 */ + 1.4253829688e+05, /* 0x480b3293 */ + 8.0330925000e+05, /* 0x49441ed4 */ + 8.4050156250e+05, /* 0x494d3359 */ + -3.4389928125e+05, /* 0xc8a7eb69 */ +]; + +const QR5: [f32; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.8408595828e-11, /* 0x2da1ec79 */ + 7.3242180049e-02, /* 0x3d95ffff */ + 5.8356351852e+00, /* 0x40babd86 */ + 1.3511157227e+02, /* 0x43071c90 */ + 1.0272437744e+03, /* 0x448067cd */ + 1.9899779053e+03, /* 0x44f8bf4b */ +]; +const QS5: [f32; 6] = [ + 8.2776611328e+01, /* 0x42a58da0 */ + 2.0778142090e+03, /* 0x4501dd07 */ + 1.8847289062e+04, /* 0x46933e94 */ + 5.6751113281e+04, /* 0x475daf1d */ + 3.5976753906e+04, /* 0x470c88c1 */ + -5.3543427734e+03, /* 0xc5a752be */ +]; + +const QR3: [f32; 6] = [ + /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + 4.3774099900e-09, /* 0x3196681b */ + 7.3241114616e-02, /* 0x3d95ff70 */ + 3.3442313671e+00, /* 0x405607e3 */ + 4.2621845245e+01, /* 0x422a7cc5 */ + 1.7080809021e+02, /* 0x432acedf */ + 1.6673394775e+02, /* 0x4326bbe4 */ +]; +const QS3: [f32; 6] = [ + 4.8758872986e+01, /* 0x42430916 */ + 7.0968920898e+02, /* 0x44316c1c */ + 3.7041481934e+03, /* 0x4567825f */ + 6.4604252930e+03, /* 0x45c9e367 */ + 2.5163337402e+03, /* 0x451d4557 */ + -1.4924745178e+02, /* 0xc3153f59 */ +]; + +const QR2: [f32; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.5044444979e-07, /* 0x342189db */ + 7.3223426938e-02, /* 0x3d95f62a */ + 1.9981917143e+00, /* 0x3fffc4bf */ + 1.4495602608e+01, /* 0x4167edfd */ + 3.1666231155e+01, /* 0x41fd5471 */ + 1.6252708435e+01, /* 0x4182058c */ +]; +const QS2: [f32; 6] = [ + 3.0365585327e+01, /* 0x41f2ecb8 */ + 2.6934811401e+02, /* 0x4386ac8f */ + 8.4478375244e+02, /* 0x44533229 */ + 8.8293585205e+02, /* 0x445cbbe5 */ + 2.1266638184e+02, /* 0x4354aa98 */ + -5.3109550476e+00, /* 0xc0a9f358 */ +]; + +fn qzerof(x: f32) -> f32 { + let p: &[f32; 6]; + let q: &[f32; 6]; + let s: f32; + let r: f32; + let z: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 { + p = &QR8; + q = &QS8; + } else if ix >= 0x409173eb { + p = &QR5; + q = &QS5; + } else if ix >= 0x4036d917 { + p = &QR3; + q = &QS3; + } else + /*ix >= 0x40000000*/ + { + p = &QR2; + q = &QS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); + return (-0.125 + r / s) / x; +} diff --git a/vendor/compiler_builtins/libm/src/math/j1.rs b/vendor/compiler_builtins/libm/src/math/j1.rs new file mode 100644 index 000000000..02a65ca5a --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/j1.rs @@ -0,0 +1,414 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j1.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* j1(x), y1(x) + * Bessel function of the first and second kinds of order zero. + * Method -- j1(x): + * 1. For tiny x, we use j1(x) = x/2 - x^3/16 + x^5/384 - ... + * 2. Reduce x to |x| since j1(x)=-j1(-x), and + * for x in (0,2) + * j1(x) = x/2 + x*z*R0/S0, where z = x*x; + * (precision: |j1/x - 1/2 - R0/S0 |<2**-61.51 ) + * for x in (2,inf) + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x1)-q1(x)*sin(x1)) + * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) + * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) + * as follow: + * cos(x1) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x1) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (sin(x) + cos(x)) + * (To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one.) + * + * 3 Special cases + * j1(nan)= nan + * j1(0) = 0 + * j1(inf) = 0 + * + * Method -- y1(x): + * 1. screen out x<=0 cases: y1(0)=-inf, y1(x<0)=NaN + * 2. For x<2. + * Since + * y1(x) = 2/pi*(j1(x)*(ln(x/2)+Euler)-1/x-x/2+5/64*x^3-...) + * therefore y1(x)-2/pi*j1(x)*ln(x)-1/x is an odd function. + * We use the following function to approximate y1, + * y1(x) = x*U(z)/V(z) + (2/pi)*(j1(x)*ln(x)-1/x), z= x^2 + * where for x in [0,2] (abs err less than 2**-65.89) + * U(z) = U0[0] + U0[1]*z + ... + U0[4]*z^4 + * V(z) = 1 + v0[0]*z + ... + v0[4]*z^5 + * Note: For tiny x, 1/x dominate y1 and hence + * y1(tiny) = -2/pi/tiny, (choose tiny<2**-54) + * 3. For x>=2. + * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) + * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) + * by method mentioned above. + */ + +use super::{cos, fabs, get_high_word, get_low_word, log, sin, sqrt}; + +const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ +const TPI: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ + +fn common(ix: u32, x: f64, y1: bool, sign: bool) -> f64 { + let z: f64; + let mut s: f64; + let c: f64; + let mut ss: f64; + let mut cc: f64; + + /* + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x-3pi/4)-q1(x)*sin(x-3pi/4)) + * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x-3pi/4)+q1(x)*cos(x-3pi/4)) + * + * sin(x-3pi/4) = -(sin(x) + cos(x))/sqrt(2) + * cos(x-3pi/4) = (sin(x) - cos(x))/sqrt(2) + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + */ + s = sin(x); + if y1 { + s = -s; + } + c = cos(x); + cc = s - c; + if ix < 0x7fe00000 { + /* avoid overflow in 2*x */ + ss = -s - c; + z = cos(2.0 * x); + if s * c > 0.0 { + cc = z / ss; + } else { + ss = z / cc; + } + if ix < 0x48000000 { + if y1 { + ss = -ss; + } + cc = pone(x) * cc - qone(x) * ss; + } + } + if sign { + cc = -cc; + } + return INVSQRTPI * cc / sqrt(x); +} + +/* R0/S0 on [0,2] */ +const R00: f64 = -6.25000000000000000000e-02; /* 0xBFB00000, 0x00000000 */ +const R01: f64 = 1.40705666955189706048e-03; /* 0x3F570D9F, 0x98472C61 */ +const R02: f64 = -1.59955631084035597520e-05; /* 0xBEF0C5C6, 0xBA169668 */ +const R03: f64 = 4.96727999609584448412e-08; /* 0x3E6AAAFA, 0x46CA0BD9 */ +const S01: f64 = 1.91537599538363460805e-02; /* 0x3F939D0B, 0x12637E53 */ +const S02: f64 = 1.85946785588630915560e-04; /* 0x3F285F56, 0xB9CDF664 */ +const S03: f64 = 1.17718464042623683263e-06; /* 0x3EB3BFF8, 0x333F8498 */ +const S04: f64 = 5.04636257076217042715e-09; /* 0x3E35AC88, 0xC97DFF2C */ +const S05: f64 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */ + +pub fn j1(x: f64) -> f64 { + let mut z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + let sign: bool; + + ix = get_high_word(x); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + if ix >= 0x7ff00000 { + return 1.0 / (x * x); + } + if ix >= 0x40000000 { + /* |x| >= 2 */ + return common(ix, fabs(x), false, sign); + } + if ix >= 0x38000000 { + /* |x| >= 2**-127 */ + z = x * x; + r = z * (R00 + z * (R01 + z * (R02 + z * R03))); + s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * (S04 + z * S05)))); + z = r / s; + } else { + /* avoid underflow, raise inexact if x!=0 */ + z = x; + } + return (0.5 + z) * x; +} + +const U0: [f64; 5] = [ + -1.96057090646238940668e-01, /* 0xBFC91866, 0x143CBC8A */ + 5.04438716639811282616e-02, /* 0x3FA9D3C7, 0x76292CD1 */ + -1.91256895875763547298e-03, /* 0xBF5F55E5, 0x4844F50F */ + 2.35252600561610495928e-05, /* 0x3EF8AB03, 0x8FA6B88E */ + -9.19099158039878874504e-08, /* 0xBE78AC00, 0x569105B8 */ +]; +const V0: [f64; 5] = [ + 1.99167318236649903973e-02, /* 0x3F94650D, 0x3F4DA9F0 */ + 2.02552581025135171496e-04, /* 0x3F2A8C89, 0x6C257764 */ + 1.35608801097516229404e-06, /* 0x3EB6C05A, 0x894E8CA6 */ + 6.22741452364621501295e-09, /* 0x3E3ABF1D, 0x5BA69A86 */ + 1.66559246207992079114e-11, /* 0x3DB25039, 0xDACA772A */ +]; + +pub fn y1(x: f64) -> f64 { + let z: f64; + let u: f64; + let v: f64; + let ix: u32; + let lx: u32; + + ix = get_high_word(x); + lx = get_low_word(x); + + /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */ + if (ix << 1 | lx) == 0 { + return -1.0 / 0.0; + } + if (ix >> 31) != 0 { + return 0.0 / 0.0; + } + if ix >= 0x7ff00000 { + return 1.0 / x; + } + + if ix >= 0x40000000 { + /* x >= 2 */ + return common(ix, x, true, false); + } + if ix < 0x3c900000 { + /* x < 2**-54 */ + return -TPI / x; + } + z = x * x; + u = U0[0] + z * (U0[1] + z * (U0[2] + z * (U0[3] + z * U0[4]))); + v = 1.0 + z * (V0[0] + z * (V0[1] + z * (V0[2] + z * (V0[3] + z * V0[4])))); + return x * (u / v) + TPI * (j1(x) * log(x) - 1.0 / x); +} + +/* For x >= 8, the asymptotic expansions of pone is + * 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x. + * We approximate pone by + * pone(x) = 1 + (R/S) + * where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10 + * S = 1 + ps0*s^2 + ... + ps4*s^10 + * and + * | pone(x)-1-R/S | <= 2 ** ( -60.06) + */ + +const PR8: [f64; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + 1.17187499999988647970e-01, /* 0x3FBDFFFF, 0xFFFFFCCE */ + 1.32394806593073575129e+01, /* 0x402A7A9D, 0x357F7FCE */ + 4.12051854307378562225e+02, /* 0x4079C0D4, 0x652EA590 */ + 3.87474538913960532227e+03, /* 0x40AE457D, 0xA3A532CC */ + 7.91447954031891731574e+03, /* 0x40BEEA7A, 0xC32782DD */ +]; +const PS8: [f64; 5] = [ + 1.14207370375678408436e+02, /* 0x405C8D45, 0x8E656CAC */ + 3.65093083420853463394e+03, /* 0x40AC85DC, 0x964D274F */ + 3.69562060269033463555e+04, /* 0x40E20B86, 0x97C5BB7F */ + 9.76027935934950801311e+04, /* 0x40F7D42C, 0xB28F17BB */ + 3.08042720627888811578e+04, /* 0x40DE1511, 0x697A0B2D */ +]; + +const PR5: [f64; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.31990519556243522749e-11, /* 0x3DAD0667, 0xDAE1CA7D */ + 1.17187493190614097638e-01, /* 0x3FBDFFFF, 0xE2C10043 */ + 6.80275127868432871736e+00, /* 0x401B3604, 0x6E6315E3 */ + 1.08308182990189109773e+02, /* 0x405B13B9, 0x452602ED */ + 5.17636139533199752805e+02, /* 0x40802D16, 0xD052D649 */ + 5.28715201363337541807e+02, /* 0x408085B8, 0xBB7E0CB7 */ +]; +const PS5: [f64; 5] = [ + 5.92805987221131331921e+01, /* 0x404DA3EA, 0xA8AF633D */ + 9.91401418733614377743e+02, /* 0x408EFB36, 0x1B066701 */ + 5.35326695291487976647e+03, /* 0x40B4E944, 0x5706B6FB */ + 7.84469031749551231769e+03, /* 0x40BEA4B0, 0xB8A5BB15 */ + 1.50404688810361062679e+03, /* 0x40978030, 0x036F5E51 */ +]; + +const PR3: [f64; 6] = [ + 3.02503916137373618024e-09, /* 0x3E29FC21, 0xA7AD9EDD */ + 1.17186865567253592491e-01, /* 0x3FBDFFF5, 0x5B21D17B */ + 3.93297750033315640650e+00, /* 0x400F76BC, 0xE85EAD8A */ + 3.51194035591636932736e+01, /* 0x40418F48, 0x9DA6D129 */ + 9.10550110750781271918e+01, /* 0x4056C385, 0x4D2C1837 */ + 4.85590685197364919645e+01, /* 0x4048478F, 0x8EA83EE5 */ +]; +const PS3: [f64; 5] = [ + 3.47913095001251519989e+01, /* 0x40416549, 0xA134069C */ + 3.36762458747825746741e+02, /* 0x40750C33, 0x07F1A75F */ + 1.04687139975775130551e+03, /* 0x40905B7C, 0x5037D523 */ + 8.90811346398256432622e+02, /* 0x408BD67D, 0xA32E31E9 */ + 1.03787932439639277504e+02, /* 0x4059F26D, 0x7C2EED53 */ +]; + +const PR2: [f64; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.07710830106873743082e-07, /* 0x3E7CE9D4, 0xF65544F4 */ + 1.17176219462683348094e-01, /* 0x3FBDFF42, 0xBE760D83 */ + 2.36851496667608785174e+00, /* 0x4002F2B7, 0xF98FAEC0 */ + 1.22426109148261232917e+01, /* 0x40287C37, 0x7F71A964 */ + 1.76939711271687727390e+01, /* 0x4031B1A8, 0x177F8EE2 */ + 5.07352312588818499250e+00, /* 0x40144B49, 0xA574C1FE */ +]; +const PS2: [f64; 5] = [ + 2.14364859363821409488e+01, /* 0x40356FBD, 0x8AD5ECDC */ + 1.25290227168402751090e+02, /* 0x405F5293, 0x14F92CD5 */ + 2.32276469057162813669e+02, /* 0x406D08D8, 0xD5A2DBD9 */ + 1.17679373287147100768e+02, /* 0x405D6B7A, 0xDA1884A9 */ + 8.36463893371618283368e+00, /* 0x4020BAB1, 0xF44E5192 */ +]; + +fn pone(x: f64) -> f64 { + let p: &[f64; 6]; + let q: &[f64; 5]; + let z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 { + p = &PR8; + q = &PS8; + } else if ix >= 0x40122E8B { + p = &PR5; + q = &PS5; + } else if ix >= 0x4006DB6D { + p = &PR3; + q = &PS3; + } else + /*ix >= 0x40000000*/ + { + p = &PR2; + q = &PS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); + return 1.0 + r / s; +} + +/* For x >= 8, the asymptotic expansions of qone is + * 3/8 s - 105/1024 s^3 - ..., where s = 1/x. + * We approximate pone by + * qone(x) = s*(0.375 + (R/S)) + * where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10 + * S = 1 + qs1*s^2 + ... + qs6*s^12 + * and + * | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13) + */ + +const QR8: [f64; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + -1.02539062499992714161e-01, /* 0xBFBA3FFF, 0xFFFFFDF3 */ + -1.62717534544589987888e+01, /* 0xC0304591, 0xA26779F7 */ + -7.59601722513950107896e+02, /* 0xC087BCD0, 0x53E4B576 */ + -1.18498066702429587167e+04, /* 0xC0C724E7, 0x40F87415 */ + -4.84385124285750353010e+04, /* 0xC0E7A6D0, 0x65D09C6A */ +]; +const QS8: [f64; 6] = [ + 1.61395369700722909556e+02, /* 0x40642CA6, 0xDE5BCDE5 */ + 7.82538599923348465381e+03, /* 0x40BE9162, 0xD0D88419 */ + 1.33875336287249578163e+05, /* 0x4100579A, 0xB0B75E98 */ + 7.19657723683240939863e+05, /* 0x4125F653, 0x72869C19 */ + 6.66601232617776375264e+05, /* 0x412457D2, 0x7719AD5C */ + -2.94490264303834643215e+05, /* 0xC111F969, 0x0EA5AA18 */ +]; + +const QR5: [f64; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -2.08979931141764104297e-11, /* 0xBDB6FA43, 0x1AA1A098 */ + -1.02539050241375426231e-01, /* 0xBFBA3FFF, 0xCB597FEF */ + -8.05644828123936029840e+00, /* 0xC0201CE6, 0xCA03AD4B */ + -1.83669607474888380239e+02, /* 0xC066F56D, 0x6CA7B9B0 */ + -1.37319376065508163265e+03, /* 0xC09574C6, 0x6931734F */ + -2.61244440453215656817e+03, /* 0xC0A468E3, 0x88FDA79D */ +]; +const QS5: [f64; 6] = [ + 8.12765501384335777857e+01, /* 0x405451B2, 0xFF5A11B2 */ + 1.99179873460485964642e+03, /* 0x409F1F31, 0xE77BF839 */ + 1.74684851924908907677e+04, /* 0x40D10F1F, 0x0D64CE29 */ + 4.98514270910352279316e+04, /* 0x40E8576D, 0xAABAD197 */ + 2.79480751638918118260e+04, /* 0x40DB4B04, 0xCF7C364B */ + -4.71918354795128470869e+03, /* 0xC0B26F2E, 0xFCFFA004 */ +]; + +const QR3: [f64; 6] = [ + -5.07831226461766561369e-09, /* 0xBE35CFA9, 0xD38FC84F */ + -1.02537829820837089745e-01, /* 0xBFBA3FEB, 0x51AEED54 */ + -4.61011581139473403113e+00, /* 0xC01270C2, 0x3302D9FF */ + -5.78472216562783643212e+01, /* 0xC04CEC71, 0xC25D16DA */ + -2.28244540737631695038e+02, /* 0xC06C87D3, 0x4718D55F */ + -2.19210128478909325622e+02, /* 0xC06B66B9, 0x5F5C1BF6 */ +]; +const QS3: [f64; 6] = [ + 4.76651550323729509273e+01, /* 0x4047D523, 0xCCD367E4 */ + 6.73865112676699709482e+02, /* 0x40850EEB, 0xC031EE3E */ + 3.38015286679526343505e+03, /* 0x40AA684E, 0x448E7C9A */ + 5.54772909720722782367e+03, /* 0x40B5ABBA, 0xA61D54A6 */ + 1.90311919338810798763e+03, /* 0x409DBC7A, 0x0DD4DF4B */ + -1.35201191444307340817e+02, /* 0xC060E670, 0x290A311F */ +]; + +const QR2: [f64; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + -1.78381727510958865572e-07, /* 0xBE87F126, 0x44C626D2 */ + -1.02517042607985553460e-01, /* 0xBFBA3E8E, 0x9148B010 */ + -2.75220568278187460720e+00, /* 0xC0060484, 0x69BB4EDA */ + -1.96636162643703720221e+01, /* 0xC033A9E2, 0xC168907F */ + -4.23253133372830490089e+01, /* 0xC04529A3, 0xDE104AAA */ + -2.13719211703704061733e+01, /* 0xC0355F36, 0x39CF6E52 */ +]; +const QS2: [f64; 6] = [ + 2.95333629060523854548e+01, /* 0x403D888A, 0x78AE64FF */ + 2.52981549982190529136e+02, /* 0x406F9F68, 0xDB821CBA */ + 7.57502834868645436472e+02, /* 0x4087AC05, 0xCE49A0F7 */ + 7.39393205320467245656e+02, /* 0x40871B25, 0x48D4C029 */ + 1.55949003336666123687e+02, /* 0x40637E5E, 0x3C3ED8D4 */ + -4.95949898822628210127e+00, /* 0xC013D686, 0xE71BE86B */ +]; + +fn qone(x: f64) -> f64 { + let p: &[f64; 6]; + let q: &[f64; 6]; + let s: f64; + let r: f64; + let z: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 { + p = &QR8; + q = &QS8; + } else if ix >= 0x40122E8B { + p = &QR5; + q = &QS5; + } else if ix >= 0x4006DB6D { + p = &QR3; + q = &QS3; + } else + /*ix >= 0x40000000*/ + { + p = &QR2; + q = &QS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); + return (0.375 + r / s) / x; +} diff --git a/vendor/compiler_builtins/libm/src/math/j1f.rs b/vendor/compiler_builtins/libm/src/math/j1f.rs new file mode 100644 index 000000000..c39f8ff7e --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/j1f.rs @@ -0,0 +1,380 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j1f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{cosf, fabsf, logf, sinf, sqrtf}; + +const INVSQRTPI: f32 = 5.6418961287e-01; /* 0x3f106ebb */ +const TPI: f32 = 6.3661974669e-01; /* 0x3f22f983 */ + +fn common(ix: u32, x: f32, y1: bool, sign: bool) -> f32 { + let z: f64; + let mut s: f64; + let c: f64; + let mut ss: f64; + let mut cc: f64; + + s = sinf(x) as f64; + if y1 { + s = -s; + } + c = cosf(x) as f64; + cc = s - c; + if ix < 0x7f000000 { + ss = -s - c; + z = cosf(2.0 * x) as f64; + if s * c > 0.0 { + cc = z / ss; + } else { + ss = z / cc; + } + if ix < 0x58800000 { + if y1 { + ss = -ss; + } + cc = (ponef(x) as f64) * cc - (qonef(x) as f64) * ss; + } + } + if sign { + cc = -cc; + } + return (((INVSQRTPI as f64) * cc) / (sqrtf(x) as f64)) as f32; +} + +/* R0/S0 on [0,2] */ +const R00: f32 = -6.2500000000e-02; /* 0xbd800000 */ +const R01: f32 = 1.4070566976e-03; /* 0x3ab86cfd */ +const R02: f32 = -1.5995563444e-05; /* 0xb7862e36 */ +const R03: f32 = 4.9672799207e-08; /* 0x335557d2 */ +const S01: f32 = 1.9153760746e-02; /* 0x3c9ce859 */ +const S02: f32 = 1.8594678841e-04; /* 0x3942fab6 */ +const S03: f32 = 1.1771846857e-06; /* 0x359dffc2 */ +const S04: f32 = 5.0463624390e-09; /* 0x31ad6446 */ +const S05: f32 = 1.2354227016e-11; /* 0x2d59567e */ + +pub fn j1f(x: f32) -> f32 { + let mut z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + let sign: bool; + + ix = x.to_bits(); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + return 1.0 / (x * x); + } + if ix >= 0x40000000 { + /* |x| >= 2 */ + return common(ix, fabsf(x), false, sign); + } + if ix >= 0x39000000 { + /* |x| >= 2**-13 */ + z = x * x; + r = z * (R00 + z * (R01 + z * (R02 + z * R03))); + s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * (S04 + z * S05)))); + z = 0.5 + r / s; + } else { + z = 0.5; + } + return z * x; +} + +const U0: [f32; 5] = [ + -1.9605709612e-01, /* 0xbe48c331 */ + 5.0443872809e-02, /* 0x3d4e9e3c */ + -1.9125689287e-03, /* 0xbafaaf2a */ + 2.3525259166e-05, /* 0x37c5581c */ + -9.1909917899e-08, /* 0xb3c56003 */ +]; +const V0: [f32; 5] = [ + 1.9916731864e-02, /* 0x3ca3286a */ + 2.0255257550e-04, /* 0x3954644b */ + 1.3560879779e-06, /* 0x35b602d4 */ + 6.2274145840e-09, /* 0x31d5f8eb */ + 1.6655924903e-11, /* 0x2d9281cf */ +]; + +pub fn y1f(x: f32) -> f32 { + let z: f32; + let u: f32; + let v: f32; + let ix: u32; + + ix = x.to_bits(); + if (ix & 0x7fffffff) == 0 { + return -1.0 / 0.0; + } + if (ix >> 31) != 0 { + return 0.0 / 0.0; + } + if ix >= 0x7f800000 { + return 1.0 / x; + } + if ix >= 0x40000000 { + /* |x| >= 2.0 */ + return common(ix, x, true, false); + } + if ix < 0x33000000 { + /* x < 2**-25 */ + return -TPI / x; + } + z = x * x; + u = U0[0] + z * (U0[1] + z * (U0[2] + z * (U0[3] + z * U0[4]))); + v = 1.0 + z * (V0[0] + z * (V0[1] + z * (V0[2] + z * (V0[3] + z * V0[4])))); + return x * (u / v) + TPI * (j1f(x) * logf(x) - 1.0 / x); +} + +/* For x >= 8, the asymptotic expansions of pone is + * 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x. + * We approximate pone by + * pone(x) = 1 + (R/S) + * where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10 + * S = 1 + ps0*s^2 + ... + ps4*s^10 + * and + * | pone(x)-1-R/S | <= 2 ** ( -60.06) + */ + +const PR8: [f32; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + 1.1718750000e-01, /* 0x3df00000 */ + 1.3239480972e+01, /* 0x4153d4ea */ + 4.1205184937e+02, /* 0x43ce06a3 */ + 3.8747453613e+03, /* 0x45722bed */ + 7.9144794922e+03, /* 0x45f753d6 */ +]; +const PS8: [f32; 5] = [ + 1.1420736694e+02, /* 0x42e46a2c */ + 3.6509309082e+03, /* 0x45642ee5 */ + 3.6956207031e+04, /* 0x47105c35 */ + 9.7602796875e+04, /* 0x47bea166 */ + 3.0804271484e+04, /* 0x46f0a88b */ +]; + +const PR5: [f32; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.3199052094e-11, /* 0x2d68333f */ + 1.1718749255e-01, /* 0x3defffff */ + 6.8027510643e+00, /* 0x40d9b023 */ + 1.0830818176e+02, /* 0x42d89dca */ + 5.1763616943e+02, /* 0x440168b7 */ + 5.2871520996e+02, /* 0x44042dc6 */ +]; +const PS5: [f32; 5] = [ + 5.9280597687e+01, /* 0x426d1f55 */ + 9.9140142822e+02, /* 0x4477d9b1 */ + 5.3532670898e+03, /* 0x45a74a23 */ + 7.8446904297e+03, /* 0x45f52586 */ + 1.5040468750e+03, /* 0x44bc0180 */ +]; + +const PR3: [f32; 6] = [ + 3.0250391081e-09, /* 0x314fe10d */ + 1.1718686670e-01, /* 0x3defffab */ + 3.9329774380e+00, /* 0x407bb5e7 */ + 3.5119403839e+01, /* 0x420c7a45 */ + 9.1055007935e+01, /* 0x42b61c2a */ + 4.8559066772e+01, /* 0x42423c7c */ +]; +const PS3: [f32; 5] = [ + 3.4791309357e+01, /* 0x420b2a4d */ + 3.3676245117e+02, /* 0x43a86198 */ + 1.0468714600e+03, /* 0x4482dbe3 */ + 8.9081134033e+02, /* 0x445eb3ed */ + 1.0378793335e+02, /* 0x42cf936c */ +]; + +const PR2: [f32; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.0771083225e-07, /* 0x33e74ea8 */ + 1.1717621982e-01, /* 0x3deffa16 */ + 2.3685150146e+00, /* 0x401795c0 */ + 1.2242610931e+01, /* 0x4143e1bc */ + 1.7693971634e+01, /* 0x418d8d41 */ + 5.0735230446e+00, /* 0x40a25a4d */ +]; +const PS2: [f32; 5] = [ + 2.1436485291e+01, /* 0x41ab7dec */ + 1.2529022980e+02, /* 0x42fa9499 */ + 2.3227647400e+02, /* 0x436846c7 */ + 1.1767937469e+02, /* 0x42eb5bd7 */ + 8.3646392822e+00, /* 0x4105d590 */ +]; + +fn ponef(x: f32) -> f32 { + let p: &[f32; 6]; + let q: &[f32; 5]; + let z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 { + p = &PR8; + q = &PS8; + } else if ix >= 0x409173eb { + p = &PR5; + q = &PS5; + } else if ix >= 0x4036d917 { + p = &PR3; + q = &PS3; + } else + /*ix >= 0x40000000*/ + { + p = &PR2; + q = &PS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); + return 1.0 + r / s; +} + +/* For x >= 8, the asymptotic expansions of qone is + * 3/8 s - 105/1024 s^3 - ..., where s = 1/x. + * We approximate pone by + * qone(x) = s*(0.375 + (R/S)) + * where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10 + * S = 1 + qs1*s^2 + ... + qs6*s^12 + * and + * | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13) + */ + +const QR8: [f32; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + -1.0253906250e-01, /* 0xbdd20000 */ + -1.6271753311e+01, /* 0xc1822c8d */ + -7.5960174561e+02, /* 0xc43de683 */ + -1.1849806641e+04, /* 0xc639273a */ + -4.8438511719e+04, /* 0xc73d3683 */ +]; +const QS8: [f32; 6] = [ + 1.6139537048e+02, /* 0x43216537 */ + 7.8253862305e+03, /* 0x45f48b17 */ + 1.3387534375e+05, /* 0x4802bcd6 */ + 7.1965775000e+05, /* 0x492fb29c */ + 6.6660125000e+05, /* 0x4922be94 */ + -2.9449025000e+05, /* 0xc88fcb48 */ +]; + +const QR5: [f32; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -2.0897993405e-11, /* 0xadb7d219 */ + -1.0253904760e-01, /* 0xbdd1fffe */ + -8.0564479828e+00, /* 0xc100e736 */ + -1.8366960144e+02, /* 0xc337ab6b */ + -1.3731937256e+03, /* 0xc4aba633 */ + -2.6124443359e+03, /* 0xc523471c */ +]; +const QS5: [f32; 6] = [ + 8.1276550293e+01, /* 0x42a28d98 */ + 1.9917987061e+03, /* 0x44f8f98f */ + 1.7468484375e+04, /* 0x468878f8 */ + 4.9851425781e+04, /* 0x4742bb6d */ + 2.7948074219e+04, /* 0x46da5826 */ + -4.7191835938e+03, /* 0xc5937978 */ +]; + +const QR3: [f32; 6] = [ + -5.0783124372e-09, /* 0xb1ae7d4f */ + -1.0253783315e-01, /* 0xbdd1ff5b */ + -4.6101160049e+00, /* 0xc0938612 */ + -5.7847221375e+01, /* 0xc267638e */ + -2.2824453735e+02, /* 0xc3643e9a */ + -2.1921012878e+02, /* 0xc35b35cb */ +]; +const QS3: [f32; 6] = [ + 4.7665153503e+01, /* 0x423ea91e */ + 6.7386511230e+02, /* 0x4428775e */ + 3.3801528320e+03, /* 0x45534272 */ + 5.5477290039e+03, /* 0x45ad5dd5 */ + 1.9031191406e+03, /* 0x44ede3d0 */ + -1.3520118713e+02, /* 0xc3073381 */ +]; + +const QR2: [f32; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + -1.7838172539e-07, /* 0xb43f8932 */ + -1.0251704603e-01, /* 0xbdd1f475 */ + -2.7522056103e+00, /* 0xc0302423 */ + -1.9663616180e+01, /* 0xc19d4f16 */ + -4.2325313568e+01, /* 0xc2294d1f */ + -2.1371921539e+01, /* 0xc1aaf9b2 */ +]; +const QS2: [f32; 6] = [ + 2.9533363342e+01, /* 0x41ec4454 */ + 2.5298155212e+02, /* 0x437cfb47 */ + 7.5750280762e+02, /* 0x443d602e */ + 7.3939318848e+02, /* 0x4438d92a */ + 1.5594900513e+02, /* 0x431bf2f2 */ + -4.9594988823e+00, /* 0xc09eb437 */ +]; + +fn qonef(x: f32) -> f32 { + let p: &[f32; 6]; + let q: &[f32; 6]; + let s: f32; + let r: f32; + let z: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 { + p = &QR8; + q = &QS8; + } else if ix >= 0x409173eb { + p = &QR5; + q = &QS5; + } else if ix >= 0x4036d917 { + p = &QR3; + q = &QS3; + } else + /*ix >= 0x40000000*/ + { + p = &QR2; + q = &QS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); + return (0.375 + r / s) / x; +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(test)] +mod tests { + use super::{j1f, y1f}; + #[test] + fn test_j1f_2488() { + // 0x401F3E49 + assert_eq!(j1f(2.4881766_f32), 0.49999475_f32); + } + #[test] + fn test_y1f_2002() { + //allow slightly different result on x87 + let res = y1f(2.0000002_f32); + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && (res == -0.10703231_f32) + { + return; + } + assert_eq!(res, -0.10703229_f32); + } +} diff --git a/vendor/compiler_builtins/libm/src/math/jn.rs b/vendor/compiler_builtins/libm/src/math/jn.rs new file mode 100644 index 000000000..1be167f84 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/jn.rs @@ -0,0 +1,343 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_jn.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * jn(n, x), yn(n, x) + * floating point Bessel's function of the 1st and 2nd kind + * of order n + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + * Note 2. About jn(n,x), yn(n,x) + * For n=0, j0(x) is called, + * for n=1, j1(x) is called, + * for n<=x, forward recursion is used starting + * from values of j0(x) and j1(x). + * for n>x, a continued fraction approximation to + * j(n,x)/j(n-1,x) is evaluated and then backward + * recursion is used starting from a supposed value + * for j(n,x). The resulting value of j(0,x) is + * compared with the actual value to correct the + * supposed value of j(n,x). + * + * yn(n,x) is similar in all respects, except + * that forward recursion is used for all + * values of n>1. + */ + +use super::{cos, fabs, get_high_word, get_low_word, j0, j1, log, sin, sqrt, y0, y1}; + +const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ + +pub fn jn(n: i32, mut x: f64) -> f64 { + let mut ix: u32; + let lx: u32; + let nm1: i32; + let mut i: i32; + let mut sign: bool; + let mut a: f64; + let mut b: f64; + let mut temp: f64; + + ix = get_high_word(x); + lx = get_low_word(x); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + // -lx == !lx + 1 + if (ix | (lx | ((!lx).wrapping_add(1))) >> 31) > 0x7ff00000 { + /* nan */ + return x; + } + + /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) + * Thus, J(-n,x) = J(n,-x) + */ + /* nm1 = |n|-1 is used instead of |n| to handle n==INT_MIN */ + if n == 0 { + return j0(x); + } + if n < 0 { + nm1 = -(n + 1); + x = -x; + sign = !sign; + } else { + nm1 = n - 1; + } + if nm1 == 0 { + return j1(x); + } + + sign &= (n & 1) != 0; /* even n: 0, odd n: signbit(x) */ + x = fabs(x); + if (ix | lx) == 0 || ix == 0x7ff00000 { + /* if x is 0 or inf */ + b = 0.0; + } else if (nm1 as f64) < x { + /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ + if ix >= 0x52d00000 { + /* x > 2**302 */ + /* (x >> n**2) + * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Let s=sin(x), c=cos(x), + * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + * + * n sin(xn)*sqt2 cos(xn)*sqt2 + * ---------------------------------- + * 0 s-c c+s + * 1 -s-c -c+s + * 2 -s+c -c-s + * 3 s+c c-s + */ + temp = match nm1 & 3 { + 0 => -cos(x) + sin(x), + 1 => -cos(x) - sin(x), + 2 => cos(x) - sin(x), + 3 | _ => cos(x) + sin(x), + }; + b = INVSQRTPI * temp / sqrt(x); + } else { + a = j0(x); + b = j1(x); + i = 0; + while i < nm1 { + i += 1; + temp = b; + b = b * (2.0 * (i as f64) / x) - a; /* avoid underflow */ + a = temp; + } + } + } else { + if ix < 0x3e100000 { + /* x < 2**-29 */ + /* x is tiny, return the first Taylor expansion of J(n,x) + * J(n,x) = 1/n!*(x/2)^n - ... + */ + if nm1 > 32 { + /* underflow */ + b = 0.0; + } else { + temp = x * 0.5; + b = temp; + a = 1.0; + i = 2; + while i <= nm1 + 1 { + a *= i as f64; /* a = n! */ + b *= temp; /* b = (x/2)^n */ + i += 1; + } + b = b / a; + } + } else { + /* use backward recurrence */ + /* x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quadruple + */ + /* determine k */ + let mut t: f64; + let mut q0: f64; + let mut q1: f64; + let mut w: f64; + let h: f64; + let mut z: f64; + let mut tmp: f64; + let nf: f64; + + let mut k: i32; + + nf = (nm1 as f64) + 1.0; + w = 2.0 * nf / x; + h = 2.0 / x; + z = w + h; + q0 = w; + q1 = w * z - 1.0; + k = 1; + while q1 < 1.0e9 { + k += 1; + z += h; + tmp = z * q1 - q0; + q0 = q1; + q1 = tmp; + } + t = 0.0; + i = k; + while i >= 0 { + t = 1.0 / (2.0 * ((i as f64) + nf) / x - t); + i -= 1; + } + a = t; + b = 1.0; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * Hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = nf * log(fabs(w)); + if tmp < 7.09782712893383973096e+02 { + i = nm1; + while i > 0 { + temp = b; + b = b * (2.0 * (i as f64)) / x - a; + a = temp; + i -= 1; + } + } else { + i = nm1; + while i > 0 { + temp = b; + b = b * (2.0 * (i as f64)) / x - a; + a = temp; + /* scale b to avoid spurious overflow */ + let x1p500 = f64::from_bits(0x5f30000000000000); // 0x1p500 == 2^500 + if b > x1p500 { + a /= b; + t /= b; + b = 1.0; + } + i -= 1; + } + } + z = j0(x); + w = j1(x); + if fabs(z) >= fabs(w) { + b = t * z / b; + } else { + b = t * w / a; + } + } + } + + if sign { + -b + } else { + b + } +} + +pub fn yn(n: i32, x: f64) -> f64 { + let mut ix: u32; + let lx: u32; + let mut ib: u32; + let nm1: i32; + let mut sign: bool; + let mut i: i32; + let mut a: f64; + let mut b: f64; + let mut temp: f64; + + ix = get_high_word(x); + lx = get_low_word(x); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + // -lx == !lx + 1 + if (ix | (lx | ((!lx).wrapping_add(1))) >> 31) > 0x7ff00000 { + /* nan */ + return x; + } + if sign && (ix | lx) != 0 { + /* x < 0 */ + return 0.0 / 0.0; + } + if ix == 0x7ff00000 { + return 0.0; + } + + if n == 0 { + return y0(x); + } + if n < 0 { + nm1 = -(n + 1); + sign = (n & 1) != 0; + } else { + nm1 = n - 1; + sign = false; + } + if nm1 == 0 { + if sign { + return -y1(x); + } else { + return y1(x); + } + } + + if ix >= 0x52d00000 { + /* x > 2**302 */ + /* (x >> n**2) + * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Let s=sin(x), c=cos(x), + * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + * + * n sin(xn)*sqt2 cos(xn)*sqt2 + * ---------------------------------- + * 0 s-c c+s + * 1 -s-c -c+s + * 2 -s+c -c-s + * 3 s+c c-s + */ + temp = match nm1 & 3 { + 0 => -sin(x) - cos(x), + 1 => -sin(x) + cos(x), + 2 => sin(x) + cos(x), + 3 | _ => sin(x) - cos(x), + }; + b = INVSQRTPI * temp / sqrt(x); + } else { + a = y0(x); + b = y1(x); + /* quit if b is -inf */ + ib = get_high_word(b); + i = 0; + while i < nm1 && ib != 0xfff00000 { + i += 1; + temp = b; + b = (2.0 * (i as f64) / x) * b - a; + ib = get_high_word(b); + a = temp; + } + } + + if sign { + -b + } else { + b + } +} diff --git a/vendor/compiler_builtins/libm/src/math/jnf.rs b/vendor/compiler_builtins/libm/src/math/jnf.rs new file mode 100644 index 000000000..360f62e20 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/jnf.rs @@ -0,0 +1,259 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_jnf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{fabsf, j0f, j1f, logf, y0f, y1f}; + +pub fn jnf(n: i32, mut x: f32) -> f32 { + let mut ix: u32; + let mut nm1: i32; + let mut sign: bool; + let mut i: i32; + let mut a: f32; + let mut b: f32; + let mut temp: f32; + + ix = x.to_bits(); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + if ix > 0x7f800000 { + /* nan */ + return x; + } + + /* J(-n,x) = J(n,-x), use |n|-1 to avoid overflow in -n */ + if n == 0 { + return j0f(x); + } + if n < 0 { + nm1 = -(n + 1); + x = -x; + sign = !sign; + } else { + nm1 = n - 1; + } + if nm1 == 0 { + return j1f(x); + } + + sign &= (n & 1) != 0; /* even n: 0, odd n: signbit(x) */ + x = fabsf(x); + if ix == 0 || ix == 0x7f800000 { + /* if x is 0 or inf */ + b = 0.0; + } else if (nm1 as f32) < x { + /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ + a = j0f(x); + b = j1f(x); + i = 0; + while i < nm1 { + i += 1; + temp = b; + b = b * (2.0 * (i as f32) / x) - a; + a = temp; + } + } else { + if ix < 0x35800000 { + /* x < 2**-20 */ + /* x is tiny, return the first Taylor expansion of J(n,x) + * J(n,x) = 1/n!*(x/2)^n - ... + */ + if nm1 > 8 { + /* underflow */ + nm1 = 8; + } + temp = 0.5 * x; + b = temp; + a = 1.0; + i = 2; + while i <= nm1 + 1 { + a *= i as f32; /* a = n! */ + b *= temp; /* b = (x/2)^n */ + i += 1; + } + b = b / a; + } else { + /* use backward recurrence */ + /* x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quadruple + */ + /* determine k */ + let mut t: f32; + let mut q0: f32; + let mut q1: f32; + let mut w: f32; + let h: f32; + let mut z: f32; + let mut tmp: f32; + let nf: f32; + let mut k: i32; + + nf = (nm1 as f32) + 1.0; + w = 2.0 * (nf as f32) / x; + h = 2.0 / x; + z = w + h; + q0 = w; + q1 = w * z - 1.0; + k = 1; + while q1 < 1.0e4 { + k += 1; + z += h; + tmp = z * q1 - q0; + q0 = q1; + q1 = tmp; + } + t = 0.0; + i = k; + while i >= 0 { + t = 1.0 / (2.0 * ((i as f32) + nf) / x - t); + i -= 1; + } + a = t; + b = 1.0; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * Hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = nf * logf(fabsf(w)); + if tmp < 88.721679688 { + i = nm1; + while i > 0 { + temp = b; + b = 2.0 * (i as f32) * b / x - a; + a = temp; + i -= 1; + } + } else { + i = nm1; + while i > 0 { + temp = b; + b = 2.0 * (i as f32) * b / x - a; + a = temp; + /* scale b to avoid spurious overflow */ + let x1p60 = f32::from_bits(0x5d800000); // 0x1p60 == 2^60 + if b > x1p60 { + a /= b; + t /= b; + b = 1.0; + } + i -= 1; + } + } + z = j0f(x); + w = j1f(x); + if fabsf(z) >= fabsf(w) { + b = t * z / b; + } else { + b = t * w / a; + } + } + } + + if sign { + -b + } else { + b + } +} + +pub fn ynf(n: i32, x: f32) -> f32 { + let mut ix: u32; + let mut ib: u32; + let nm1: i32; + let mut sign: bool; + let mut i: i32; + let mut a: f32; + let mut b: f32; + let mut temp: f32; + + ix = x.to_bits(); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + if ix > 0x7f800000 { + /* nan */ + return x; + } + if sign && ix != 0 { + /* x < 0 */ + return 0.0 / 0.0; + } + if ix == 0x7f800000 { + return 0.0; + } + + if n == 0 { + return y0f(x); + } + if n < 0 { + nm1 = -(n + 1); + sign = (n & 1) != 0; + } else { + nm1 = n - 1; + sign = false; + } + if nm1 == 0 { + if sign { + return -y1f(x); + } else { + return y1f(x); + } + } + + a = y0f(x); + b = y1f(x); + /* quit if b is -inf */ + ib = b.to_bits(); + i = 0; + while i < nm1 && ib != 0xff800000 { + i += 1; + temp = b; + b = (2.0 * (i as f32) / x) * b - a; + ib = b.to_bits(); + a = temp; + } + + if sign { + -b + } else { + b + } +} diff --git a/vendor/compiler_builtins/libm/src/math/k_cos.rs b/vendor/compiler_builtins/libm/src/math/k_cos.rs new file mode 100644 index 000000000..49b2fc64d --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/k_cos.rs @@ -0,0 +1,62 @@ +// origin: FreeBSD /usr/src/lib/msun/src/k_cos.c +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunSoft, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +const C1: f64 = 4.16666666666666019037e-02; /* 0x3FA55555, 0x5555554C */ +const C2: f64 = -1.38888888888741095749e-03; /* 0xBF56C16C, 0x16C15177 */ +const C3: f64 = 2.48015872894767294178e-05; /* 0x3EFA01A0, 0x19CB1590 */ +const C4: f64 = -2.75573143513906633035e-07; /* 0xBE927E4F, 0x809C52AD */ +const C5: f64 = 2.08757232129817482790e-09; /* 0x3E21EE9E, 0xBDB4B1C4 */ +const C6: f64 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ + +// kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 +// Input x is assumed to be bounded by ~pi/4 in magnitude. +// Input y is the tail of x. +// +// Algorithm +// 1. Since cos(-x) = cos(x), we need only to consider positive x. +// 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. +// 3. cos(x) is approximated by a polynomial of degree 14 on +// [0,pi/4] +// 4 14 +// cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x +// where the remez error is +// +// | 2 4 6 8 10 12 14 | -58 +// |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 +// | | +// +// 4 6 8 10 12 14 +// 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then +// cos(x) ~ 1 - x*x/2 + r +// since cos(x+y) ~ cos(x) - sin(x)*y +// ~ cos(x) - x*y, +// a correction term is necessary in cos(x) and hence +// cos(x+y) = 1 - (x*x/2 - (r - x*y)) +// For better accuracy, rearrange to +// cos(x+y) ~ w + (tmp + (r-x*y)) +// where w = 1 - x*x/2 and tmp is a tiny correction term +// (1 - x*x/2 == w + tmp exactly in infinite precision). +// The exactness of w + tmp in infinite precision depends on w +// and tmp having the same precision as x. If they have extra +// precision due to compiler bugs, then the extra precision is +// only good provided it is retained in all terms of the final +// expression for cos(). Retention happens in all cases tested +// under FreeBSD, so don't pessimize things by forcibly clipping +// any extra precision in w. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_cos(x: f64, y: f64) -> f64 { + let z = x * x; + let w = z * z; + let r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6)); + let hz = 0.5 * z; + let w = 1.0 - hz; + w + (((1.0 - w) - hz) + (z * r - x * y)) +} diff --git a/vendor/compiler_builtins/libm/src/math/k_cosf.rs b/vendor/compiler_builtins/libm/src/math/k_cosf.rs new file mode 100644 index 000000000..e99f2348c --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/k_cosf.rs @@ -0,0 +1,29 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_cosf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Debugged and optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */ +const C0: f64 = -0.499999997251031003120; /* -0x1ffffffd0c5e81.0p-54 */ +const C1: f64 = 0.0416666233237390631894; /* 0x155553e1053a42.0p-57 */ +const C2: f64 = -0.00138867637746099294692; /* -0x16c087e80f1e27.0p-62 */ +const C3: f64 = 0.0000243904487962774090654; /* 0x199342e0ee5069.0p-68 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_cosf(x: f64) -> f32 { + let z = x * x; + let w = z * z; + let r = C2 + z * C3; + (((1.0 + z * C0) + w * C1) + (w * z) * r) as f32 +} diff --git a/vendor/compiler_builtins/libm/src/math/k_expo2.rs b/vendor/compiler_builtins/libm/src/math/k_expo2.rs new file mode 100644 index 000000000..7345075f3 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/k_expo2.rs @@ -0,0 +1,14 @@ +use super::exp; + +/* k is such that k*ln2 has minimal relative error and x - kln2 > log(FLT_MIN) */ +const K: i32 = 2043; + +/* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_expo2(x: f64) -> f64 { + let k_ln2 = f64::from_bits(0x40962066151add8b); + /* note that k is odd and scale*scale overflows */ + let scale = f64::from_bits(((((0x3ff + K / 2) as u32) << 20) as u64) << 32); + /* exp(x - k ln2) * 2**(k-1) */ + exp(x - k_ln2) * scale * scale +} diff --git a/vendor/compiler_builtins/libm/src/math/k_expo2f.rs b/vendor/compiler_builtins/libm/src/math/k_expo2f.rs new file mode 100644 index 000000000..fbd7b27d5 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/k_expo2f.rs @@ -0,0 +1,14 @@ +use super::expf; + +/* k is such that k*ln2 has minimal relative error and x - kln2 > log(FLT_MIN) */ +const K: i32 = 235; + +/* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_expo2f(x: f32) -> f32 { + let k_ln2 = f32::from_bits(0x4322e3bc); + /* note that k is odd and scale*scale overflows */ + let scale = f32::from_bits(((0x7f + K / 2) as u32) << 23); + /* exp(x - k ln2) * 2**(k-1) */ + expf(x - k_ln2) * scale * scale +} diff --git a/vendor/compiler_builtins/libm/src/math/k_sin.rs b/vendor/compiler_builtins/libm/src/math/k_sin.rs new file mode 100644 index 000000000..9dd96c944 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/k_sin.rs @@ -0,0 +1,57 @@ +// origin: FreeBSD /usr/src/lib/msun/src/k_sin.c +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunSoft, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +const S1: f64 = -1.66666666666666324348e-01; /* 0xBFC55555, 0x55555549 */ +const S2: f64 = 8.33333333332248946124e-03; /* 0x3F811111, 0x1110F8A6 */ +const S3: f64 = -1.98412698298579493134e-04; /* 0xBF2A01A0, 0x19C161D5 */ +const S4: f64 = 2.75573137070700676789e-06; /* 0x3EC71DE3, 0x57B1FE7D */ +const S5: f64 = -2.50507602534068634195e-08; /* 0xBE5AE5E6, 0x8A2B9CEB */ +const S6: f64 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ + +// kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854 +// Input x is assumed to be bounded by ~pi/4 in magnitude. +// Input y is the tail of x. +// Input iy indicates whether y is 0. (if iy=0, y assume to be 0). +// +// Algorithm +// 1. Since sin(-x) = -sin(x), we need only to consider positive x. +// 2. Callers must return sin(-0) = -0 without calling here since our +// odd polynomial is not evaluated in a way that preserves -0. +// Callers may do the optimization sin(x) ~ x for tiny x. +// 3. sin(x) is approximated by a polynomial of degree 13 on +// [0,pi/4] +// 3 13 +// sin(x) ~ x + S1*x + ... + S6*x +// where +// +// |sin(x) 2 4 6 8 10 12 | -58 +// |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 +// | x | +// +// 4. sin(x+y) = sin(x) + sin'(x')*y +// ~ sin(x) + (1-x*x/2)*y +// For better accuracy, let +// 3 2 2 2 2 +// r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) +// then 3 2 +// sin(x) = x + (S1*x + (x *(r-y/2)+y)) +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_sin(x: f64, y: f64, iy: i32) -> f64 { + let z = x * x; + let w = z * z; + let r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6); + let v = z * x; + if iy == 0 { + x + v * (S1 + z * r) + } else { + x - ((z * (0.5 * y - v * r) - y) - v * S1) + } +} diff --git a/vendor/compiler_builtins/libm/src/math/k_sinf.rs b/vendor/compiler_builtins/libm/src/math/k_sinf.rs new file mode 100644 index 000000000..88d10caba --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/k_sinf.rs @@ -0,0 +1,30 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */ +const S1: f64 = -0.166666666416265235595; /* -0x15555554cbac77.0p-55 */ +const S2: f64 = 0.0083333293858894631756; /* 0x111110896efbb2.0p-59 */ +const S3: f64 = -0.000198393348360966317347; /* -0x1a00f9e2cae774.0p-65 */ +const S4: f64 = 0.0000027183114939898219064; /* 0x16cd878c3b46a7.0p-71 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_sinf(x: f64) -> f32 { + let z = x * x; + let w = z * z; + let r = S3 + z * S4; + let s = z * x; + ((x + s * (S1 + z * S2)) + s * w * r) as f32 +} diff --git a/vendor/compiler_builtins/libm/src/math/k_tan.rs b/vendor/compiler_builtins/libm/src/math/k_tan.rs new file mode 100644 index 000000000..d177010bb --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/k_tan.rs @@ -0,0 +1,105 @@ +// origin: FreeBSD /usr/src/lib/msun/src/k_tan.c */ +// +// ==================================================== +// Copyright 2004 Sun Microsystems, Inc. All Rights Reserved. +// +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +// kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854 +// Input x is assumed to be bounded by ~pi/4 in magnitude. +// Input y is the tail of x. +// Input odd indicates whether tan (if odd = 0) or -1/tan (if odd = 1) is returned. +// +// Algorithm +// 1. Since tan(-x) = -tan(x), we need only to consider positive x. +// 2. Callers must return tan(-0) = -0 without calling here since our +// odd polynomial is not evaluated in a way that preserves -0. +// Callers may do the optimization tan(x) ~ x for tiny x. +// 3. tan(x) is approximated by a odd polynomial of degree 27 on +// [0,0.67434] +// 3 27 +// tan(x) ~ x + T1*x + ... + T13*x +// where +// +// |tan(x) 2 4 26 | -59.2 +// |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2 +// | x | +// +// Note: tan(x+y) = tan(x) + tan'(x)*y +// ~ tan(x) + (1+x*x)*y +// Therefore, for better accuracy in computing tan(x+y), let +// 3 2 2 2 2 +// r = x *(T2+x *(T3+x *(...+x *(T12+x *T13)))) +// then +// 3 2 +// tan(x+y) = x + (T1*x + (x *(r+y)+y)) +// +// 4. For x in [0.67434,pi/4], let y = pi/4 - x, then +// tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y)) +// = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y))) +static T: [f64; 13] = [ + 3.33333333333334091986e-01, /* 3FD55555, 55555563 */ + 1.33333333333201242699e-01, /* 3FC11111, 1110FE7A */ + 5.39682539762260521377e-02, /* 3FABA1BA, 1BB341FE */ + 2.18694882948595424599e-02, /* 3F9664F4, 8406D637 */ + 8.86323982359930005737e-03, /* 3F8226E3, E96E8493 */ + 3.59207910759131235356e-03, /* 3F6D6D22, C9560328 */ + 1.45620945432529025516e-03, /* 3F57DBC8, FEE08315 */ + 5.88041240820264096874e-04, /* 3F4344D8, F2F26501 */ + 2.46463134818469906812e-04, /* 3F3026F7, 1A8D1068 */ + 7.81794442939557092300e-05, /* 3F147E88, A03792A6 */ + 7.14072491382608190305e-05, /* 3F12B80F, 32F0A7E9 */ + -1.85586374855275456654e-05, /* BEF375CB, DB605373 */ + 2.59073051863633712884e-05, /* 3EFB2A70, 74BF7AD4 */ +]; +const PIO4: f64 = 7.85398163397448278999e-01; /* 3FE921FB, 54442D18 */ +const PIO4_LO: f64 = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_tan(mut x: f64, mut y: f64, odd: i32) -> f64 { + let hx = (f64::to_bits(x) >> 32) as u32; + let big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */ + if big { + let sign = hx >> 31; + if sign != 0 { + x = -x; + y = -y; + } + x = (PIO4 - x) + (PIO4_LO - y); + y = 0.0; + } + let z = x * x; + let w = z * z; + /* + * Break x^5*(T[1]+x^2*T[2]+...) into + * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) + + * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12])) + */ + let r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11])))); + let v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12]))))); + let s = z * x; + let r = y + z * (s * (r + v) + y) + s * T[0]; + let w = x + r; + if big { + let sign = hx >> 31; + let s = 1.0 - 2.0 * odd as f64; + let v = s - 2.0 * (x + (r - w * w / (w + s))); + return if sign != 0 { -v } else { v }; + } + if odd == 0 { + return w; + } + /* -1.0/(x+r) has up to 2ulp error, so compute it accurately */ + let w0 = zero_low_word(w); + let v = r - (w0 - x); /* w0+v = r+x */ + let a = -1.0 / w; + let a0 = zero_low_word(a); + a0 + a * (1.0 + a0 * w0 + a0 * v) +} + +fn zero_low_word(x: f64) -> f64 { + f64::from_bits(f64::to_bits(x) & 0xFFFF_FFFF_0000_0000) +} diff --git a/vendor/compiler_builtins/libm/src/math/k_tanf.rs b/vendor/compiler_builtins/libm/src/math/k_tanf.rs new file mode 100644 index 000000000..af8db539d --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/k_tanf.rs @@ -0,0 +1,46 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_tan.c */ +/* + * ==================================================== + * Copyright 2004 Sun Microsystems, Inc. All Rights Reserved. + * + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +/* |tan(x)/x - t(x)| < 2**-25.5 (~[-2e-08, 2e-08]). */ +const T: [f64; 6] = [ + 0.333331395030791399758, /* 0x15554d3418c99f.0p-54 */ + 0.133392002712976742718, /* 0x1112fd38999f72.0p-55 */ + 0.0533812378445670393523, /* 0x1b54c91d865afe.0p-57 */ + 0.0245283181166547278873, /* 0x191df3908c33ce.0p-58 */ + 0.00297435743359967304927, /* 0x185dadfcecf44e.0p-61 */ + 0.00946564784943673166728, /* 0x1362b9bf971bcd.0p-59 */ +]; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_tanf(x: f64, odd: bool) -> f32 { + let z = x * x; + /* + * Split up the polynomial into small independent terms to give + * opportunities for parallel evaluation. The chosen splitting is + * micro-optimized for Athlons (XP, X64). It costs 2 multiplications + * relative to Horner's method on sequential machines. + * + * We add the small terms from lowest degree up for efficiency on + * non-sequential machines (the lowest degree terms tend to be ready + * earlier). Apart from this, we don't care about order of + * operations, and don't need to to care since we have precision to + * spare. However, the chosen splitting is good for accuracy too, + * and would give results as accurate as Horner's method if the + * small terms were added from highest degree down. + */ + let mut r = T[4] + z * T[5]; + let t = T[2] + z * T[3]; + let w = z * z; + let s = z * x; + let u = T[0] + z * T[1]; + r = (x + s * u) + (s * w) * (t + w * r); + (if odd { -1. / r } else { r }) as f32 +} diff --git a/vendor/compiler_builtins/libm/src/math/ldexp.rs b/vendor/compiler_builtins/libm/src/math/ldexp.rs new file mode 100644 index 000000000..e46242e55 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/ldexp.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexp(x: f64, n: i32) -> f64 { + super::scalbn(x, n) +} diff --git a/vendor/compiler_builtins/libm/src/math/ldexpf.rs b/vendor/compiler_builtins/libm/src/math/ldexpf.rs new file mode 100644 index 000000000..95b27fc49 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/ldexpf.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf(x: f32, n: i32) -> f32 { + super::scalbnf(x, n) +} diff --git a/vendor/compiler_builtins/libm/src/math/lgamma.rs b/vendor/compiler_builtins/libm/src/math/lgamma.rs new file mode 100644 index 000000000..5bc87e85e --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/lgamma.rs @@ -0,0 +1,5 @@ +use super::lgamma_r; + +pub fn lgamma(x: f64) -> f64 { + lgamma_r(x).0 +} diff --git a/vendor/compiler_builtins/libm/src/math/lgamma_r.rs b/vendor/compiler_builtins/libm/src/math/lgamma_r.rs new file mode 100644 index 000000000..9533e882c --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/lgamma_r.rs @@ -0,0 +1,319 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_lgamma_r.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + */ +/* lgamma_r(x, signgamp) + * Reentrant version of the logarithm of the Gamma function + * with user provide pointer for the sign of Gamma(x). + * + * Method: + * 1. Argument Reduction for 0 < x <= 8 + * Since gamma(1+s)=s*gamma(s), for x in [0,8], we may + * reduce x to a number in [1.5,2.5] by + * lgamma(1+s) = log(s) + lgamma(s) + * for example, + * lgamma(7.3) = log(6.3) + lgamma(6.3) + * = log(6.3*5.3) + lgamma(5.3) + * = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3) + * 2. Polynomial approximation of lgamma around its + * minimun ymin=1.461632144968362245 to maintain monotonicity. + * On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use + * Let z = x-ymin; + * lgamma(x) = -1.214862905358496078218 + z^2*poly(z) + * where + * poly(z) is a 14 degree polynomial. + * 2. Rational approximation in the primary interval [2,3] + * We use the following approximation: + * s = x-2.0; + * lgamma(x) = 0.5*s + s*P(s)/Q(s) + * with accuracy + * |P/Q - (lgamma(x)-0.5s)| < 2**-61.71 + * Our algorithms are based on the following observation + * + * zeta(2)-1 2 zeta(3)-1 3 + * lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ... + * 2 3 + * + * where Euler = 0.5771... is the Euler constant, which is very + * close to 0.5. + * + * 3. For x>=8, we have + * lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+.... + * (better formula: + * lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...) + * Let z = 1/x, then we approximation + * f(z) = lgamma(x) - (x-0.5)(log(x)-1) + * by + * 3 5 11 + * w = w0 + w1*z + w2*z + w3*z + ... + w6*z + * where + * |w - f(z)| < 2**-58.74 + * + * 4. For negative x, since (G is gamma function) + * -x*G(-x)*G(x) = PI/sin(PI*x), + * we have + * G(x) = PI/(sin(PI*x)*(-x)*G(-x)) + * since G(-x) is positive, sign(G(x)) = sign(sin(PI*x)) for x<0 + * Hence, for x<0, signgam = sign(sin(PI*x)) and + * lgamma(x) = log(|Gamma(x)|) + * = log(PI/(|x*sin(PI*x)|)) - lgamma(-x); + * Note: one should avoid compute PI*(-x) directly in the + * computation of sin(PI*(-x)). + * + * 5. Special Cases + * lgamma(2+s) ~ s*(1-Euler) for tiny s + * lgamma(1) = lgamma(2) = 0 + * lgamma(x) ~ -log(|x|) for tiny x + * lgamma(0) = lgamma(neg.integer) = inf and raise divide-by-zero + * lgamma(inf) = inf + * lgamma(-inf) = inf (bug for bug compatible with C99!?) + * + */ + +use super::{floor, k_cos, k_sin, log}; + +const PI: f64 = 3.14159265358979311600e+00; /* 0x400921FB, 0x54442D18 */ +const A0: f64 = 7.72156649015328655494e-02; /* 0x3FB3C467, 0xE37DB0C8 */ +const A1: f64 = 3.22467033424113591611e-01; /* 0x3FD4A34C, 0xC4A60FAD */ +const A2: f64 = 6.73523010531292681824e-02; /* 0x3FB13E00, 0x1A5562A7 */ +const A3: f64 = 2.05808084325167332806e-02; /* 0x3F951322, 0xAC92547B */ +const A4: f64 = 7.38555086081402883957e-03; /* 0x3F7E404F, 0xB68FEFE8 */ +const A5: f64 = 2.89051383673415629091e-03; /* 0x3F67ADD8, 0xCCB7926B */ +const A6: f64 = 1.19270763183362067845e-03; /* 0x3F538A94, 0x116F3F5D */ +const A7: f64 = 5.10069792153511336608e-04; /* 0x3F40B6C6, 0x89B99C00 */ +const A8: f64 = 2.20862790713908385557e-04; /* 0x3F2CF2EC, 0xED10E54D */ +const A9: f64 = 1.08011567247583939954e-04; /* 0x3F1C5088, 0x987DFB07 */ +const A10: f64 = 2.52144565451257326939e-05; /* 0x3EFA7074, 0x428CFA52 */ +const A11: f64 = 4.48640949618915160150e-05; /* 0x3F07858E, 0x90A45837 */ +const TC: f64 = 1.46163214496836224576e+00; /* 0x3FF762D8, 0x6356BE3F */ +const TF: f64 = -1.21486290535849611461e-01; /* 0xBFBF19B9, 0xBCC38A42 */ +/* tt = -(tail of TF) */ +const TT: f64 = -3.63867699703950536541e-18; /* 0xBC50C7CA, 0xA48A971F */ +const T0: f64 = 4.83836122723810047042e-01; /* 0x3FDEF72B, 0xC8EE38A2 */ +const T1: f64 = -1.47587722994593911752e-01; /* 0xBFC2E427, 0x8DC6C509 */ +const T2: f64 = 6.46249402391333854778e-02; /* 0x3FB08B42, 0x94D5419B */ +const T3: f64 = -3.27885410759859649565e-02; /* 0xBFA0C9A8, 0xDF35B713 */ +const T4: f64 = 1.79706750811820387126e-02; /* 0x3F9266E7, 0x970AF9EC */ +const T5: f64 = -1.03142241298341437450e-02; /* 0xBF851F9F, 0xBA91EC6A */ +const T6: f64 = 6.10053870246291332635e-03; /* 0x3F78FCE0, 0xE370E344 */ +const T7: f64 = -3.68452016781138256760e-03; /* 0xBF6E2EFF, 0xB3E914D7 */ +const T8: f64 = 2.25964780900612472250e-03; /* 0x3F6282D3, 0x2E15C915 */ +const T9: f64 = -1.40346469989232843813e-03; /* 0xBF56FE8E, 0xBF2D1AF1 */ +const T10: f64 = 8.81081882437654011382e-04; /* 0x3F4CDF0C, 0xEF61A8E9 */ +const T11: f64 = -5.38595305356740546715e-04; /* 0xBF41A610, 0x9C73E0EC */ +const T12: f64 = 3.15632070903625950361e-04; /* 0x3F34AF6D, 0x6C0EBBF7 */ +const T13: f64 = -3.12754168375120860518e-04; /* 0xBF347F24, 0xECC38C38 */ +const T14: f64 = 3.35529192635519073543e-04; /* 0x3F35FD3E, 0xE8C2D3F4 */ +const U0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ +const U1: f64 = 6.32827064025093366517e-01; /* 0x3FE4401E, 0x8B005DFF */ +const U2: f64 = 1.45492250137234768737e+00; /* 0x3FF7475C, 0xD119BD6F */ +const U3: f64 = 9.77717527963372745603e-01; /* 0x3FEF4976, 0x44EA8450 */ +const U4: f64 = 2.28963728064692451092e-01; /* 0x3FCD4EAE, 0xF6010924 */ +const U5: f64 = 1.33810918536787660377e-02; /* 0x3F8B678B, 0xBF2BAB09 */ +const V1: f64 = 2.45597793713041134822e+00; /* 0x4003A5D7, 0xC2BD619C */ +const V2: f64 = 2.12848976379893395361e+00; /* 0x40010725, 0xA42B18F5 */ +const V3: f64 = 7.69285150456672783825e-01; /* 0x3FE89DFB, 0xE45050AF */ +const V4: f64 = 1.04222645593369134254e-01; /* 0x3FBAAE55, 0xD6537C88 */ +const V5: f64 = 3.21709242282423911810e-03; /* 0x3F6A5ABB, 0x57D0CF61 */ +const S0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ +const S1: f64 = 2.14982415960608852501e-01; /* 0x3FCB848B, 0x36E20878 */ +const S2: f64 = 3.25778796408930981787e-01; /* 0x3FD4D98F, 0x4F139F59 */ +const S3: f64 = 1.46350472652464452805e-01; /* 0x3FC2BB9C, 0xBEE5F2F7 */ +const S4: f64 = 2.66422703033638609560e-02; /* 0x3F9B481C, 0x7E939961 */ +const S5: f64 = 1.84028451407337715652e-03; /* 0x3F5E26B6, 0x7368F239 */ +const S6: f64 = 3.19475326584100867617e-05; /* 0x3F00BFEC, 0xDD17E945 */ +const R1: f64 = 1.39200533467621045958e+00; /* 0x3FF645A7, 0x62C4AB74 */ +const R2: f64 = 7.21935547567138069525e-01; /* 0x3FE71A18, 0x93D3DCDC */ +const R3: f64 = 1.71933865632803078993e-01; /* 0x3FC601ED, 0xCCFBDF27 */ +const R4: f64 = 1.86459191715652901344e-02; /* 0x3F9317EA, 0x742ED475 */ +const R5: f64 = 7.77942496381893596434e-04; /* 0x3F497DDA, 0xCA41A95B */ +const R6: f64 = 7.32668430744625636189e-06; /* 0x3EDEBAF7, 0xA5B38140 */ +const W0: f64 = 4.18938533204672725052e-01; /* 0x3FDACFE3, 0x90C97D69 */ +const W1: f64 = 8.33333333333329678849e-02; /* 0x3FB55555, 0x5555553B */ +const W2: f64 = -2.77777777728775536470e-03; /* 0xBF66C16C, 0x16B02E5C */ +const W3: f64 = 7.93650558643019558500e-04; /* 0x3F4A019F, 0x98CF38B6 */ +const W4: f64 = -5.95187557450339963135e-04; /* 0xBF4380CB, 0x8C0FE741 */ +const W5: f64 = 8.36339918996282139126e-04; /* 0x3F4B67BA, 0x4CDAD5D1 */ +const W6: f64 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */ + +/* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ +fn sin_pi(mut x: f64) -> f64 { + let mut n: i32; + + /* spurious inexact if odd int */ + x = 2.0 * (x * 0.5 - floor(x * 0.5)); /* x mod 2.0 */ + + n = (x * 4.0) as i32; + n = (n + 1) / 2; + x -= (n as f64) * 0.5; + x *= PI; + + match n { + 1 => k_cos(x, 0.0), + 2 => k_sin(-x, 0.0, 0), + 3 => -k_cos(x, 0.0), + 0 | _ => k_sin(x, 0.0, 0), + } +} + +pub fn lgamma_r(mut x: f64) -> (f64, i32) { + let u: u64 = x.to_bits(); + let mut t: f64; + let y: f64; + let mut z: f64; + let nadj: f64; + let p: f64; + let p1: f64; + let p2: f64; + let p3: f64; + let q: f64; + let mut r: f64; + let w: f64; + let ix: u32; + let sign: bool; + let i: i32; + let mut signgam: i32; + + /* purge off +-inf, NaN, +-0, tiny and negative arguments */ + signgam = 1; + sign = (u >> 63) != 0; + ix = ((u >> 32) as u32) & 0x7fffffff; + if ix >= 0x7ff00000 { + return (x * x, signgam); + } + if ix < (0x3ff - 70) << 20 { + /* |x|<2**-70, return -log(|x|) */ + if sign { + x = -x; + signgam = -1; + } + return (-log(x), signgam); + } + if sign { + x = -x; + t = sin_pi(x); + if t == 0.0 { + /* -integer */ + return (1.0 / (x - x), signgam); + } + if t > 0.0 { + signgam = -1; + } else { + t = -t; + } + nadj = log(PI / (t * x)); + } else { + nadj = 0.0; + } + + /* purge off 1 and 2 */ + if (ix == 0x3ff00000 || ix == 0x40000000) && (u & 0xffffffff) == 0 { + r = 0.0; + } + /* for x < 2.0 */ + else if ix < 0x40000000 { + if ix <= 0x3feccccc { + /* lgamma(x) = lgamma(x+1)-log(x) */ + r = -log(x); + if ix >= 0x3FE76944 { + y = 1.0 - x; + i = 0; + } else if ix >= 0x3FCDA661 { + y = x - (TC - 1.0); + i = 1; + } else { + y = x; + i = 2; + } + } else { + r = 0.0; + if ix >= 0x3FFBB4C3 { + /* [1.7316,2] */ + y = 2.0 - x; + i = 0; + } else if ix >= 0x3FF3B4C4 { + /* [1.23,1.73] */ + y = x - TC; + i = 1; + } else { + y = x - 1.0; + i = 2; + } + } + match i { + 0 => { + z = y * y; + p1 = A0 + z * (A2 + z * (A4 + z * (A6 + z * (A8 + z * A10)))); + p2 = z * (A1 + z * (A3 + z * (A5 + z * (A7 + z * (A9 + z * A11))))); + p = y * p1 + p2; + r += p - 0.5 * y; + } + 1 => { + z = y * y; + w = z * y; + p1 = T0 + w * (T3 + w * (T6 + w * (T9 + w * T12))); /* parallel comp */ + p2 = T1 + w * (T4 + w * (T7 + w * (T10 + w * T13))); + p3 = T2 + w * (T5 + w * (T8 + w * (T11 + w * T14))); + p = z * p1 - (TT - w * (p2 + y * p3)); + r += TF + p; + } + 2 => { + p1 = y * (U0 + y * (U1 + y * (U2 + y * (U3 + y * (U4 + y * U5))))); + p2 = 1.0 + y * (V1 + y * (V2 + y * (V3 + y * (V4 + y * V5)))); + r += -0.5 * y + p1 / p2; + } + #[cfg(debug_assertions)] + _ => unreachable!(), + #[cfg(not(debug_assertions))] + _ => {} + } + } else if ix < 0x40200000 { + /* x < 8.0 */ + i = x as i32; + y = x - (i as f64); + p = y * (S0 + y * (S1 + y * (S2 + y * (S3 + y * (S4 + y * (S5 + y * S6)))))); + q = 1.0 + y * (R1 + y * (R2 + y * (R3 + y * (R4 + y * (R5 + y * R6))))); + r = 0.5 * y + p / q; + z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ + // TODO: In C, this was implemented using switch jumps with fallthrough. + // Does this implementation have performance problems? + if i >= 7 { + z *= y + 6.0; + } + if i >= 6 { + z *= y + 5.0; + } + if i >= 5 { + z *= y + 4.0; + } + if i >= 4 { + z *= y + 3.0; + } + if i >= 3 { + z *= y + 2.0; + r += log(z); + } + } else if ix < 0x43900000 { + /* 8.0 <= x < 2**58 */ + t = log(x); + z = 1.0 / x; + y = z * z; + w = W0 + z * (W1 + y * (W2 + y * (W3 + y * (W4 + y * (W5 + y * W6))))); + r = (x - 0.5) * (t - 1.0) + w; + } else { + /* 2**58 <= x <= inf */ + r = x * (log(x) - 1.0); + } + if sign { + r = nadj - r; + } + return (r, signgam); +} diff --git a/vendor/compiler_builtins/libm/src/math/lgammaf.rs b/vendor/compiler_builtins/libm/src/math/lgammaf.rs new file mode 100644 index 000000000..dfdc87f96 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/lgammaf.rs @@ -0,0 +1,5 @@ +use super::lgammaf_r; + +pub fn lgammaf(x: f32) -> f32 { + lgammaf_r(x).0 +} diff --git a/vendor/compiler_builtins/libm/src/math/lgammaf_r.rs b/vendor/compiler_builtins/libm/src/math/lgammaf_r.rs new file mode 100644 index 000000000..c5e559f46 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/lgammaf_r.rs @@ -0,0 +1,254 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_lgammaf_r.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{floorf, k_cosf, k_sinf, logf}; + +const PI: f32 = 3.1415927410e+00; /* 0x40490fdb */ +const A0: f32 = 7.7215664089e-02; /* 0x3d9e233f */ +const A1: f32 = 3.2246702909e-01; /* 0x3ea51a66 */ +const A2: f32 = 6.7352302372e-02; /* 0x3d89f001 */ +const A3: f32 = 2.0580807701e-02; /* 0x3ca89915 */ +const A4: f32 = 7.3855509982e-03; /* 0x3bf2027e */ +const A5: f32 = 2.8905137442e-03; /* 0x3b3d6ec6 */ +const A6: f32 = 1.1927076848e-03; /* 0x3a9c54a1 */ +const A7: f32 = 5.1006977446e-04; /* 0x3a05b634 */ +const A8: f32 = 2.2086278477e-04; /* 0x39679767 */ +const A9: f32 = 1.0801156895e-04; /* 0x38e28445 */ +const A10: f32 = 2.5214456400e-05; /* 0x37d383a2 */ +const A11: f32 = 4.4864096708e-05; /* 0x383c2c75 */ +const TC: f32 = 1.4616321325e+00; /* 0x3fbb16c3 */ +const TF: f32 = -1.2148628384e-01; /* 0xbdf8cdcd */ +/* TT = -(tail of TF) */ +const TT: f32 = 6.6971006518e-09; /* 0x31e61c52 */ +const T0: f32 = 4.8383611441e-01; /* 0x3ef7b95e */ +const T1: f32 = -1.4758771658e-01; /* 0xbe17213c */ +const T2: f32 = 6.4624942839e-02; /* 0x3d845a15 */ +const T3: f32 = -3.2788541168e-02; /* 0xbd064d47 */ +const T4: f32 = 1.7970675603e-02; /* 0x3c93373d */ +const T5: f32 = -1.0314224288e-02; /* 0xbc28fcfe */ +const T6: f32 = 6.1005386524e-03; /* 0x3bc7e707 */ +const T7: f32 = -3.6845202558e-03; /* 0xbb7177fe */ +const T8: f32 = 2.2596477065e-03; /* 0x3b141699 */ +const T9: f32 = -1.4034647029e-03; /* 0xbab7f476 */ +const T10: f32 = 8.8108185446e-04; /* 0x3a66f867 */ +const T11: f32 = -5.3859531181e-04; /* 0xba0d3085 */ +const T12: f32 = 3.1563205994e-04; /* 0x39a57b6b */ +const T13: f32 = -3.1275415677e-04; /* 0xb9a3f927 */ +const T14: f32 = 3.3552918467e-04; /* 0x39afe9f7 */ +const U0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ +const U1: f32 = 6.3282704353e-01; /* 0x3f2200f4 */ +const U2: f32 = 1.4549225569e+00; /* 0x3fba3ae7 */ +const U3: f32 = 9.7771751881e-01; /* 0x3f7a4bb2 */ +const U4: f32 = 2.2896373272e-01; /* 0x3e6a7578 */ +const U5: f32 = 1.3381091878e-02; /* 0x3c5b3c5e */ +const V1: f32 = 2.4559779167e+00; /* 0x401d2ebe */ +const V2: f32 = 2.1284897327e+00; /* 0x4008392d */ +const V3: f32 = 7.6928514242e-01; /* 0x3f44efdf */ +const V4: f32 = 1.0422264785e-01; /* 0x3dd572af */ +const V5: f32 = 3.2170924824e-03; /* 0x3b52d5db */ +const S0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ +const S1: f32 = 2.1498242021e-01; /* 0x3e5c245a */ +const S2: f32 = 3.2577878237e-01; /* 0x3ea6cc7a */ +const S3: f32 = 1.4635047317e-01; /* 0x3e15dce6 */ +const S4: f32 = 2.6642270386e-02; /* 0x3cda40e4 */ +const S5: f32 = 1.8402845599e-03; /* 0x3af135b4 */ +const S6: f32 = 3.1947532989e-05; /* 0x3805ff67 */ +const R1: f32 = 1.3920053244e+00; /* 0x3fb22d3b */ +const R2: f32 = 7.2193557024e-01; /* 0x3f38d0c5 */ +const R3: f32 = 1.7193385959e-01; /* 0x3e300f6e */ +const R4: f32 = 1.8645919859e-02; /* 0x3c98bf54 */ +const R5: f32 = 7.7794247773e-04; /* 0x3a4beed6 */ +const R6: f32 = 7.3266842264e-06; /* 0x36f5d7bd */ +const W0: f32 = 4.1893854737e-01; /* 0x3ed67f1d */ +const W1: f32 = 8.3333335817e-02; /* 0x3daaaaab */ +const W2: f32 = -2.7777778450e-03; /* 0xbb360b61 */ +const W3: f32 = 7.9365057172e-04; /* 0x3a500cfd */ +const W4: f32 = -5.9518753551e-04; /* 0xba1c065c */ +const W5: f32 = 8.3633989561e-04; /* 0x3a5b3dd2 */ +const W6: f32 = -1.6309292987e-03; /* 0xbad5c4e8 */ + +/* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ +fn sin_pi(mut x: f32) -> f32 { + let mut y: f64; + let mut n: isize; + + /* spurious inexact if odd int */ + x = 2.0 * (x * 0.5 - floorf(x * 0.5)); /* x mod 2.0 */ + + n = (x * 4.0) as isize; + n = (n + 1) / 2; + y = (x as f64) - (n as f64) * 0.5; + y *= 3.14159265358979323846; + match n { + 1 => k_cosf(y), + 2 => k_sinf(-y), + 3 => -k_cosf(y), + 0 | _ => k_sinf(y), + } +} + +pub fn lgammaf_r(mut x: f32) -> (f32, i32) { + let u = x.to_bits(); + let mut t: f32; + let y: f32; + let mut z: f32; + let nadj: f32; + let p: f32; + let p1: f32; + let p2: f32; + let p3: f32; + let q: f32; + let mut r: f32; + let w: f32; + let ix: u32; + let i: i32; + let sign: bool; + let mut signgam: i32; + + /* purge off +-inf, NaN, +-0, tiny and negative arguments */ + signgam = 1; + sign = (u >> 31) != 0; + ix = u & 0x7fffffff; + if ix >= 0x7f800000 { + return (x * x, signgam); + } + if ix < 0x35000000 { + /* |x| < 2**-21, return -log(|x|) */ + if sign { + signgam = -1; + x = -x; + } + return (-logf(x), signgam); + } + if sign { + x = -x; + t = sin_pi(x); + if t == 0.0 { + /* -integer */ + return (1.0 / (x - x), signgam); + } + if t > 0.0 { + signgam = -1; + } else { + t = -t; + } + nadj = logf(PI / (t * x)); + } else { + nadj = 0.0; + } + + /* purge off 1 and 2 */ + if ix == 0x3f800000 || ix == 0x40000000 { + r = 0.0; + } + /* for x < 2.0 */ + else if ix < 0x40000000 { + if ix <= 0x3f666666 { + /* lgamma(x) = lgamma(x+1)-log(x) */ + r = -logf(x); + if ix >= 0x3f3b4a20 { + y = 1.0 - x; + i = 0; + } else if ix >= 0x3e6d3308 { + y = x - (TC - 1.0); + i = 1; + } else { + y = x; + i = 2; + } + } else { + r = 0.0; + if ix >= 0x3fdda618 { + /* [1.7316,2] */ + y = 2.0 - x; + i = 0; + } else if ix >= 0x3F9da620 { + /* [1.23,1.73] */ + y = x - TC; + i = 1; + } else { + y = x - 1.0; + i = 2; + } + } + match i { + 0 => { + z = y * y; + p1 = A0 + z * (A2 + z * (A4 + z * (A6 + z * (A8 + z * A10)))); + p2 = z * (A1 + z * (A3 + z * (A5 + z * (A7 + z * (A9 + z * A11))))); + p = y * p1 + p2; + r += p - 0.5 * y; + } + 1 => { + z = y * y; + w = z * y; + p1 = T0 + w * (T3 + w * (T6 + w * (T9 + w * T12))); /* parallel comp */ + p2 = T1 + w * (T4 + w * (T7 + w * (T10 + w * T13))); + p3 = T2 + w * (T5 + w * (T8 + w * (T11 + w * T14))); + p = z * p1 - (TT - w * (p2 + y * p3)); + r += TF + p; + } + 2 => { + p1 = y * (U0 + y * (U1 + y * (U2 + y * (U3 + y * (U4 + y * U5))))); + p2 = 1.0 + y * (V1 + y * (V2 + y * (V3 + y * (V4 + y * V5)))); + r += -0.5 * y + p1 / p2; + } + #[cfg(debug_assertions)] + _ => unreachable!(), + #[cfg(not(debug_assertions))] + _ => {} + } + } else if ix < 0x41000000 { + /* x < 8.0 */ + i = x as i32; + y = x - (i as f32); + p = y * (S0 + y * (S1 + y * (S2 + y * (S3 + y * (S4 + y * (S5 + y * S6)))))); + q = 1.0 + y * (R1 + y * (R2 + y * (R3 + y * (R4 + y * (R5 + y * R6))))); + r = 0.5 * y + p / q; + z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ + // TODO: In C, this was implemented using switch jumps with fallthrough. + // Does this implementation have performance problems? + if i >= 7 { + z *= y + 6.0; + } + if i >= 6 { + z *= y + 5.0; + } + if i >= 5 { + z *= y + 4.0; + } + if i >= 4 { + z *= y + 3.0; + } + if i >= 3 { + z *= y + 2.0; + r += logf(z); + } + } else if ix < 0x5c800000 { + /* 8.0 <= x < 2**58 */ + t = logf(x); + z = 1.0 / x; + y = z * z; + w = W0 + z * (W1 + y * (W2 + y * (W3 + y * (W4 + y * (W5 + y * W6))))); + r = (x - 0.5) * (t - 1.0) + w; + } else { + /* 2**58 <= x <= inf */ + r = x * (logf(x) - 1.0); + } + if sign { + r = nadj - r; + } + return (r, signgam); +} diff --git a/vendor/compiler_builtins/libm/src/math/log.rs b/vendor/compiler_builtins/libm/src/math/log.rs new file mode 100644 index 000000000..27a26da60 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/log.rs @@ -0,0 +1,117 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* log(x) + * Return the logarithm of x + * + * Method : + * 1. Argument Reduction: find k and f such that + * x = 2^k * (1+f), + * where sqrt(2)/2 < 1+f < sqrt(2) . + * + * 2. Approximation of log(1+f). + * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) + * = 2s + 2/3 s**3 + 2/5 s**5 + ....., + * = 2s + s*R + * We use a special Remez algorithm on [0,0.1716] to generate + * a polynomial of degree 14 to approximate R The maximum error + * of this polynomial approximation is bounded by 2**-58.45. In + * other words, + * 2 4 6 8 10 12 14 + * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s + * (the values of Lg1 to Lg7 are listed in the program) + * and + * | 2 14 | -58.45 + * | Lg1*s +...+Lg7*s - R(z) | <= 2 + * | | + * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. + * In order to guarantee error in log below 1ulp, we compute log + * by + * log(1+f) = f - s*(f - R) (if f is not too large) + * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) + * + * 3. Finally, log(x) = k*ln2 + log(1+f). + * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) + * Here ln2 is split into two floating point number: + * ln2_hi + ln2_lo, + * where n*ln2_hi is always exact for |n| < 2000. + * + * Special cases: + * log(x) is NaN with signal if x < 0 (including -INF) ; + * log(+INF) is +INF; log(0) is -INF with signal; + * log(NaN) is that NaN with no signal. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ + +const LN2_HI: f64 = 6.93147180369123816490e-01; /* 3fe62e42 fee00000 */ +const LN2_LO: f64 = 1.90821492927058770002e-10; /* 3dea39ef 35793c76 */ +const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ +const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ +const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ +const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ +const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ +const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ +const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn log(mut x: f64) -> f64 { + let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 + + let mut ui = x.to_bits(); + let mut hx: u32 = (ui >> 32) as u32; + let mut k: i32 = 0; + + if (hx < 0x00100000) || ((hx >> 31) != 0) { + /* x < 2**-126 */ + if ui << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if hx >> 31 != 0 { + return (x - x) / 0.0; /* log(-#) = NaN */ + } + /* subnormal number, scale x up */ + k -= 54; + x *= x1p54; + ui = x.to_bits(); + hx = (ui >> 32) as u32; + } else if hx >= 0x7ff00000 { + return x; + } else if hx == 0x3ff00000 && ui << 32 == 0 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + hx += 0x3ff00000 - 0x3fe6a09e; + k += ((hx >> 20) as i32) - 0x3ff; + hx = (hx & 0x000fffff) + 0x3fe6a09e; + ui = ((hx as u64) << 32) | (ui & 0xffffffff); + x = f64::from_bits(ui); + + let f: f64 = x - 1.0; + let hfsq: f64 = 0.5 * f * f; + let s: f64 = f / (2.0 + f); + let z: f64 = s * s; + let w: f64 = z * z; + let t1: f64 = w * (LG2 + w * (LG4 + w * LG6)); + let t2: f64 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); + let r: f64 = t2 + t1; + let dk: f64 = k as f64; + s * (hfsq + r) + dk * LN2_LO - hfsq + f + dk * LN2_HI +} diff --git a/vendor/compiler_builtins/libm/src/math/log10.rs b/vendor/compiler_builtins/libm/src/math/log10.rs new file mode 100644 index 000000000..40dacf2c9 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/log10.rs @@ -0,0 +1,117 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * Return the base 10 logarithm of x. See log.c for most comments. + * + * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2 + * as in log.c, then combine and scale in extra precision: + * log10(x) = (f - f*f/2 + r)/log(10) + k*log10(2) + */ + +use core::f64; + +const IVLN10HI: f64 = 4.34294481878168880939e-01; /* 0x3fdbcb7b, 0x15200000 */ +const IVLN10LO: f64 = 2.50829467116452752298e-11; /* 0x3dbb9438, 0xca9aadd5 */ +const LOG10_2HI: f64 = 3.01029995663611771306e-01; /* 0x3FD34413, 0x509F6000 */ +const LOG10_2LO: f64 = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ +const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ +const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ +const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ +const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ +const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ +const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ +const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn log10(mut x: f64) -> f64 { + let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 + + let mut ui: u64 = x.to_bits(); + let hfsq: f64; + let f: f64; + let s: f64; + let z: f64; + let r: f64; + let mut w: f64; + let t1: f64; + let t2: f64; + let dk: f64; + let y: f64; + let mut hi: f64; + let lo: f64; + let mut val_hi: f64; + let mut val_lo: f64; + let mut hx: u32; + let mut k: i32; + + hx = (ui >> 32) as u32; + k = 0; + if hx < 0x00100000 || (hx >> 31) > 0 { + if ui << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if (hx >> 31) > 0 { + return (x - x) / 0.0; /* log(-#) = NaN */ + } + /* subnormal number, scale x up */ + k -= 54; + x *= x1p54; + ui = x.to_bits(); + hx = (ui >> 32) as u32; + } else if hx >= 0x7ff00000 { + return x; + } else if hx == 0x3ff00000 && ui << 32 == 0 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + hx += 0x3ff00000 - 0x3fe6a09e; + k += (hx >> 20) as i32 - 0x3ff; + hx = (hx & 0x000fffff) + 0x3fe6a09e; + ui = (hx as u64) << 32 | (ui & 0xffffffff); + x = f64::from_bits(ui); + + f = x - 1.0; + hfsq = 0.5 * f * f; + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * (LG4 + w * LG6)); + t2 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); + r = t2 + t1; + + /* See log2.c for details. */ + /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */ + hi = f - hfsq; + ui = hi.to_bits(); + ui &= (-1i64 as u64) << 32; + hi = f64::from_bits(ui); + lo = f - hi - hfsq + s * (hfsq + r); + + /* val_hi+val_lo ~ log10(1+f) + k*log10(2) */ + val_hi = hi * IVLN10HI; + dk = k as f64; + y = dk * LOG10_2HI; + val_lo = dk * LOG10_2LO + (lo + hi) * IVLN10LO + lo * IVLN10HI; + + /* + * Extra precision in for adding y is not strictly needed + * since there is no very large cancellation near x = sqrt(2) or + * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs + * with some parallelism and it reduces the error for many args. + */ + w = y + val_hi; + val_lo += (y - w) + val_hi; + val_hi = w; + + val_lo + val_hi +} diff --git a/vendor/compiler_builtins/libm/src/math/log10f.rs b/vendor/compiler_builtins/libm/src/math/log10f.rs new file mode 100644 index 000000000..108dfa8b5 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/log10f.rs @@ -0,0 +1,91 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log10f.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * See comments in log10.c. + */ + +use core::f32; + +const IVLN10HI: f32 = 4.3432617188e-01; /* 0x3ede6000 */ +const IVLN10LO: f32 = -3.1689971365e-05; /* 0xb804ead9 */ +const LOG10_2HI: f32 = 3.0102920532e-01; /* 0x3e9a2080 */ +const LOG10_2LO: f32 = 7.9034151668e-07; /* 0x355427db */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24 */ +const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ +const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ +const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn log10f(mut x: f32) -> f32 { + let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 + + let mut ui: u32 = x.to_bits(); + let hfsq: f32; + let f: f32; + let s: f32; + let z: f32; + let r: f32; + let w: f32; + let t1: f32; + let t2: f32; + let dk: f32; + let mut hi: f32; + let lo: f32; + let mut ix: u32; + let mut k: i32; + + ix = ui; + k = 0; + if ix < 0x00800000 || (ix >> 31) > 0 { + /* x < 2**-126 */ + if ix << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if (ix >> 31) > 0 { + return (x - x) / 0.0; /* log(-#) = NaN */ + } + /* subnormal number, scale up x */ + k -= 25; + x *= x1p25f; + ui = x.to_bits(); + ix = ui; + } else if ix >= 0x7f800000 { + return x; + } else if ix == 0x3f800000 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + ix += 0x3f800000 - 0x3f3504f3; + k += (ix >> 23) as i32 - 0x7f; + ix = (ix & 0x007fffff) + 0x3f3504f3; + ui = ix; + x = f32::from_bits(ui); + + f = x - 1.0; + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * LG4); + t2 = z * (LG1 + w * LG3); + r = t2 + t1; + hfsq = 0.5 * f * f; + + hi = f - hfsq; + ui = hi.to_bits(); + ui &= 0xfffff000; + hi = f32::from_bits(ui); + lo = f - hi - hfsq + s * (hfsq + r); + dk = k as f32; + dk * LOG10_2LO + (lo + hi) * IVLN10LO + lo * IVLN10HI + hi * IVLN10HI + dk * LOG10_2HI +} diff --git a/vendor/compiler_builtins/libm/src/math/log1p.rs b/vendor/compiler_builtins/libm/src/math/log1p.rs new file mode 100644 index 000000000..4fd1c73eb --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/log1p.rs @@ -0,0 +1,143 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_log1p.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* double log1p(double x) + * Return the natural logarithm of 1+x. + * + * Method : + * 1. Argument Reduction: find k and f such that + * 1+x = 2^k * (1+f), + * where sqrt(2)/2 < 1+f < sqrt(2) . + * + * Note. If k=0, then f=x is exact. However, if k!=0, then f + * may not be representable exactly. In that case, a correction + * term is need. Let u=1+x rounded. Let c = (1+x)-u, then + * log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), + * and add back the correction term c/u. + * (Note: when x > 2**53, one can simply return log(x)) + * + * 2. Approximation of log(1+f): See log.c + * + * 3. Finally, log1p(x) = k*ln2 + log(1+f) + c/u. See log.c + * + * Special cases: + * log1p(x) is NaN with signal if x < -1 (including -INF) ; + * log1p(+INF) is +INF; log1p(-1) is -INF with signal; + * log1p(NaN) is that NaN with no signal. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + * + * Note: Assuming log() return accurate answer, the following + * algorithm can be used to compute log1p(x) to within a few ULP: + * + * u = 1+x; + * if(u==1.0) return x ; else + * return log(u)*(x/(u-1.0)); + * + * See HP-15C Advanced Functions Handbook, p.193. + */ + +use core::f64; + +const LN2_HI: f64 = 6.93147180369123816490e-01; /* 3fe62e42 fee00000 */ +const LN2_LO: f64 = 1.90821492927058770002e-10; /* 3dea39ef 35793c76 */ +const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ +const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ +const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ +const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ +const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ +const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ +const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn log1p(x: f64) -> f64 { + let mut ui: u64 = x.to_bits(); + let hfsq: f64; + let mut f: f64 = 0.; + let mut c: f64 = 0.; + let s: f64; + let z: f64; + let r: f64; + let w: f64; + let t1: f64; + let t2: f64; + let dk: f64; + let hx: u32; + let mut hu: u32; + let mut k: i32; + + hx = (ui >> 32) as u32; + k = 1; + if hx < 0x3fda827a || (hx >> 31) > 0 { + /* 1+x < sqrt(2)+ */ + if hx >= 0xbff00000 { + /* x <= -1.0 */ + if x == -1. { + return x / 0.0; /* log1p(-1) = -inf */ + } + return (x - x) / 0.0; /* log1p(x<-1) = NaN */ + } + if hx << 1 < 0x3ca00000 << 1 { + /* |x| < 2**-53 */ + /* underflow if subnormal */ + if (hx & 0x7ff00000) == 0 { + force_eval!(x as f32); + } + return x; + } + if hx <= 0xbfd2bec4 { + /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ + k = 0; + c = 0.; + f = x; + } + } else if hx >= 0x7ff00000 { + return x; + } + if k > 0 { + ui = (1. + x).to_bits(); + hu = (ui >> 32) as u32; + hu += 0x3ff00000 - 0x3fe6a09e; + k = (hu >> 20) as i32 - 0x3ff; + /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ + if k < 54 { + c = if k >= 2 { + 1. - (f64::from_bits(ui) - x) + } else { + x - (f64::from_bits(ui) - 1.) + }; + c /= f64::from_bits(ui); + } else { + c = 0.; + } + /* reduce u into [sqrt(2)/2, sqrt(2)] */ + hu = (hu & 0x000fffff) + 0x3fe6a09e; + ui = (hu as u64) << 32 | (ui & 0xffffffff); + f = f64::from_bits(ui) - 1.; + } + hfsq = 0.5 * f * f; + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * (LG4 + w * LG6)); + t2 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); + r = t2 + t1; + dk = k as f64; + s * (hfsq + r) + (dk * LN2_LO + c) - hfsq + f + dk * LN2_HI +} diff --git a/vendor/compiler_builtins/libm/src/math/log1pf.rs b/vendor/compiler_builtins/libm/src/math/log1pf.rs new file mode 100644 index 000000000..500e8eeaa --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/log1pf.rs @@ -0,0 +1,98 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_log1pf.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use core::f32; + +const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */ +const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24 */ +const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ +const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ +const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn log1pf(x: f32) -> f32 { + let mut ui: u32 = x.to_bits(); + let hfsq: f32; + let mut f: f32 = 0.; + let mut c: f32 = 0.; + let s: f32; + let z: f32; + let r: f32; + let w: f32; + let t1: f32; + let t2: f32; + let dk: f32; + let ix: u32; + let mut iu: u32; + let mut k: i32; + + ix = ui; + k = 1; + if ix < 0x3ed413d0 || (ix >> 31) > 0 { + /* 1+x < sqrt(2)+ */ + if ix >= 0xbf800000 { + /* x <= -1.0 */ + if x == -1. { + return x / 0.0; /* log1p(-1)=+inf */ + } + return (x - x) / 0.0; /* log1p(x<-1)=NaN */ + } + if ix << 1 < 0x33800000 << 1 { + /* |x| < 2**-24 */ + /* underflow if subnormal */ + if (ix & 0x7f800000) == 0 { + force_eval!(x * x); + } + return x; + } + if ix <= 0xbe95f619 { + /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ + k = 0; + c = 0.; + f = x; + } + } else if ix >= 0x7f800000 { + return x; + } + if k > 0 { + ui = (1. + x).to_bits(); + iu = ui; + iu += 0x3f800000 - 0x3f3504f3; + k = (iu >> 23) as i32 - 0x7f; + /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ + if k < 25 { + c = if k >= 2 { + 1. - (f32::from_bits(ui) - x) + } else { + x - (f32::from_bits(ui) - 1.) + }; + c /= f32::from_bits(ui); + } else { + c = 0.; + } + /* reduce u into [sqrt(2)/2, sqrt(2)] */ + iu = (iu & 0x007fffff) + 0x3f3504f3; + ui = iu; + f = f32::from_bits(ui) - 1.; + } + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * LG4); + t2 = z * (LG1 + w * LG3); + r = t2 + t1; + hfsq = 0.5 * f * f; + dk = k as f32; + s * (hfsq + r) + (dk * LN2_LO + c) - hfsq + f + dk * LN2_HI +} diff --git a/vendor/compiler_builtins/libm/src/math/log2.rs b/vendor/compiler_builtins/libm/src/math/log2.rs new file mode 100644 index 000000000..83da3a193 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/log2.rs @@ -0,0 +1,106 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log2.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * Return the base 2 logarithm of x. See log.c for most comments. + * + * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2 + * as in log.c, then combine and scale in extra precision: + * log2(x) = (f - f*f/2 + r)/log(2) + k + */ + +use core::f64; + +const IVLN2HI: f64 = 1.44269504072144627571e+00; /* 0x3ff71547, 0x65200000 */ +const IVLN2LO: f64 = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */ +const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ +const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ +const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ +const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ +const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ +const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ +const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn log2(mut x: f64) -> f64 { + let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 + + let mut ui: u64 = x.to_bits(); + let hfsq: f64; + let f: f64; + let s: f64; + let z: f64; + let r: f64; + let mut w: f64; + let t1: f64; + let t2: f64; + let y: f64; + let mut hi: f64; + let lo: f64; + let mut val_hi: f64; + let mut val_lo: f64; + let mut hx: u32; + let mut k: i32; + + hx = (ui >> 32) as u32; + k = 0; + if hx < 0x00100000 || (hx >> 31) > 0 { + if ui << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if (hx >> 31) > 0 { + return (x - x) / 0.0; /* log(-#) = NaN */ + } + /* subnormal number, scale x up */ + k -= 54; + x *= x1p54; + ui = x.to_bits(); + hx = (ui >> 32) as u32; + } else if hx >= 0x7ff00000 { + return x; + } else if hx == 0x3ff00000 && ui << 32 == 0 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + hx += 0x3ff00000 - 0x3fe6a09e; + k += (hx >> 20) as i32 - 0x3ff; + hx = (hx & 0x000fffff) + 0x3fe6a09e; + ui = (hx as u64) << 32 | (ui & 0xffffffff); + x = f64::from_bits(ui); + + f = x - 1.0; + hfsq = 0.5 * f * f; + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * (LG4 + w * LG6)); + t2 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); + r = t2 + t1; + + /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */ + hi = f - hfsq; + ui = hi.to_bits(); + ui &= (-1i64 as u64) << 32; + hi = f64::from_bits(ui); + lo = f - hi - hfsq + s * (hfsq + r); + + val_hi = hi * IVLN2HI; + val_lo = (lo + hi) * IVLN2LO + lo * IVLN2HI; + + /* spadd(val_hi, val_lo, y), except for not using double_t: */ + y = k.into(); + w = y + val_hi; + val_lo += (y - w) + val_hi; + val_hi = w; + + val_lo + val_hi +} diff --git a/vendor/compiler_builtins/libm/src/math/log2f.rs b/vendor/compiler_builtins/libm/src/math/log2f.rs new file mode 100644 index 000000000..3a20fb15b --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/log2f.rs @@ -0,0 +1,87 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log2f.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * See comments in log2.c. + */ + +use core::f32; + +const IVLN2HI: f32 = 1.4428710938e+00; /* 0x3fb8b000 */ +const IVLN2LO: f32 = -1.7605285393e-04; /* 0xb9389ad4 */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24 */ +const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ +const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ +const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn log2f(mut x: f32) -> f32 { + let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 + + let mut ui: u32 = x.to_bits(); + let hfsq: f32; + let f: f32; + let s: f32; + let z: f32; + let r: f32; + let w: f32; + let t1: f32; + let t2: f32; + let mut hi: f32; + let lo: f32; + let mut ix: u32; + let mut k: i32; + + ix = ui; + k = 0; + if ix < 0x00800000 || (ix >> 31) > 0 { + /* x < 2**-126 */ + if ix << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if (ix >> 31) > 0 { + return (x - x) / 0.0; /* log(-#) = NaN */ + } + /* subnormal number, scale up x */ + k -= 25; + x *= x1p25f; + ui = x.to_bits(); + ix = ui; + } else if ix >= 0x7f800000 { + return x; + } else if ix == 0x3f800000 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + ix += 0x3f800000 - 0x3f3504f3; + k += (ix >> 23) as i32 - 0x7f; + ix = (ix & 0x007fffff) + 0x3f3504f3; + ui = ix; + x = f32::from_bits(ui); + + f = x - 1.0; + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * LG4); + t2 = z * (LG1 + w * LG3); + r = t2 + t1; + hfsq = 0.5 * f * f; + + hi = f - hfsq; + ui = hi.to_bits(); + ui &= 0xfffff000; + hi = f32::from_bits(ui); + lo = f - hi - hfsq + s * (hfsq + r); + (lo + hi) * IVLN2LO + lo * IVLN2HI + hi * IVLN2HI + k as f32 +} diff --git a/vendor/compiler_builtins/libm/src/math/logf.rs b/vendor/compiler_builtins/libm/src/math/logf.rs new file mode 100644 index 000000000..2b57b934f --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/logf.rs @@ -0,0 +1,65 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_logf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */ +const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24*/ +const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ +const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ +const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn logf(mut x: f32) -> f32 { + let x1p25 = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 + + let mut ix = x.to_bits(); + let mut k = 0i32; + + if (ix < 0x00800000) || ((ix >> 31) != 0) { + /* x < 2**-126 */ + if ix << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if (ix >> 31) != 0 { + return (x - x) / 0.; /* log(-#) = NaN */ + } + /* subnormal number, scale up x */ + k -= 25; + x *= x1p25; + ix = x.to_bits(); + } else if ix >= 0x7f800000 { + return x; + } else if ix == 0x3f800000 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + ix += 0x3f800000 - 0x3f3504f3; + k += ((ix >> 23) as i32) - 0x7f; + ix = (ix & 0x007fffff) + 0x3f3504f3; + x = f32::from_bits(ix); + + let f = x - 1.; + let s = f / (2. + f); + let z = s * s; + let w = z * z; + let t1 = w * (LG2 + w * LG4); + let t2 = z * (LG1 + w * LG3); + let r = t2 + t1; + let hfsq = 0.5 * f * f; + let dk = k as f32; + s * (hfsq + r) + dk * LN2_LO - hfsq + f + dk * LN2_HI +} diff --git a/vendor/compiler_builtins/libm/src/math/mod.rs b/vendor/compiler_builtins/libm/src/math/mod.rs new file mode 100644 index 000000000..81bfc53ed --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/mod.rs @@ -0,0 +1,366 @@ +macro_rules! force_eval { + ($e:expr) => { + unsafe { ::core::ptr::read_volatile(&$e) } + }; +} + +#[cfg(not(debug_assertions))] +macro_rules! i { + ($array:expr, $index:expr) => { + unsafe { *$array.get_unchecked($index) } + }; + ($array:expr, $index:expr, = , $rhs:expr) => { + unsafe { + *$array.get_unchecked_mut($index) = $rhs; + } + }; + ($array:expr, $index:expr, += , $rhs:expr) => { + unsafe { + *$array.get_unchecked_mut($index) += $rhs; + } + }; + ($array:expr, $index:expr, -= , $rhs:expr) => { + unsafe { + *$array.get_unchecked_mut($index) -= $rhs; + } + }; + ($array:expr, $index:expr, &= , $rhs:expr) => { + unsafe { + *$array.get_unchecked_mut($index) &= $rhs; + } + }; + ($array:expr, $index:expr, == , $rhs:expr) => { + unsafe { *$array.get_unchecked_mut($index) == $rhs } + }; +} + +#[cfg(debug_assertions)] +macro_rules! i { + ($array:expr, $index:expr) => { + *$array.get($index).unwrap() + }; + ($array:expr, $index:expr, = , $rhs:expr) => { + *$array.get_mut($index).unwrap() = $rhs; + }; + ($array:expr, $index:expr, -= , $rhs:expr) => { + *$array.get_mut($index).unwrap() -= $rhs; + }; + ($array:expr, $index:expr, += , $rhs:expr) => { + *$array.get_mut($index).unwrap() += $rhs; + }; + ($array:expr, $index:expr, &= , $rhs:expr) => { + *$array.get_mut($index).unwrap() &= $rhs; + }; + ($array:expr, $index:expr, == , $rhs:expr) => { + *$array.get_mut($index).unwrap() == $rhs + }; +} + +// Temporary macro to avoid panic codegen for division (in debug mode too). At +// the time of this writing this is only used in a few places, and once +// rust-lang/rust#72751 is fixed then this macro will no longer be necessary and +// the native `/` operator can be used and panics won't be codegen'd. +#[cfg(any(debug_assertions, not(feature = "unstable")))] +macro_rules! div { + ($a:expr, $b:expr) => { + $a / $b + }; +} + +#[cfg(all(not(debug_assertions), feature = "unstable"))] +macro_rules! div { + ($a:expr, $b:expr) => { + unsafe { core::intrinsics::unchecked_div($a, $b) } + }; +} + +macro_rules! llvm_intrinsically_optimized { + (#[cfg($($clause:tt)*)] $e:expr) => { + #[cfg(all(feature = "unstable", $($clause)*))] + { + if true { // thwart the dead code lint + $e + } + } + }; +} + +// Public modules +mod acos; +mod acosf; +mod acosh; +mod acoshf; +mod asin; +mod asinf; +mod asinh; +mod asinhf; +mod atan; +mod atan2; +mod atan2f; +mod atanf; +mod atanh; +mod atanhf; +mod cbrt; +mod cbrtf; +mod ceil; +mod ceilf; +mod copysign; +mod copysignf; +mod cos; +mod cosf; +mod cosh; +mod coshf; +mod erf; +mod erff; +mod exp; +mod exp10; +mod exp10f; +mod exp2; +mod exp2f; +mod expf; +mod expm1; +mod expm1f; +mod fabs; +mod fabsf; +mod fdim; +mod fdimf; +mod floor; +mod floorf; +mod fma; +mod fmaf; +mod fmax; +mod fmaxf; +mod fmin; +mod fminf; +mod fmod; +mod fmodf; +mod frexp; +mod frexpf; +mod hypot; +mod hypotf; +mod ilogb; +mod ilogbf; +mod j0; +mod j0f; +mod j1; +mod j1f; +mod jn; +mod jnf; +mod ldexp; +mod ldexpf; +mod lgamma; +mod lgamma_r; +mod lgammaf; +mod lgammaf_r; +mod log; +mod log10; +mod log10f; +mod log1p; +mod log1pf; +mod log2; +mod log2f; +mod logf; +mod modf; +mod modff; +mod nextafter; +mod nextafterf; +mod pow; +mod powf; +mod remainder; +mod remainderf; +mod remquo; +mod remquof; +mod round; +mod roundf; +mod scalbn; +mod scalbnf; +mod sin; +mod sincos; +mod sincosf; +mod sinf; +mod sinh; +mod sinhf; +mod sqrt; +mod sqrtf; +mod tan; +mod tanf; +mod tanh; +mod tanhf; +mod tgamma; +mod tgammaf; +mod trunc; +mod truncf; + +// Use separated imports instead of {}-grouped imports for easier merging. +pub use self::acos::acos; +pub use self::acosf::acosf; +pub use self::acosh::acosh; +pub use self::acoshf::acoshf; +pub use self::asin::asin; +pub use self::asinf::asinf; +pub use self::asinh::asinh; +pub use self::asinhf::asinhf; +pub use self::atan::atan; +pub use self::atan2::atan2; +pub use self::atan2f::atan2f; +pub use self::atanf::atanf; +pub use self::atanh::atanh; +pub use self::atanhf::atanhf; +pub use self::cbrt::cbrt; +pub use self::cbrtf::cbrtf; +pub use self::ceil::ceil; +pub use self::ceilf::ceilf; +pub use self::copysign::copysign; +pub use self::copysignf::copysignf; +pub use self::cos::cos; +pub use self::cosf::cosf; +pub use self::cosh::cosh; +pub use self::coshf::coshf; +pub use self::erf::erf; +pub use self::erf::erfc; +pub use self::erff::erfcf; +pub use self::erff::erff; +pub use self::exp::exp; +pub use self::exp10::exp10; +pub use self::exp10f::exp10f; +pub use self::exp2::exp2; +pub use self::exp2f::exp2f; +pub use self::expf::expf; +pub use self::expm1::expm1; +pub use self::expm1f::expm1f; +pub use self::fabs::fabs; +pub use self::fabsf::fabsf; +pub use self::fdim::fdim; +pub use self::fdimf::fdimf; +pub use self::floor::floor; +pub use self::floorf::floorf; +pub use self::fma::fma; +pub use self::fmaf::fmaf; +pub use self::fmax::fmax; +pub use self::fmaxf::fmaxf; +pub use self::fmin::fmin; +pub use self::fminf::fminf; +pub use self::fmod::fmod; +pub use self::fmodf::fmodf; +pub use self::frexp::frexp; +pub use self::frexpf::frexpf; +pub use self::hypot::hypot; +pub use self::hypotf::hypotf; +pub use self::ilogb::ilogb; +pub use self::ilogbf::ilogbf; +pub use self::j0::j0; +pub use self::j0::y0; +pub use self::j0f::j0f; +pub use self::j0f::y0f; +pub use self::j1::j1; +pub use self::j1::y1; +pub use self::j1f::j1f; +pub use self::j1f::y1f; +pub use self::jn::jn; +pub use self::jn::yn; +pub use self::jnf::jnf; +pub use self::jnf::ynf; +pub use self::ldexp::ldexp; +pub use self::ldexpf::ldexpf; +pub use self::lgamma::lgamma; +pub use self::lgamma_r::lgamma_r; +pub use self::lgammaf::lgammaf; +pub use self::lgammaf_r::lgammaf_r; +pub use self::log::log; +pub use self::log10::log10; +pub use self::log10f::log10f; +pub use self::log1p::log1p; +pub use self::log1pf::log1pf; +pub use self::log2::log2; +pub use self::log2f::log2f; +pub use self::logf::logf; +pub use self::modf::modf; +pub use self::modff::modff; +pub use self::nextafter::nextafter; +pub use self::nextafterf::nextafterf; +pub use self::pow::pow; +pub use self::powf::powf; +pub use self::remainder::remainder; +pub use self::remainderf::remainderf; +pub use self::remquo::remquo; +pub use self::remquof::remquof; +pub use self::round::round; +pub use self::roundf::roundf; +pub use self::scalbn::scalbn; +pub use self::scalbnf::scalbnf; +pub use self::sin::sin; +pub use self::sincos::sincos; +pub use self::sincosf::sincosf; +pub use self::sinf::sinf; +pub use self::sinh::sinh; +pub use self::sinhf::sinhf; +pub use self::sqrt::sqrt; +pub use self::sqrtf::sqrtf; +pub use self::tan::tan; +pub use self::tanf::tanf; +pub use self::tanh::tanh; +pub use self::tanhf::tanhf; +pub use self::tgamma::tgamma; +pub use self::tgammaf::tgammaf; +pub use self::trunc::trunc; +pub use self::truncf::truncf; + +// Private modules +mod expo2; +mod fenv; +mod k_cos; +mod k_cosf; +mod k_expo2; +mod k_expo2f; +mod k_sin; +mod k_sinf; +mod k_tan; +mod k_tanf; +mod rem_pio2; +mod rem_pio2_large; +mod rem_pio2f; + +// Private re-imports +use self::expo2::expo2; +use self::k_cos::k_cos; +use self::k_cosf::k_cosf; +use self::k_expo2::k_expo2; +use self::k_expo2f::k_expo2f; +use self::k_sin::k_sin; +use self::k_sinf::k_sinf; +use self::k_tan::k_tan; +use self::k_tanf::k_tanf; +use self::rem_pio2::rem_pio2; +use self::rem_pio2_large::rem_pio2_large; +use self::rem_pio2f::rem_pio2f; + +#[inline] +fn get_high_word(x: f64) -> u32 { + (x.to_bits() >> 32) as u32 +} + +#[inline] +fn get_low_word(x: f64) -> u32 { + x.to_bits() as u32 +} + +#[inline] +fn with_set_high_word(f: f64, hi: u32) -> f64 { + let mut tmp = f.to_bits(); + tmp &= 0x00000000_ffffffff; + tmp |= (hi as u64) << 32; + f64::from_bits(tmp) +} + +#[inline] +fn with_set_low_word(f: f64, lo: u32) -> f64 { + let mut tmp = f.to_bits(); + tmp &= 0xffffffff_00000000; + tmp |= lo as u64; + f64::from_bits(tmp) +} + +#[inline] +fn combine_words(hi: u32, lo: u32) -> f64 { + f64::from_bits((hi as u64) << 32 | lo as u64) +} diff --git a/vendor/compiler_builtins/libm/src/math/modf.rs b/vendor/compiler_builtins/libm/src/math/modf.rs new file mode 100644 index 000000000..bcab33a81 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/modf.rs @@ -0,0 +1,34 @@ +pub fn modf(x: f64) -> (f64, f64) { + let rv2: f64; + let mut u = x.to_bits(); + let mask: u64; + let e = ((u >> 52 & 0x7ff) as i32) - 0x3ff; + + /* no fractional part */ + if e >= 52 { + rv2 = x; + if e == 0x400 && (u << 12) != 0 { + /* nan */ + return (x, rv2); + } + u &= 1 << 63; + return (f64::from_bits(u), rv2); + } + + /* no integral part*/ + if e < 0 { + u &= 1 << 63; + rv2 = f64::from_bits(u); + return (x, rv2); + } + + mask = ((!0) >> 12) >> e; + if (u & mask) == 0 { + rv2 = x; + u &= 1 << 63; + return (f64::from_bits(u), rv2); + } + u &= !mask; + rv2 = f64::from_bits(u); + return (x - rv2, rv2); +} diff --git a/vendor/compiler_builtins/libm/src/math/modff.rs b/vendor/compiler_builtins/libm/src/math/modff.rs new file mode 100644 index 000000000..56ece12e3 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/modff.rs @@ -0,0 +1,33 @@ +pub fn modff(x: f32) -> (f32, f32) { + let rv2: f32; + let mut u: u32 = x.to_bits(); + let mask: u32; + let e = ((u >> 23 & 0xff) as i32) - 0x7f; + + /* no fractional part */ + if e >= 23 { + rv2 = x; + if e == 0x80 && (u << 9) != 0 { + /* nan */ + return (x, rv2); + } + u &= 0x80000000; + return (f32::from_bits(u), rv2); + } + /* no integral part */ + if e < 0 { + u &= 0x80000000; + rv2 = f32::from_bits(u); + return (x, rv2); + } + + mask = 0x007fffff >> e; + if (u & mask) == 0 { + rv2 = x; + u &= 0x80000000; + return (f32::from_bits(u), rv2); + } + u &= !mask; + rv2 = f32::from_bits(u); + return (x - rv2, rv2); +} diff --git a/vendor/compiler_builtins/libm/src/math/nextafter.rs b/vendor/compiler_builtins/libm/src/math/nextafter.rs new file mode 100644 index 000000000..13094a17c --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/nextafter.rs @@ -0,0 +1,37 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn nextafter(x: f64, y: f64) -> f64 { + if x.is_nan() || y.is_nan() { + return x + y; + } + + let mut ux_i = x.to_bits(); + let uy_i = y.to_bits(); + if ux_i == uy_i { + return y; + } + + let ax = ux_i & !1_u64 / 2; + let ay = uy_i & !1_u64 / 2; + if ax == 0 { + if ay == 0 { + return y; + } + ux_i = (uy_i & 1_u64 << 63) | 1; + } else if ax > ay || ((ux_i ^ uy_i) & 1_u64 << 63) != 0 { + ux_i -= 1; + } else { + ux_i += 1; + } + + let e = ux_i.wrapping_shr(52 & 0x7ff); + // raise overflow if ux.f is infinite and x is finite + if e == 0x7ff { + force_eval!(x + x); + } + let ux_f = f64::from_bits(ux_i); + // raise underflow if ux.f is subnormal or zero + if e == 0 { + force_eval!(x * x + ux_f * ux_f); + } + ux_f +} diff --git a/vendor/compiler_builtins/libm/src/math/nextafterf.rs b/vendor/compiler_builtins/libm/src/math/nextafterf.rs new file mode 100644 index 000000000..df9b10829 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/nextafterf.rs @@ -0,0 +1,37 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn nextafterf(x: f32, y: f32) -> f32 { + if x.is_nan() || y.is_nan() { + return x + y; + } + + let mut ux_i = x.to_bits(); + let uy_i = y.to_bits(); + if ux_i == uy_i { + return y; + } + + let ax = ux_i & 0x7fff_ffff_u32; + let ay = uy_i & 0x7fff_ffff_u32; + if ax == 0 { + if ay == 0 { + return y; + } + ux_i = (uy_i & 0x8000_0000_u32) | 1; + } else if ax > ay || ((ux_i ^ uy_i) & 0x8000_0000_u32) != 0 { + ux_i -= 1; + } else { + ux_i += 1; + } + + let e = ux_i.wrapping_shr(0x7f80_0000_u32); + // raise overflow if ux_f is infinite and x is finite + if e == 0x7f80_0000_u32 { + force_eval!(x + x); + } + let ux_f = f32::from_bits(ux_i); + // raise underflow if ux_f is subnormal or zero + if e == 0 { + force_eval!(x * x + ux_f * ux_f); + } + ux_f +} diff --git a/vendor/compiler_builtins/libm/src/math/pow.rs b/vendor/compiler_builtins/libm/src/math/pow.rs new file mode 100644 index 000000000..6a19ae601 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/pow.rs @@ -0,0 +1,637 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_pow.c */ +/* + * ==================================================== + * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. + * + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +// pow(x,y) return x**y +// +// n +// Method: Let x = 2 * (1+f) +// 1. Compute and return log2(x) in two pieces: +// log2(x) = w1 + w2, +// where w1 has 53-24 = 29 bit trailing zeros. +// 2. Perform y*log2(x) = n+y' by simulating muti-precision +// arithmetic, where |y'|<=0.5. +// 3. Return x**y = 2**n*exp(y'*log2) +// +// Special cases: +// 1. (anything) ** 0 is 1 +// 2. 1 ** (anything) is 1 +// 3. (anything except 1) ** NAN is NAN +// 4. NAN ** (anything except 0) is NAN +// 5. +-(|x| > 1) ** +INF is +INF +// 6. +-(|x| > 1) ** -INF is +0 +// 7. +-(|x| < 1) ** +INF is +0 +// 8. +-(|x| < 1) ** -INF is +INF +// 9. -1 ** +-INF is 1 +// 10. +0 ** (+anything except 0, NAN) is +0 +// 11. -0 ** (+anything except 0, NAN, odd integer) is +0 +// 12. +0 ** (-anything except 0, NAN) is +INF, raise divbyzero +// 13. -0 ** (-anything except 0, NAN, odd integer) is +INF, raise divbyzero +// 14. -0 ** (+odd integer) is -0 +// 15. -0 ** (-odd integer) is -INF, raise divbyzero +// 16. +INF ** (+anything except 0,NAN) is +INF +// 17. +INF ** (-anything except 0,NAN) is +0 +// 18. -INF ** (+odd integer) is -INF +// 19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer) +// 20. (anything) ** 1 is (anything) +// 21. (anything) ** -1 is 1/(anything) +// 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) +// 23. (-anything except 0 and inf) ** (non-integer) is NAN +// +// Accuracy: +// pow(x,y) returns x**y nearly rounded. In particular +// pow(integer,integer) +// always returns the correct integer provided it is +// representable. +// +// Constants : +// The hexadecimal values are the intended ones for the following +// constants. The decimal values may be used, provided that the +// compiler will convert from decimal to binary accurately enough +// to produce the hexadecimal values shown. +// +use super::{fabs, get_high_word, scalbn, sqrt, with_set_high_word, with_set_low_word}; + +const BP: [f64; 2] = [1.0, 1.5]; +const DP_H: [f64; 2] = [0.0, 5.84962487220764160156e-01]; /* 0x3fe2b803_40000000 */ +const DP_L: [f64; 2] = [0.0, 1.35003920212974897128e-08]; /* 0x3E4CFDEB, 0x43CFD006 */ +const TWO53: f64 = 9007199254740992.0; /* 0x43400000_00000000 */ +const HUGE: f64 = 1.0e300; +const TINY: f64 = 1.0e-300; + +// poly coefs for (3/2)*(log(x)-2s-2/3*s**3: +const L1: f64 = 5.99999999999994648725e-01; /* 0x3fe33333_33333303 */ +const L2: f64 = 4.28571428578550184252e-01; /* 0x3fdb6db6_db6fabff */ +const L3: f64 = 3.33333329818377432918e-01; /* 0x3fd55555_518f264d */ +const L4: f64 = 2.72728123808534006489e-01; /* 0x3fd17460_a91d4101 */ +const L5: f64 = 2.30660745775561754067e-01; /* 0x3fcd864a_93c9db65 */ +const L6: f64 = 2.06975017800338417784e-01; /* 0x3fca7e28_4a454eef */ +const P1: f64 = 1.66666666666666019037e-01; /* 0x3fc55555_5555553e */ +const P2: f64 = -2.77777777770155933842e-03; /* 0xbf66c16c_16bebd93 */ +const P3: f64 = 6.61375632143793436117e-05; /* 0x3f11566a_af25de2c */ +const P4: f64 = -1.65339022054652515390e-06; /* 0xbebbbd41_c5d26bf1 */ +const P5: f64 = 4.13813679705723846039e-08; /* 0x3e663769_72bea4d0 */ +const LG2: f64 = 6.93147180559945286227e-01; /* 0x3fe62e42_fefa39ef */ +const LG2_H: f64 = 6.93147182464599609375e-01; /* 0x3fe62e43_00000000 */ +const LG2_L: f64 = -1.90465429995776804525e-09; /* 0xbe205c61_0ca86c39 */ +const OVT: f64 = 8.0085662595372944372e-017; /* -(1024-log2(ovfl+.5ulp)) */ +const CP: f64 = 9.61796693925975554329e-01; /* 0x3feec709_dc3a03fd =2/(3ln2) */ +const CP_H: f64 = 9.61796700954437255859e-01; /* 0x3feec709_e0000000 =(float)cp */ +const CP_L: f64 = -7.02846165095275826516e-09; /* 0xbe3e2fe0_145b01f5 =tail of cp_h*/ +const IVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547_652b82fe =1/ln2 */ +const IVLN2_H: f64 = 1.44269502162933349609e+00; /* 0x3ff71547_60000000 =24b 1/ln2*/ +const IVLN2_L: f64 = 1.92596299112661746887e-08; /* 0x3e54ae0b_f85ddf44 =1/ln2 tail*/ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn pow(x: f64, y: f64) -> f64 { + let t1: f64; + let t2: f64; + + let (hx, lx): (i32, u32) = ((x.to_bits() >> 32) as i32, x.to_bits() as u32); + let (hy, ly): (i32, u32) = ((y.to_bits() >> 32) as i32, y.to_bits() as u32); + + let mut ix: i32 = (hx & 0x7fffffff) as i32; + let iy: i32 = (hy & 0x7fffffff) as i32; + + /* x**0 = 1, even if x is NaN */ + if ((iy as u32) | ly) == 0 { + return 1.0; + } + + /* 1**y = 1, even if y is NaN */ + if hx == 0x3ff00000 && lx == 0 { + return 1.0; + } + + /* NaN if either arg is NaN */ + if ix > 0x7ff00000 + || (ix == 0x7ff00000 && lx != 0) + || iy > 0x7ff00000 + || (iy == 0x7ff00000 && ly != 0) + { + return x + y; + } + + /* determine if y is an odd int when x < 0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + let mut yisint: i32 = 0; + let mut k: i32; + let mut j: i32; + if hx < 0 { + if iy >= 0x43400000 { + yisint = 2; /* even integer y */ + } else if iy >= 0x3ff00000 { + k = (iy >> 20) - 0x3ff; /* exponent */ + + if k > 20 { + j = (ly >> (52 - k)) as i32; + + if (j << (52 - k)) == (ly as i32) { + yisint = 2 - (j & 1); + } + } else if ly == 0 { + j = iy >> (20 - k); + + if (j << (20 - k)) == iy { + yisint = 2 - (j & 1); + } + } + } + } + + if ly == 0 { + /* special value of y */ + if iy == 0x7ff00000 { + /* y is +-inf */ + + return if ((ix - 0x3ff00000) | (lx as i32)) == 0 { + /* (-1)**+-inf is 1 */ + 1.0 + } else if ix >= 0x3ff00000 { + /* (|x|>1)**+-inf = inf,0 */ + if hy >= 0 { + y + } else { + 0.0 + } + } else { + /* (|x|<1)**+-inf = 0,inf */ + if hy >= 0 { + 0.0 + } else { + -y + } + }; + } + + if iy == 0x3ff00000 { + /* y is +-1 */ + return if hy >= 0 { x } else { 1.0 / x }; + } + + if hy == 0x40000000 { + /* y is 2 */ + return x * x; + } + + if hy == 0x3fe00000 { + /* y is 0.5 */ + if hx >= 0 { + /* x >= +0 */ + return sqrt(x); + } + } + } + + let mut ax: f64 = fabs(x); + if lx == 0 { + /* special value of x */ + if ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000 { + /* x is +-0,+-inf,+-1 */ + let mut z: f64 = ax; + + if hy < 0 { + /* z = (1/|x|) */ + z = 1.0 / z; + } + + if hx < 0 { + if ((ix - 0x3ff00000) | yisint) == 0 { + z = (z - z) / (z - z); /* (-1)**non-int is NaN */ + } else if yisint == 1 { + z = -z; /* (x<0)**odd = -(|x|**odd) */ + } + } + + return z; + } + } + + let mut s: f64 = 1.0; /* sign of result */ + if hx < 0 { + if yisint == 0 { + /* (x<0)**(non-int) is NaN */ + return (x - x) / (x - x); + } + + if yisint == 1 { + /* (x<0)**(odd int) */ + s = -1.0; + } + } + + /* |y| is HUGE */ + if iy > 0x41e00000 { + /* if |y| > 2**31 */ + if iy > 0x43f00000 { + /* if |y| > 2**64, must o/uflow */ + if ix <= 0x3fefffff { + return if hy < 0 { HUGE * HUGE } else { TINY * TINY }; + } + + if ix >= 0x3ff00000 { + return if hy > 0 { HUGE * HUGE } else { TINY * TINY }; + } + } + + /* over/underflow if x is not close to one */ + if ix < 0x3fefffff { + return if hy < 0 { + s * HUGE * HUGE + } else { + s * TINY * TINY + }; + } + if ix > 0x3ff00000 { + return if hy > 0 { + s * HUGE * HUGE + } else { + s * TINY * TINY + }; + } + + /* now |1-x| is TINY <= 2**-20, suffice to compute + log(x) by x-x^2/2+x^3/3-x^4/4 */ + let t: f64 = ax - 1.0; /* t has 20 trailing zeros */ + let w: f64 = (t * t) * (0.5 - t * (0.3333333333333333333333 - t * 0.25)); + let u: f64 = IVLN2_H * t; /* ivln2_h has 21 sig. bits */ + let v: f64 = t * IVLN2_L - w * IVLN2; + t1 = with_set_low_word(u + v, 0); + t2 = v - (t1 - u); + } else { + // double ss,s2,s_h,s_l,t_h,t_l; + let mut n: i32 = 0; + + if ix < 0x00100000 { + /* take care subnormal number */ + ax *= TWO53; + n -= 53; + ix = get_high_word(ax) as i32; + } + + n += (ix >> 20) - 0x3ff; + j = ix & 0x000fffff; + + /* determine interval */ + let k: i32; + ix = j | 0x3ff00000; /* normalize ix */ + if j <= 0x3988E { + /* |x|<sqrt(3/2) */ + k = 0; + } else if j < 0xBB67A { + /* |x|<sqrt(3) */ + k = 1; + } else { + k = 0; + n += 1; + ix -= 0x00100000; + } + ax = with_set_high_word(ax, ix as u32); + + /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ + let u: f64 = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */ + let v: f64 = 1.0 / (ax + i!(BP, k as usize)); + let ss: f64 = u * v; + let s_h = with_set_low_word(ss, 0); + + /* t_h=ax+bp[k] High */ + let t_h: f64 = with_set_high_word( + 0.0, + ((ix as u32 >> 1) | 0x20000000) + 0x00080000 + ((k as u32) << 18), + ); + let t_l: f64 = ax - (t_h - i!(BP, k as usize)); + let s_l: f64 = v * ((u - s_h * t_h) - s_h * t_l); + + /* compute log(ax) */ + let s2: f64 = ss * ss; + let mut r: f64 = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); + r += s_l * (s_h + ss); + let s2: f64 = s_h * s_h; + let t_h: f64 = with_set_low_word(3.0 + s2 + r, 0); + let t_l: f64 = r - ((t_h - 3.0) - s2); + + /* u+v = ss*(1+...) */ + let u: f64 = s_h * t_h; + let v: f64 = s_l * t_h + t_l * ss; + + /* 2/(3log2)*(ss+...) */ + let p_h: f64 = with_set_low_word(u + v, 0); + let p_l = v - (p_h - u); + let z_h: f64 = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ + let z_l: f64 = CP_L * p_h + p_l * CP + i!(DP_L, k as usize); + + /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ + let t: f64 = n as f64; + t1 = with_set_low_word(((z_h + z_l) + i!(DP_H, k as usize)) + t, 0); + t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h); + } + + /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ + let y1: f64 = with_set_low_word(y, 0); + let p_l: f64 = (y - y1) * t1 + y * t2; + let mut p_h: f64 = y1 * t1; + let z: f64 = p_l + p_h; + let mut j: i32 = (z.to_bits() >> 32) as i32; + let i: i32 = z.to_bits() as i32; + // let (j, i): (i32, i32) = ((z.to_bits() >> 32) as i32, z.to_bits() as i32); + + if j >= 0x40900000 { + /* z >= 1024 */ + if (j - 0x40900000) | i != 0 { + /* if z > 1024 */ + return s * HUGE * HUGE; /* overflow */ + } + + if p_l + OVT > z - p_h { + return s * HUGE * HUGE; /* overflow */ + } + } else if (j & 0x7fffffff) >= 0x4090cc00 { + /* z <= -1075 */ + // FIXME: instead of abs(j) use unsigned j + + if (((j as u32) - 0xc090cc00) | (i as u32)) != 0 { + /* z < -1075 */ + return s * TINY * TINY; /* underflow */ + } + + if p_l <= z - p_h { + return s * TINY * TINY; /* underflow */ + } + } + + /* compute 2**(p_h+p_l) */ + let i: i32 = j & (0x7fffffff as i32); + k = (i >> 20) - 0x3ff; + let mut n: i32 = 0; + + if i > 0x3fe00000 { + /* if |z| > 0.5, set n = [z+0.5] */ + n = j + (0x00100000 >> (k + 1)); + k = ((n & 0x7fffffff) >> 20) - 0x3ff; /* new k for n */ + let t: f64 = with_set_high_word(0.0, (n & !(0x000fffff >> k)) as u32); + n = ((n & 0x000fffff) | 0x00100000) >> (20 - k); + if j < 0 { + n = -n; + } + p_h -= t; + } + + let t: f64 = with_set_low_word(p_l + p_h, 0); + let u: f64 = t * LG2_H; + let v: f64 = (p_l - (t - p_h)) * LG2 + t * LG2_L; + let mut z: f64 = u + v; + let w: f64 = v - (z - u); + let t: f64 = z * z; + let t1: f64 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); + let r: f64 = (z * t1) / (t1 - 2.0) - (w + z * w); + z = 1.0 - (r - z); + j = get_high_word(z) as i32; + j += n << 20; + + if (j >> 20) <= 0 { + /* subnormal output */ + z = scalbn(z, n); + } else { + z = with_set_high_word(z, j as u32); + } + + s * z +} + +#[cfg(test)] +mod tests { + extern crate core; + + use self::core::f64::consts::{E, PI}; + use self::core::f64::{EPSILON, INFINITY, MAX, MIN, MIN_POSITIVE, NAN, NEG_INFINITY}; + use super::pow; + + const POS_ZERO: &[f64] = &[0.0]; + const NEG_ZERO: &[f64] = &[-0.0]; + const POS_ONE: &[f64] = &[1.0]; + const NEG_ONE: &[f64] = &[-1.0]; + const POS_FLOATS: &[f64] = &[99.0 / 70.0, E, PI]; + const NEG_FLOATS: &[f64] = &[-99.0 / 70.0, -E, -PI]; + const POS_SMALL_FLOATS: &[f64] = &[(1.0 / 2.0), MIN_POSITIVE, EPSILON]; + const NEG_SMALL_FLOATS: &[f64] = &[-(1.0 / 2.0), -MIN_POSITIVE, -EPSILON]; + const POS_EVENS: &[f64] = &[2.0, 6.0, 8.0, 10.0, 22.0, 100.0, MAX]; + const NEG_EVENS: &[f64] = &[MIN, -100.0, -22.0, -10.0, -8.0, -6.0, -2.0]; + const POS_ODDS: &[f64] = &[3.0, 7.0]; + const NEG_ODDS: &[f64] = &[-7.0, -3.0]; + const NANS: &[f64] = &[NAN]; + const POS_INF: &[f64] = &[INFINITY]; + const NEG_INF: &[f64] = &[NEG_INFINITY]; + + const ALL: &[&[f64]] = &[ + POS_ZERO, + NEG_ZERO, + NANS, + NEG_SMALL_FLOATS, + POS_SMALL_FLOATS, + NEG_FLOATS, + POS_FLOATS, + NEG_EVENS, + POS_EVENS, + NEG_ODDS, + POS_ODDS, + NEG_INF, + POS_INF, + NEG_ONE, + POS_ONE, + ]; + const POS: &[&[f64]] = &[POS_ZERO, POS_ODDS, POS_ONE, POS_FLOATS, POS_EVENS, POS_INF]; + const NEG: &[&[f64]] = &[NEG_ZERO, NEG_ODDS, NEG_ONE, NEG_FLOATS, NEG_EVENS, NEG_INF]; + + fn pow_test(base: f64, exponent: f64, expected: f64) { + let res = pow(base, exponent); + assert!( + if expected.is_nan() { + res.is_nan() + } else { + pow(base, exponent) == expected + }, + "{} ** {} was {} instead of {}", + base, + exponent, + res, + expected + ); + } + + fn test_sets_as_base(sets: &[&[f64]], exponent: f64, expected: f64) { + sets.iter() + .for_each(|s| s.iter().for_each(|val| pow_test(*val, exponent, expected))); + } + + fn test_sets_as_exponent(base: f64, sets: &[&[f64]], expected: f64) { + sets.iter() + .for_each(|s| s.iter().for_each(|val| pow_test(base, *val, expected))); + } + + fn test_sets(sets: &[&[f64]], computed: &dyn Fn(f64) -> f64, expected: &dyn Fn(f64) -> f64) { + sets.iter().for_each(|s| { + s.iter().for_each(|val| { + let exp = expected(*val); + let res = computed(*val); + + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let exp = force_eval!(exp); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let res = force_eval!(res); + assert!( + if exp.is_nan() { + res.is_nan() + } else { + exp == res + }, + "test for {} was {} instead of {}", + val, + res, + exp + ); + }) + }); + } + + #[test] + fn zero_as_exponent() { + test_sets_as_base(ALL, 0.0, 1.0); + test_sets_as_base(ALL, -0.0, 1.0); + } + + #[test] + fn one_as_base() { + test_sets_as_exponent(1.0, ALL, 1.0); + } + + #[test] + fn nan_inputs() { + // NAN as the base: + // (NAN ^ anything *but 0* should be NAN) + test_sets_as_exponent(NAN, &ALL[2..], NAN); + + // NAN as the exponent: + // (anything *but 1* ^ NAN should be NAN) + test_sets_as_base(&ALL[..(ALL.len() - 2)], NAN, NAN); + } + + #[test] + fn infinity_as_base() { + // Positive Infinity as the base: + // (+Infinity ^ positive anything but 0 and NAN should be +Infinity) + test_sets_as_exponent(INFINITY, &POS[1..], INFINITY); + + // (+Infinity ^ negative anything except 0 and NAN should be 0.0) + test_sets_as_exponent(INFINITY, &NEG[1..], 0.0); + + // Negative Infinity as the base: + // (-Infinity ^ positive odd ints should be -Infinity) + test_sets_as_exponent(NEG_INFINITY, &[POS_ODDS], NEG_INFINITY); + + // (-Infinity ^ anything but odd ints should be == -0 ^ (-anything)) + // We can lump in pos/neg odd ints here because they don't seem to + // cause panics (div by zero) in release mode (I think). + test_sets(ALL, &|v: f64| pow(NEG_INFINITY, v), &|v: f64| pow(-0.0, -v)); + } + + #[test] + fn infinity_as_exponent() { + // Positive/Negative base greater than 1: + // (pos/neg > 1 ^ Infinity should be Infinity - note this excludes NAN as the base) + test_sets_as_base(&ALL[5..(ALL.len() - 2)], INFINITY, INFINITY); + + // (pos/neg > 1 ^ -Infinity should be 0.0) + test_sets_as_base(&ALL[5..ALL.len() - 2], NEG_INFINITY, 0.0); + + // Positive/Negative base less than 1: + let base_below_one = &[POS_ZERO, NEG_ZERO, NEG_SMALL_FLOATS, POS_SMALL_FLOATS]; + + // (pos/neg < 1 ^ Infinity should be 0.0 - this also excludes NAN as the base) + test_sets_as_base(base_below_one, INFINITY, 0.0); + + // (pos/neg < 1 ^ -Infinity should be Infinity) + test_sets_as_base(base_below_one, NEG_INFINITY, INFINITY); + + // Positive/Negative 1 as the base: + // (pos/neg 1 ^ Infinity should be 1) + test_sets_as_base(&[NEG_ONE, POS_ONE], INFINITY, 1.0); + + // (pos/neg 1 ^ -Infinity should be 1) + test_sets_as_base(&[NEG_ONE, POS_ONE], NEG_INFINITY, 1.0); + } + + #[test] + fn zero_as_base() { + // Positive Zero as the base: + // (+0 ^ anything positive but 0 and NAN should be +0) + test_sets_as_exponent(0.0, &POS[1..], 0.0); + + // (+0 ^ anything negative but 0 and NAN should be Infinity) + // (this should panic because we're dividing by zero) + test_sets_as_exponent(0.0, &NEG[1..], INFINITY); + + // Negative Zero as the base: + // (-0 ^ anything positive but 0, NAN, and odd ints should be +0) + test_sets_as_exponent(-0.0, &POS[3..], 0.0); + + // (-0 ^ anything negative but 0, NAN, and odd ints should be Infinity) + // (should panic because of divide by zero) + test_sets_as_exponent(-0.0, &NEG[3..], INFINITY); + + // (-0 ^ positive odd ints should be -0) + test_sets_as_exponent(-0.0, &[POS_ODDS], -0.0); + + // (-0 ^ negative odd ints should be -Infinity) + // (should panic because of divide by zero) + test_sets_as_exponent(-0.0, &[NEG_ODDS], NEG_INFINITY); + } + + #[test] + fn special_cases() { + // One as the exponent: + // (anything ^ 1 should be anything - i.e. the base) + test_sets(ALL, &|v: f64| pow(v, 1.0), &|v: f64| v); + + // Negative One as the exponent: + // (anything ^ -1 should be 1/anything) + test_sets(ALL, &|v: f64| pow(v, -1.0), &|v: f64| 1.0 / v); + + // Factoring -1 out: + // (negative anything ^ integer should be (-1 ^ integer) * (positive anything ^ integer)) + (&[POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS]) + .iter() + .for_each(|int_set| { + int_set.iter().for_each(|int| { + test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| { + pow(-1.0, *int) * pow(v, *int) + }); + }) + }); + + // Negative base (imaginary results): + // (-anything except 0 and Infinity ^ non-integer should be NAN) + (&NEG[1..(NEG.len() - 1)]).iter().for_each(|set| { + set.iter().for_each(|val| { + test_sets(&ALL[3..7], &|v: f64| pow(*val, v), &|_| NAN); + }) + }); + } + + #[test] + fn normal_cases() { + assert_eq!(pow(2.0, 20.0), (1 << 20) as f64); + assert_eq!(pow(-1.0, 9.0), -1.0); + assert!(pow(-1.0, 2.2).is_nan()); + assert!(pow(-1.0, -1.14).is_nan()); + } +} diff --git a/vendor/compiler_builtins/libm/src/math/powf.rs b/vendor/compiler_builtins/libm/src/math/powf.rs new file mode 100644 index 000000000..68d2083bb --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/powf.rs @@ -0,0 +1,342 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_powf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{fabsf, scalbnf, sqrtf}; + +const BP: [f32; 2] = [1.0, 1.5]; +const DP_H: [f32; 2] = [0.0, 5.84960938e-01]; /* 0x3f15c000 */ +const DP_L: [f32; 2] = [0.0, 1.56322085e-06]; /* 0x35d1cfdc */ +const TWO24: f32 = 16777216.0; /* 0x4b800000 */ +const HUGE: f32 = 1.0e30; +const TINY: f32 = 1.0e-30; +const L1: f32 = 6.0000002384e-01; /* 0x3f19999a */ +const L2: f32 = 4.2857143283e-01; /* 0x3edb6db7 */ +const L3: f32 = 3.3333334327e-01; /* 0x3eaaaaab */ +const L4: f32 = 2.7272811532e-01; /* 0x3e8ba305 */ +const L5: f32 = 2.3066075146e-01; /* 0x3e6c3255 */ +const L6: f32 = 2.0697501302e-01; /* 0x3e53f142 */ +const P1: f32 = 1.6666667163e-01; /* 0x3e2aaaab */ +const P2: f32 = -2.7777778450e-03; /* 0xbb360b61 */ +const P3: f32 = 6.6137559770e-05; /* 0x388ab355 */ +const P4: f32 = -1.6533901999e-06; /* 0xb5ddea0e */ +const P5: f32 = 4.1381369442e-08; /* 0x3331bb4c */ +const LG2: f32 = 6.9314718246e-01; /* 0x3f317218 */ +const LG2_H: f32 = 6.93145752e-01; /* 0x3f317200 */ +const LG2_L: f32 = 1.42860654e-06; /* 0x35bfbe8c */ +const OVT: f32 = 4.2995665694e-08; /* -(128-log2(ovfl+.5ulp)) */ +const CP: f32 = 9.6179670095e-01; /* 0x3f76384f =2/(3ln2) */ +const CP_H: f32 = 9.6191406250e-01; /* 0x3f764000 =12b cp */ +const CP_L: f32 = -1.1736857402e-04; /* 0xb8f623c6 =tail of cp_h */ +const IVLN2: f32 = 1.4426950216e+00; +const IVLN2_H: f32 = 1.4426879883e+00; +const IVLN2_L: f32 = 7.0526075433e-06; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn powf(x: f32, y: f32) -> f32 { + let mut z: f32; + let mut ax: f32; + let z_h: f32; + let z_l: f32; + let mut p_h: f32; + let mut p_l: f32; + let y1: f32; + let mut t1: f32; + let t2: f32; + let mut r: f32; + let s: f32; + let mut sn: f32; + let mut t: f32; + let mut u: f32; + let mut v: f32; + let mut w: f32; + let i: i32; + let mut j: i32; + let mut k: i32; + let mut yisint: i32; + let mut n: i32; + let hx: i32; + let hy: i32; + let mut ix: i32; + let iy: i32; + let mut is: i32; + + hx = x.to_bits() as i32; + hy = y.to_bits() as i32; + + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + /* x**0 = 1, even if x is NaN */ + if iy == 0 { + return 1.0; + } + + /* 1**y = 1, even if y is NaN */ + if hx == 0x3f800000 { + return 1.0; + } + + /* NaN if either arg is NaN */ + if ix > 0x7f800000 || iy > 0x7f800000 { + return x + y; + } + + /* determine if y is an odd int when x < 0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + yisint = 0; + if hx < 0 { + if iy >= 0x4b800000 { + yisint = 2; /* even integer y */ + } else if iy >= 0x3f800000 { + k = (iy >> 23) - 0x7f; /* exponent */ + j = iy >> (23 - k); + if (j << (23 - k)) == iy { + yisint = 2 - (j & 1); + } + } + } + + /* special value of y */ + if iy == 0x7f800000 { + /* y is +-inf */ + if ix == 0x3f800000 { + /* (-1)**+-inf is 1 */ + return 1.0; + } else if ix > 0x3f800000 { + /* (|x|>1)**+-inf = inf,0 */ + return if hy >= 0 { y } else { 0.0 }; + } else { + /* (|x|<1)**+-inf = 0,inf */ + return if hy >= 0 { 0.0 } else { -y }; + } + } + if iy == 0x3f800000 { + /* y is +-1 */ + return if hy >= 0 { x } else { 1.0 / x }; + } + + if hy == 0x40000000 { + /* y is 2 */ + return x * x; + } + + if hy == 0x3f000000 + /* y is 0.5 */ + && hx >= 0 + { + /* x >= +0 */ + return sqrtf(x); + } + + ax = fabsf(x); + /* special value of x */ + if ix == 0x7f800000 || ix == 0 || ix == 0x3f800000 { + /* x is +-0,+-inf,+-1 */ + z = ax; + if hy < 0 { + /* z = (1/|x|) */ + z = 1.0 / z; + } + + if hx < 0 { + if ((ix - 0x3f800000) | yisint) == 0 { + z = (z - z) / (z - z); /* (-1)**non-int is NaN */ + } else if yisint == 1 { + z = -z; /* (x<0)**odd = -(|x|**odd) */ + } + } + return z; + } + + sn = 1.0; /* sign of result */ + if hx < 0 { + if yisint == 0 { + /* (x<0)**(non-int) is NaN */ + return (x - x) / (x - x); + } + + if yisint == 1 { + /* (x<0)**(odd int) */ + sn = -1.0; + } + } + + /* |y| is HUGE */ + if iy > 0x4d000000 { + /* if |y| > 2**27 */ + /* over/underflow if x is not close to one */ + if ix < 0x3f7ffff8 { + return if hy < 0 { + sn * HUGE * HUGE + } else { + sn * TINY * TINY + }; + } + + if ix > 0x3f800007 { + return if hy > 0 { + sn * HUGE * HUGE + } else { + sn * TINY * TINY + }; + } + + /* now |1-x| is TINY <= 2**-20, suffice to compute + log(x) by x-x^2/2+x^3/3-x^4/4 */ + t = ax - 1.; /* t has 20 trailing zeros */ + w = (t * t) * (0.5 - t * (0.333333333333 - t * 0.25)); + u = IVLN2_H * t; /* IVLN2_H has 16 sig. bits */ + v = t * IVLN2_L - w * IVLN2; + t1 = u + v; + is = t1.to_bits() as i32; + t1 = f32::from_bits(is as u32 & 0xfffff000); + t2 = v - (t1 - u); + } else { + let mut s2: f32; + let mut s_h: f32; + let s_l: f32; + let mut t_h: f32; + let mut t_l: f32; + + n = 0; + /* take care subnormal number */ + if ix < 0x00800000 { + ax *= TWO24; + n -= 24; + ix = ax.to_bits() as i32; + } + n += ((ix) >> 23) - 0x7f; + j = ix & 0x007fffff; + /* determine interval */ + ix = j | 0x3f800000; /* normalize ix */ + if j <= 0x1cc471 { + /* |x|<sqrt(3/2) */ + k = 0; + } else if j < 0x5db3d7 { + /* |x|<sqrt(3) */ + k = 1; + } else { + k = 0; + n += 1; + ix -= 0x00800000; + } + ax = f32::from_bits(ix as u32); + + /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ + u = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */ + v = 1.0 / (ax + i!(BP, k as usize)); + s = u * v; + s_h = s; + is = s_h.to_bits() as i32; + s_h = f32::from_bits(is as u32 & 0xfffff000); + /* t_h=ax+bp[k] High */ + is = (((ix as u32 >> 1) & 0xfffff000) | 0x20000000) as i32; + t_h = f32::from_bits(is as u32 + 0x00400000 + ((k as u32) << 21)); + t_l = ax - (t_h - i!(BP, k as usize)); + s_l = v * ((u - s_h * t_h) - s_h * t_l); + /* compute log(ax) */ + s2 = s * s; + r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); + r += s_l * (s_h + s); + s2 = s_h * s_h; + t_h = 3.0 + s2 + r; + is = t_h.to_bits() as i32; + t_h = f32::from_bits(is as u32 & 0xfffff000); + t_l = r - ((t_h - 3.0) - s2); + /* u+v = s*(1+...) */ + u = s_h * t_h; + v = s_l * t_h + t_l * s; + /* 2/(3log2)*(s+...) */ + p_h = u + v; + is = p_h.to_bits() as i32; + p_h = f32::from_bits(is as u32 & 0xfffff000); + p_l = v - (p_h - u); + z_h = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ + z_l = CP_L * p_h + p_l * CP + i!(DP_L, k as usize); + /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */ + t = n as f32; + t1 = ((z_h + z_l) + i!(DP_H, k as usize)) + t; + is = t1.to_bits() as i32; + t1 = f32::from_bits(is as u32 & 0xfffff000); + t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h); + }; + + /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ + is = y.to_bits() as i32; + y1 = f32::from_bits(is as u32 & 0xfffff000); + p_l = (y - y1) * t1 + y * t2; + p_h = y1 * t1; + z = p_l + p_h; + j = z.to_bits() as i32; + if j > 0x43000000 { + /* if z > 128 */ + return sn * HUGE * HUGE; /* overflow */ + } else if j == 0x43000000 { + /* if z == 128 */ + if p_l + OVT > z - p_h { + return sn * HUGE * HUGE; /* overflow */ + } + } else if (j & 0x7fffffff) > 0x43160000 { + /* z < -150 */ + // FIXME: check should be (uint32_t)j > 0xc3160000 + return sn * TINY * TINY; /* underflow */ + } else if j as u32 == 0xc3160000 + /* z == -150 */ + && p_l <= z - p_h + { + return sn * TINY * TINY; /* underflow */ + } + + /* + * compute 2**(p_h+p_l) + */ + i = j & 0x7fffffff; + k = (i >> 23) - 0x7f; + n = 0; + if i > 0x3f000000 { + /* if |z| > 0.5, set n = [z+0.5] */ + n = j + (0x00800000 >> (k + 1)); + k = ((n & 0x7fffffff) >> 23) - 0x7f; /* new k for n */ + t = f32::from_bits(n as u32 & !(0x007fffff >> k)); + n = ((n & 0x007fffff) | 0x00800000) >> (23 - k); + if j < 0 { + n = -n; + } + p_h -= t; + } + t = p_l + p_h; + is = t.to_bits() as i32; + t = f32::from_bits(is as u32 & 0xffff8000); + u = t * LG2_H; + v = (p_l - (t - p_h)) * LG2 + t * LG2_L; + z = u + v; + w = v - (z - u); + t = z * z; + t1 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); + r = (z * t1) / (t1 - 2.0) - (w + z * w); + z = 1.0 - (r - z); + j = z.to_bits() as i32; + j += n << 23; + if (j >> 23) <= 0 { + /* subnormal output */ + z = scalbnf(z, n); + } else { + z = f32::from_bits(j as u32); + } + sn * z +} diff --git a/vendor/compiler_builtins/libm/src/math/rem_pio2.rs b/vendor/compiler_builtins/libm/src/math/rem_pio2.rs new file mode 100644 index 000000000..644616f2d --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/rem_pio2.rs @@ -0,0 +1,233 @@ +// origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// Optimized by Bruce D. Evans. */ +use super::rem_pio2_large; + +// #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 +// #define EPS DBL_EPSILON +const EPS: f64 = 2.2204460492503131e-16; +// #elif FLT_EVAL_METHOD==2 +// #define EPS LDBL_EPSILON +// #endif + +// TODO: Support FLT_EVAL_METHOD? + +const TO_INT: f64 = 1.5 / EPS; +/// 53 bits of 2/pi +const INV_PIO2: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ +/// first 33 bits of pi/2 +const PIO2_1: f64 = 1.57079632673412561417e+00; /* 0x3FF921FB, 0x54400000 */ +/// pi/2 - PIO2_1 +const PIO2_1T: f64 = 6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */ +/// second 33 bits of pi/2 +const PIO2_2: f64 = 6.07710050630396597660e-11; /* 0x3DD0B461, 0x1A600000 */ +/// pi/2 - (PIO2_1+PIO2_2) +const PIO2_2T: f64 = 2.02226624879595063154e-21; /* 0x3BA3198A, 0x2E037073 */ +/// third 33 bits of pi/2 +const PIO2_3: f64 = 2.02226624871116645580e-21; /* 0x3BA3198A, 0x2E000000 */ +/// pi/2 - (PIO2_1+PIO2_2+PIO2_3) +const PIO2_3T: f64 = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ + +// return the remainder of x rem pi/2 in y[0]+y[1] +// use rem_pio2_large() for large x +// +// caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { + let x1p24 = f64::from_bits(0x4170000000000000); + + let sign = (f64::to_bits(x) >> 63) as i32; + let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; + + fn medium(x: f64, ix: u32) -> (i32, f64, f64) { + /* rint(x/(pi/2)), Assume round-to-nearest. */ + let tmp = x as f64 * INV_PIO2 + TO_INT; + // force rounding of tmp to it's storage format on x87 to avoid + // excess precision issues. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let tmp = force_eval!(tmp); + let f_n = tmp - TO_INT; + let n = f_n as i32; + let mut r = x - f_n * PIO2_1; + let mut w = f_n * PIO2_1T; /* 1st round, good to 85 bits */ + let mut y0 = r - w; + let ui = f64::to_bits(y0); + let ey = (ui >> 52) as i32 & 0x7ff; + let ex = (ix >> 20) as i32; + if ex - ey > 16 { + /* 2nd round, good to 118 bits */ + let t = r; + w = f_n * PIO2_2; + r = t - w; + w = f_n * PIO2_2T - ((t - r) - w); + y0 = r - w; + let ey = (f64::to_bits(y0) >> 52) as i32 & 0x7ff; + if ex - ey > 49 { + /* 3rd round, good to 151 bits, covers all cases */ + let t = r; + w = f_n * PIO2_3; + r = t - w; + w = f_n * PIO2_3T - ((t - r) - w); + y0 = r - w; + } + } + let y1 = (r - y0) - w; + (n, y0, y1) + } + + if ix <= 0x400f6a7a { + /* |x| ~<= 5pi/4 */ + if (ix & 0xfffff) == 0x921fb { + /* |x| ~= pi/2 or 2pi/2 */ + return medium(x, ix); /* cancellation -- use medium case */ + } + if ix <= 0x4002d97c { + /* |x| ~<= 3pi/4 */ + if sign == 0 { + let z = x - PIO2_1; /* one round good to 85 bits */ + let y0 = z - PIO2_1T; + let y1 = (z - y0) - PIO2_1T; + return (1, y0, y1); + } else { + let z = x + PIO2_1; + let y0 = z + PIO2_1T; + let y1 = (z - y0) + PIO2_1T; + return (-1, y0, y1); + } + } else if sign == 0 { + let z = x - 2.0 * PIO2_1; + let y0 = z - 2.0 * PIO2_1T; + let y1 = (z - y0) - 2.0 * PIO2_1T; + return (2, y0, y1); + } else { + let z = x + 2.0 * PIO2_1; + let y0 = z + 2.0 * PIO2_1T; + let y1 = (z - y0) + 2.0 * PIO2_1T; + return (-2, y0, y1); + } + } + if ix <= 0x401c463b { + /* |x| ~<= 9pi/4 */ + if ix <= 0x4015fdbc { + /* |x| ~<= 7pi/4 */ + if ix == 0x4012d97c { + /* |x| ~= 3pi/2 */ + return medium(x, ix); + } + if sign == 0 { + let z = x - 3.0 * PIO2_1; + let y0 = z - 3.0 * PIO2_1T; + let y1 = (z - y0) - 3.0 * PIO2_1T; + return (3, y0, y1); + } else { + let z = x + 3.0 * PIO2_1; + let y0 = z + 3.0 * PIO2_1T; + let y1 = (z - y0) + 3.0 * PIO2_1T; + return (-3, y0, y1); + } + } else { + if ix == 0x401921fb { + /* |x| ~= 4pi/2 */ + return medium(x, ix); + } + if sign == 0 { + let z = x - 4.0 * PIO2_1; + let y0 = z - 4.0 * PIO2_1T; + let y1 = (z - y0) - 4.0 * PIO2_1T; + return (4, y0, y1); + } else { + let z = x + 4.0 * PIO2_1; + let y0 = z + 4.0 * PIO2_1T; + let y1 = (z - y0) + 4.0 * PIO2_1T; + return (-4, y0, y1); + } + } + } + if ix < 0x413921fb { + /* |x| ~< 2^20*(pi/2), medium size */ + return medium(x, ix); + } + /* + * all other (large) arguments + */ + if ix >= 0x7ff00000 { + /* x is inf or NaN */ + let y0 = x - x; + let y1 = y0; + return (0, y0, y1); + } + /* set z = scalbn(|x|,-ilogb(x)+23) */ + let mut ui = f64::to_bits(x); + ui &= (!1) >> 12; + ui |= (0x3ff + 23) << 52; + let mut z = f64::from_bits(ui); + let mut tx = [0.0; 3]; + for i in 0..2 { + i!(tx,i, =, z as i32 as f64); + z = (z - i!(tx, i)) * x1p24; + } + i!(tx,2, =, z); + /* skip zero terms, first term is non-zero */ + let mut i = 2; + while i != 0 && i!(tx, i) == 0.0 { + i -= 1; + } + let mut ty = [0.0; 3]; + let n = rem_pio2_large(&tx[..=i], &mut ty, ((ix as i32) >> 20) - (0x3ff + 23), 1); + if sign != 0 { + return (-n, -i!(ty, 0), -i!(ty, 1)); + } + (n, i!(ty, 0), i!(ty, 1)) +} + +#[cfg(test)] +mod tests { + use super::rem_pio2; + + #[test] + fn test_near_pi() { + let arg = 3.141592025756836; + let arg = force_eval!(arg); + assert_eq!( + rem_pio2(arg), + (2, -6.278329573009626e-7, -2.1125998133974653e-23) + ); + let arg = 3.141592033207416; + let arg = force_eval!(arg); + assert_eq!( + rem_pio2(arg), + (2, -6.20382377148128e-7, -2.1125998133974653e-23) + ); + let arg = 3.141592144966125; + let arg = force_eval!(arg); + assert_eq!( + rem_pio2(arg), + (2, -5.086236681942706e-7, -2.1125998133974653e-23) + ); + let arg = 3.141592979431152; + let arg = force_eval!(arg); + assert_eq!( + rem_pio2(arg), + (2, 3.2584135866119817e-7, -2.1125998133974653e-23) + ); + } + + #[test] + fn test_overflow_b9b847() { + let _ = rem_pio2(-3054214.5490637687); + } + + #[test] + fn test_overflow_4747b9() { + let _ = rem_pio2(917340800458.2274); + } +} diff --git a/vendor/compiler_builtins/libm/src/math/rem_pio2_large.rs b/vendor/compiler_builtins/libm/src/math/rem_pio2_large.rs new file mode 100644 index 000000000..65473f0ab --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/rem_pio2_large.rs @@ -0,0 +1,470 @@ +#![allow(unused_unsafe)] +/* origin: FreeBSD /usr/src/lib/msun/src/k_rem_pio2.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::floor; +use super::scalbn; + +// initial value for jk +const INIT_JK: [usize; 4] = [3, 4, 4, 6]; + +// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi +// +// integer array, contains the (24*i)-th to (24*i+23)-th +// bit of 2/pi after binary point. The corresponding +// floating value is +// +// ipio2[i] * 2^(-24(i+1)). +// +// NB: This table must have at least (e0-3)/24 + jk terms. +// For quad precision (e0 <= 16360, jk = 6), this is 686. +#[cfg(target_pointer_width = "32")] +const IPIO2: [i32; 66] = [ + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, + 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, + 0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292, + 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA, + 0x73A8C9, 0x60E27B, 0xC08C6B, +]; + +#[cfg(target_pointer_width = "64")] +const IPIO2: [i32; 690] = [ + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, + 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, + 0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292, + 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA, + 0x73A8C9, 0x60E27B, 0xC08C6B, 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6, + 0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2, 0xDE4F98, 0x327DBB, 0xC33D26, + 0xEF6B1E, 0x5EF89F, 0x3A1F35, 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30, + 0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C, 0x467D86, 0x2D71E3, 0x9AC69B, + 0x006233, 0x7CD2B4, 0x97A7B4, 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770, + 0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7, 0xCB2324, 0x778AD6, 0x23545A, + 0xB91F00, 0x1B0AF1, 0xDFCE19, 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522, + 0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16, 0xDE3B58, 0x929BDE, 0x2822D2, + 0xE88628, 0x4D58E2, 0x32CAC6, 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E, + 0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48, 0xD36710, 0xD8DDAA, 0x425FAE, + 0xCE616A, 0xA4280A, 0xB499D3, 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF, + 0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55, 0x36D9CA, 0xD2A828, 0x8D61C2, + 0x77C912, 0x142604, 0x9B4612, 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929, + 0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC, 0xC3E7B3, 0x28F8C7, 0x940593, + 0x3E71C1, 0xB3092E, 0xF3450B, 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C, + 0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4, 0x9794E8, 0x84E6E2, 0x973199, + 0x6BED88, 0x365F5F, 0x0EFDBB, 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC, + 0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C, 0x90AA47, 0x02E774, 0x24D6BD, + 0xA67DF7, 0x72486E, 0xEF169F, 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5, + 0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437, 0x10D86D, 0x324832, 0x754C5B, + 0xD4714E, 0x6E5445, 0xC1090B, 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA, + 0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD, 0x6AE290, 0x89D988, 0x50722C, + 0xBEA404, 0x940777, 0x7030F3, 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3, + 0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717, 0x3BDF08, 0x2B3715, 0xA0805C, + 0x93805A, 0x921110, 0xD8E80F, 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61, + 0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB, 0xAA140A, 0x2F2689, 0x768364, + 0x333B09, 0x1A940E, 0xAA3A51, 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0, + 0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C, 0x5BC3D8, 0xC492F5, 0x4BADC6, + 0xA5CA4E, 0xCD37A7, 0x36A9E6, 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC, + 0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED, 0x306529, 0xBF5657, 0x3AFF47, + 0xB9F96A, 0xF3BE75, 0xDF9328, 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D, + 0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0, 0xA8654F, 0xA5C1D2, 0x0F3F0B, + 0xCD785B, 0x76F923, 0x048B7B, 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4, + 0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3, 0xDA4886, 0xA05DF7, 0xF480C6, + 0x2FF0AC, 0x9AECDD, 0xBC5C3F, 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD, + 0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B, 0x2A1216, 0x2DB7DC, 0xFDE5FA, + 0xFEDB89, 0xFDBE89, 0x6C76E4, 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761, + 0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31, 0x48D784, 0x16DF30, 0x432DC7, + 0x356125, 0xCE70C9, 0xB8CB30, 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262, + 0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E, 0xC4F133, 0x5F6E13, 0xE4305D, + 0xA92E85, 0xC3B21D, 0x3632A1, 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C, + 0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4, 0xCBDA11, 0xD0BE7D, 0xC1DB9B, + 0xBD17AB, 0x81A2CA, 0x5C6A08, 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196, + 0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9, 0x4F6A68, 0xA82A4A, 0x5AC44F, + 0xBCF82D, 0x985AD7, 0x95C7F4, 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC, + 0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C, 0xD0C0B2, 0x485551, 0x0EFB1E, + 0xC37295, 0x3B06A3, 0x3540C0, 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C, + 0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0, 0x3C3ABA, 0x461846, 0x5F7555, + 0xF5BDD2, 0xC6926E, 0x5D2EAC, 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22, + 0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893, 0x745D7C, 0xB2AD6B, 0x9D6ECD, + 0x7B723E, 0x6A11C6, 0xA9CFF7, 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5, + 0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F, 0xBEFDFD, 0xEF4556, 0x367ED9, + 0x13D9EC, 0xB9BA8B, 0xFC97C4, 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF, + 0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B, 0x9C2A3E, 0xCC5F11, 0x4A0BFD, + 0xFBF4E1, 0x6D3B8E, 0x2C86E2, 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138, + 0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E, 0xCC2254, 0xDC552A, 0xD6C6C0, + 0x96190B, 0xB8701A, 0x649569, 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34, + 0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9, 0x9B5861, 0xBC57E1, 0xC68351, + 0x103ED8, 0x4871DD, 0xDD1C2D, 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F, + 0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855, 0x382682, 0x9BE7CA, 0xA40D51, + 0xB13399, 0x0ED7A9, 0x480569, 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B, + 0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE, 0x5FD45E, 0xA4677B, 0x7AACBA, + 0xA2F655, 0x23882B, 0x55BA41, 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49, + 0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F, 0xAE5ADB, 0x86C547, 0x624385, + 0x3B8621, 0x94792C, 0x876110, 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8, + 0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365, 0xB1933D, 0x0B7CBD, 0xDC51A4, + 0x63DD27, 0xDDE169, 0x19949A, 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270, + 0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5, 0x4D7E6F, 0x5119A5, 0xABF9B5, + 0xD6DF82, 0x61DD96, 0x023616, 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B, + 0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0, +]; + +const PIO2: [f64; 8] = [ + 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */ + 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */ + 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */ + 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */ + 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */ + 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */ + 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */ + 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ +]; + +// fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 +// +// Input parameters: +// x[] The input value (must be positive) is broken into nx +// pieces of 24-bit integers in double precision format. +// x[i] will be the i-th 24 bit of x. The scaled exponent +// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 +// match x's up to 24 bits. +// +// Example of breaking a double positive z into x[0]+x[1]+x[2]: +// e0 = ilogb(z)-23 +// z = scalbn(z,-e0) +// for i = 0,1,2 +// x[i] = floor(z) +// z = (z-x[i])*2**24 +// +// y[] ouput result in an array of double precision numbers. +// The dimension of y[] is: +// 24-bit precision 1 +// 53-bit precision 2 +// 64-bit precision 2 +// 113-bit precision 3 +// The actual value is the sum of them. Thus for 113-bit +// precison, one may have to do something like: +// +// long double t,w,r_head, r_tail; +// t = (long double)y[2] + (long double)y[1]; +// w = (long double)y[0]; +// r_head = t+w; +// r_tail = w - (r_head - t); +// +// e0 The exponent of x[0]. Must be <= 16360 or you need to +// expand the ipio2 table. +// +// prec an integer indicating the precision: +// 0 24 bits (single) +// 1 53 bits (double) +// 2 64 bits (extended) +// 3 113 bits (quad) +// +// Here is the description of some local variables: +// +// jk jk+1 is the initial number of terms of ipio2[] needed +// in the computation. The minimum and recommended value +// for jk is 3,4,4,6 for single, double, extended, and quad. +// jk+1 must be 2 larger than you might expect so that our +// recomputation test works. (Up to 24 bits in the integer +// part (the 24 bits of it that we compute) and 23 bits in +// the fraction part may be lost to cancelation before we +// recompute.) +// +// jz local integer variable indicating the number of +// terms of ipio2[] used. +// +// jx nx - 1 +// +// jv index for pointing to the suitable ipio2[] for the +// computation. In general, we want +// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 +// is an integer. Thus +// e0-3-24*jv >= 0 or (e0-3)/24 >= jv +// Hence jv = max(0,(e0-3)/24). +// +// jp jp+1 is the number of terms in PIo2[] needed, jp = jk. +// +// q[] double array with integral value, representing the +// 24-bits chunk of the product of x and 2/pi. +// +// q0 the corresponding exponent of q[0]. Note that the +// exponent for q[i] would be q0-24*i. +// +// PIo2[] double precision array, obtained by cutting pi/2 +// into 24 bits chunks. +// +// f[] ipio2[] in floating point +// +// iq[] integer array by breaking up q[] in 24-bits chunk. +// +// fq[] final product of x*(2/pi) in fq[0],..,fq[jk] +// +// ih integer. If >0 it indicates q[] is >= 0.5, hence +// it also indicates the *sign* of the result. + +/// Return the last three digits of N with y = x - N*pi/2 +/// so that |y| < pi/2. +/// +/// The method is to compute the integer (mod 8) and fraction parts of +/// (2/pi)*x without doing the full multiplication. In general we +/// skip the part of the product that are known to be a huge integer ( +/// more accurately, = 0 mod 8 ). Thus the number of operations are +/// independent of the exponent of the input. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { + let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 + let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24) + + #[cfg(all(target_pointer_width = "64", feature = "checked"))] + assert!(e0 <= 16360); + + let nx = x.len(); + + let mut fw: f64; + let mut n: i32; + let mut ih: i32; + let mut z: f64; + let mut f: [f64; 20] = [0.; 20]; + let mut fq: [f64; 20] = [0.; 20]; + let mut q: [f64; 20] = [0.; 20]; + let mut iq: [i32; 20] = [0; 20]; + + /* initialize jk*/ + let jk = i!(INIT_JK, prec); + let jp = jk; + + /* determine jx,jv,q0, note that 3>q0 */ + let jx = nx - 1; + let mut jv = div!(e0 - 3, 24); + if jv < 0 { + jv = 0; + } + let mut q0 = e0 - 24 * (jv + 1); + let jv = jv as usize; + + /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ + let mut j = (jv as i32) - (jx as i32); + let m = jx + jk; + for i in 0..=m { + i!(f, i, =, if j < 0 { + 0. + } else { + i!(IPIO2, j as usize) as f64 + }); + j += 1; + } + + /* compute q[0],q[1],...q[jk] */ + for i in 0..=jk { + fw = 0f64; + for j in 0..=jx { + fw += i!(x, j) * i!(f, jx + i - j); + } + i!(q, i, =, fw); + } + + let mut jz = jk; + + 'recompute: loop { + /* distill q[] into iq[] reversingly */ + let mut i = 0i32; + z = i!(q, jz); + for j in (1..=jz).rev() { + fw = (x1p_24 * z) as i32 as f64; + i!(iq, i as usize, =, (z - x1p24 * fw) as i32); + z = i!(q, j - 1) + fw; + i += 1; + } + + /* compute n */ + z = scalbn(z, q0); /* actual value of z */ + z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */ + n = z as i32; + z -= n as f64; + ih = 0; + if q0 > 0 { + /* need iq[jz-1] to determine n */ + i = i!(iq, jz - 1) >> (24 - q0); + n += i; + i!(iq, jz - 1, -=, i << (24 - q0)); + ih = i!(iq, jz - 1) >> (23 - q0); + } else if q0 == 0 { + ih = i!(iq, jz - 1) >> 23; + } else if z >= 0.5 { + ih = 2; + } + + if ih > 0 { + /* q > 0.5 */ + n += 1; + let mut carry = 0i32; + for i in 0..jz { + /* compute 1-q */ + let j = i!(iq, i); + if carry == 0 { + if j != 0 { + carry = 1; + i!(iq, i, =, 0x1000000 - j); + } + } else { + i!(iq, i, =, 0xffffff - j); + } + } + if q0 > 0 { + /* rare case: chance is 1 in 12 */ + match q0 { + 1 => { + i!(iq, jz - 1, &=, 0x7fffff); + } + 2 => { + i!(iq, jz - 1, &=, 0x3fffff); + } + _ => {} + } + } + if ih == 2 { + z = 1. - z; + if carry != 0 { + z -= scalbn(1., q0); + } + } + } + + /* check if recomputation is needed */ + if z == 0. { + let mut j = 0; + for i in (jk..=jz - 1).rev() { + j |= i!(iq, i); + } + if j == 0 { + /* need recomputation */ + let mut k = 1; + while i!(iq, jk - k, ==, 0) { + k += 1; /* k = no. of terms needed */ + } + + for i in (jz + 1)..=(jz + k) { + /* add q[jz+1] to q[jz+k] */ + i!(f, jx + i, =, i!(IPIO2, jv + i) as f64); + fw = 0f64; + for j in 0..=jx { + fw += i!(x, j) * i!(f, jx + i - j); + } + i!(q, i, =, fw); + } + jz += k; + continue 'recompute; + } + } + + break; + } + + /* chop off zero terms */ + if z == 0. { + jz -= 1; + q0 -= 24; + while i!(iq, jz) == 0 { + jz -= 1; + q0 -= 24; + } + } else { + /* break z into 24-bit if necessary */ + z = scalbn(z, -q0); + if z >= x1p24 { + fw = (x1p_24 * z) as i32 as f64; + i!(iq, jz, =, (z - x1p24 * fw) as i32); + jz += 1; + q0 += 24; + i!(iq, jz, =, fw as i32); + } else { + i!(iq, jz, =, z as i32); + } + } + + /* convert integer "bit" chunk to floating-point value */ + fw = scalbn(1., q0); + for i in (0..=jz).rev() { + i!(q, i, =, fw * (i!(iq, i) as f64)); + fw *= x1p_24; + } + + /* compute PIo2[0,...,jp]*q[jz,...,0] */ + for i in (0..=jz).rev() { + fw = 0f64; + let mut k = 0; + while (k <= jp) && (k <= jz - i) { + fw += i!(PIO2, k) * i!(q, i + k); + k += 1; + } + i!(fq, jz - i, =, fw); + } + + /* compress fq[] into y[] */ + match prec { + 0 => { + fw = 0f64; + for i in (0..=jz).rev() { + fw += i!(fq, i); + } + i!(y, 0, =, if ih == 0 { fw } else { -fw }); + } + 1 | 2 => { + fw = 0f64; + for i in (0..=jz).rev() { + fw += i!(fq, i); + } + // TODO: drop excess precision here once double_t is used + fw = fw as f64; + i!(y, 0, =, if ih == 0 { fw } else { -fw }); + fw = i!(fq, 0) - fw; + for i in 1..=jz { + fw += i!(fq, i); + } + i!(y, 1, =, if ih == 0 { fw } else { -fw }); + } + 3 => { + /* painful */ + for i in (1..=jz).rev() { + fw = i!(fq, i - 1) + i!(fq, i); + i!(fq, i, +=, i!(fq, i - 1) - fw); + i!(fq, i - 1, =, fw); + } + for i in (2..=jz).rev() { + fw = i!(fq, i - 1) + i!(fq, i); + i!(fq, i, +=, i!(fq, i - 1) - fw); + i!(fq, i - 1, =, fw); + } + fw = 0f64; + for i in (2..=jz).rev() { + fw += i!(fq, i); + } + if ih == 0 { + i!(y, 0, =, i!(fq, 0)); + i!(y, 1, =, i!(fq, 1)); + i!(y, 2, =, fw); + } else { + i!(y, 0, =, -i!(fq, 0)); + i!(y, 1, =, -i!(fq, 1)); + i!(y, 2, =, -fw); + } + } + #[cfg(debug_assertions)] + _ => unreachable!(), + #[cfg(not(debug_assertions))] + _ => {} + } + n & 7 +} diff --git a/vendor/compiler_builtins/libm/src/math/rem_pio2f.rs b/vendor/compiler_builtins/libm/src/math/rem_pio2f.rs new file mode 100644 index 000000000..775f5d750 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/rem_pio2f.rs @@ -0,0 +1,67 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Debugged and optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::rem_pio2_large; + +use core::f64; + +const TOINT: f64 = 1.5 / f64::EPSILON; + +/// 53 bits of 2/pi +const INV_PIO2: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ +/// first 25 bits of pi/2 +const PIO2_1: f64 = 1.57079631090164184570e+00; /* 0x3FF921FB, 0x50000000 */ +/// pi/2 - pio2_1 +const PIO2_1T: f64 = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ + +/// Return the remainder of x rem pi/2 in *y +/// +/// use double precision for everything except passing x +/// use __rem_pio2_large() for large x +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) { + let x64 = x as f64; + + let mut tx: [f64; 1] = [0.]; + let mut ty: [f64; 1] = [0.]; + + let ix = x.to_bits() & 0x7fffffff; + /* 25+53 bit pi is good enough for medium size */ + if ix < 0x4dc90fdb { + /* |x| ~< 2^28*(pi/2), medium size */ + /* Use a specialized rint() to get fn. Assume round-to-nearest. */ + let tmp = x64 * INV_PIO2 + TOINT; + // force rounding of tmp to it's storage format on x87 to avoid + // excess precision issues. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let tmp = force_eval!(tmp); + let f_n = tmp - TOINT; + return (f_n as i32, x64 - f_n * PIO2_1 - f_n * PIO2_1T); + } + if ix >= 0x7f800000 { + /* x is inf or NaN */ + return (0, x64 - x64); + } + /* scale x into [2^23, 2^24-1] */ + let sign = (x.to_bits() >> 31) != 0; + let e0 = ((ix >> 23) - (0x7f + 23)) as i32; /* e0 = ilogb(|x|)-23, positive */ + tx[0] = f32::from_bits(ix - (e0 << 23) as u32) as f64; + let n = rem_pio2_large(&tx, &mut ty, e0, 0); + if sign { + return (-n, -ty[0]); + } + (n, ty[0]) +} diff --git a/vendor/compiler_builtins/libm/src/math/remainder.rs b/vendor/compiler_builtins/libm/src/math/remainder.rs new file mode 100644 index 000000000..9e966c9ed --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/remainder.rs @@ -0,0 +1,5 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn remainder(x: f64, y: f64) -> f64 { + let (result, _) = super::remquo(x, y); + result +} diff --git a/vendor/compiler_builtins/libm/src/math/remainderf.rs b/vendor/compiler_builtins/libm/src/math/remainderf.rs new file mode 100644 index 000000000..b1407cf2a --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/remainderf.rs @@ -0,0 +1,5 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn remainderf(x: f32, y: f32) -> f32 { + let (result, _) = super::remquof(x, y); + result +} diff --git a/vendor/compiler_builtins/libm/src/math/remquo.rs b/vendor/compiler_builtins/libm/src/math/remquo.rs new file mode 100644 index 000000000..0afd1f7f5 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/remquo.rs @@ -0,0 +1,110 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn remquo(mut x: f64, mut y: f64) -> (f64, i32) { + let ux: u64 = x.to_bits(); + let mut uy: u64 = y.to_bits(); + let mut ex = ((ux >> 52) & 0x7ff) as i32; + let mut ey = ((uy >> 52) & 0x7ff) as i32; + let sx = (ux >> 63) != 0; + let sy = (uy >> 63) != 0; + let mut q: u32; + let mut i: u64; + let mut uxi: u64 = ux; + + if (uy << 1) == 0 || y.is_nan() || ex == 0x7ff { + return ((x * y) / (x * y), 0); + } + if (ux << 1) == 0 { + return (x, 0); + } + + /* normalize x and y */ + if ex == 0 { + i = uxi << 12; + while (i >> 63) == 0 { + ex -= 1; + i <<= 1; + } + uxi <<= -ex + 1; + } else { + uxi &= (!0) >> 12; + uxi |= 1 << 52; + } + if ey == 0 { + i = uy << 12; + while (i >> 63) == 0 { + ey -= 1; + i <<= 1; + } + uy <<= -ey + 1; + } else { + uy &= (!0) >> 12; + uy |= 1 << 52; + } + + q = 0; + + if ex + 1 != ey { + if ex < ey { + return (x, 0); + } + /* x mod y */ + while ex > ey { + i = uxi.wrapping_sub(uy); + if (i >> 63) == 0 { + uxi = i; + q += 1; + } + uxi <<= 1; + q <<= 1; + ex -= 1; + } + i = uxi.wrapping_sub(uy); + if (i >> 63) == 0 { + uxi = i; + q += 1; + } + if uxi == 0 { + ex = -60; + } else { + while (uxi >> 52) == 0 { + uxi <<= 1; + ex -= 1; + } + } + } + + /* scale result and decide between |x| and |x|-|y| */ + if ex > 0 { + uxi -= 1 << 52; + uxi |= (ex as u64) << 52; + } else { + uxi >>= -ex + 1; + } + x = f64::from_bits(uxi); + if sy { + y = -y; + } + if ex == ey || (ex + 1 == ey && (2.0 * x > y || (2.0 * x == y && (q % 2) != 0))) { + x -= y; + // TODO: this matches musl behavior, but it is incorrect + q = q.wrapping_add(1); + } + q &= 0x7fffffff; + let quo = if sx ^ sy { -(q as i32) } else { q as i32 }; + if sx { + (-x, quo) + } else { + (x, quo) + } +} + +#[cfg(test)] +mod tests { + use super::remquo; + + #[test] + fn test_q_overflow() { + // 0xc000000000000001, 0x04c0000000000004 + let _ = remquo(-2.0000000000000004, 8.406091369059082e-286); + } +} diff --git a/vendor/compiler_builtins/libm/src/math/remquof.rs b/vendor/compiler_builtins/libm/src/math/remquof.rs new file mode 100644 index 000000000..d71bd38e3 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/remquof.rs @@ -0,0 +1,97 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn remquof(mut x: f32, mut y: f32) -> (f32, i32) { + let ux: u32 = x.to_bits(); + let mut uy: u32 = y.to_bits(); + let mut ex = ((ux >> 23) & 0xff) as i32; + let mut ey = ((uy >> 23) & 0xff) as i32; + let sx = (ux >> 31) != 0; + let sy = (uy >> 31) != 0; + let mut q: u32; + let mut i: u32; + let mut uxi: u32 = ux; + + if (uy << 1) == 0 || y.is_nan() || ex == 0xff { + return ((x * y) / (x * y), 0); + } + if (ux << 1) == 0 { + return (x, 0); + } + + /* normalize x and y */ + if ex == 0 { + i = uxi << 9; + while (i >> 31) == 0 { + ex -= 1; + i <<= 1; + } + uxi <<= -ex + 1; + } else { + uxi &= (!0) >> 9; + uxi |= 1 << 23; + } + if ey == 0 { + i = uy << 9; + while (i >> 31) == 0 { + ey -= 1; + i <<= 1; + } + uy <<= -ey + 1; + } else { + uy &= (!0) >> 9; + uy |= 1 << 23; + } + + q = 0; + if ex + 1 != ey { + if ex < ey { + return (x, 0); + } + /* x mod y */ + while ex > ey { + i = uxi.wrapping_sub(uy); + if (i >> 31) == 0 { + uxi = i; + q += 1; + } + uxi <<= 1; + q <<= 1; + ex -= 1; + } + i = uxi.wrapping_sub(uy); + if (i >> 31) == 0 { + uxi = i; + q += 1; + } + if uxi == 0 { + ex = -30; + } else { + while (uxi >> 23) == 0 { + uxi <<= 1; + ex -= 1; + } + } + } + + /* scale result and decide between |x| and |x|-|y| */ + if ex > 0 { + uxi -= 1 << 23; + uxi |= (ex as u32) << 23; + } else { + uxi >>= -ex + 1; + } + x = f32::from_bits(uxi); + if sy { + y = -y; + } + if ex == ey || (ex + 1 == ey && (2.0 * x > y || (2.0 * x == y && (q % 2) != 0))) { + x -= y; + q += 1; + } + q &= 0x7fffffff; + let quo = if sx ^ sy { -(q as i32) } else { q as i32 }; + if sx { + (-x, quo) + } else { + (x, quo) + } +} diff --git a/vendor/compiler_builtins/libm/src/math/round.rs b/vendor/compiler_builtins/libm/src/math/round.rs new file mode 100644 index 000000000..46fabc90f --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/round.rs @@ -0,0 +1,28 @@ +use super::copysign; +use super::trunc; +use core::f64; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn round(x: f64) -> f64 { + trunc(x + copysign(0.5 - 0.25 * f64::EPSILON, x)) +} + +#[cfg(test)] +mod tests { + use super::round; + + #[test] + fn negative_zero() { + assert_eq!(round(-0.0_f64).to_bits(), (-0.0_f64).to_bits()); + } + + #[test] + fn sanity_check() { + assert_eq!(round(-1.0), -1.0); + assert_eq!(round(2.8), 3.0); + assert_eq!(round(-0.5), -1.0); + assert_eq!(round(0.5), 1.0); + assert_eq!(round(-1.5), -2.0); + assert_eq!(round(1.5), 2.0); + } +} diff --git a/vendor/compiler_builtins/libm/src/math/roundf.rs b/vendor/compiler_builtins/libm/src/math/roundf.rs new file mode 100644 index 000000000..becdb5620 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/roundf.rs @@ -0,0 +1,30 @@ +use super::copysignf; +use super::truncf; +use core::f32; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf(x: f32) -> f32 { + truncf(x + copysignf(0.5 - 0.25 * f32::EPSILON, x)) +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(test)] +mod tests { + use super::roundf; + + #[test] + fn negative_zero() { + assert_eq!(roundf(-0.0_f32).to_bits(), (-0.0_f32).to_bits()); + } + + #[test] + fn sanity_check() { + assert_eq!(roundf(-1.0), -1.0); + assert_eq!(roundf(2.8), 3.0); + assert_eq!(roundf(-0.5), -1.0); + assert_eq!(roundf(0.5), 1.0); + assert_eq!(roundf(-1.5), -2.0); + assert_eq!(roundf(1.5), 2.0); + } +} diff --git a/vendor/compiler_builtins/libm/src/math/scalbn.rs b/vendor/compiler_builtins/libm/src/math/scalbn.rs new file mode 100644 index 000000000..00c455a10 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/scalbn.rs @@ -0,0 +1,33 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbn(x: f64, mut n: i32) -> f64 { + let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 + let x1p53 = f64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53 + let x1p_1022 = f64::from_bits(0x0010000000000000); // 0x1p-1022 === 2 ^ (-1022) + + let mut y = x; + + if n > 1023 { + y *= x1p1023; + n -= 1023; + if n > 1023 { + y *= x1p1023; + n -= 1023; + if n > 1023 { + n = 1023; + } + } + } else if n < -1022 { + /* make sure final n < -53 to avoid double + rounding in the subnormal range */ + y *= x1p_1022 * x1p53; + n += 1022 - 53; + if n < -1022 { + y *= x1p_1022 * x1p53; + n += 1022 - 53; + if n < -1022 { + n = -1022; + } + } + } + y * f64::from_bits(((0x3ff + n) as u64) << 52) +} diff --git a/vendor/compiler_builtins/libm/src/math/scalbnf.rs b/vendor/compiler_builtins/libm/src/math/scalbnf.rs new file mode 100644 index 000000000..73f4bb57a --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/scalbnf.rs @@ -0,0 +1,29 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf(mut x: f32, mut n: i32) -> f32 { + let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 + let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 + let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 + + if n > 127 { + x *= x1p127; + n -= 127; + if n > 127 { + x *= x1p127; + n -= 127; + if n > 127 { + n = 127; + } + } + } else if n < -126 { + x *= x1p_126 * x1p24; + n += 126 - 24; + if n < -126 { + x *= x1p_126 * x1p24; + n += 126 - 24; + if n < -126 { + n = -126; + } + } + } + x * f32::from_bits(((0x7f + n) as u32) << 23) +} diff --git a/vendor/compiler_builtins/libm/src/math/sin.rs b/vendor/compiler_builtins/libm/src/math/sin.rs new file mode 100644 index 000000000..a53843dcd --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/sin.rs @@ -0,0 +1,88 @@ +// origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +use super::{k_cos, k_sin, rem_pio2}; + +// sin(x) +// Return sine function of x. +// +// kernel function: +// k_sin ... sine function on [-pi/4,pi/4] +// k_cos ... cose function on [-pi/4,pi/4] +// rem_pio2 ... argument reduction routine +// +// Method. +// Let S,C and T denote the sin, cos and tan respectively on +// [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 +// in [-pi/4 , +pi/4], and let n = k mod 4. +// We have +// +// n sin(x) cos(x) tan(x) +// ---------------------------------------------------------- +// 0 S C T +// 1 C -S -1/T +// 2 -S -C T +// 3 -C S -1/T +// ---------------------------------------------------------- +// +// Special cases: +// Let trig be any of sin, cos, or tan. +// trig(+-INF) is NaN, with signals; +// trig(NaN) is that NaN; +// +// Accuracy: +// TRIG(x) returns trig(x) nearly rounded +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sin(x: f64) -> f64 { + let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 + + /* High word of x. */ + let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; + + /* |x| ~< pi/4 */ + if ix <= 0x3fe921fb { + if ix < 0x3e500000 { + /* |x| < 2**-26 */ + /* raise inexact if x != 0 and underflow if subnormal*/ + if ix < 0x00100000 { + force_eval!(x / x1p120); + } else { + force_eval!(x + x1p120); + } + return x; + } + return k_sin(x, 0.0, 0); + } + + /* sin(Inf or NaN) is NaN */ + if ix >= 0x7ff00000 { + return x - x; + } + + /* argument reduction needed */ + let (n, y0, y1) = rem_pio2(x); + match n & 3 { + 0 => k_sin(y0, y1, 1), + 1 => k_cos(y0, y1), + 2 => -k_sin(y0, y1, 1), + _ => -k_cos(y0, y1), + } +} + +#[test] +fn test_near_pi() { + let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707 + let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7 + let result = sin(x); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let result = force_eval!(result); + assert_eq!(result, sx); +} diff --git a/vendor/compiler_builtins/libm/src/math/sincos.rs b/vendor/compiler_builtins/libm/src/math/sincos.rs new file mode 100644 index 000000000..4ab588412 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/sincos.rs @@ -0,0 +1,133 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{get_high_word, k_cos, k_sin, rem_pio2}; + +pub fn sincos(x: f64) -> (f64, f64) { + let s: f64; + let c: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + + /* |x| ~< pi/4 */ + if ix <= 0x3fe921fb { + /* if |x| < 2**-27 * sqrt(2) */ + if ix < 0x3e46a09e { + /* raise inexact if x!=0 and underflow if subnormal */ + let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120 == 2^120 + if ix < 0x00100000 { + force_eval!(x / x1p120); + } else { + force_eval!(x + x1p120); + } + return (x, 1.0); + } + return (k_sin(x, 0.0, 0), k_cos(x, 0.0)); + } + + /* sincos(Inf or NaN) is NaN */ + if ix >= 0x7ff00000 { + let rv = x - x; + return (rv, rv); + } + + /* argument reduction needed */ + let (n, y0, y1) = rem_pio2(x); + s = k_sin(y0, y1, 1); + c = k_cos(y0, y1); + match n & 3 { + 0 => (s, c), + 1 => (c, -s), + 2 => (-s, -c), + 3 => (-c, s), + #[cfg(debug_assertions)] + _ => unreachable!(), + #[cfg(not(debug_assertions))] + _ => (0.0, 1.0), + } +} + +// These tests are based on those from sincosf.rs +#[cfg(test)] +mod tests { + use super::sincos; + + const TOLERANCE: f64 = 1e-6; + + #[test] + fn with_pi() { + let (s, c) = sincos(core::f64::consts::PI); + assert!( + (s - 0.0).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + 0.0, + (s - 0.0).abs(), + TOLERANCE + ); + assert!( + (c + 1.0).abs() < TOLERANCE, + "|{} + {}| = {} >= {}", + c, + 1.0, + (s + 1.0).abs(), + TOLERANCE + ); + } + + #[test] + fn rotational_symmetry() { + use core::f64::consts::PI; + const N: usize = 24; + for n in 0..N { + let theta = 2. * PI * (n as f64) / (N as f64); + let (s, c) = sincos(theta); + let (s_plus, c_plus) = sincos(theta + 2. * PI); + let (s_minus, c_minus) = sincos(theta - 2. * PI); + + assert!( + (s - s_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_plus, + (s - s_plus).abs(), + TOLERANCE + ); + assert!( + (s - s_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_minus, + (s - s_minus).abs(), + TOLERANCE + ); + assert!( + (c - c_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_plus, + (c - c_plus).abs(), + TOLERANCE + ); + assert!( + (c - c_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_minus, + (c - c_minus).abs(), + TOLERANCE + ); + } + } +} diff --git a/vendor/compiler_builtins/libm/src/math/sincosf.rs b/vendor/compiler_builtins/libm/src/math/sincosf.rs new file mode 100644 index 000000000..5304e8ca0 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/sincosf.rs @@ -0,0 +1,184 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{k_cosf, k_sinf, rem_pio2f}; + +/* Small multiples of pi/2 rounded to double precision. */ +const PI_2: f32 = 0.5 * 3.1415926535897931160E+00; +const S1PIO2: f32 = 1.0 * PI_2; /* 0x3FF921FB, 0x54442D18 */ +const S2PIO2: f32 = 2.0 * PI_2; /* 0x400921FB, 0x54442D18 */ +const S3PIO2: f32 = 3.0 * PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const S4PIO2: f32 = 4.0 * PI_2; /* 0x401921FB, 0x54442D18 */ + +pub fn sincosf(x: f32) -> (f32, f32) { + let s: f32; + let c: f32; + let mut ix: u32; + let sign: bool; + + ix = x.to_bits(); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + /* |x| ~<= pi/4 */ + if ix <= 0x3f490fda { + /* |x| < 2**-12 */ + if ix < 0x39800000 { + /* raise inexact if x!=0 and underflow if subnormal */ + + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120 == 2^120 + if ix < 0x00100000 { + force_eval!(x / x1p120); + } else { + force_eval!(x + x1p120); + } + return (x, 1.0); + } + return (k_sinf(x as f64), k_cosf(x as f64)); + } + + /* |x| ~<= 5*pi/4 */ + if ix <= 0x407b53d1 { + if ix <= 0x4016cbe3 { + /* |x| ~<= 3pi/4 */ + if sign { + s = -k_cosf((x + S1PIO2) as f64); + c = k_sinf((x + S1PIO2) as f64); + } else { + s = k_cosf((S1PIO2 - x) as f64); + c = k_sinf((S1PIO2 - x) as f64); + } + } + /* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */ + else { + if sign { + s = -k_sinf((x + S2PIO2) as f64); + c = -k_cosf((x + S2PIO2) as f64); + } else { + s = -k_sinf((x - S2PIO2) as f64); + c = -k_cosf((x - S2PIO2) as f64); + } + } + + return (s, c); + } + + /* |x| ~<= 9*pi/4 */ + if ix <= 0x40e231d5 { + if ix <= 0x40afeddf { + /* |x| ~<= 7*pi/4 */ + if sign { + s = k_cosf((x + S3PIO2) as f64); + c = -k_sinf((x + S3PIO2) as f64); + } else { + s = -k_cosf((x - S3PIO2) as f64); + c = k_sinf((x - S3PIO2) as f64); + } + } else { + if sign { + s = k_sinf((x + S4PIO2) as f64); + c = k_cosf((x + S4PIO2) as f64); + } else { + s = k_sinf((x - S4PIO2) as f64); + c = k_cosf((x - S4PIO2) as f64); + } + } + + return (s, c); + } + + /* sin(Inf or NaN) is NaN */ + if ix >= 0x7f800000 { + let rv = x - x; + return (rv, rv); + } + + /* general argument reduction needed */ + let (n, y) = rem_pio2f(x); + s = k_sinf(y); + c = k_cosf(y); + match n & 3 { + 0 => (s, c), + 1 => (c, -s), + 2 => (-s, -c), + 3 => (-c, s), + #[cfg(debug_assertions)] + _ => unreachable!(), + #[cfg(not(debug_assertions))] + _ => (0.0, 1.0), + } +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(test)] +mod tests { + use super::sincosf; + use crate::_eqf; + + #[test] + fn with_pi() { + let (s, c) = sincosf(core::f32::consts::PI); + _eqf(s.abs(), 0.0).unwrap(); + _eqf(c, -1.0).unwrap(); + } + + #[test] + fn rotational_symmetry() { + use core::f32::consts::PI; + const N: usize = 24; + for n in 0..N { + let theta = 2. * PI * (n as f32) / (N as f32); + let (s, c) = sincosf(theta); + let (s_plus, c_plus) = sincosf(theta + 2. * PI); + let (s_minus, c_minus) = sincosf(theta - 2. * PI); + + const TOLERANCE: f32 = 1e-6; + assert!( + (s - s_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_plus, + (s - s_plus).abs(), + TOLERANCE + ); + assert!( + (s - s_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_minus, + (s - s_minus).abs(), + TOLERANCE + ); + assert!( + (c - c_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_plus, + (c - c_plus).abs(), + TOLERANCE + ); + assert!( + (c - c_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_minus, + (c - c_minus).abs(), + TOLERANCE + ); + } + } +} diff --git a/vendor/compiler_builtins/libm/src/math/sinf.rs b/vendor/compiler_builtins/libm/src/math/sinf.rs new file mode 100644 index 000000000..6e20be2ae --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/sinf.rs @@ -0,0 +1,93 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{k_cosf, k_sinf, rem_pio2f}; + +use core::f64::consts::FRAC_PI_2; + +/* Small multiples of pi/2 rounded to double precision. */ +const S1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ +const S2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ +const S3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const S4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sinf(x: f32) -> f32 { + let x64 = x as f64; + + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + if ix <= 0x3f490fda { + /* |x| ~<= pi/4 */ + if ix < 0x39800000 { + /* |x| < 2**-12 */ + /* raise inexact if x!=0 and underflow if subnormal */ + force_eval!(if ix < 0x00800000 { + x / x1p120 + } else { + x + x1p120 + }); + return x; + } + return k_sinf(x64); + } + if ix <= 0x407b53d1 { + /* |x| ~<= 5*pi/4 */ + if ix <= 0x4016cbe3 { + /* |x| ~<= 3pi/4 */ + if sign { + return -k_cosf(x64 + S1_PIO2); + } else { + return k_cosf(x64 - S1_PIO2); + } + } + return k_sinf(if sign { + -(x64 + S2_PIO2) + } else { + -(x64 - S2_PIO2) + }); + } + if ix <= 0x40e231d5 { + /* |x| ~<= 9*pi/4 */ + if ix <= 0x40afeddf { + /* |x| ~<= 7*pi/4 */ + if sign { + return k_cosf(x64 + S3_PIO2); + } else { + return -k_cosf(x64 - S3_PIO2); + } + } + return k_sinf(if sign { x64 + S4_PIO2 } else { x64 - S4_PIO2 }); + } + + /* sin(Inf or NaN) is NaN */ + if ix >= 0x7f800000 { + return x - x; + } + + /* general argument reduction needed */ + let (n, y) = rem_pio2f(x); + match n & 3 { + 0 => k_sinf(y), + 1 => k_cosf(y), + 2 => k_sinf(-y), + _ => -k_cosf(y), + } +} diff --git a/vendor/compiler_builtins/libm/src/math/sinh.rs b/vendor/compiler_builtins/libm/src/math/sinh.rs new file mode 100644 index 000000000..fd24fd20c --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/sinh.rs @@ -0,0 +1,49 @@ +use super::{expm1, expo2}; + +// sinh(x) = (exp(x) - 1/exp(x))/2 +// = (exp(x)-1 + (exp(x)-1)/exp(x))/2 +// = x + x^3/6 + o(x^5) +// +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sinh(x: f64) -> f64 { + // union {double f; uint64_t i;} u = {.f = x}; + // uint32_t w; + // double t, h, absx; + + let mut uf: f64 = x; + let mut ui: u64 = f64::to_bits(uf); + let w: u32; + let t: f64; + let mut h: f64; + let absx: f64; + + h = 0.5; + if ui >> 63 != 0 { + h = -h; + } + /* |x| */ + ui &= !1 / 2; + uf = f64::from_bits(ui); + absx = uf; + w = (ui >> 32) as u32; + + /* |x| < log(DBL_MAX) */ + if w < 0x40862e42 { + t = expm1(absx); + if w < 0x3ff00000 { + if w < 0x3ff00000 - (26 << 20) { + /* note: inexact and underflow are raised by expm1 */ + /* note: this branch avoids spurious underflow */ + return x; + } + return h * (2.0 * t - t * t / (t + 1.0)); + } + /* note: |x|>log(0x1p26)+eps could be just h*exp(x) */ + return h * (t + t / (t + 1.0)); + } + + /* |x| > log(DBL_MAX) or nan */ + /* note: the result is stored to handle overflow */ + t = 2.0 * h * expo2(absx); + t +} diff --git a/vendor/compiler_builtins/libm/src/math/sinhf.rs b/vendor/compiler_builtins/libm/src/math/sinhf.rs new file mode 100644 index 000000000..24f863c44 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/sinhf.rs @@ -0,0 +1,30 @@ +use super::expm1f; +use super::k_expo2f; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sinhf(x: f32) -> f32 { + let mut h = 0.5f32; + let mut ix = x.to_bits(); + if (ix >> 31) != 0 { + h = -h; + } + /* |x| */ + ix &= 0x7fffffff; + let absx = f32::from_bits(ix); + let w = ix; + + /* |x| < log(FLT_MAX) */ + if w < 0x42b17217 { + let t = expm1f(absx); + if w < 0x3f800000 { + if w < (0x3f800000 - (12 << 23)) { + return x; + } + return h * (2. * t - t * t / (t + 1.)); + } + return h * (t + t / (t + 1.)); + } + + /* |x| > logf(FLT_MAX) or nan */ + 2. * h * k_expo2f(absx) +} diff --git a/vendor/compiler_builtins/libm/src/math/sqrt.rs b/vendor/compiler_builtins/libm/src/math/sqrt.rs new file mode 100644 index 000000000..f06b209a4 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/sqrt.rs @@ -0,0 +1,264 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* sqrt(x) + * Return correctly rounded sqrt. + * ------------------------------------------ + * | Use the hardware sqrt if you have one | + * ------------------------------------------ + * Method: + * Bit by bit method using integer arithmetic. (Slow, but portable) + * 1. Normalization + * Scale x to y in [1,4) with even powers of 2: + * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then + * sqrt(x) = 2^k * sqrt(y) + * 2. Bit by bit computation + * Let q = sqrt(y) truncated to i bit after binary point (q = 1), + * i 0 + * i+1 2 + * s = 2*q , and y = 2 * ( y - q ). (1) + * i i i i + * + * To compute q from q , one checks whether + * i+1 i + * + * -(i+1) 2 + * (q + 2 ) <= y. (2) + * i + * -(i+1) + * If (2) is false, then q = q ; otherwise q = q + 2 . + * i+1 i i+1 i + * + * With some algebraic manipulation, it is not difficult to see + * that (2) is equivalent to + * -(i+1) + * s + 2 <= y (3) + * i i + * + * The advantage of (3) is that s and y can be computed by + * i i + * the following recurrence formula: + * if (3) is false + * + * s = s , y = y ; (4) + * i+1 i i+1 i + * + * otherwise, + * -i -(i+1) + * s = s + 2 , y = y - s - 2 (5) + * i+1 i i+1 i i + * + * One may easily use induction to prove (4) and (5). + * Note. Since the left hand side of (3) contain only i+2 bits, + * it does not necessary to do a full (53-bit) comparison + * in (3). + * 3. Final rounding + * After generating the 53 bits result, we compute one more bit. + * Together with the remainder, we can decide whether the + * result is exact, bigger than 1/2ulp, or less than 1/2ulp + * (it will never equal to 1/2ulp). + * The rounding mode can be detected by checking whether + * huge + tiny is equal to huge, and whether huge - tiny is + * equal to huge for some floating point number "huge" and "tiny". + * + * Special cases: + * sqrt(+-0) = +-0 ... exact + * sqrt(inf) = inf + * sqrt(-ve) = NaN ... with invalid signal + * sqrt(NaN) = NaN ... with invalid signal for signaling NaN + */ + +use core::f64; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrt(x: f64) -> f64 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f64.sqrt` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return if x < 0.0 { + f64::NAN + } else { + unsafe { ::core::intrinsics::sqrtf64(x) } + } + } + } + #[cfg(target_feature = "sse2")] + { + // Note: This path is unlikely since LLVM will usually have already + // optimized sqrt calls into hardware instructions if sse2 is available, + // but if someone does end up here they'll apprected the speed increase. + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + unsafe { + let m = _mm_set_sd(x); + let m_sqrt = _mm_sqrt_pd(m); + _mm_cvtsd_f64(m_sqrt) + } + } + #[cfg(not(target_feature = "sse2"))] + { + use core::num::Wrapping; + + const TINY: f64 = 1.0e-300; + + let mut z: f64; + let sign: Wrapping<u32> = Wrapping(0x80000000); + let mut ix0: i32; + let mut s0: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: Wrapping<u32>; + let mut t1: Wrapping<u32>; + let mut s1: Wrapping<u32>; + let mut ix1: Wrapping<u32>; + let mut q1: Wrapping<u32>; + + ix0 = (x.to_bits() >> 32) as i32; + ix1 = Wrapping(x.to_bits() as u32); + + /* take care of Inf and NaN */ + if (ix0 & 0x7ff00000) == 0x7ff00000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + /* take care of zero */ + if ix0 <= 0 { + if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix0 < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ + } + } + /* normalize x */ + m = ix0 >> 20; + if m == 0 { + /* subnormal x */ + while ix0 == 0 { + m -= 21; + ix0 |= (ix1 >> 11).0 as i32; + ix1 <<= 21; + } + i = 0; + while (ix0 & 0x00100000) == 0 { + i += 1; + ix0 <<= 1; + } + m -= i - 1; + ix0 |= (ix1 >> (32 - i) as usize).0 as i32; + ix1 = ix1 << i as usize; + } + m -= 1023; /* unbias exponent */ + ix0 = (ix0 & 0x000fffff) | 0x00100000; + if (m & 1) == 1 { + /* odd m, double x to make it even */ + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + } + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + q = 0; /* [q,q1] = sqrt(x) */ + q1 = Wrapping(0); + s0 = 0; + s1 = Wrapping(0); + r = Wrapping(0x00200000); /* r = moving bit from right to left */ + + while r != Wrapping(0) { + t = s0 + r.0 as i32; + if t <= ix0 { + s0 = t + r.0 as i32; + ix0 -= t; + q += r.0 as i32; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } + + r = sign; + while r != Wrapping(0) { + t1 = s1 + r; + t = s0; + if t < ix0 || (t == ix0 && t1 <= ix1) { + s1 = t1 + r; + if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { + s0 += 1; + } + ix0 -= t; + if ix1 < t1 { + ix0 -= 1; + } + ix1 -= t1; + q1 += r; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } + + /* use floating add to find out rounding direction */ + if (ix0 as u32 | ix1.0) != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if q1.0 == 0xffffffff { + q1 = Wrapping(0); + q += 1; + } else if z > 1.0 { + if q1.0 == 0xfffffffe { + q += 1; + } + q1 += Wrapping(2); + } else { + q1 += q1 & Wrapping(1); + } + } + } + ix0 = (q >> 1) + 0x3fe00000; + ix1 = q1 >> 1; + if (q & 1) == 1 { + ix1 |= sign; + } + ix0 += m << 20; + f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use core::f64::*; + + #[test] + fn sanity_check() { + assert_eq!(sqrt(100.0), 10.0); + assert_eq!(sqrt(4.0), 2.0); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt + #[test] + fn spec_tests() { + // Not Asserted: FE_INVALID exception is raised if argument is negative. + assert!(sqrt(-1.0).is_nan()); + assert!(sqrt(NAN).is_nan()); + for f in [0.0, -0.0, INFINITY].iter().copied() { + assert_eq!(sqrt(f), f); + } + } +} diff --git a/vendor/compiler_builtins/libm/src/math/sqrtf.rs b/vendor/compiler_builtins/libm/src/math/sqrtf.rs new file mode 100644 index 000000000..00b20e578 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/sqrtf.rs @@ -0,0 +1,154 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf(x: f32) -> f32 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f32.sqrt` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return if x < 0.0 { + ::core::f32::NAN + } else { + unsafe { ::core::intrinsics::sqrtf32(x) } + } + } + } + #[cfg(target_feature = "sse")] + { + // Note: This path is unlikely since LLVM will usually have already + // optimized sqrt calls into hardware instructions if sse is available, + // but if someone does end up here they'll apprected the speed increase. + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + unsafe { + let m = _mm_set_ss(x); + let m_sqrt = _mm_sqrt_ss(m); + _mm_cvtss_f32(m_sqrt) + } + } + #[cfg(not(target_feature = "sse"))] + { + const TINY: f32 = 1.0e-30; + + let mut z: f32; + let sign: i32 = 0x80000000u32 as i32; + let mut ix: i32; + let mut s: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: u32; + + ix = x.to_bits() as i32; + + /* take care of Inf and NaN */ + if (ix as u32 & 0x7f800000) == 0x7f800000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + + /* take care of zero */ + if ix <= 0 { + if (ix & !sign) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ + } + } + + /* normalize x */ + m = ix >> 23; + if m == 0 { + /* subnormal x */ + i = 0; + while ix & 0x00800000 == 0 { + ix <<= 1; + i = i + 1; + } + m -= i - 1; + } + m -= 127; /* unbias exponent */ + ix = (ix & 0x007fffff) | 0x00800000; + if m & 1 == 1 { + /* odd m, double x to make it even */ + ix += ix; + } + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix += ix; + q = 0; + s = 0; + r = 0x01000000; /* r = moving bit from right to left */ + + while r != 0 { + t = s + r as i32; + if t <= ix { + s = t + r as i32; + ix -= t; + q += r as i32; + } + ix += ix; + r >>= 1; + } + + /* use floating add to find out rounding direction */ + if ix != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if z > 1.0 { + q += 2; + } else { + q += q & 1; + } + } + } + + ix = (q >> 1) + 0x3f000000; + ix += m << 23; + f32::from_bits(ix as u32) + } +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(test)] +mod tests { + use super::*; + use core::f32::*; + + #[test] + fn sanity_check() { + assert_eq!(sqrtf(100.0), 10.0); + assert_eq!(sqrtf(4.0), 2.0); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt + #[test] + fn spec_tests() { + // Not Asserted: FE_INVALID exception is raised if argument is negative. + assert!(sqrtf(-1.0).is_nan()); + assert!(sqrtf(NAN).is_nan()); + for f in [0.0, -0.0, INFINITY].iter().copied() { + assert_eq!(sqrtf(f), f); + } + } +} diff --git a/vendor/compiler_builtins/libm/src/math/tan.rs b/vendor/compiler_builtins/libm/src/math/tan.rs new file mode 100644 index 000000000..5a72f6801 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/tan.rs @@ -0,0 +1,70 @@ +// origin: FreeBSD /usr/src/lib/msun/src/s_tan.c */ +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +use super::{k_tan, rem_pio2}; + +// tan(x) +// Return tangent function of x. +// +// kernel function: +// k_tan ... tangent function on [-pi/4,pi/4] +// rem_pio2 ... argument reduction routine +// +// Method. +// Let S,C and T denote the sin, cos and tan respectively on +// [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 +// in [-pi/4 , +pi/4], and let n = k mod 4. +// We have +// +// n sin(x) cos(x) tan(x) +// ---------------------------------------------------------- +// 0 S C T +// 1 C -S -1/T +// 2 -S -C T +// 3 -C S -1/T +// ---------------------------------------------------------- +// +// Special cases: +// Let trig be any of sin, cos, or tan. +// trig(+-INF) is NaN, with signals; +// trig(NaN) is that NaN; +// +// Accuracy: +// TRIG(x) returns trig(x) nearly rounded +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn tan(x: f64) -> f64 { + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; + /* |x| ~< pi/4 */ + if ix <= 0x3fe921fb { + if ix < 0x3e400000 { + /* |x| < 2**-27 */ + /* raise inexact if x!=0 and underflow if subnormal */ + force_eval!(if ix < 0x00100000 { + x / x1p120 as f64 + } else { + x + x1p120 as f64 + }); + return x; + } + return k_tan(x, 0.0, 0); + } + + /* tan(Inf or NaN) is NaN */ + if ix >= 0x7ff00000 { + return x - x; + } + + /* argument reduction */ + let (n, y0, y1) = rem_pio2(x); + k_tan(y0, y1, n & 1) +} diff --git a/vendor/compiler_builtins/libm/src/math/tanf.rs b/vendor/compiler_builtins/libm/src/math/tanf.rs new file mode 100644 index 000000000..10de59c39 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/tanf.rs @@ -0,0 +1,78 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_tanf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{k_tanf, rem_pio2f}; + +use core::f64::consts::FRAC_PI_2; + +/* Small multiples of pi/2 rounded to double precision. */ +const T1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ +const T2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ +const T3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const T4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn tanf(x: f32) -> f32 { + let x64 = x as f64; + + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + if ix <= 0x3f490fda { + /* |x| ~<= pi/4 */ + if ix < 0x39800000 { + /* |x| < 2**-12 */ + /* raise inexact if x!=0 and underflow if subnormal */ + force_eval!(if ix < 0x00800000 { + x / x1p120 + } else { + x + x1p120 + }); + return x; + } + return k_tanf(x64, false); + } + if ix <= 0x407b53d1 { + /* |x| ~<= 5*pi/4 */ + if ix <= 0x4016cbe3 { + /* |x| ~<= 3pi/4 */ + return k_tanf(if sign { x64 + T1_PIO2 } else { x64 - T1_PIO2 }, true); + } else { + return k_tanf(if sign { x64 + T2_PIO2 } else { x64 - T2_PIO2 }, false); + } + } + if ix <= 0x40e231d5 { + /* |x| ~<= 9*pi/4 */ + if ix <= 0x40afeddf { + /* |x| ~<= 7*pi/4 */ + return k_tanf(if sign { x64 + T3_PIO2 } else { x64 - T3_PIO2 }, true); + } else { + return k_tanf(if sign { x64 + T4_PIO2 } else { x64 - T4_PIO2 }, false); + } + } + + /* tan(Inf or NaN) is NaN */ + if ix >= 0x7f800000 { + return x - x; + } + + /* argument reduction */ + let (n, y) = rem_pio2f(x); + k_tanf(y, n & 1 != 0) +} diff --git a/vendor/compiler_builtins/libm/src/math/tanh.rs b/vendor/compiler_builtins/libm/src/math/tanh.rs new file mode 100644 index 000000000..980c68554 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/tanh.rs @@ -0,0 +1,53 @@ +use super::expm1; + +/* tanh(x) = (exp(x) - exp(-x))/(exp(x) + exp(-x)) + * = (exp(2*x) - 1)/(exp(2*x) - 1 + 2) + * = (1 - exp(-2*x))/(exp(-2*x) - 1 + 2) + */ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn tanh(mut x: f64) -> f64 { + let mut uf: f64 = x; + let mut ui: u64 = f64::to_bits(uf); + + let w: u32; + let sign: bool; + let mut t: f64; + + /* x = |x| */ + sign = ui >> 63 != 0; + ui &= !1 / 2; + uf = f64::from_bits(ui); + x = uf; + w = (ui >> 32) as u32; + + if w > 0x3fe193ea { + /* |x| > log(3)/2 ~= 0.5493 or nan */ + if w > 0x40340000 { + /* |x| > 20 or nan */ + /* note: this branch avoids raising overflow */ + t = 1.0 - 0.0 / x; + } else { + t = expm1(2.0 * x); + t = 1.0 - 2.0 / (t + 2.0); + } + } else if w > 0x3fd058ae { + /* |x| > log(5/3)/2 ~= 0.2554 */ + t = expm1(2.0 * x); + t = t / (t + 2.0); + } else if w >= 0x00100000 { + /* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */ + t = expm1(-2.0 * x); + t = -t / (t + 2.0); + } else { + /* |x| is subnormal */ + /* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */ + force_eval!(x as f32); + t = x; + } + + if sign { + -t + } else { + t + } +} diff --git a/vendor/compiler_builtins/libm/src/math/tanhf.rs b/vendor/compiler_builtins/libm/src/math/tanhf.rs new file mode 100644 index 000000000..fc94e3ddd --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/tanhf.rs @@ -0,0 +1,39 @@ +use super::expm1f; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn tanhf(mut x: f32) -> f32 { + /* x = |x| */ + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + x = f32::from_bits(ix); + let w = ix; + + let tt = if w > 0x3f0c9f54 { + /* |x| > log(3)/2 ~= 0.5493 or nan */ + if w > 0x41200000 { + /* |x| > 10 */ + 1. + 0. / x + } else { + let t = expm1f(2. * x); + 1. - 2. / (t + 2.) + } + } else if w > 0x3e82c578 { + /* |x| > log(5/3)/2 ~= 0.2554 */ + let t = expm1f(2. * x); + t / (t + 2.) + } else if w >= 0x00800000 { + /* |x| >= 0x1p-126 */ + let t = expm1f(-2. * x); + -t / (t + 2.) + } else { + /* |x| is subnormal */ + force_eval!(x * x); + x + }; + if sign { + -tt + } else { + tt + } +} diff --git a/vendor/compiler_builtins/libm/src/math/tgamma.rs b/vendor/compiler_builtins/libm/src/math/tgamma.rs new file mode 100644 index 000000000..f8ccf669a --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/tgamma.rs @@ -0,0 +1,207 @@ +/* +"A Precision Approximation of the Gamma Function" - Cornelius Lanczos (1964) +"Lanczos Implementation of the Gamma Function" - Paul Godfrey (2001) +"An Analysis of the Lanczos Gamma Approximation" - Glendon Ralph Pugh (2004) + +approximation method: + + (x - 0.5) S(x) +Gamma(x) = (x + g - 0.5) * ---------------- + exp(x + g - 0.5) + +with + a1 a2 a3 aN +S(x) ~= [ a0 + ----- + ----- + ----- + ... + ----- ] + x + 1 x + 2 x + 3 x + N + +with a0, a1, a2, a3,.. aN constants which depend on g. + +for x < 0 the following reflection formula is used: + +Gamma(x)*Gamma(-x) = -pi/(x sin(pi x)) + +most ideas and constants are from boost and python +*/ +extern crate core; +use super::{exp, floor, k_cos, k_sin, pow}; + +const PI: f64 = 3.141592653589793238462643383279502884; + +/* sin(pi x) with x > 0x1p-100, if sin(pi*x)==0 the sign is arbitrary */ +fn sinpi(mut x: f64) -> f64 { + let mut n: isize; + + /* argument reduction: x = |x| mod 2 */ + /* spurious inexact when x is odd int */ + x = x * 0.5; + x = 2.0 * (x - floor(x)); + + /* reduce x into [-.25,.25] */ + n = (4.0 * x) as isize; + n = (n + 1) / 2; + x -= (n as f64) * 0.5; + + x *= PI; + match n { + 1 => k_cos(x, 0.0), + 2 => k_sin(-x, 0.0, 0), + 3 => -k_cos(x, 0.0), + 0 | _ => k_sin(x, 0.0, 0), + } +} + +const N: usize = 12; +//static const double g = 6.024680040776729583740234375; +const GMHALF: f64 = 5.524680040776729583740234375; +const SNUM: [f64; N + 1] = [ + 23531376880.410759688572007674451636754734846804940, + 42919803642.649098768957899047001988850926355848959, + 35711959237.355668049440185451547166705960488635843, + 17921034426.037209699919755754458931112671403265390, + 6039542586.3520280050642916443072979210699388420708, + 1439720407.3117216736632230727949123939715485786772, + 248874557.86205415651146038641322942321632125127801, + 31426415.585400194380614231628318205362874684987640, + 2876370.6289353724412254090516208496135991145378768, + 186056.26539522349504029498971604569928220784236328, + 8071.6720023658162106380029022722506138218516325024, + 210.82427775157934587250973392071336271166969580291, + 2.5066282746310002701649081771338373386264310793408, +]; +const SDEN: [f64; N + 1] = [ + 0.0, + 39916800.0, + 120543840.0, + 150917976.0, + 105258076.0, + 45995730.0, + 13339535.0, + 2637558.0, + 357423.0, + 32670.0, + 1925.0, + 66.0, + 1.0, +]; +/* n! for small integer n */ +const FACT: [f64; 23] = [ + 1.0, + 1.0, + 2.0, + 6.0, + 24.0, + 120.0, + 720.0, + 5040.0, + 40320.0, + 362880.0, + 3628800.0, + 39916800.0, + 479001600.0, + 6227020800.0, + 87178291200.0, + 1307674368000.0, + 20922789888000.0, + 355687428096000.0, + 6402373705728000.0, + 121645100408832000.0, + 2432902008176640000.0, + 51090942171709440000.0, + 1124000727777607680000.0, +]; + +/* S(x) rational function for positive x */ +fn s(x: f64) -> f64 { + let mut num: f64 = 0.0; + let mut den: f64 = 0.0; + + /* to avoid overflow handle large x differently */ + if x < 8.0 { + for i in (0..=N).rev() { + num = num * x + SNUM[i]; + den = den * x + SDEN[i]; + } + } else { + for i in 0..=N { + num = num / x + SNUM[i]; + den = den / x + SDEN[i]; + } + } + return num / den; +} + +pub fn tgamma(mut x: f64) -> f64 { + let u: u64 = x.to_bits(); + let absx: f64; + let mut y: f64; + let mut dy: f64; + let mut z: f64; + let mut r: f64; + let ix: u32 = ((u >> 32) as u32) & 0x7fffffff; + let sign: bool = (u >> 63) != 0; + + /* special cases */ + if ix >= 0x7ff00000 { + /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */ + return x + core::f64::INFINITY; + } + if ix < ((0x3ff - 54) << 20) { + /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */ + return 1.0 / x; + } + + /* integer arguments */ + /* raise inexact when non-integer */ + if x == floor(x) { + if sign { + return 0.0 / 0.0; + } + if x <= FACT.len() as f64 { + return FACT[(x as usize) - 1]; + } + } + + /* x >= 172: tgamma(x)=inf with overflow */ + /* x =< -184: tgamma(x)=+-0 with underflow */ + if ix >= 0x40670000 { + /* |x| >= 184 */ + if sign { + let x1p_126 = f64::from_bits(0x3810000000000000); // 0x1p-126 == 2^-126 + force_eval!((x1p_126 / x) as f32); + if floor(x) * 0.5 == floor(x * 0.5) { + return 0.0; + } else { + return -0.0; + } + } + let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 == 2^1023 + x *= x1p1023; + return x; + } + + absx = if sign { -x } else { x }; + + /* handle the error of x + g - 0.5 */ + y = absx + GMHALF; + if absx > GMHALF { + dy = y - absx; + dy -= GMHALF; + } else { + dy = y - GMHALF; + dy -= absx; + } + + z = absx - 0.5; + r = s(absx) * exp(-y); + if x < 0.0 { + /* reflection formula for negative x */ + /* sinpi(absx) is not 0, integers are already handled */ + r = -PI / (sinpi(absx) * absx * r); + dy = -dy; + z = -z; + } + r += dy * (GMHALF + 0.5) * r / y; + z = pow(y, 0.5 * z); + y = r * z * z; + return y; +} diff --git a/vendor/compiler_builtins/libm/src/math/tgammaf.rs b/vendor/compiler_builtins/libm/src/math/tgammaf.rs new file mode 100644 index 000000000..a8f161f0c --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/tgammaf.rs @@ -0,0 +1,5 @@ +use super::tgamma; + +pub fn tgammaf(x: f32) -> f32 { + tgamma(x as f64) as f32 +} diff --git a/vendor/compiler_builtins/libm/src/math/trunc.rs b/vendor/compiler_builtins/libm/src/math/trunc.rs new file mode 100644 index 000000000..f7892a2c5 --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/trunc.rs @@ -0,0 +1,40 @@ +use core::f64; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn trunc(x: f64) -> f64 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f64.trunc` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::truncf64(x) } + } + } + let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 + + let mut i: u64 = x.to_bits(); + let mut e: i64 = (i >> 52 & 0x7ff) as i64 - 0x3ff + 12; + let m: u64; + + if e >= 52 + 12 { + return x; + } + if e < 12 { + e = 1; + } + m = -1i64 as u64 >> e; + if (i & m) == 0 { + return x; + } + force_eval!(x + x1p120); + i &= !m; + f64::from_bits(i) +} + +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::trunc(1.1), 1.0); + } +} diff --git a/vendor/compiler_builtins/libm/src/math/truncf.rs b/vendor/compiler_builtins/libm/src/math/truncf.rs new file mode 100644 index 000000000..20d5b73bd --- /dev/null +++ b/vendor/compiler_builtins/libm/src/math/truncf.rs @@ -0,0 +1,42 @@ +use core::f32; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf(x: f32) -> f32 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f32.trunc` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::truncf32(x) } + } + } + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + let mut i: u32 = x.to_bits(); + let mut e: i32 = (i >> 23 & 0xff) as i32 - 0x7f + 9; + let m: u32; + + if e >= 23 + 9 { + return x; + } + if e < 9 { + e = 1; + } + m = -1i32 as u32 >> e; + if (i & m) == 0 { + return x; + } + force_eval!(x + x1p120); + i &= !m; + f32::from_bits(i) +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::truncf(1.1), 1.0); + } +} diff --git a/vendor/compiler_builtins/src/arm.rs b/vendor/compiler_builtins/src/arm.rs new file mode 100644 index 000000000..9c1b6ad12 --- /dev/null +++ b/vendor/compiler_builtins/src/arm.rs @@ -0,0 +1,183 @@ +#![cfg(not(feature = "no-asm"))] +#![allow(unused_imports)] + +use core::intrinsics; + +// iOS symbols have a leading underscore. +#[cfg(target_os = "ios")] +macro_rules! bl { + ($func:literal) => { + concat!("bl _", $func) + }; +} +#[cfg(not(target_os = "ios"))] +macro_rules! bl { + ($func:literal) => { + concat!("bl ", $func) + }; +} + +intrinsics! { + // NOTE This function and the ones below are implemented using assembly because they are using a + // custom calling convention which can't be implemented using a normal Rust function. + #[naked] + #[cfg(not(target_env = "msvc"))] + pub unsafe extern "C" fn __aeabi_uidivmod() { + core::arch::asm!( + "push {{lr}}", + "sub sp, sp, #4", + "mov r2, sp", + bl!("__udivmodsi4"), + "ldr r1, [sp]", + "add sp, sp, #4", + "pop {{pc}}", + options(noreturn) + ); + } + + #[naked] + pub unsafe extern "C" fn __aeabi_uldivmod() { + core::arch::asm!( + "push {{r4, lr}}", + "sub sp, sp, #16", + "add r4, sp, #8", + "str r4, [sp]", + bl!("__udivmoddi4"), + "ldr r2, [sp, #8]", + "ldr r3, [sp, #12]", + "add sp, sp, #16", + "pop {{r4, pc}}", + options(noreturn) + ); + } + + #[naked] + pub unsafe extern "C" fn __aeabi_idivmod() { + core::arch::asm!( + "push {{r0, r1, r4, lr}}", + bl!("__aeabi_idiv"), + "pop {{r1, r2}}", + "muls r2, r2, r0", + "subs r1, r1, r2", + "pop {{r4, pc}}", + options(noreturn) + ); + } + + #[naked] + pub unsafe extern "C" fn __aeabi_ldivmod() { + core::arch::asm!( + "push {{r4, lr}}", + "sub sp, sp, #16", + "add r4, sp, #8", + "str r4, [sp]", + bl!("__divmoddi4"), + "ldr r2, [sp, #8]", + "ldr r3, [sp, #12]", + "add sp, sp, #16", + "pop {{r4, pc}}", + options(noreturn) + ); + } + + // The following functions use weak linkage to allow users to override + // with custom implementation. + // FIXME: The `*4` and `*8` variants should be defined as aliases. + + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) { + ::mem::memcpy(dest, src, n); + } + + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) { + // We are guaranteed 4-alignment, so accessing at u32 is okay. + let mut dest = dest as *mut u32; + let mut src = src as *mut u32; + let mut n = n; + + while n >= 4 { + *dest = *src; + dest = dest.offset(1); + src = src.offset(1); + n -= 4; + } + + __aeabi_memcpy(dest as *mut u8, src as *const u8, n); + } + + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) { + __aeabi_memcpy4(dest, src, n); + } + + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) { + ::mem::memmove(dest, src, n); + } + + #[cfg(not(any(target_os = "ios", target_env = "msvc")))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) { + __aeabi_memmove(dest, src, n); + } + + #[cfg(not(any(target_os = "ios", target_env = "msvc")))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) { + __aeabi_memmove(dest, src, n); + } + + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { + // Note the different argument order + ::mem::memset(dest, c, n); + } + + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) { + let mut dest = dest as *mut u32; + let mut n = n; + + let byte = (c as u32) & 0xff; + let c = (byte << 24) | (byte << 16) | (byte << 8) | byte; + + while n >= 4 { + *dest = c; + dest = dest.offset(1); + n -= 4; + } + + __aeabi_memset(dest as *mut u8, n, byte as i32); + } + + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) { + __aeabi_memset4(dest, n, c); + } + + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) { + __aeabi_memset(dest, n, 0); + } + + #[cfg(not(any(target_os = "ios", target_env = "msvc")))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) { + __aeabi_memset4(dest, n, 0); + } + + #[cfg(not(any(target_os = "ios", target_env = "msvc")))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { + __aeabi_memset4(dest, n, 0); + } +} diff --git a/vendor/compiler_builtins/src/arm_linux.rs b/vendor/compiler_builtins/src/arm_linux.rs new file mode 100644 index 000000000..8fe09485b --- /dev/null +++ b/vendor/compiler_builtins/src/arm_linux.rs @@ -0,0 +1,214 @@ +use core::intrinsics; +use core::mem; + +// Kernel-provided user-mode helper functions: +// https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt +unsafe fn __kuser_cmpxchg(oldval: u32, newval: u32, ptr: *mut u32) -> bool { + let f: extern "C" fn(u32, u32, *mut u32) -> u32 = mem::transmute(0xffff0fc0usize as *const ()); + f(oldval, newval, ptr) == 0 +} +unsafe fn __kuser_memory_barrier() { + let f: extern "C" fn() = mem::transmute(0xffff0fa0usize as *const ()); + f(); +} + +// Word-align a pointer +fn align_ptr<T>(ptr: *mut T) -> *mut u32 { + // This gives us a mask of 0 when T == u32 since the pointer is already + // supposed to be aligned, which avoids any masking in that case. + let ptr_mask = 3 & (4 - mem::size_of::<T>()); + (ptr as usize & !ptr_mask) as *mut u32 +} + +// Calculate the shift and mask of a value inside an aligned word +fn get_shift_mask<T>(ptr: *mut T) -> (u32, u32) { + // Mask to get the low byte/halfword/word + let mask = match mem::size_of::<T>() { + 1 => 0xff, + 2 => 0xffff, + 4 => 0xffffffff, + _ => unreachable!(), + }; + + // If we are on big-endian then we need to adjust the shift accordingly + let endian_adjust = if cfg!(target_endian = "little") { + 0 + } else { + 4 - mem::size_of::<T>() as u32 + }; + + // Shift to get the desired element in the word + let ptr_mask = 3 & (4 - mem::size_of::<T>()); + let shift = ((ptr as usize & ptr_mask) as u32 ^ endian_adjust) * 8; + + (shift, mask) +} + +// Extract a value from an aligned word +fn extract_aligned(aligned: u32, shift: u32, mask: u32) -> u32 { + (aligned >> shift) & mask +} + +// Insert a value into an aligned word +fn insert_aligned(aligned: u32, val: u32, shift: u32, mask: u32) -> u32 { + (aligned & !(mask << shift)) | ((val & mask) << shift) +} + +// Generic atomic read-modify-write operation +unsafe fn atomic_rmw<T, F: Fn(u32) -> u32>(ptr: *mut T, f: F) -> u32 { + let aligned_ptr = align_ptr(ptr); + let (shift, mask) = get_shift_mask(ptr); + + loop { + let curval_aligned = intrinsics::atomic_load_unordered(aligned_ptr); + let curval = extract_aligned(curval_aligned, shift, mask); + let newval = f(curval); + let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask); + if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) { + return curval; + } + } +} + +// Generic atomic compare-exchange operation +unsafe fn atomic_cmpxchg<T>(ptr: *mut T, oldval: u32, newval: u32) -> u32 { + let aligned_ptr = align_ptr(ptr); + let (shift, mask) = get_shift_mask(ptr); + + loop { + let curval_aligned = intrinsics::atomic_load_unordered(aligned_ptr); + let curval = extract_aligned(curval_aligned, shift, mask); + if curval != oldval { + return curval; + } + let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask); + if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) { + return oldval; + } + } +} + +macro_rules! atomic_rmw { + ($name:ident, $ty:ty, $op:expr) => { + intrinsics! { + pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty { + atomic_rmw(ptr, |x| $op(x as $ty, val) as u32) as $ty + } + } + }; +} +macro_rules! atomic_cmpxchg { + ($name:ident, $ty:ty) => { + intrinsics! { + pub unsafe extern "C" fn $name(ptr: *mut $ty, oldval: $ty, newval: $ty) -> $ty { + atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty + } + } + }; +} + +atomic_rmw!(__sync_fetch_and_add_1, u8, |a: u8, b: u8| a.wrapping_add(b)); +atomic_rmw!(__sync_fetch_and_add_2, u16, |a: u16, b: u16| a + .wrapping_add(b)); +atomic_rmw!(__sync_fetch_and_add_4, u32, |a: u32, b: u32| a + .wrapping_add(b)); + +atomic_rmw!(__sync_fetch_and_sub_1, u8, |a: u8, b: u8| a.wrapping_sub(b)); +atomic_rmw!(__sync_fetch_and_sub_2, u16, |a: u16, b: u16| a + .wrapping_sub(b)); +atomic_rmw!(__sync_fetch_and_sub_4, u32, |a: u32, b: u32| a + .wrapping_sub(b)); + +atomic_rmw!(__sync_fetch_and_and_1, u8, |a: u8, b: u8| a & b); +atomic_rmw!(__sync_fetch_and_and_2, u16, |a: u16, b: u16| a & b); +atomic_rmw!(__sync_fetch_and_and_4, u32, |a: u32, b: u32| a & b); + +atomic_rmw!(__sync_fetch_and_or_1, u8, |a: u8, b: u8| a | b); +atomic_rmw!(__sync_fetch_and_or_2, u16, |a: u16, b: u16| a | b); +atomic_rmw!(__sync_fetch_and_or_4, u32, |a: u32, b: u32| a | b); + +atomic_rmw!(__sync_fetch_and_xor_1, u8, |a: u8, b: u8| a ^ b); +atomic_rmw!(__sync_fetch_and_xor_2, u16, |a: u16, b: u16| a ^ b); +atomic_rmw!(__sync_fetch_and_xor_4, u32, |a: u32, b: u32| a ^ b); + +atomic_rmw!(__sync_fetch_and_nand_1, u8, |a: u8, b: u8| !(a & b)); +atomic_rmw!(__sync_fetch_and_nand_2, u16, |a: u16, b: u16| !(a & b)); +atomic_rmw!(__sync_fetch_and_nand_4, u32, |a: u32, b: u32| !(a & b)); + +atomic_rmw!(__sync_fetch_and_max_1, i8, |a: i8, b: i8| if a > b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_max_2, i16, |a: i16, b: i16| if a > b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_max_4, i32, |a: i32, b: i32| if a > b { + a +} else { + b +}); + +atomic_rmw!(__sync_fetch_and_umax_1, u8, |a: u8, b: u8| if a > b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_umax_2, u16, |a: u16, b: u16| if a > b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_umax_4, u32, |a: u32, b: u32| if a > b { + a +} else { + b +}); + +atomic_rmw!(__sync_fetch_and_min_1, i8, |a: i8, b: i8| if a < b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_min_2, i16, |a: i16, b: i16| if a < b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_min_4, i32, |a: i32, b: i32| if a < b { + a +} else { + b +}); + +atomic_rmw!(__sync_fetch_and_umin_1, u8, |a: u8, b: u8| if a < b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_umin_2, u16, |a: u16, b: u16| if a < b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_umin_4, u32, |a: u32, b: u32| if a < b { + a +} else { + b +}); + +atomic_rmw!(__sync_lock_test_and_set_1, u8, |_: u8, b: u8| b); +atomic_rmw!(__sync_lock_test_and_set_2, u16, |_: u16, b: u16| b); +atomic_rmw!(__sync_lock_test_and_set_4, u32, |_: u32, b: u32| b); + +atomic_cmpxchg!(__sync_val_compare_and_swap_1, u8); +atomic_cmpxchg!(__sync_val_compare_and_swap_2, u16); +atomic_cmpxchg!(__sync_val_compare_and_swap_4, u32); + +intrinsics! { + pub unsafe extern "C" fn __sync_synchronize() { + __kuser_memory_barrier(); + } +} diff --git a/vendor/compiler_builtins/src/float/add.rs b/vendor/compiler_builtins/src/float/add.rs new file mode 100644 index 000000000..67f6c2c14 --- /dev/null +++ b/vendor/compiler_builtins/src/float/add.rs @@ -0,0 +1,213 @@ +use float::Float; +use int::{CastInto, Int}; + +/// Returns `a + b` +fn add<F: Float>(a: F, b: F) -> F +where + u32: CastInto<F::Int>, + F::Int: CastInto<u32>, + i32: CastInto<F::Int>, + F::Int: CastInto<i32>, +{ + let one = F::Int::ONE; + let zero = F::Int::ZERO; + + let bits = F::BITS.cast(); + let significand_bits = F::SIGNIFICAND_BITS; + let max_exponent = F::EXPONENT_MAX; + + let implicit_bit = F::IMPLICIT_BIT; + let significand_mask = F::SIGNIFICAND_MASK; + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXPONENT_MASK; + let inf_rep = exponent_mask; + let quiet_bit = implicit_bit >> 1; + let qnan_rep = exponent_mask | quiet_bit; + + let mut a_rep = a.repr(); + let mut b_rep = b.repr(); + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + // Detect if a or b is zero, infinity, or NaN. + if a_abs.wrapping_sub(one) >= inf_rep - one || b_abs.wrapping_sub(one) >= inf_rep - one { + // NaN + anything = qNaN + if a_abs > inf_rep { + return F::from_repr(a_abs | quiet_bit); + } + // anything + NaN = qNaN + if b_abs > inf_rep { + return F::from_repr(b_abs | quiet_bit); + } + + if a_abs == inf_rep { + // +/-infinity + -/+infinity = qNaN + if (a.repr() ^ b.repr()) == sign_bit { + return F::from_repr(qnan_rep); + } else { + // +/-infinity + anything remaining = +/- infinity + return a; + } + } + + // anything remaining + +/-infinity = +/-infinity + if b_abs == inf_rep { + return b; + } + + // zero + anything = anything + if a_abs == Int::ZERO { + // but we need to get the sign right for zero + zero + if b_abs == Int::ZERO { + return F::from_repr(a.repr() & b.repr()); + } else { + return b; + } + } + + // anything + zero = anything + if b_abs == Int::ZERO { + return a; + } + } + + // Swap a and b if necessary so that a has the larger absolute value. + if b_abs > a_abs { + // Don't use mem::swap because it may generate references to memcpy in unoptimized code. + let tmp = a_rep; + a_rep = b_rep; + b_rep = tmp; + } + + // Extract the exponent and significand from the (possibly swapped) a and b. + let mut a_exponent: i32 = ((a_rep & exponent_mask) >> significand_bits).cast(); + let mut b_exponent: i32 = ((b_rep & exponent_mask) >> significand_bits).cast(); + let mut a_significand = a_rep & significand_mask; + let mut b_significand = b_rep & significand_mask; + + // normalize any denormals, and adjust the exponent accordingly. + if a_exponent == 0 { + let (exponent, significand) = F::normalize(a_significand); + a_exponent = exponent; + a_significand = significand; + } + if b_exponent == 0 { + let (exponent, significand) = F::normalize(b_significand); + b_exponent = exponent; + b_significand = significand; + } + + // The sign of the result is the sign of the larger operand, a. If they + // have opposite signs, we are performing a subtraction; otherwise addition. + let result_sign = a_rep & sign_bit; + let subtraction = ((a_rep ^ b_rep) & sign_bit) != zero; + + // Shift the significands to give us round, guard and sticky, and or in the + // implicit significand bit. (If we fell through from the denormal path it + // was already set by normalize(), but setting it twice won't hurt + // anything.) + a_significand = (a_significand | implicit_bit) << 3; + b_significand = (b_significand | implicit_bit) << 3; + + // Shift the significand of b by the difference in exponents, with a sticky + // bottom bit to get rounding correct. + let align = a_exponent.wrapping_sub(b_exponent).cast(); + if align != Int::ZERO { + if align < bits { + let sticky = + F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != Int::ZERO); + b_significand = (b_significand >> align.cast()) | sticky; + } else { + b_significand = one; // sticky; b is known to be non-zero. + } + } + if subtraction { + a_significand = a_significand.wrapping_sub(b_significand); + // If a == -b, return +zero. + if a_significand == Int::ZERO { + return F::from_repr(Int::ZERO); + } + + // If partial cancellation occured, we need to left-shift the result + // and adjust the exponent: + if a_significand < implicit_bit << 3 { + let shift = + a_significand.leading_zeros() as i32 - (implicit_bit << 3).leading_zeros() as i32; + a_significand <<= shift; + a_exponent -= shift; + } + } else { + // addition + a_significand += b_significand; + + // If the addition carried up, we need to right-shift the result and + // adjust the exponent: + if a_significand & implicit_bit << 4 != Int::ZERO { + let sticky = F::Int::from_bool(a_significand & one != Int::ZERO); + a_significand = a_significand >> 1 | sticky; + a_exponent += 1; + } + } + + // If we have overflowed the type, return +/- infinity: + if a_exponent >= max_exponent as i32 { + return F::from_repr(inf_rep | result_sign); + } + + if a_exponent <= 0 { + // Result is denormal before rounding; the exponent is zero and we + // need to shift the significand. + let shift = (1 - a_exponent).cast(); + let sticky = + F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != Int::ZERO); + a_significand = a_significand >> shift.cast() | sticky; + a_exponent = 0; + } + + // Low three bits are round, guard, and sticky. + let a_significand_i32: i32 = a_significand.cast(); + let round_guard_sticky: i32 = a_significand_i32 & 0x7; + + // Shift the significand into place, and mask off the implicit bit. + let mut result = a_significand >> 3 & significand_mask; + + // Insert the exponent and sign. + result |= a_exponent.cast() << significand_bits; + result |= result_sign; + + // Final rounding. The result may overflow to infinity, but that is the + // correct result in that case. + if round_guard_sticky > 0x4 { + result += one; + } + if round_guard_sticky == 0x4 { + result += result & one; + } + + F::from_repr(result) +} + +intrinsics! { + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_fadd] + pub extern "C" fn __addsf3(a: f32, b: f32) -> f32 { + add(a, b) + } + + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_dadd] + pub extern "C" fn __adddf3(a: f64, b: f64) -> f64 { + add(a, b) + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __addsf3vfp(a: f32, b: f32) -> f32 { + a + b + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __adddf3vfp(a: f64, b: f64) -> f64 { + a + b + } +} diff --git a/vendor/compiler_builtins/src/float/cmp.rs b/vendor/compiler_builtins/src/float/cmp.rs new file mode 100644 index 000000000..1d4e38433 --- /dev/null +++ b/vendor/compiler_builtins/src/float/cmp.rs @@ -0,0 +1,253 @@ +#![allow(unreachable_code)] + +use float::Float; +use int::Int; + +#[derive(Clone, Copy)] +enum Result { + Less, + Equal, + Greater, + Unordered, +} + +impl Result { + fn to_le_abi(self) -> i32 { + match self { + Result::Less => -1, + Result::Equal => 0, + Result::Greater => 1, + Result::Unordered => 1, + } + } + + fn to_ge_abi(self) -> i32 { + match self { + Result::Less => -1, + Result::Equal => 0, + Result::Greater => 1, + Result::Unordered => -1, + } + } +} + +fn cmp<F: Float>(a: F, b: F) -> Result { + let one = F::Int::ONE; + let zero = F::Int::ZERO; + let szero = F::SignedInt::ZERO; + + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXPONENT_MASK; + let inf_rep = exponent_mask; + + let a_rep = a.repr(); + let b_rep = b.repr(); + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + // If either a or b is NaN, they are unordered. + if a_abs > inf_rep || b_abs > inf_rep { + return Result::Unordered; + } + + // If a and b are both zeros, they are equal. + if a_abs | b_abs == zero { + return Result::Equal; + } + + let a_srep = a.signed_repr(); + let b_srep = b.signed_repr(); + + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a fp_ting-point compare. + if a_srep & b_srep >= szero { + if a_srep < b_srep { + Result::Less + } else if a_srep == b_srep { + Result::Equal + } else { + Result::Greater + } + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. (This assumes a twos- or ones- + // complement integer representation; if integers are represented in a + // sign-magnitude representation, then this flip is incorrect). + } else if a_srep > b_srep { + Result::Less + } else if a_srep == b_srep { + Result::Equal + } else { + Result::Greater + } +} + +fn unord<F: Float>(a: F, b: F) -> bool { + let one = F::Int::ONE; + + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXPONENT_MASK; + let inf_rep = exponent_mask; + + let a_rep = a.repr(); + let b_rep = b.repr(); + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + a_abs > inf_rep || b_abs > inf_rep +} + +intrinsics! { + pub extern "C" fn __lesf2(a: f32, b: f32) -> i32 { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __gesf2(a: f32, b: f32) -> i32 { + cmp(a, b).to_ge_abi() + } + + #[arm_aeabi_alias = __aeabi_fcmpun] + pub extern "C" fn __unordsf2(a: f32, b: f32) -> i32 { + unord(a, b) as i32 + } + + pub extern "C" fn __eqsf2(a: f32, b: f32) -> i32 { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __ltsf2(a: f32, b: f32) -> i32 { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __nesf2(a: f32, b: f32) -> i32 { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __gtsf2(a: f32, b: f32) -> i32 { + cmp(a, b).to_ge_abi() + } + + pub extern "C" fn __ledf2(a: f64, b: f64) -> i32 { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __gedf2(a: f64, b: f64) -> i32 { + cmp(a, b).to_ge_abi() + } + + #[arm_aeabi_alias = __aeabi_dcmpun] + pub extern "C" fn __unorddf2(a: f64, b: f64) -> i32 { + unord(a, b) as i32 + } + + pub extern "C" fn __eqdf2(a: f64, b: f64) -> i32 { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __ltdf2(a: f64, b: f64) -> i32 { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __nedf2(a: f64, b: f64) -> i32 { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __gtdf2(a: f64, b: f64) -> i32 { + cmp(a, b).to_ge_abi() + } +} + +#[cfg(target_arch = "arm")] +intrinsics! { + pub extern "aapcs" fn __aeabi_fcmple(a: f32, b: f32) -> i32 { + (__lesf2(a, b) <= 0) as i32 + } + + pub extern "aapcs" fn __aeabi_fcmpge(a: f32, b: f32) -> i32 { + (__gesf2(a, b) >= 0) as i32 + } + + pub extern "aapcs" fn __aeabi_fcmpeq(a: f32, b: f32) -> i32 { + (__eqsf2(a, b) == 0) as i32 + } + + pub extern "aapcs" fn __aeabi_fcmplt(a: f32, b: f32) -> i32 { + (__ltsf2(a, b) < 0) as i32 + } + + pub extern "aapcs" fn __aeabi_fcmpgt(a: f32, b: f32) -> i32 { + (__gtsf2(a, b) > 0) as i32 + } + + pub extern "aapcs" fn __aeabi_dcmple(a: f64, b: f64) -> i32 { + (__ledf2(a, b) <= 0) as i32 + } + + pub extern "aapcs" fn __aeabi_dcmpge(a: f64, b: f64) -> i32 { + (__gedf2(a, b) >= 0) as i32 + } + + pub extern "aapcs" fn __aeabi_dcmpeq(a: f64, b: f64) -> i32 { + (__eqdf2(a, b) == 0) as i32 + } + + pub extern "aapcs" fn __aeabi_dcmplt(a: f64, b: f64) -> i32 { + (__ltdf2(a, b) < 0) as i32 + } + + pub extern "aapcs" fn __aeabi_dcmpgt(a: f64, b: f64) -> i32 { + (__gtdf2(a, b) > 0) as i32 + } + + // On hard-float targets LLVM will use native instructions + // for all VFP intrinsics below + + pub extern "C" fn __gesf2vfp(a: f32, b: f32) -> i32 { + (a >= b) as i32 + } + + pub extern "C" fn __gedf2vfp(a: f64, b: f64) -> i32 { + (a >= b) as i32 + } + + pub extern "C" fn __gtsf2vfp(a: f32, b: f32) -> i32 { + (a > b) as i32 + } + + pub extern "C" fn __gtdf2vfp(a: f64, b: f64) -> i32 { + (a > b) as i32 + } + + pub extern "C" fn __ltsf2vfp(a: f32, b: f32) -> i32 { + (a < b) as i32 + } + + pub extern "C" fn __ltdf2vfp(a: f64, b: f64) -> i32 { + (a < b) as i32 + } + + pub extern "C" fn __lesf2vfp(a: f32, b: f32) -> i32 { + (a <= b) as i32 + } + + pub extern "C" fn __ledf2vfp(a: f64, b: f64) -> i32 { + (a <= b) as i32 + } + + pub extern "C" fn __nesf2vfp(a: f32, b: f32) -> i32 { + (a != b) as i32 + } + + pub extern "C" fn __nedf2vfp(a: f64, b: f64) -> i32 { + (a != b) as i32 + } + + pub extern "C" fn __eqsf2vfp(a: f32, b: f32) -> i32 { + (a == b) as i32 + } + + pub extern "C" fn __eqdf2vfp(a: f64, b: f64) -> i32 { + (a == b) as i32 + } +} diff --git a/vendor/compiler_builtins/src/float/conv.rs b/vendor/compiler_builtins/src/float/conv.rs new file mode 100644 index 000000000..07b58f3d2 --- /dev/null +++ b/vendor/compiler_builtins/src/float/conv.rs @@ -0,0 +1,351 @@ +/// Conversions from integers to floats. +/// +/// These are hand-optimized bit twiddling code, +/// which unfortunately isn't the easiest kind of code to read. +/// +/// The algorithm is explained here: https://blog.m-ou.se/floats/ +mod int_to_float { + pub fn u32_to_f32_bits(i: u32) -> u32 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + let a = (i << n) >> 8; // Significant bits, with bit 24 still in tact. + let b = (i << n) << 24; // Insignificant bits, only relevant for rounding. + let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. + let e = 157 - n as u32; // Exponent plus 127, minus one. + (e << 23) + m // + not |, so the mantissa can overflow into the exponent. + } + + pub fn u32_to_f64_bits(i: u32) -> u64 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + let m = (i as u64) << (21 + n); // Significant bits, with bit 53 still in tact. + let e = 1053 - n as u64; // Exponent plus 1023, minus one. + (e << 52) + m // Bit 53 of m will overflow into e. + } + + pub fn u64_to_f32_bits(i: u64) -> u32 { + let n = i.leading_zeros(); + let y = i.wrapping_shl(n); + let a = (y >> 40) as u32; // Significant bits, with bit 24 still in tact. + let b = (y >> 8 | y & 0xFFFF) as u32; // Insignificant bits, only relevant for rounding. + let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. + let e = if i == 0 { 0 } else { 189 - n }; // Exponent plus 127, minus one, except for zero. + (e << 23) + m // + not |, so the mantissa can overflow into the exponent. + } + + pub fn u64_to_f64_bits(i: u64) -> u64 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + let a = ((i << n) >> 11) as u64; // Significant bits, with bit 53 still in tact. + let b = ((i << n) << 53) as u64; // Insignificant bits, only relevant for rounding. + let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even. + let e = 1085 - n as u64; // Exponent plus 1023, minus one. + (e << 52) + m // + not |, so the mantissa can overflow into the exponent. + } + + pub fn u128_to_f32_bits(i: u128) -> u32 { + let n = i.leading_zeros(); + let y = i.wrapping_shl(n); + let a = (y >> 104) as u32; // Significant bits, with bit 24 still in tact. + let b = (y >> 72) as u32 | ((y << 32) >> 32 != 0) as u32; // Insignificant bits, only relevant for rounding. + let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. + let e = if i == 0 { 0 } else { 253 - n }; // Exponent plus 127, minus one, except for zero. + (e << 23) + m // + not |, so the mantissa can overflow into the exponent. + } + + pub fn u128_to_f64_bits(i: u128) -> u64 { + let n = i.leading_zeros(); + let y = i.wrapping_shl(n); + let a = (y >> 75) as u64; // Significant bits, with bit 53 still in tact. + let b = (y >> 11 | y & 0xFFFF_FFFF) as u64; // Insignificant bits, only relevant for rounding. + let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even. + let e = if i == 0 { 0 } else { 1149 - n as u64 }; // Exponent plus 1023, minus one, except for zero. + (e << 52) + m // + not |, so the mantissa can overflow into the exponent. + } +} + +// Conversions from unsigned integers to floats. +intrinsics! { + #[arm_aeabi_alias = __aeabi_ui2f] + pub extern "C" fn __floatunsisf(i: u32) -> f32 { + f32::from_bits(int_to_float::u32_to_f32_bits(i)) + } + + #[arm_aeabi_alias = __aeabi_ui2d] + pub extern "C" fn __floatunsidf(i: u32) -> f64 { + f64::from_bits(int_to_float::u32_to_f64_bits(i)) + } + + #[arm_aeabi_alias = __aeabi_ul2f] + pub extern "C" fn __floatundisf(i: u64) -> f32 { + f32::from_bits(int_to_float::u64_to_f32_bits(i)) + } + + #[arm_aeabi_alias = __aeabi_ul2d] + pub extern "C" fn __floatundidf(i: u64) -> f64 { + f64::from_bits(int_to_float::u64_to_f64_bits(i)) + } + + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + pub extern "C" fn __floatuntisf(i: u128) -> f32 { + f32::from_bits(int_to_float::u128_to_f32_bits(i)) + } + + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + pub extern "C" fn __floatuntidf(i: u128) -> f64 { + f64::from_bits(int_to_float::u128_to_f64_bits(i)) + } +} + +// Conversions from signed integers to floats. +intrinsics! { + #[arm_aeabi_alias = __aeabi_i2f] + pub extern "C" fn __floatsisf(i: i32) -> f32 { + let sign_bit = ((i >> 31) as u32) << 31; + f32::from_bits(int_to_float::u32_to_f32_bits(i.unsigned_abs()) | sign_bit) + } + + #[arm_aeabi_alias = __aeabi_i2d] + pub extern "C" fn __floatsidf(i: i32) -> f64 { + let sign_bit = ((i >> 31) as u64) << 63; + f64::from_bits(int_to_float::u32_to_f64_bits(i.unsigned_abs()) | sign_bit) + } + + #[arm_aeabi_alias = __aeabi_l2f] + pub extern "C" fn __floatdisf(i: i64) -> f32 { + let sign_bit = ((i >> 63) as u32) << 31; + f32::from_bits(int_to_float::u64_to_f32_bits(i.unsigned_abs()) | sign_bit) + } + + #[arm_aeabi_alias = __aeabi_l2d] + pub extern "C" fn __floatdidf(i: i64) -> f64 { + let sign_bit = ((i >> 63) as u64) << 63; + f64::from_bits(int_to_float::u64_to_f64_bits(i.unsigned_abs()) | sign_bit) + } + + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + pub extern "C" fn __floattisf(i: i128) -> f32 { + let sign_bit = ((i >> 127) as u32) << 31; + f32::from_bits(int_to_float::u128_to_f32_bits(i.unsigned_abs()) | sign_bit) + } + + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + pub extern "C" fn __floattidf(i: i128) -> f64 { + let sign_bit = ((i >> 127) as u64) << 63; + f64::from_bits(int_to_float::u128_to_f64_bits(i.unsigned_abs()) | sign_bit) + } +} + +// Conversions from floats to unsigned integers. +intrinsics! { + #[arm_aeabi_alias = __aeabi_f2uiz] + pub extern "C" fn __fixunssfsi(f: f32) -> u32 { + let fbits = f.to_bits(); + if fbits < 127 << 23 { // >= 0, < 1 + 0 + } else if fbits < 159 << 23 { // >= 1, < max + let m = 1 << 31 | fbits << 8; // Mantissa and the implicit 1-bit. + let s = 158 - (fbits >> 23); // Shift based on the exponent and bias. + m >> s + } else if fbits <= 255 << 23 { // >= max (incl. inf) + u32::MAX + } else { // Negative or NaN + 0 + } + } + + #[arm_aeabi_alias = __aeabi_f2ulz] + pub extern "C" fn __fixunssfdi(f: f32) -> u64 { + let fbits = f.to_bits(); + if fbits < 127 << 23 { // >= 0, < 1 + 0 + } else if fbits < 191 << 23 { // >= 1, < max + let m = 1 << 63 | (fbits as u64) << 40; // Mantissa and the implicit 1-bit. + let s = 190 - (fbits >> 23); // Shift based on the exponent and bias. + m >> s + } else if fbits <= 255 << 23 { // >= max (incl. inf) + u64::MAX + } else { // Negative or NaN + 0 + } + } + + #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + pub extern "C" fn __fixunssfti(f: f32) -> u128 { + let fbits = f.to_bits(); + if fbits < 127 << 23 { // >= 0, < 1 + 0 + } else if fbits < 255 << 23 { // >= 1, < inf + let m = 1 << 127 | (fbits as u128) << 104; // Mantissa and the implicit 1-bit. + let s = 254 - (fbits >> 23); // Shift based on the exponent and bias. + m >> s + } else if fbits == 255 << 23 { // == inf + u128::MAX + } else { // Negative or NaN + 0 + } + } + + #[arm_aeabi_alias = __aeabi_d2uiz] + pub extern "C" fn __fixunsdfsi(f: f64) -> u32 { + let fbits = f.to_bits(); + if fbits < 1023 << 52 { // >= 0, < 1 + 0 + } else if fbits < 1055 << 52 { // >= 1, < max + let m = 1 << 31 | (fbits >> 21) as u32; // Mantissa and the implicit 1-bit. + let s = 1054 - (fbits >> 52); // Shift based on the exponent and bias. + m >> s + } else if fbits <= 2047 << 52 { // >= max (incl. inf) + u32::MAX + } else { // Negative or NaN + 0 + } + } + + #[arm_aeabi_alias = __aeabi_d2ulz] + pub extern "C" fn __fixunsdfdi(f: f64) -> u64 { + let fbits = f.to_bits(); + if fbits < 1023 << 52 { // >= 0, < 1 + 0 + } else if fbits < 1087 << 52 { // >= 1, < max + let m = 1 << 63 | fbits << 11; // Mantissa and the implicit 1-bit. + let s = 1086 - (fbits >> 52); // Shift based on the exponent and bias. + m >> s + } else if fbits <= 2047 << 52 { // >= max (incl. inf) + u64::MAX + } else { // Negative or NaN + 0 + } + } + + #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + pub extern "C" fn __fixunsdfti(f: f64) -> u128 { + let fbits = f.to_bits(); + if fbits < 1023 << 52 { // >= 0, < 1 + 0 + } else if fbits < 1151 << 52 { // >= 1, < max + let m = 1 << 127 | (fbits as u128) << 75; // Mantissa and the implicit 1-bit. + let s = 1150 - (fbits >> 52); // Shift based on the exponent and bias. + m >> s + } else if fbits <= 2047 << 52 { // >= max (incl. inf) + u128::MAX + } else { // Negative or NaN + 0 + } + } +} + +// Conversions from floats to signed integers. +intrinsics! { + #[arm_aeabi_alias = __aeabi_f2iz] + pub extern "C" fn __fixsfsi(f: f32) -> i32 { + let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. + if fbits < 127 << 23 { // >= 0, < 1 + 0 + } else if fbits < 158 << 23 { // >= 1, < max + let m = 1 << 31 | fbits << 8; // Mantissa and the implicit 1-bit. + let s = 158 - (fbits >> 23); // Shift based on the exponent and bias. + let u = (m >> s) as i32; // Unsigned result. + if f.is_sign_negative() { -u } else { u } + } else if fbits <= 255 << 23 { // >= max (incl. inf) + if f.is_sign_negative() { i32::MIN } else { i32::MAX } + } else { // NaN + 0 + } + } + + #[arm_aeabi_alias = __aeabi_f2lz] + pub extern "C" fn __fixsfdi(f: f32) -> i64 { + let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. + if fbits < 127 << 23 { // >= 0, < 1 + 0 + } else if fbits < 190 << 23 { // >= 1, < max + let m = 1 << 63 | (fbits as u64) << 40; // Mantissa and the implicit 1-bit. + let s = 190 - (fbits >> 23); // Shift based on the exponent and bias. + let u = (m >> s) as i64; // Unsigned result. + if f.is_sign_negative() { -u } else { u } + } else if fbits <= 255 << 23 { // >= max (incl. inf) + if f.is_sign_negative() { i64::MIN } else { i64::MAX } + } else { // NaN + 0 + } + } + + #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + pub extern "C" fn __fixsfti(f: f32) -> i128 { + let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. + if fbits < 127 << 23 { // >= 0, < 1 + 0 + } else if fbits < 254 << 23 { // >= 1, < max + let m = 1 << 127 | (fbits as u128) << 104; // Mantissa and the implicit 1-bit. + let s = 254 - (fbits >> 23); // Shift based on the exponent and bias. + let u = (m >> s) as i128; // Unsigned result. + if f.is_sign_negative() { -u } else { u } + } else if fbits <= 255 << 23 { // >= max (incl. inf) + if f.is_sign_negative() { i128::MIN } else { i128::MAX } + } else { // NaN + 0 + } + } + + #[arm_aeabi_alias = __aeabi_d2iz] + pub extern "C" fn __fixdfsi(f: f64) -> i32 { + let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. + if fbits < 1023 << 52 { // >= 0, < 1 + 0 + } else if fbits < 1054 << 52 { // >= 1, < max + let m = 1 << 31 | (fbits >> 21) as u32; // Mantissa and the implicit 1-bit. + let s = 1054 - (fbits >> 52); // Shift based on the exponent and bias. + let u = (m >> s) as i32; // Unsigned result. + if f.is_sign_negative() { -u } else { u } + } else if fbits <= 2047 << 52 { // >= max (incl. inf) + if f.is_sign_negative() { i32::MIN } else { i32::MAX } + } else { // NaN + 0 + } + } + + #[arm_aeabi_alias = __aeabi_d2lz] + pub extern "C" fn __fixdfdi(f: f64) -> i64 { + let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. + if fbits < 1023 << 52 { // >= 0, < 1 + 0 + } else if fbits < 1086 << 52 { // >= 1, < max + let m = 1 << 63 | fbits << 11; // Mantissa and the implicit 1-bit. + let s = 1086 - (fbits >> 52); // Shift based on the exponent and bias. + let u = (m >> s) as i64; // Unsigned result. + if f.is_sign_negative() { -u } else { u } + } else if fbits <= 2047 << 52 { // >= max (incl. inf) + if f.is_sign_negative() { i64::MIN } else { i64::MAX } + } else { // NaN + 0 + } + } + + #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + pub extern "C" fn __fixdfti(f: f64) -> i128 { + let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. + if fbits < 1023 << 52 { // >= 0, < 1 + 0 + } else if fbits < 1150 << 52 { // >= 1, < max + let m = 1 << 127 | (fbits as u128) << 75; // Mantissa and the implicit 1-bit. + let s = 1150 - (fbits >> 52); // Shift based on the exponent and bias. + let u = (m >> s) as i128; // Unsigned result. + if f.is_sign_negative() { -u } else { u } + } else if fbits <= 2047 << 52 { // >= max (incl. inf) + if f.is_sign_negative() { i128::MIN } else { i128::MAX } + } else { // NaN + 0 + } + } +} diff --git a/vendor/compiler_builtins/src/float/div.rs b/vendor/compiler_builtins/src/float/div.rs new file mode 100644 index 000000000..528a8368d --- /dev/null +++ b/vendor/compiler_builtins/src/float/div.rs @@ -0,0 +1,467 @@ +// The functions are complex with many branches, and explicit +// `return`s makes it clear where function exit points are +#![allow(clippy::needless_return)] + +use float::Float; +use int::{CastInto, DInt, HInt, Int}; + +fn div32<F: Float>(a: F, b: F) -> F +where + u32: CastInto<F::Int>, + F::Int: CastInto<u32>, + i32: CastInto<F::Int>, + F::Int: CastInto<i32>, + F::Int: HInt, +{ + let one = F::Int::ONE; + let zero = F::Int::ZERO; + + // let bits = F::BITS; + let significand_bits = F::SIGNIFICAND_BITS; + let max_exponent = F::EXPONENT_MAX; + + let exponent_bias = F::EXPONENT_BIAS; + + let implicit_bit = F::IMPLICIT_BIT; + let significand_mask = F::SIGNIFICAND_MASK; + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXPONENT_MASK; + let inf_rep = exponent_mask; + let quiet_bit = implicit_bit >> 1; + let qnan_rep = exponent_mask | quiet_bit; + + #[inline(always)] + fn negate_u32(a: u32) -> u32 { + (<i32>::wrapping_neg(a as i32)) as u32 + } + + let a_rep = a.repr(); + let b_rep = b.repr(); + + let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); + let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); + let quotient_sign = (a_rep ^ b_rep) & sign_bit; + + let mut a_significand = a_rep & significand_mask; + let mut b_significand = b_rep & significand_mask; + let mut scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + { + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + // NaN / anything = qNaN + if a_abs > inf_rep { + return F::from_repr(a_rep | quiet_bit); + } + // anything / NaN = qNaN + if b_abs > inf_rep { + return F::from_repr(b_rep | quiet_bit); + } + + if a_abs == inf_rep { + if b_abs == inf_rep { + // infinity / infinity = NaN + return F::from_repr(qnan_rep); + } else { + // infinity / anything else = +/- infinity + return F::from_repr(a_abs | quotient_sign); + } + } + + // anything else / infinity = +/- 0 + if b_abs == inf_rep { + return F::from_repr(quotient_sign); + } + + if a_abs == zero { + if b_abs == zero { + // zero / zero = NaN + return F::from_repr(qnan_rep); + } else { + // zero / anything else = +/- zero + return F::from_repr(quotient_sign); + } + } + + // anything else / zero = +/- infinity + if b_abs == zero { + return F::from_repr(inf_rep | quotient_sign); + } + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if a_abs < implicit_bit { + let (exponent, significand) = F::normalize(a_significand); + scale += exponent; + a_significand = significand; + } + + if b_abs < implicit_bit { + let (exponent, significand) = F::normalize(b_significand); + scale -= exponent; + b_significand = significand; + } + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + a_significand |= implicit_bit; + b_significand |= implicit_bit; + let mut quotient_exponent: i32 = CastInto::<i32>::cast(a_exponent) + .wrapping_sub(CastInto::<i32>::cast(b_exponent)) + .wrapping_add(scale); + + // Align the significand of b as a Q31 fixed-point number in the range + // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + let q31b = CastInto::<u32>::cast(b_significand << 8.cast()); + let mut reciprocal = (0x7504f333u32).wrapping_sub(q31b); + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration, so after three iterations, we have about 28 binary + // digits of accuracy. + + let mut correction: u32 = + negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); + reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32; + correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); + reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32; + correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); + reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32; + + // Exhaustive testing shows that the error in reciprocal after three steps + // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our + // expectations. We bump the reciprocal by a tiny value to force the error + // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to + // be specific). This also causes 1/1 to give a sensible approximation + // instead of zero (due to overflow). + reciprocal = reciprocal.wrapping_sub(2); + + // The numerical reciprocal is accurate to within 2^-28, lies in the + // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller + // than the true reciprocal of b. Multiplying a by this reciprocal thus + // gives a numerical q = a/b in Q24 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0) + // 3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes + // from the fact that we truncate the product, and the 2^27 term + // is the error in the reciprocal of b scaled by the maximum + // possible value of a. As a consequence of this error bound, + // either q or nextafter(q) is the correctly rounded + let mut quotient = (a_significand << 1).widen_mul(reciprocal.cast()).hi(); + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // In either case, we are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b + // + // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + // We also take this time to right shift quotient if it falls in the [1,2) + // range and adjust the exponent accordingly. + let residual = if quotient < (implicit_bit << 1) { + quotient_exponent = quotient_exponent.wrapping_sub(1); + (a_significand << (significand_bits + 1)).wrapping_sub(quotient.wrapping_mul(b_significand)) + } else { + quotient >>= 1; + (a_significand << significand_bits).wrapping_sub(quotient.wrapping_mul(b_significand)) + }; + + let written_exponent = quotient_exponent.wrapping_add(exponent_bias as i32); + + if written_exponent >= max_exponent as i32 { + // If we have overflowed the exponent, return infinity. + return F::from_repr(inf_rep | quotient_sign); + } else if written_exponent < 1 { + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return F::from_repr(quotient_sign); + } else { + let round = ((residual << 1) > b_significand) as u32; + // Clear the implicit bits + let mut abs_result = quotient & significand_mask; + // Insert the exponent + abs_result |= written_exponent.cast() << significand_bits; + // Round + abs_result = abs_result.wrapping_add(round.cast()); + // Insert the sign and return + return F::from_repr(abs_result | quotient_sign); + } +} + +fn div64<F: Float>(a: F, b: F) -> F +where + u32: CastInto<F::Int>, + F::Int: CastInto<u32>, + i32: CastInto<F::Int>, + F::Int: CastInto<i32>, + u64: CastInto<F::Int>, + F::Int: CastInto<u64>, + i64: CastInto<F::Int>, + F::Int: CastInto<i64>, + F::Int: HInt, +{ + let one = F::Int::ONE; + let zero = F::Int::ZERO; + + // let bits = F::BITS; + let significand_bits = F::SIGNIFICAND_BITS; + let max_exponent = F::EXPONENT_MAX; + + let exponent_bias = F::EXPONENT_BIAS; + + let implicit_bit = F::IMPLICIT_BIT; + let significand_mask = F::SIGNIFICAND_MASK; + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXPONENT_MASK; + let inf_rep = exponent_mask; + let quiet_bit = implicit_bit >> 1; + let qnan_rep = exponent_mask | quiet_bit; + // let exponent_bits = F::EXPONENT_BITS; + + #[inline(always)] + fn negate_u32(a: u32) -> u32 { + (<i32>::wrapping_neg(a as i32)) as u32 + } + + #[inline(always)] + fn negate_u64(a: u64) -> u64 { + (<i64>::wrapping_neg(a as i64)) as u64 + } + + let a_rep = a.repr(); + let b_rep = b.repr(); + + let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); + let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); + let quotient_sign = (a_rep ^ b_rep) & sign_bit; + + let mut a_significand = a_rep & significand_mask; + let mut b_significand = b_rep & significand_mask; + let mut scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + { + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + // NaN / anything = qNaN + if a_abs > inf_rep { + return F::from_repr(a_rep | quiet_bit); + } + // anything / NaN = qNaN + if b_abs > inf_rep { + return F::from_repr(b_rep | quiet_bit); + } + + if a_abs == inf_rep { + if b_abs == inf_rep { + // infinity / infinity = NaN + return F::from_repr(qnan_rep); + } else { + // infinity / anything else = +/- infinity + return F::from_repr(a_abs | quotient_sign); + } + } + + // anything else / infinity = +/- 0 + if b_abs == inf_rep { + return F::from_repr(quotient_sign); + } + + if a_abs == zero { + if b_abs == zero { + // zero / zero = NaN + return F::from_repr(qnan_rep); + } else { + // zero / anything else = +/- zero + return F::from_repr(quotient_sign); + } + } + + // anything else / zero = +/- infinity + if b_abs == zero { + return F::from_repr(inf_rep | quotient_sign); + } + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if a_abs < implicit_bit { + let (exponent, significand) = F::normalize(a_significand); + scale += exponent; + a_significand = significand; + } + + if b_abs < implicit_bit { + let (exponent, significand) = F::normalize(b_significand); + scale -= exponent; + b_significand = significand; + } + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + a_significand |= implicit_bit; + b_significand |= implicit_bit; + let mut quotient_exponent: i32 = CastInto::<i32>::cast(a_exponent) + .wrapping_sub(CastInto::<i32>::cast(b_exponent)) + .wrapping_add(scale); + + // Align the significand of b as a Q31 fixed-point number in the range + // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + let q31b = CastInto::<u32>::cast(b_significand >> 21.cast()); + let mut recip32 = (0x7504f333u32).wrapping_sub(q31b); + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration, so after three iterations, we have about 28 binary + // digits of accuracy. + + let mut correction32: u32 = + negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32); + recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32; + correction32 = negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32); + recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32; + correction32 = negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32); + recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32; + + // recip32 might have overflowed to exactly zero in the preceeding + // computation if the high word of b is exactly 1.0. This would sabotage + // the full-width final stage of the computation that follows, so we adjust + // recip32 downward by one bit. + recip32 = recip32.wrapping_sub(1); + + // We need to perform one more iteration to get us to 56 binary digits; + // The last iteration needs to happen with extra precision. + let q63blo = CastInto::<u32>::cast(b_significand << 11.cast()); + + let correction: u64 = negate_u64( + (recip32 as u64) + .wrapping_mul(q31b as u64) + .wrapping_add((recip32 as u64).wrapping_mul(q63blo as u64) >> 32), + ); + let c_hi = (correction >> 32) as u32; + let c_lo = correction as u32; + let mut reciprocal: u64 = (recip32 as u64) + .wrapping_mul(c_hi as u64) + .wrapping_add((recip32 as u64).wrapping_mul(c_lo as u64) >> 32); + + // We already adjusted the 32-bit estimate, now we need to adjust the final + // 64-bit reciprocal estimate downward to ensure that it is strictly smaller + // than the infinitely precise exact reciprocal. Because the computation + // of the Newton-Raphson step is truncating at every step, this adjustment + // is small; most of the work is already done. + reciprocal = reciprocal.wrapping_sub(2); + + // The numerical reciprocal is accurate to within 2^-56, lies in the + // interval [0.5, 1.0), and is strictly smaller than the true reciprocal + // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b + // in Q53 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0.5, 2.0) + // 3. the error in q is bounded away from 2^-53 (actually, we have a + // couple of bits to spare, but this is all we need). + + // We need a 64 x 64 multiply high to compute q, which isn't a basic + // operation in C, so we need to be a little bit fussy. + // let mut quotient: F::Int = ((((reciprocal as u64) + // .wrapping_mul(CastInto::<u32>::cast(a_significand << 1) as u64)) + // >> 32) as u32) + // .cast(); + + // We need a 64 x 64 multiply high to compute q, which isn't a basic + // operation in C, so we need to be a little bit fussy. + let mut quotient = (a_significand << 2).widen_mul(reciprocal.cast()).hi(); + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // In either case, we are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b + // + // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + // We also take this time to right shift quotient if it falls in the [1,2) + // range and adjust the exponent accordingly. + let residual = if quotient < (implicit_bit << 1) { + quotient_exponent = quotient_exponent.wrapping_sub(1); + (a_significand << (significand_bits + 1)).wrapping_sub(quotient.wrapping_mul(b_significand)) + } else { + quotient >>= 1; + (a_significand << significand_bits).wrapping_sub(quotient.wrapping_mul(b_significand)) + }; + + let written_exponent = quotient_exponent.wrapping_add(exponent_bias as i32); + + if written_exponent >= max_exponent as i32 { + // If we have overflowed the exponent, return infinity. + return F::from_repr(inf_rep | quotient_sign); + } else if written_exponent < 1 { + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return F::from_repr(quotient_sign); + } else { + let round = ((residual << 1) > b_significand) as u32; + // Clear the implicit bits + let mut abs_result = quotient & significand_mask; + // Insert the exponent + abs_result |= written_exponent.cast() << significand_bits; + // Round + abs_result = abs_result.wrapping_add(round.cast()); + // Insert the sign and return + return F::from_repr(abs_result | quotient_sign); + } +} + +intrinsics! { + #[arm_aeabi_alias = __aeabi_fdiv] + pub extern "C" fn __divsf3(a: f32, b: f32) -> f32 { + div32(a, b) + } + + #[arm_aeabi_alias = __aeabi_ddiv] + pub extern "C" fn __divdf3(a: f64, b: f64) -> f64 { + div64(a, b) + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __divsf3vfp(a: f32, b: f32) -> f32 { + a / b + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __divdf3vfp(a: f64, b: f64) -> f64 { + a / b + } +} diff --git a/vendor/compiler_builtins/src/float/extend.rs b/vendor/compiler_builtins/src/float/extend.rs new file mode 100644 index 000000000..39633773b --- /dev/null +++ b/vendor/compiler_builtins/src/float/extend.rs @@ -0,0 +1,83 @@ +use float::Float; +use int::{CastInto, Int}; + +/// Generic conversion from a narrower to a wider IEEE-754 floating-point type +fn extend<F: Float, R: Float>(a: F) -> R +where + F::Int: CastInto<u64>, + u64: CastInto<F::Int>, + u32: CastInto<R::Int>, + R::Int: CastInto<u32>, + R::Int: CastInto<u64>, + u64: CastInto<R::Int>, + F::Int: CastInto<R::Int>, +{ + let src_zero = F::Int::ZERO; + let src_one = F::Int::ONE; + let src_bits = F::BITS; + let src_sign_bits = F::SIGNIFICAND_BITS; + let src_exp_bias = F::EXPONENT_BIAS; + let src_min_normal = F::IMPLICIT_BIT; + let src_infinity = F::EXPONENT_MASK; + let src_sign_mask = F::SIGN_MASK as F::Int; + let src_abs_mask = src_sign_mask - src_one; + let src_qnan = F::SIGNIFICAND_MASK; + let src_nan_code = src_qnan - src_one; + + let dst_bits = R::BITS; + let dst_sign_bits = R::SIGNIFICAND_BITS; + let dst_inf_exp = R::EXPONENT_MAX; + let dst_exp_bias = R::EXPONENT_BIAS; + let dst_min_normal = R::IMPLICIT_BIT; + + let sign_bits_delta = dst_sign_bits - src_sign_bits; + let exp_bias_delta = dst_exp_bias - src_exp_bias; + let a_abs = a.repr() & src_abs_mask; + let mut abs_result = R::Int::ZERO; + + if a_abs.wrapping_sub(src_min_normal) < src_infinity.wrapping_sub(src_min_normal) { + // a is a normal number. + // Extend to the destination type by shifting the significand and + // exponent into the proper position and rebiasing the exponent. + let abs_dst: R::Int = a_abs.cast(); + let bias_dst: R::Int = exp_bias_delta.cast(); + abs_result = abs_dst.wrapping_shl(sign_bits_delta); + abs_result += bias_dst.wrapping_shl(dst_sign_bits); + } else if a_abs >= src_infinity { + // a is NaN or infinity. + // Conjure the result by beginning with infinity, then setting the qNaN + // bit (if needed) and right-aligning the rest of the trailing NaN + // payload field. + let qnan_dst: R::Int = (a_abs & src_qnan).cast(); + let nan_code_dst: R::Int = (a_abs & src_nan_code).cast(); + let inf_exp_dst: R::Int = dst_inf_exp.cast(); + abs_result = inf_exp_dst.wrapping_shl(dst_sign_bits); + abs_result |= qnan_dst.wrapping_shl(sign_bits_delta); + abs_result |= nan_code_dst.wrapping_shl(sign_bits_delta); + } else if a_abs != src_zero { + // a is denormal. + // Renormalize the significand and clear the leading bit, then insert + // the correct adjusted exponent in the destination type. + let scale = a_abs.leading_zeros() - src_min_normal.leading_zeros(); + let abs_dst: R::Int = a_abs.cast(); + let bias_dst: R::Int = (exp_bias_delta - scale + 1).cast(); + abs_result = abs_dst.wrapping_shl(sign_bits_delta + scale); + abs_result = (abs_result ^ dst_min_normal) | (bias_dst.wrapping_shl(dst_sign_bits)); + } + + let sign_result: R::Int = (a.repr() & src_sign_mask).cast(); + R::from_repr(abs_result | (sign_result.wrapping_shl(dst_bits - src_bits))) +} + +intrinsics! { + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_f2d] + pub extern "C" fn __extendsfdf2(a: f32) -> f64 { + extend(a) + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __extendsfdf2vfp(a: f32) -> f64 { + a as f64 // LLVM generate 'fcvtds' + } +} diff --git a/vendor/compiler_builtins/src/float/mod.rs b/vendor/compiler_builtins/src/float/mod.rs new file mode 100644 index 000000000..01a5504d5 --- /dev/null +++ b/vendor/compiler_builtins/src/float/mod.rs @@ -0,0 +1,175 @@ +use core::ops; + +use super::int::Int; + +pub mod add; +pub mod cmp; +pub mod conv; +pub mod div; +pub mod extend; +pub mod mul; +pub mod pow; +pub mod sub; +pub mod trunc; + +public_test_dep! { +/// Trait for some basic operations on floats +pub(crate) trait Float: + Copy + + core::fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::MulAssign + + ops::Add<Output = Self> + + ops::Sub<Output = Self> + + ops::Div<Output = Self> + + ops::Rem<Output = Self> +{ + /// A uint of the same with as the float + type Int: Int; + + /// A int of the same with as the float + type SignedInt: Int; + + /// An int capable of containing the exponent bits plus a sign bit. This is signed. + type ExpInt: Int; + + const ZERO: Self; + const ONE: Self; + + /// The bitwidth of the float type + const BITS: u32; + + /// The bitwidth of the significand + const SIGNIFICAND_BITS: u32; + + /// The bitwidth of the exponent + const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1; + + /// The maximum value of the exponent + const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1; + + /// The exponent bias value + const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1; + + /// A mask for the sign bit + const SIGN_MASK: Self::Int; + + /// A mask for the significand + const SIGNIFICAND_MASK: Self::Int; + + // The implicit bit of the float format + const IMPLICIT_BIT: Self::Int; + + /// A mask for the exponent + const EXPONENT_MASK: Self::Int; + + /// Returns `self` transmuted to `Self::Int` + fn repr(self) -> Self::Int; + + /// Returns `self` transmuted to `Self::SignedInt` + fn signed_repr(self) -> Self::SignedInt; + + /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be + /// represented in multiple different ways. This method returns `true` if two NaNs are + /// compared. + fn eq_repr(self, rhs: Self) -> bool; + + /// Returns the sign bit + fn sign(self) -> bool; + + /// Returns the exponent with bias + fn exp(self) -> Self::ExpInt; + + /// Returns the significand with no implicit bit (or the "fractional" part) + fn frac(self) -> Self::Int; + + /// Returns the significand with implicit bit + fn imp_frac(self) -> Self::Int; + + /// Returns a `Self::Int` transmuted back to `Self` + fn from_repr(a: Self::Int) -> Self; + + /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. + fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self; + + /// Returns (normalized exponent, normalized significand) + fn normalize(significand: Self::Int) -> (i32, Self::Int); + + /// Returns if `self` is subnormal + fn is_subnormal(self) -> bool; +} +} + +macro_rules! float_impl { + ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { + impl Float for $ty { + type Int = $ity; + type SignedInt = $sity; + type ExpInt = $expty; + + const ZERO: Self = 0.0; + const ONE: Self = 1.0; + + const BITS: u32 = $bits; + const SIGNIFICAND_BITS: u32 = $significand_bits; + + const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); + const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1; + const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS; + const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK); + + fn repr(self) -> Self::Int { + self.to_bits() + } + fn signed_repr(self) -> Self::SignedInt { + self.to_bits() as Self::SignedInt + } + fn eq_repr(self, rhs: Self) -> bool { + if self.is_nan() && rhs.is_nan() { + true + } else { + self.repr() == rhs.repr() + } + } + fn sign(self) -> bool { + self.signed_repr() < Self::SignedInt::ZERO + } + fn exp(self) -> Self::ExpInt { + ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt + } + fn frac(self) -> Self::Int { + self.to_bits() & Self::SIGNIFICAND_MASK + } + fn imp_frac(self) -> Self::Int { + self.frac() | Self::IMPLICIT_BIT + } + fn from_repr(a: Self::Int) -> Self { + Self::from_bits(a) + } + fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self { + Self::from_repr( + ((sign as Self::Int) << (Self::BITS - 1)) + | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK) + | (significand & Self::SIGNIFICAND_MASK), + ) + } + fn normalize(significand: Self::Int) -> (i32, Self::Int) { + let shift = significand + .leading_zeros() + .wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros()); + ( + 1i32.wrapping_sub(shift as i32), + significand << shift as Self::Int, + ) + } + fn is_subnormal(self) -> bool { + (self.repr() & Self::EXPONENT_MASK) == Self::Int::ZERO + } + } + }; +} + +float_impl!(f32, u32, i32, i16, 32, 23); +float_impl!(f64, u64, i64, i16, 64, 52); diff --git a/vendor/compiler_builtins/src/float/mul.rs b/vendor/compiler_builtins/src/float/mul.rs new file mode 100644 index 000000000..c89f22756 --- /dev/null +++ b/vendor/compiler_builtins/src/float/mul.rs @@ -0,0 +1,209 @@ +use float::Float; +use int::{CastInto, DInt, HInt, Int}; + +fn mul<F: Float>(a: F, b: F) -> F +where + u32: CastInto<F::Int>, + F::Int: CastInto<u32>, + i32: CastInto<F::Int>, + F::Int: CastInto<i32>, + F::Int: HInt, +{ + let one = F::Int::ONE; + let zero = F::Int::ZERO; + + let bits = F::BITS; + let significand_bits = F::SIGNIFICAND_BITS; + let max_exponent = F::EXPONENT_MAX; + + let exponent_bias = F::EXPONENT_BIAS; + + let implicit_bit = F::IMPLICIT_BIT; + let significand_mask = F::SIGNIFICAND_MASK; + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXPONENT_MASK; + let inf_rep = exponent_mask; + let quiet_bit = implicit_bit >> 1; + let qnan_rep = exponent_mask | quiet_bit; + let exponent_bits = F::EXPONENT_BITS; + + let a_rep = a.repr(); + let b_rep = b.repr(); + + let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); + let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); + let product_sign = (a_rep ^ b_rep) & sign_bit; + + let mut a_significand = a_rep & significand_mask; + let mut b_significand = b_rep & significand_mask; + let mut scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + { + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + // NaN + anything = qNaN + if a_abs > inf_rep { + return F::from_repr(a_rep | quiet_bit); + } + // anything + NaN = qNaN + if b_abs > inf_rep { + return F::from_repr(b_rep | quiet_bit); + } + + if a_abs == inf_rep { + if b_abs != zero { + // infinity * non-zero = +/- infinity + return F::from_repr(a_abs | product_sign); + } else { + // infinity * zero = NaN + return F::from_repr(qnan_rep); + } + } + + if b_abs == inf_rep { + if a_abs != zero { + // infinity * non-zero = +/- infinity + return F::from_repr(b_abs | product_sign); + } else { + // infinity * zero = NaN + return F::from_repr(qnan_rep); + } + } + + // zero * anything = +/- zero + if a_abs == zero { + return F::from_repr(product_sign); + } + + // anything * zero = +/- zero + if b_abs == zero { + return F::from_repr(product_sign); + } + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if a_abs < implicit_bit { + let (exponent, significand) = F::normalize(a_significand); + scale += exponent; + a_significand = significand; + } + + if b_abs < implicit_bit { + let (exponent, significand) = F::normalize(b_significand); + scale += exponent; + b_significand = significand; + } + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + a_significand |= implicit_bit; + b_significand |= implicit_bit; + + // Get the significand of a*b. Before multiplying the significands, shift + // one of them left to left-align it in the field. Thus, the product will + // have (exponentBits + 2) integral digits, all but two of which must be + // zero. Normalizing this result is just a conditional left-shift by one + // and bumping the exponent accordingly. + let (mut product_low, mut product_high) = a_significand + .widen_mul(b_significand << exponent_bits) + .lo_hi(); + + let a_exponent_i32: i32 = a_exponent.cast(); + let b_exponent_i32: i32 = b_exponent.cast(); + let mut product_exponent: i32 = a_exponent_i32 + .wrapping_add(b_exponent_i32) + .wrapping_add(scale) + .wrapping_sub(exponent_bias as i32); + + // Normalize the significand, adjust exponent if needed. + if (product_high & implicit_bit) != zero { + product_exponent = product_exponent.wrapping_add(1); + } else { + product_high = (product_high << 1) | (product_low >> (bits - 1)); + product_low <<= 1; + } + + // If we have overflowed the type, return +/- infinity. + if product_exponent >= max_exponent as i32 { + return F::from_repr(inf_rep | product_sign); + } + + if product_exponent <= 0 { + // Result is denormal before rounding + // + // If the result is so small that it just underflows to zero, return + // a zero of the appropriate sign. Mathematically there is no need to + // handle this case separately, but we make it a special case to + // simplify the shift logic. + let shift = one.wrapping_sub(product_exponent.cast()).cast(); + if shift >= bits { + return F::from_repr(product_sign); + } + + // Otherwise, shift the significand of the result so that the round + // bit is the high bit of productLo. + if shift < bits { + let sticky = product_low << (bits - shift); + product_low = product_high << (bits - shift) | product_low >> shift | sticky; + product_high >>= shift; + } else if shift < (2 * bits) { + let sticky = product_high << (2 * bits - shift) | product_low; + product_low = product_high >> (shift - bits) | sticky; + product_high = zero; + } else { + product_high = zero; + } + } else { + // Result is normal before rounding; insert the exponent. + product_high &= significand_mask; + product_high |= product_exponent.cast() << significand_bits; + } + + // Insert the sign of the result: + product_high |= product_sign; + + // Final rounding. The final result may overflow to infinity, or underflow + // to zero, but those are the correct results in those cases. We use the + // default IEEE-754 round-to-nearest, ties-to-even rounding mode. + if product_low > sign_bit { + product_high += one; + } + + if product_low == sign_bit { + product_high += product_high & one; + } + + F::from_repr(product_high) +} + +intrinsics! { + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_fmul] + pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 { + mul(a, b) + } + + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_dmul] + pub extern "C" fn __muldf3(a: f64, b: f64) -> f64 { + mul(a, b) + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __mulsf3vfp(a: f32, b: f32) -> f32 { + a * b + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __muldf3vfp(a: f64, b: f64) -> f64 { + a * b + } +} diff --git a/vendor/compiler_builtins/src/float/pow.rs b/vendor/compiler_builtins/src/float/pow.rs new file mode 100644 index 000000000..a75340c30 --- /dev/null +++ b/vendor/compiler_builtins/src/float/pow.rs @@ -0,0 +1,36 @@ +use float::Float; +use int::Int; + +/// Returns `a` raised to the power `b` +fn pow<F: Float>(a: F, b: i32) -> F { + let mut a = a; + let recip = b < 0; + let mut pow = Int::abs_diff(b, 0); + let mut mul = F::ONE; + loop { + if (pow & 1) != 0 { + mul *= a; + } + pow >>= 1; + if pow == 0 { + break; + } + a *= a; + } + + if recip { + F::ONE / mul + } else { + mul + } +} + +intrinsics! { + pub extern "C" fn __powisf2(a: f32, b: i32) -> f32 { + pow(a, b) + } + + pub extern "C" fn __powidf2(a: f64, b: i32) -> f64 { + pow(a, b) + } +} diff --git a/vendor/compiler_builtins/src/float/sub.rs b/vendor/compiler_builtins/src/float/sub.rs new file mode 100644 index 000000000..8d300e9d2 --- /dev/null +++ b/vendor/compiler_builtins/src/float/sub.rs @@ -0,0 +1,25 @@ +use float::add::__adddf3; +use float::add::__addsf3; +use float::Float; + +intrinsics! { + #[arm_aeabi_alias = __aeabi_fsub] + pub extern "C" fn __subsf3(a: f32, b: f32) -> f32 { + __addsf3(a, f32::from_repr(b.repr() ^ f32::SIGN_MASK)) + } + + #[arm_aeabi_alias = __aeabi_dsub] + pub extern "C" fn __subdf3(a: f64, b: f64) -> f64 { + __adddf3(a, f64::from_repr(b.repr() ^ f64::SIGN_MASK)) + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __subsf3vfp(a: f32, b: f32) -> f32 { + a - b + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __subdf3vfp(a: f64, b: f64) -> f64 { + a - b + } +} diff --git a/vendor/compiler_builtins/src/float/trunc.rs b/vendor/compiler_builtins/src/float/trunc.rs new file mode 100644 index 000000000..d73713084 --- /dev/null +++ b/vendor/compiler_builtins/src/float/trunc.rs @@ -0,0 +1,125 @@ +use float::Float; +use int::{CastInto, Int}; + +fn trunc<F: Float, R: Float>(a: F) -> R +where + F::Int: CastInto<u64>, + F::Int: CastInto<u32>, + u64: CastInto<F::Int>, + u32: CastInto<F::Int>, + + R::Int: CastInto<u32>, + u32: CastInto<R::Int>, + F::Int: CastInto<R::Int>, +{ + let src_zero = F::Int::ZERO; + let src_one = F::Int::ONE; + let src_bits = F::BITS; + let src_exp_bias = F::EXPONENT_BIAS; + + let src_min_normal = F::IMPLICIT_BIT; + let src_significand_mask = F::SIGNIFICAND_MASK; + let src_infinity = F::EXPONENT_MASK; + let src_sign_mask = F::SIGN_MASK; + let src_abs_mask = src_sign_mask - src_one; + let round_mask = (src_one << (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)) - src_one; + let halfway = src_one << (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS - 1); + let src_qnan = src_one << (F::SIGNIFICAND_BITS - 1); + let src_nan_code = src_qnan - src_one; + + let dst_zero = R::Int::ZERO; + let dst_one = R::Int::ONE; + let dst_bits = R::BITS; + let dst_inf_exp = R::EXPONENT_MAX; + let dst_exp_bias = R::EXPONENT_BIAS; + + let underflow_exponent: F::Int = (src_exp_bias + 1 - dst_exp_bias).cast(); + let overflow_exponent: F::Int = (src_exp_bias + dst_inf_exp - dst_exp_bias).cast(); + let underflow: F::Int = underflow_exponent << F::SIGNIFICAND_BITS; + let overflow: F::Int = overflow_exponent << F::SIGNIFICAND_BITS; + + let dst_qnan = R::Int::ONE << (R::SIGNIFICAND_BITS - 1); + let dst_nan_code = dst_qnan - dst_one; + + let sign_bits_delta = F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS; + // Break a into a sign and representation of the absolute value. + let a_abs = a.repr() & src_abs_mask; + let sign = a.repr() & src_sign_mask; + let mut abs_result: R::Int; + + if a_abs.wrapping_sub(underflow) < a_abs.wrapping_sub(overflow) { + // The exponent of a is within the range of normal numbers in the + // destination format. We can convert by simply right-shifting with + // rounding and adjusting the exponent. + abs_result = (a_abs >> sign_bits_delta).cast(); + let tmp = src_exp_bias.wrapping_sub(dst_exp_bias) << R::SIGNIFICAND_BITS; + abs_result = abs_result.wrapping_sub(tmp.cast()); + + let round_bits = a_abs & round_mask; + if round_bits > halfway { + // Round to nearest. + abs_result += dst_one; + } else if round_bits == halfway { + // Tie to even. + abs_result += abs_result & dst_one; + }; + } else if a_abs > src_infinity { + // a is NaN. + // Conjure the result by beginning with infinity, setting the qNaN + // bit and inserting the (truncated) trailing NaN field. + abs_result = (dst_inf_exp << R::SIGNIFICAND_BITS).cast(); + abs_result |= dst_qnan; + abs_result |= dst_nan_code + & ((a_abs & src_nan_code) >> (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)).cast(); + } else if a_abs >= overflow { + // a overflows to infinity. + abs_result = (dst_inf_exp << R::SIGNIFICAND_BITS).cast(); + } else { + // a underflows on conversion to the destination type or is an exact + // zero. The result may be a denormal or zero. Extract the exponent + // to get the shift amount for the denormalization. + let a_exp: u32 = (a_abs >> F::SIGNIFICAND_BITS).cast(); + let shift = src_exp_bias - dst_exp_bias - a_exp + 1; + + let significand = (a.repr() & src_significand_mask) | src_min_normal; + + // Right shift by the denormalization amount with sticky. + if shift > F::SIGNIFICAND_BITS { + abs_result = dst_zero; + } else { + let sticky = if (significand << (src_bits - shift)) != src_zero { + src_one + } else { + src_zero + }; + let denormalized_significand: F::Int = significand >> shift | sticky; + abs_result = + (denormalized_significand >> (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)).cast(); + let round_bits = denormalized_significand & round_mask; + // Round to nearest + if round_bits > halfway { + abs_result += dst_one; + } + // Ties to even + else if round_bits == halfway { + abs_result += abs_result & dst_one; + }; + } + } + + // Apply the signbit to the absolute value. + R::from_repr(abs_result | sign.wrapping_shr(src_bits - dst_bits).cast()) +} + +intrinsics! { + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_d2f] + pub extern "C" fn __truncdfsf2(a: f64) -> f32 { + trunc(a) + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __truncdfsf2vfp(a: f64) -> f32 { + a as f32 + } +} diff --git a/vendor/compiler_builtins/src/int/addsub.rs b/vendor/compiler_builtins/src/int/addsub.rs new file mode 100644 index 000000000..f4841e90f --- /dev/null +++ b/vendor/compiler_builtins/src/int/addsub.rs @@ -0,0 +1,96 @@ +use int::{DInt, Int}; + +trait UAddSub: DInt { + fn uadd(self, other: Self) -> Self { + let (lo, carry) = self.lo().overflowing_add(other.lo()); + let hi = self.hi().wrapping_add(other.hi()); + let carry = if carry { Self::H::ONE } else { Self::H::ZERO }; + Self::from_lo_hi(lo, hi.wrapping_add(carry)) + } + fn uadd_one(self) -> Self { + let (lo, carry) = self.lo().overflowing_add(Self::H::ONE); + let carry = if carry { Self::H::ONE } else { Self::H::ZERO }; + Self::from_lo_hi(lo, self.hi().wrapping_add(carry)) + } + fn usub(self, other: Self) -> Self { + let uneg = (!other).uadd_one(); + self.uadd(uneg) + } +} + +impl UAddSub for u128 {} + +trait AddSub: Int +where + <Self as Int>::UnsignedInt: UAddSub, +{ + fn add(self, other: Self) -> Self { + Self::from_unsigned(self.unsigned().uadd(other.unsigned())) + } + fn sub(self, other: Self) -> Self { + Self::from_unsigned(self.unsigned().usub(other.unsigned())) + } +} + +impl AddSub for u128 {} +impl AddSub for i128 {} + +trait Addo: AddSub +where + <Self as Int>::UnsignedInt: UAddSub, +{ + fn addo(self, other: Self) -> (Self, bool) { + let sum = AddSub::add(self, other); + (sum, (other < Self::ZERO) != (sum < self)) + } +} + +impl Addo for i128 {} +impl Addo for u128 {} + +trait Subo: AddSub +where + <Self as Int>::UnsignedInt: UAddSub, +{ + fn subo(self, other: Self) -> (Self, bool) { + let sum = AddSub::sub(self, other); + (sum, (other < Self::ZERO) != (self < sum)) + } +} + +impl Subo for i128 {} +impl Subo for u128 {} + +intrinsics! { + pub extern "C" fn __rust_i128_add(a: i128, b: i128) -> i128 { + AddSub::add(a,b) + } + + pub extern "C" fn __rust_i128_addo(a: i128, b: i128) -> (i128, bool) { + a.addo(b) + } + + pub extern "C" fn __rust_u128_add(a: u128, b: u128) -> u128 { + AddSub::add(a,b) + } + + pub extern "C" fn __rust_u128_addo(a: u128, b: u128) -> (u128, bool) { + a.addo(b) + } + + pub extern "C" fn __rust_i128_sub(a: i128, b: i128) -> i128 { + AddSub::sub(a,b) + } + + pub extern "C" fn __rust_i128_subo(a: i128, b: i128) -> (i128, bool) { + a.subo(b) + } + + pub extern "C" fn __rust_u128_sub(a: u128, b: u128) -> u128 { + AddSub::sub(a,b) + } + + pub extern "C" fn __rust_u128_subo(a: u128, b: u128) -> (u128, bool) { + a.subo(b) + } +} diff --git a/vendor/compiler_builtins/src/int/leading_zeros.rs b/vendor/compiler_builtins/src/int/leading_zeros.rs new file mode 100644 index 000000000..9e60ab0d7 --- /dev/null +++ b/vendor/compiler_builtins/src/int/leading_zeros.rs @@ -0,0 +1,149 @@ +// Note: these functions happen to produce the correct `usize::leading_zeros(0)` value +// without a explicit zero check. Zero is probably common enough that it could warrant +// adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`. +// Compilers will insert the check for zero in cases where it is needed. + +public_test_dep! { +/// Returns the number of leading binary zeros in `x`. +#[allow(dead_code)] +pub(crate) fn usize_leading_zeros_default(x: usize) -> usize { + // The basic idea is to test if the higher bits of `x` are zero and bisect the number + // of leading zeros. It is possible for all branches of the bisection to use the same + // code path by conditionally shifting the higher parts down to let the next bisection + // step work on the higher or lower parts of `x`. Instead of starting with `z == 0` + // and adding to the number of zeros, it is slightly faster to start with + // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros, + // because it simplifies the final bisection step. + let mut x = x; + // the number of potential leading zeros + let mut z = usize::MAX.count_ones() as usize; + // a temporary + let mut t: usize; + #[cfg(target_pointer_width = "64")] + { + t = x >> 32; + if t != 0 { + z -= 32; + x = t; + } + } + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] + { + t = x >> 16; + if t != 0 { + z -= 16; + x = t; + } + } + t = x >> 8; + if t != 0 { + z -= 8; + x = t; + } + t = x >> 4; + if t != 0 { + z -= 4; + x = t; + } + t = x >> 2; + if t != 0 { + z -= 2; + x = t; + } + // the last two bisections are combined into one conditional + t = x >> 1; + if t != 0 { + z - 2 + } else { + z - x + } + + // We could potentially save a few cycles by using the LUT trick from + // "https://embeddedgurus.com/state-space/2014/09/ + // fast-deterministic-and-portable-counting-leading-zeros/". + // However, 256 bytes for a LUT is too large for embedded use cases. We could remove + // the last 3 bisections and use this 16 byte LUT for the rest of the work: + //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]; + //z -= LUT[x] as usize; + //z + // However, it ends up generating about the same number of instructions. When benchmarked + // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO + // execution effects. Changing to using a LUT and branching is risky for smaller cores. +} +} + +// The above method does not compile well on RISC-V (because of the lack of predicated +// instructions), producing code with many branches or using an excessively long +// branchless solution. This method takes advantage of the set-if-less-than instruction on +// RISC-V that allows `(x >= power-of-two) as usize` to be branchless. + +public_test_dep! { +/// Returns the number of leading binary zeros in `x`. +#[allow(dead_code)] +pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize { + let mut x = x; + // the number of potential leading zeros + let mut z = usize::MAX.count_ones() as usize; + // a temporary + let mut t: usize; + + // RISC-V does not have a set-if-greater-than-or-equal instruction and + // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is + // still the most optimal method. A conditional set can only be turned into a single + // immediate instruction if `x` is compared with an immediate `imm` (that can fit into + // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the + // right). If we try to save an instruction by using `x < imm` for each bisection, we + // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, + // but the immediate will never fit into 12 bits and never save an instruction. + #[cfg(target_pointer_width = "64")] + { + // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise + // `t` is set to 0. + t = ((x >= (1 << 32)) as usize) << 5; + // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the + // next step to process. + x >>= t; + // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential + // leading zeros + z -= t; + } + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] + { + t = ((x >= (1 << 16)) as usize) << 4; + x >>= t; + z -= t; + } + t = ((x >= (1 << 8)) as usize) << 3; + x >>= t; + z -= t; + t = ((x >= (1 << 4)) as usize) << 2; + x >>= t; + z -= t; + t = ((x >= (1 << 2)) as usize) << 1; + x >>= t; + z -= t; + t = (x >= (1 << 1)) as usize; + x >>= t; + z -= t; + // All bits except the LSB are guaranteed to be zero for this final bisection step. + // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. + z - x +} +} + +intrinsics! { + #[maybe_use_optimized_c_shim] + #[cfg(any( + target_pointer_width = "16", + target_pointer_width = "32", + target_pointer_width = "64" + ))] + /// Returns the number of leading binary zeros in `x`. + pub extern "C" fn __clzsi2(x: usize) -> usize { + if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { + usize_leading_zeros_riscv(x) + } else { + usize_leading_zeros_default(x) + } + } +} diff --git a/vendor/compiler_builtins/src/int/mod.rs b/vendor/compiler_builtins/src/int/mod.rs new file mode 100644 index 000000000..509f9fdae --- /dev/null +++ b/vendor/compiler_builtins/src/int/mod.rs @@ -0,0 +1,390 @@ +use core::ops; + +mod specialized_div_rem; + +pub mod addsub; +pub mod leading_zeros; +pub mod mul; +pub mod sdiv; +pub mod shift; +pub mod udiv; + +pub use self::leading_zeros::__clzsi2; + +public_test_dep! { +/// Trait for some basic operations on integers +pub(crate) trait Int: + Copy + + core::fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::SubAssign + + ops::BitAndAssign + + ops::BitOrAssign + + ops::BitXorAssign + + ops::ShlAssign<i32> + + ops::ShrAssign<u32> + + ops::Add<Output = Self> + + ops::Sub<Output = Self> + + ops::Div<Output = Self> + + ops::Shl<u32, Output = Self> + + ops::Shr<u32, Output = Self> + + ops::BitOr<Output = Self> + + ops::BitXor<Output = Self> + + ops::BitAnd<Output = Self> + + ops::Not<Output = Self> +{ + /// Type with the same width but other signedness + type OtherSign: Int; + /// Unsigned version of Self + type UnsignedInt: Int; + + /// If `Self` is a signed integer + const SIGNED: bool; + + /// The bitwidth of the int type + const BITS: u32; + + const ZERO: Self; + const ONE: Self; + const MIN: Self; + const MAX: Self; + + /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing + /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, + /// 112,119,120,125,126,127]. + const FUZZ_LENGTHS: [u8; 20]; + /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. + const FUZZ_NUM: usize; + + fn unsigned(self) -> Self::UnsignedInt; + fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; + + fn from_bool(b: bool) -> Self; + + /// Prevents the need for excessive conversions between signed and unsigned + fn logical_shr(self, other: u32) -> Self; + + /// Absolute difference between two integers. + fn abs_diff(self, other: Self) -> Self::UnsignedInt; + + // copied from primitive integers, but put in a trait + fn is_zero(self) -> bool; + fn wrapping_neg(self) -> Self; + fn wrapping_add(self, other: Self) -> Self; + fn wrapping_mul(self, other: Self) -> Self; + fn wrapping_sub(self, other: Self) -> Self; + fn wrapping_shl(self, other: u32) -> Self; + fn wrapping_shr(self, other: u32) -> Self; + fn rotate_left(self, other: u32) -> Self; + fn overflowing_add(self, other: Self) -> (Self, bool); + fn leading_zeros(self) -> u32; +} +} + +macro_rules! int_impl_common { + ($ty:ty) => { + const BITS: u32 = <Self as Int>::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = <Self>::MIN; + const MAX: Self = <Self>::MAX; + + const FUZZ_LENGTHS: [u8; 20] = { + let bits = <Self as Int>::BITS; + let mut v = [0u8; 20]; + v[0] = 0; + v[1] = 1; + v[2] = 2; // important for parity and the iX::MIN case when reversed + let mut i = 3; + // No need for any more until the byte boundary, because there should be no algorithms + // that are sensitive to anything not next to byte boundaries after 2. We also scale + // in powers of two, which is important to prevent u128 corner tests from getting too + // big. + let mut l = 8; + loop { + if l >= ((bits / 2) as u8) { + break; + } + // get both sides of the byte boundary + v[i] = l - 1; + i += 1; + v[i] = l; + i += 1; + l *= 2; + } + + if bits != 8 { + // add the lower side of the middle boundary + v[i] = ((bits / 2) - 1) as u8; + i += 1; + } + + // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS + // boundary because of algorithms that split the high part up. We reverse the scaling + // as we go to Self::BITS. + let mid = i; + let mut j = 1; + loop { + v[i] = (bits as u8) - (v[mid - j]) - 1; + if j == mid { + break; + } + i += 1; + j += 1; + } + v + }; + + const FUZZ_NUM: usize = { + let log2 = (<Self as Int>::BITS - 1).count_ones() as usize; + if log2 == 3 { + // case for u8 + 6 + } else { + // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate + // boundaries. + 8 + (4 * (log2 - 4)) + } + }; + + fn from_bool(b: bool) -> Self { + b as $ty + } + + fn logical_shr(self, other: u32) -> Self { + Self::from_unsigned(self.unsigned().wrapping_shr(other)) + } + + fn is_zero(self) -> bool { + self == Self::ZERO + } + + fn wrapping_neg(self) -> Self { + <Self>::wrapping_neg(self) + } + + fn wrapping_add(self, other: Self) -> Self { + <Self>::wrapping_add(self, other) + } + + fn wrapping_mul(self, other: Self) -> Self { + <Self>::wrapping_mul(self, other) + } + + fn wrapping_sub(self, other: Self) -> Self { + <Self>::wrapping_sub(self, other) + } + + fn wrapping_shl(self, other: u32) -> Self { + <Self>::wrapping_shl(self, other) + } + + fn wrapping_shr(self, other: u32) -> Self { + <Self>::wrapping_shr(self, other) + } + + fn rotate_left(self, other: u32) -> Self { + <Self>::rotate_left(self, other) + } + + fn overflowing_add(self, other: Self) -> (Self, bool) { + <Self>::overflowing_add(self, other) + } + + fn leading_zeros(self) -> u32 { + <Self>::leading_zeros(self) + } + }; +} + +macro_rules! int_impl { + ($ity:ty, $uty:ty) => { + impl Int for $uty { + type OtherSign = $ity; + type UnsignedInt = $uty; + + fn unsigned(self) -> $uty { + self + } + + // It makes writing macros easier if this is implemented for both signed and unsigned + #[allow(clippy::wrong_self_convention)] + fn from_unsigned(me: $uty) -> Self { + me + } + + fn abs_diff(self, other: Self) -> Self { + if self < other { + other.wrapping_sub(self) + } else { + self.wrapping_sub(other) + } + } + + int_impl_common!($uty); + } + + impl Int for $ity { + type OtherSign = $uty; + type UnsignedInt = $uty; + + fn unsigned(self) -> $uty { + self as $uty + } + + fn from_unsigned(me: $uty) -> Self { + me as $ity + } + + fn abs_diff(self, other: Self) -> $uty { + self.wrapping_sub(other).wrapping_abs() as $uty + } + + int_impl_common!($ity); + } + }; +} + +int_impl!(isize, usize); +int_impl!(i8, u8); +int_impl!(i16, u16); +int_impl!(i32, u32); +int_impl!(i64, u64); +int_impl!(i128, u128); + +public_test_dep! { +/// Trait for integers twice the bit width of another integer. This is implemented for all +/// primitives except for `u8`, because there is not a smaller primitive. +pub(crate) trait DInt: Int { + /// Integer that is half the bit width of the integer this trait is implemented for + type H: HInt<D = Self> + Int; + + /// Returns the low half of `self` + fn lo(self) -> Self::H; + /// Returns the high half of `self` + fn hi(self) -> Self::H; + /// Returns the low and high halves of `self` as a tuple + fn lo_hi(self) -> (Self::H, Self::H); + /// Constructs an integer using lower and higher half parts + fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self; +} +} + +public_test_dep! { +/// Trait for integers half the bit width of another integer. This is implemented for all +/// primitives except for `u128`, because it there is not a larger primitive. +pub(crate) trait HInt: Int { + /// Integer that is double the bit width of the integer this trait is implemented for + type D: DInt<H = Self> + Int; + + /// Widens (using default extension) the integer to have double bit width + fn widen(self) -> Self::D; + /// Widens (zero extension only) the integer to have double bit width. This is needed to get + /// around problems with associated type bounds (such as `Int<Othersign: DInt>`) being unstable + fn zero_widen(self) -> Self::D; + /// Widens the integer to have double bit width and shifts the integer into the higher bits + fn widen_hi(self) -> Self::D; + /// Widening multiplication with zero widening. This cannot overflow. + fn zero_widen_mul(self, rhs: Self) -> Self::D; + /// Widening multiplication. This cannot overflow. + fn widen_mul(self, rhs: Self) -> Self::D; +} +} + +macro_rules! impl_d_int { + ($($X:ident $D:ident),*) => { + $( + impl DInt for $D { + type H = $X; + + fn lo(self) -> Self::H { + self as $X + } + fn hi(self) -> Self::H { + (self >> <$X as Int>::BITS) as $X + } + fn lo_hi(self) -> (Self::H, Self::H) { + (self.lo(), self.hi()) + } + fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { + lo.zero_widen() | hi.widen_hi() + } + } + )* + }; +} + +macro_rules! impl_h_int { + ($($H:ident $uH:ident $X:ident),*) => { + $( + impl HInt for $H { + type D = $X; + + fn widen(self) -> Self::D { + self as $X + } + fn zero_widen(self) -> Self::D { + (self as $uH) as $X + } + fn widen_hi(self) -> Self::D { + (self as $X) << <$H as Int>::BITS + } + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen().wrapping_mul(rhs.zero_widen()) + } + fn widen_mul(self, rhs: Self) -> Self::D { + self.widen().wrapping_mul(rhs.widen()) + } + } + )* + }; +} + +impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); +impl_h_int!( + u8 u8 u16, + u16 u16 u32, + u32 u32 u64, + u64 u64 u128, + i8 u8 i16, + i16 u16 i32, + i32 u32 i64, + i64 u64 i128 +); + +public_test_dep! { +/// Trait to express (possibly lossy) casting of integers +pub(crate) trait CastInto<T: Copy>: Copy { + fn cast(self) -> T; +} +} + +macro_rules! cast_into { + ($ty:ty) => { + cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); + }; + ($ty:ty; $($into:ty),*) => {$( + impl CastInto<$into> for $ty { + fn cast(self) -> $into { + self as $into + } + } + )*}; +} + +cast_into!(usize); +cast_into!(isize); +cast_into!(u8); +cast_into!(i8); +cast_into!(u16); +cast_into!(i16); +cast_into!(u32); +cast_into!(i32); +cast_into!(u64); +cast_into!(i64); +cast_into!(u128); +cast_into!(i128); diff --git a/vendor/compiler_builtins/src/int/mul.rs b/vendor/compiler_builtins/src/int/mul.rs new file mode 100644 index 000000000..07ce061c9 --- /dev/null +++ b/vendor/compiler_builtins/src/int/mul.rs @@ -0,0 +1,138 @@ +use int::{DInt, HInt, Int}; + +trait Mul: DInt +where + Self::H: DInt, +{ + fn mul(self, rhs: Self) -> Self { + // In order to prevent infinite recursion, we cannot use the `widen_mul` in this: + //self.lo().widen_mul(rhs.lo()) + // .wrapping_add(self.lo().wrapping_mul(rhs.hi()).widen_hi()) + // .wrapping_add(self.hi().wrapping_mul(rhs.lo()).widen_hi()) + + let lhs_lo = self.lo(); + let rhs_lo = rhs.lo(); + // construct the widening multiplication using only `Self::H` sized multiplications + let tmp_0 = lhs_lo.lo().zero_widen_mul(rhs_lo.lo()); + let tmp_1 = lhs_lo.lo().zero_widen_mul(rhs_lo.hi()); + let tmp_2 = lhs_lo.hi().zero_widen_mul(rhs_lo.lo()); + let tmp_3 = lhs_lo.hi().zero_widen_mul(rhs_lo.hi()); + // sum up all widening partials + let mul = Self::from_lo_hi(tmp_0, tmp_3) + .wrapping_add(tmp_1.zero_widen() << (Self::BITS / 4)) + .wrapping_add(tmp_2.zero_widen() << (Self::BITS / 4)); + // add the higher partials + mul.wrapping_add(lhs_lo.wrapping_mul(rhs.hi()).widen_hi()) + .wrapping_add(self.hi().wrapping_mul(rhs_lo).widen_hi()) + } +} + +impl Mul for u64 {} +impl Mul for i128 {} + +pub(crate) trait UMulo: Int + DInt { + fn mulo(self, rhs: Self) -> (Self, bool) { + match (self.hi().is_zero(), rhs.hi().is_zero()) { + // overflow is guaranteed + (false, false) => (self.wrapping_mul(rhs), true), + (true, false) => { + let mul_lo = self.lo().widen_mul(rhs.lo()); + let mul_hi = self.lo().widen_mul(rhs.hi()); + let (mul, o) = mul_lo.overflowing_add(mul_hi.lo().widen_hi()); + (mul, o || !mul_hi.hi().is_zero()) + } + (false, true) => { + let mul_lo = rhs.lo().widen_mul(self.lo()); + let mul_hi = rhs.lo().widen_mul(self.hi()); + let (mul, o) = mul_lo.overflowing_add(mul_hi.lo().widen_hi()); + (mul, o || !mul_hi.hi().is_zero()) + } + // overflow is guaranteed to not happen, and use a smaller widening multiplication + (true, true) => (self.lo().widen_mul(rhs.lo()), false), + } + } +} + +impl UMulo for u32 {} +impl UMulo for u64 {} +impl UMulo for u128 {} + +macro_rules! impl_signed_mulo { + ($fn:ident, $iD:ident, $uD:ident) => { + fn $fn(lhs: $iD, rhs: $iD) -> ($iD, bool) { + let mut lhs = lhs; + let mut rhs = rhs; + // the test against `mul_neg` below fails without this early return + if lhs == 0 || rhs == 0 { + return (0, false); + } + + let lhs_neg = lhs < 0; + let rhs_neg = rhs < 0; + if lhs_neg { + lhs = lhs.wrapping_neg(); + } + if rhs_neg { + rhs = rhs.wrapping_neg(); + } + let mul_neg = lhs_neg != rhs_neg; + + let (mul, o) = (lhs as $uD).mulo(rhs as $uD); + let mut mul = mul as $iD; + + if mul_neg { + mul = mul.wrapping_neg(); + } + if (mul < 0) != mul_neg { + // this one check happens to catch all edge cases related to `$iD::MIN` + (mul, true) + } else { + (mul, o) + } + } + }; +} + +impl_signed_mulo!(i32_overflowing_mul, i32, u32); +impl_signed_mulo!(i64_overflowing_mul, i64, u64); +impl_signed_mulo!(i128_overflowing_mul, i128, u128); + +intrinsics! { + #[maybe_use_optimized_c_shim] + #[arm_aeabi_alias = __aeabi_lmul] + #[cfg(any(not(any(target_arch = "riscv32", target_arch = "riscv64")), target_feature = "m"))] + pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 { + a.mul(b) + } + + pub extern "C" fn __multi3(a: i128, b: i128) -> i128 { + a.mul(b) + } + + pub extern "C" fn __mulosi4(a: i32, b: i32, oflow: &mut i32) -> i32 { + let (mul, o) = i32_overflowing_mul(a, b); + *oflow = o as i32; + mul + } + + pub extern "C" fn __mulodi4(a: i64, b: i64, oflow: &mut i32) -> i64 { + let (mul, o) = i64_overflowing_mul(a, b); + *oflow = o as i32; + mul + } + + #[unadjusted_on_win64] + pub extern "C" fn __muloti4(a: i128, b: i128, oflow: &mut i32) -> i128 { + let (mul, o) = i128_overflowing_mul(a, b); + *oflow = o as i32; + mul + } + + pub extern "C" fn __rust_i128_mulo(a: i128, b: i128) -> (i128, bool) { + i128_overflowing_mul(a, b) + } + + pub extern "C" fn __rust_u128_mulo(a: u128, b: u128) -> (u128, bool) { + a.mulo(b) + } +} diff --git a/vendor/compiler_builtins/src/int/sdiv.rs b/vendor/compiler_builtins/src/int/sdiv.rs new file mode 100644 index 000000000..f1822f0f8 --- /dev/null +++ b/vendor/compiler_builtins/src/int/sdiv.rs @@ -0,0 +1,169 @@ +use int::udiv::*; + +macro_rules! sdivmod { + ( + $unsigned_fn:ident, // name of the unsigned division function + $signed_fn:ident, // name of the signed division function + $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` + $iX:ident, // signed integer type for the inputs and outputs of `$signed_name` + $($attr:tt),* // attributes + ) => { + intrinsics! { + #[avr_skip] + $( + #[$attr] + )* + /// Returns `n / d` and sets `*rem = n % d` + pub extern "C" fn $signed_fn(a: $iX, b: $iX, rem: &mut $iX) -> $iX { + let a_neg = a < 0; + let b_neg = b < 0; + let mut a = a; + let mut b = b; + if a_neg { + a = a.wrapping_neg(); + } + if b_neg { + b = b.wrapping_neg(); + } + let mut r = *rem as $uX; + let t = $unsigned_fn(a as $uX, b as $uX, Some(&mut r)) as $iX; + let mut r = r as $iX; + if a_neg { + r = r.wrapping_neg(); + } + *rem = r; + if a_neg != b_neg { + t.wrapping_neg() + } else { + t + } + } + } + } +} + +macro_rules! sdiv { + ( + $unsigned_fn:ident, // name of the unsigned division function + $signed_fn:ident, // name of the signed division function + $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` + $iX:ident, // signed integer type for the inputs and outputs of `$signed_name` + $($attr:tt),* // attributes + ) => { + intrinsics! { + #[avr_skip] + $( + #[$attr] + )* + /// Returns `n / d` + pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX { + let a_neg = a < 0; + let b_neg = b < 0; + let mut a = a; + let mut b = b; + if a_neg { + a = a.wrapping_neg(); + } + if b_neg { + b = b.wrapping_neg(); + } + let t = $unsigned_fn(a as $uX, b as $uX) as $iX; + if a_neg != b_neg { + t.wrapping_neg() + } else { + t + } + } + } + } +} + +macro_rules! smod { + ( + $unsigned_fn:ident, // name of the unsigned division function + $signed_fn:ident, // name of the signed division function + $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` + $iX:ident, // signed integer type for the inputs and outputs of `$signed_name` + $($attr:tt),* // attributes + ) => { + intrinsics! { + #[avr_skip] + $( + #[$attr] + )* + /// Returns `n % d` + pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX { + let a_neg = a < 0; + let b_neg = b < 0; + let mut a = a; + let mut b = b; + if a_neg { + a = a.wrapping_neg(); + } + if b_neg { + b = b.wrapping_neg(); + } + let r = $unsigned_fn(a as $uX, b as $uX) as $iX; + if a_neg { + r.wrapping_neg() + } else { + r + } + } + } + } +} + +sdivmod!( + __udivmodsi4, + __divmodsi4, + u32, + i32, + maybe_use_optimized_c_shim +); +// The `#[arm_aeabi_alias = __aeabi_idiv]` attribute cannot be made to work with `intrinsics!` in macros +intrinsics! { + #[maybe_use_optimized_c_shim] + #[arm_aeabi_alias = __aeabi_idiv] + /// Returns `n / d` + pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 { + let a_neg = a < 0; + let b_neg = b < 0; + let mut a = a; + let mut b = b; + if a_neg { + a = a.wrapping_neg(); + } + if b_neg { + b = b.wrapping_neg(); + } + let t = __udivsi3(a as u32, b as u32) as i32; + if a_neg != b_neg { + t.wrapping_neg() + } else { + t + } + } +} +smod!(__umodsi3, __modsi3, u32, i32, maybe_use_optimized_c_shim); + +sdivmod!( + __udivmoddi4, + __divmoddi4, + u64, + i64, + maybe_use_optimized_c_shim +); +sdiv!(__udivdi3, __divdi3, u64, i64, maybe_use_optimized_c_shim); +smod!(__umoddi3, __moddi3, u64, i64, maybe_use_optimized_c_shim); + +// LLVM does not currently have a `__divmodti4` function, but GCC does +sdivmod!( + __udivmodti4, + __divmodti4, + u128, + i128, + maybe_use_optimized_c_shim +); +sdiv!(__udivti3, __divti3, u128, i128, win64_128bit_abi_hack); +smod!(__umodti3, __modti3, u128, i128, win64_128bit_abi_hack); diff --git a/vendor/compiler_builtins/src/int/shift.rs b/vendor/compiler_builtins/src/int/shift.rs new file mode 100644 index 000000000..908e619e1 --- /dev/null +++ b/vendor/compiler_builtins/src/int/shift.rs @@ -0,0 +1,116 @@ +use int::{DInt, HInt, Int}; + +trait Ashl: DInt { + /// Returns `a << b`, requires `b < Self::BITS` + fn ashl(self, shl: u32) -> Self { + let n_h = Self::H::BITS; + if shl & n_h != 0 { + // we only need `self.lo()` because `self.hi()` will be shifted out entirely + self.lo().wrapping_shl(shl - n_h).widen_hi() + } else if shl == 0 { + self + } else { + Self::from_lo_hi( + self.lo().wrapping_shl(shl), + self.lo().logical_shr(n_h - shl) | self.hi().wrapping_shl(shl), + ) + } + } +} + +impl Ashl for u32 {} +impl Ashl for u64 {} +impl Ashl for u128 {} + +trait Ashr: DInt { + /// Returns arithmetic `a >> b`, requires `b < Self::BITS` + fn ashr(self, shr: u32) -> Self { + let n_h = Self::H::BITS; + if shr & n_h != 0 { + Self::from_lo_hi( + self.hi().wrapping_shr(shr - n_h), + // smear the sign bit + self.hi().wrapping_shr(n_h - 1), + ) + } else if shr == 0 { + self + } else { + Self::from_lo_hi( + self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h - shr), + self.hi().wrapping_shr(shr), + ) + } + } +} + +impl Ashr for i32 {} +impl Ashr for i64 {} +impl Ashr for i128 {} + +trait Lshr: DInt { + /// Returns logical `a >> b`, requires `b < Self::BITS` + fn lshr(self, shr: u32) -> Self { + let n_h = Self::H::BITS; + if shr & n_h != 0 { + self.hi().logical_shr(shr - n_h).zero_widen() + } else if shr == 0 { + self + } else { + Self::from_lo_hi( + self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h - shr), + self.hi().logical_shr(shr), + ) + } + } +} + +impl Lshr for u32 {} +impl Lshr for u64 {} +impl Lshr for u128 {} + +intrinsics! { + #[maybe_use_optimized_c_shim] + pub extern "C" fn __ashlsi3(a: u32, b: u32) -> u32 { + a.ashl(b) + } + + #[maybe_use_optimized_c_shim] + #[arm_aeabi_alias = __aeabi_llsl] + pub extern "C" fn __ashldi3(a: u64, b: u32) -> u64 { + a.ashl(b) + } + + pub extern "C" fn __ashlti3(a: u128, b: u32) -> u128 { + a.ashl(b) + } + + #[maybe_use_optimized_c_shim] + pub extern "C" fn __ashrsi3(a: i32, b: u32) -> i32 { + a.ashr(b) + } + + #[maybe_use_optimized_c_shim] + #[arm_aeabi_alias = __aeabi_lasr] + pub extern "C" fn __ashrdi3(a: i64, b: u32) -> i64 { + a.ashr(b) + } + + pub extern "C" fn __ashrti3(a: i128, b: u32) -> i128 { + a.ashr(b) + } + + #[maybe_use_optimized_c_shim] + pub extern "C" fn __lshrsi3(a: u32, b: u32) -> u32 { + a.lshr(b) + } + + #[maybe_use_optimized_c_shim] + #[arm_aeabi_alias = __aeabi_llsr] + pub extern "C" fn __lshrdi3(a: u64, b: u32) -> u64 { + a.lshr(b) + } + + pub extern "C" fn __lshrti3(a: u128, b: u32) -> u128 { + a.lshr(b) + } +} diff --git a/vendor/compiler_builtins/src/int/specialized_div_rem/asymmetric.rs b/vendor/compiler_builtins/src/int/specialized_div_rem/asymmetric.rs new file mode 100644 index 000000000..56ce188a3 --- /dev/null +++ b/vendor/compiler_builtins/src/int/specialized_div_rem/asymmetric.rs @@ -0,0 +1,69 @@ +/// Creates an unsigned division function optimized for dividing integers with the same +/// bitwidth as the largest operand in an asymmetrically sized division. For example, x86-64 has an +/// assembly instruction that can divide a 128 bit integer by a 64 bit integer if the quotient fits +/// in 64 bits. The 128 bit version of this algorithm would use that fast hardware division to +/// construct a full 128 bit by 128 bit division. +#[allow(unused_macros)] +macro_rules! impl_asymmetric { + ( + $fn:ident, // name of the unsigned division function + $zero_div_fn:ident, // function called when division by zero is attempted + $half_division:ident, // function for division of a $uX by a $uX + $asymmetric_division:ident, // function for division of a $uD by a $uX + $n_h:expr, // the number of bits in a $iH or $uH + $uH:ident, // unsigned integer with half the bit width of $uX + $uX:ident, // unsigned integer with half the bit width of $uD + $uD:ident // unsigned integer type for the inputs and outputs of `$fn` + ) => { + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) { + let n: u32 = $n_h * 2; + + let duo_lo = duo as $uX; + let duo_hi = (duo >> n) as $uX; + let div_lo = div as $uX; + let div_hi = (div >> n) as $uX; + if div_hi == 0 { + if div_lo == 0 { + $zero_div_fn() + } + if duo_hi < div_lo { + // `$uD` by `$uX` division with a quotient that will fit into a `$uX` + let (quo, rem) = unsafe { $asymmetric_division(duo, div_lo) }; + return (quo as $uD, rem as $uD); + } else { + // Short division using the $uD by $uX division + let (quo_hi, rem_hi) = $half_division(duo_hi, div_lo); + let tmp = unsafe { + $asymmetric_division((duo_lo as $uD) | ((rem_hi as $uD) << n), div_lo) + }; + return ((tmp.0 as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD); + } + } + + // This has been adapted from + // https://www.codeproject.com/tips/785014/uint-division-modulus which was in turn + // adapted from Hacker's Delight. This is similar to the two possibility algorithm + // in that it uses only more significant parts of `duo` and `div` to divide a large + // integer with a smaller division instruction. + let div_lz = div_hi.leading_zeros(); + let div_extra = n - div_lz; + let div_sig_n = (div >> div_extra) as $uX; + let tmp = unsafe { $asymmetric_division(duo >> 1, div_sig_n) }; + + let mut quo = tmp.0 >> ((n - 1) - div_lz); + if quo != 0 { + quo -= 1; + } + + // Note that this is a full `$uD` multiplication being used here + let mut rem = duo - (quo as $uD).wrapping_mul(div); + if div <= rem { + quo += 1; + rem -= div; + } + return (quo as $uD, rem); + } + }; +} diff --git a/vendor/compiler_builtins/src/int/specialized_div_rem/binary_long.rs b/vendor/compiler_builtins/src/int/specialized_div_rem/binary_long.rs new file mode 100644 index 000000000..0d7822882 --- /dev/null +++ b/vendor/compiler_builtins/src/int/specialized_div_rem/binary_long.rs @@ -0,0 +1,548 @@ +/// Creates an unsigned division function that uses binary long division, designed for +/// computer architectures without division instructions. These functions have good performance for +/// microarchitectures with large branch miss penalties and architectures without the ability to +/// predicate instructions. For architectures with predicated instructions, one of the algorithms +/// described in the documentation of these functions probably has higher performance, and a custom +/// assembly routine should be used instead. +#[allow(unused_macros)] +macro_rules! impl_binary_long { + ( + $fn:ident, // name of the unsigned division function + $zero_div_fn:ident, // function called when division by zero is attempted + $normalization_shift:ident, // function for finding the normalization shift + $n:tt, // the number of bits in a $iX or $uX + $uX:ident, // unsigned integer type for the inputs and outputs of `$fn` + $iX:ident // signed integer type with same bitwidth as `$uX` + ) => { + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + pub fn $fn(duo: $uX, div: $uX) -> ($uX, $uX) { + let mut duo = duo; + // handle edge cases before calling `$normalization_shift` + if div == 0 { + $zero_div_fn() + } + if duo < div { + return (0, duo); + } + + // There are many variations of binary division algorithm that could be used. This + // documentation gives a tour of different methods so that future readers wanting to + // optimize further do not have to painstakingly derive them. The SWAR variation is + // especially hard to understand without reading the less convoluted methods first. + + // You may notice that a `duo < div_original` check is included in many these + // algorithms. A critical optimization that many algorithms miss is handling of + // quotients that will turn out to have many trailing zeros or many leading zeros. This + // happens in cases of exact or close-to-exact divisions, divisions by power of two, and + // in cases where the quotient is small. The `duo < div_original` check handles these + // cases of early returns and ends up replacing other kinds of mundane checks that + // normally terminate a binary division algorithm. + // + // Something you may see in other algorithms that is not special-cased here is checks + // for division by powers of two. The `duo < div_original` check handles this case and + // more, however it can be checked up front before the bisection using the + // `((div > 0) && ((div & (div - 1)) == 0))` trick. This is not special-cased because + // compilers should handle most cases where divisions by power of two occur, and we do + // not want to add on a few cycles for every division operation just to save a few + // cycles rarely. + + // The following example is the most straightforward translation from the way binary + // long division is typically visualized: + // Dividing 178u8 (0b10110010) by 6u8 (0b110). `div` is shifted left by 5, according to + // the result from `$normalization_shift(duo, div, false)`. + // + // Step 0: `sub` is negative, so there is not full normalization, so no `quo` bit is set + // and `duo` is kept unchanged. + // duo:10110010, div_shifted:11000000, sub:11110010, quo:00000000, shl:5 + // + // Step 1: `sub` is positive, set a `quo` bit and update `duo` for next step. + // duo:10110010, div_shifted:01100000, sub:01010010, quo:00010000, shl:4 + // + // Step 2: Continue based on `sub`. The `quo` bits start accumulating. + // duo:01010010, div_shifted:00110000, sub:00100010, quo:00011000, shl:3 + // duo:00100010, div_shifted:00011000, sub:00001010, quo:00011100, shl:2 + // duo:00001010, div_shifted:00001100, sub:11111110, quo:00011100, shl:1 + // duo:00001010, div_shifted:00000110, sub:00000100, quo:00011100, shl:0 + // The `duo < div_original` check terminates the algorithm with the correct quotient of + // 29u8 and remainder of 4u8 + /* + let div_original = div; + let mut shl = $normalization_shift(duo, div, false); + let mut quo = 0; + loop { + let div_shifted = div << shl; + let sub = duo.wrapping_sub(div_shifted); + // it is recommended to use `println!`s like this if functionality is unclear + /* + println!("duo:{:08b}, div_shifted:{:08b}, sub:{:08b}, quo:{:08b}, shl:{}", + duo, + div_shifted, + sub, + quo, + shl + ); + */ + if 0 <= (sub as $iX) { + duo = sub; + quo += 1 << shl; + if duo < div_original { + // this branch is optional + return (quo, duo) + } + } + if shl == 0 { + return (quo, duo) + } + shl -= 1; + } + */ + + // This restoring binary long division algorithm reduces the number of operations + // overall via: + // - `pow` can be shifted right instead of recalculating from `shl` + // - starting `div` shifted left and shifting it right for each step instead of + // recalculating from `shl` + // - The `duo < div_original` branch is used to terminate the algorithm instead of the + // `shl == 0` branch. This check is strong enough to prevent set bits of `pow` and + // `div` from being shifted off the end. This check also only occurs on half of steps + // on average, since it is behind the `(sub as $iX) >= 0` branch. + // - `shl` is now not needed by any aspect of of the loop and thus only 3 variables are + // being updated between steps + // + // There are many variations of this algorithm, but this encompases the largest number + // of architectures and does not rely on carry flags, add-with-carry, or SWAR + // complications to be decently fast. + /* + let div_original = div; + let shl = $normalization_shift(duo, div, false); + let mut div: $uX = div << shl; + let mut pow: $uX = 1 << shl; + let mut quo: $uX = 0; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as $iX) { + duo = sub; + quo |= pow; + if duo < div_original { + return (quo, duo) + } + } + div >>= 1; + pow >>= 1; + } + */ + + // If the architecture has flags and predicated arithmetic instructions, it is possible + // to do binary long division without branching and in only 3 or 4 instructions. This is + // a variation of a 3 instruction central loop from + // http://www.chiark.greenend.org.uk/~theom/riscos/docs/ultimate/a252div.txt. + // + // What allows doing division in only 3 instructions is realizing that instead of + // keeping `duo` in place and shifting `div` right to align bits, `div` can be kept in + // place and `duo` can be shifted left. This means `div` does not have to be updated, + // but causes edge case problems and makes `duo < div_original` tests harder. Some + // architectures have an option to shift an argument in an arithmetic operation, which + // means `duo` can be shifted left and subtracted from in one instruction. The other two + // instructions are updating `quo` and undoing the subtraction if it turns out things + // were not normalized. + + /* + // Perform one binary long division step on the already normalized arguments, because + // the main. Note that this does a full normalization since the central loop needs + // `duo.leading_zeros()` to be at least 1 more than `div.leading_zeros()`. The original + // variation only did normalization to the nearest 4 steps, but this makes handling edge + // cases much harder. We do a full normalization and perform a binary long division + // step. In the edge case where the msbs of `duo` and `div` are set, it clears the msb + // of `duo`, then the edge case handler shifts `div` right and does another long + // division step to always insure `duo.leading_zeros() + 1 >= div.leading_zeros()`. + let div_original = div; + let mut shl = $normalization_shift(duo, div, true); + let mut div: $uX = (div << shl); + let mut quo: $uX = 1; + duo = duo.wrapping_sub(div); + if duo < div_original { + return (1 << shl, duo); + } + let div_neg: $uX; + if (div as $iX) < 0 { + // A very ugly edge case where the most significant bit of `div` is set (after + // shifting to match `duo` when its most significant bit is at the sign bit), which + // leads to the sign bit of `div_neg` being cut off and carries not happening when + // they should. This branch performs a long division step that keeps `duo` in place + // and shifts `div` down. + div >>= 1; + div_neg = div.wrapping_neg(); + let (sub, carry) = duo.overflowing_add(div_neg); + duo = sub; + quo = quo.wrapping_add(quo).wrapping_add(carry as $uX); + if !carry { + duo = duo.wrapping_add(div); + } + shl -= 1; + } else { + div_neg = div.wrapping_neg(); + } + // The add-with-carry that updates `quo` needs to have the carry set when a normalized + // subtract happens. Using `duo.wrapping_shl(1).overflowing_sub(div)` to do the + // subtraction generates a carry when an unnormalized subtract happens, which is the + // opposite of what we want. Instead, we use + // `duo.wrapping_shl(1).overflowing_add(div_neg)`, where `div_neg` is negative `div`. + let mut i = shl; + loop { + if i == 0 { + break; + } + i -= 1; + // `ADDS duo, div, duo, LSL #1` + // (add `div` to `duo << 1` and set flags) + let (sub, carry) = duo.wrapping_shl(1).overflowing_add(div_neg); + duo = sub; + // `ADC quo, quo, quo` + // (add with carry). Effectively shifts `quo` left by 1 and sets the least + // significant bit to the carry. + quo = quo.wrapping_add(quo).wrapping_add(carry as $uX); + // `ADDCC duo, duo, div` + // (add if carry clear). Undoes the subtraction if no carry was generated. + if !carry { + duo = duo.wrapping_add(div); + } + } + return (quo, duo >> shl); + */ + + // This is the SWAR (SIMD within in a register) restoring division algorithm. + // This combines several ideas of the above algorithms: + // - If `duo` is shifted left instead of shifting `div` right like in the 3 instruction + // restoring division algorithm, some architectures can do the shifting and + // subtraction step in one instruction. + // - `quo` can be constructed by adding powers-of-two to it or shifting it left by one + // and adding one. + // - Every time `duo` is shifted left, there is another unused 0 bit shifted into the + // LSB, so what if we use those bits to store `quo`? + // Through a complex setup, it is possible to manage `duo` and `quo` in the same + // register, and perform one step with 2 or 3 instructions. The only major downsides are + // that there is significant setup (it is only saves instructions if `shl` is + // approximately more than 4), `duo < div_original` checks are impractical once SWAR is + // initiated, and the number of division steps taken has to be exact (we cannot do more + // division steps than `shl`, because it introduces edge cases where quotient bits in + // `duo` start to collide with the real part of `div`. + /* + // first step. The quotient bit is stored in `quo` for now + let div_original = div; + let mut shl = $normalization_shift(duo, div, true); + let mut div: $uX = (div << shl); + duo = duo.wrapping_sub(div); + let mut quo: $uX = 1 << shl; + if duo < div_original { + return (quo, duo); + } + + let mask: $uX; + if (div as $iX) < 0 { + // deal with same edge case as the 3 instruction restoring division algorithm, but + // the quotient bit from this step also has to be stored in `quo` + div >>= 1; + shl -= 1; + let tmp = 1 << shl; + mask = tmp - 1; + let sub = duo.wrapping_sub(div); + if (sub as $iX) >= 0 { + // restore + duo = sub; + quo |= tmp; + } + if duo < div_original { + return (quo, duo); + } + } else { + mask = quo - 1; + } + // There is now room for quotient bits in `duo`. + + // Note that `div` is already shifted left and has `shl` unset bits. We subtract 1 from + // `div` and end up with the subset of `shl` bits being all being set. This subset acts + // just like a two's complement negative one. The subset of `div` containing the divisor + // had 1 subtracted from it, but a carry will always be generated from the `shl` subset + // as long as the quotient stays positive. + // + // When the modified `div` is subtracted from `duo.wrapping_shl(1)`, the `shl` subset + // adds a quotient bit to the least significant bit. + // For example, 89 (0b01011001) divided by 3 (0b11): + // + // shl:4, div:0b00110000 + // first step: + // duo:0b01011001 + // + div_neg:0b11010000 + // ____________________ + // 0b00101001 + // quo is set to 0b00010000 and mask is set to 0b00001111 for later + // + // 1 is subtracted from `div`. I will differentiate the `shl` part of `div` and the + // quotient part of `duo` with `^`s. + // chars. + // div:0b00110000 + // ^^^^ + // + 0b11111111 + // ________________ + // 0b00101111 + // ^^^^ + // div_neg:0b11010001 + // + // first SWAR step: + // duo_shl1:0b01010010 + // ^ + // + div_neg:0b11010001 + // ____________________ + // 0b00100011 + // ^ + // second: + // duo_shl1:0b01000110 + // ^^ + // + div_neg:0b11010001 + // ____________________ + // 0b00010111 + // ^^ + // third: + // duo_shl1:0b00101110 + // ^^^ + // + div_neg:0b11010001 + // ____________________ + // 0b11111111 + // ^^^ + // 3 steps resulted in the quotient with 3 set bits as expected, but currently the real + // part of `duo` is negative and the third step was an unnormalized step. The restore + // branch then restores `duo`. Note that the restore branch does not shift `duo` left. + // + // duo:0b11111111 + // ^^^ + // + div:0b00101111 + // ^^^^ + // ________________ + // 0b00101110 + // ^^^ + // `duo` is now back in the `duo_shl1` state it was at in the the third step, with an + // unset quotient bit. + // + // final step (`shl` was 4, so exactly 4 steps must be taken) + // duo_shl1:0b01011100 + // ^^^^ + // + div_neg:0b11010001 + // ____________________ + // 0b00101101 + // ^^^^ + // The quotient includes the `^` bits added with the `quo` bits from the beginning that + // contained the first step and potential edge case step, + // `quo:0b00010000 + (duo:0b00101101 & mask:0b00001111) == 0b00011101 == 29u8`. + // The remainder is the bits remaining in `duo` that are not part of the quotient bits, + // `duo:0b00101101 >> shl == 0b0010 == 2u8`. + let div: $uX = div.wrapping_sub(1); + let mut i = shl; + loop { + if i == 0 { + break; + } + i -= 1; + duo = duo.wrapping_shl(1).wrapping_sub(div); + if (duo as $iX) < 0 { + // restore + duo = duo.wrapping_add(div); + } + } + // unpack the results of SWAR + return ((duo & mask) | quo, duo >> shl); + */ + + // The problem with the conditional restoring SWAR algorithm above is that, in practice, + // it requires assembly code to bring out its full unrolled potential (It seems that + // LLVM can't use unrolled conditionals optimally and ends up erasing all the benefit + // that my algorithm intends. On architectures without predicated instructions, the code + // gen is especially bad. We need a default software division algorithm that is + // guaranteed to get decent code gen for the central loop. + + // For non-SWAR algorithms, there is a way to do binary long division without + // predication or even branching. This involves creating a mask from the sign bit and + // performing different kinds of steps using that. + /* + let shl = $normalization_shift(duo, div, true); + let mut div: $uX = div << shl; + let mut pow: $uX = 1 << shl; + let mut quo: $uX = 0; + loop { + let sub = duo.wrapping_sub(div); + let sign_mask = !((sub as $iX).wrapping_shr($n - 1) as $uX); + duo -= div & sign_mask; + quo |= pow & sign_mask; + div >>= 1; + pow >>= 1; + if pow == 0 { + break; + } + } + return (quo, duo); + */ + // However, it requires about 4 extra operations (smearing the sign bit, negating the + // mask, and applying the mask twice) on top of the operations done by the actual + // algorithm. With SWAR however, just 2 extra operations are needed, making it + // practical and even the most optimal algorithm for some architectures. + + // What we do is use custom assembly for predicated architectures that need software + // division, and for the default algorithm use a mask based restoring SWAR algorithm + // without conditionals or branches. On almost all architectures, this Rust code is + // guaranteed to compile down to 5 assembly instructions or less for each step, and LLVM + // will unroll it in a decent way. + + // standard opening for SWAR algorithm with first step and edge case handling + let div_original = div; + let mut shl = $normalization_shift(duo, div, true); + let mut div: $uX = (div << shl); + duo = duo.wrapping_sub(div); + let mut quo: $uX = 1 << shl; + if duo < div_original { + return (quo, duo); + } + let mask: $uX; + if (div as $iX) < 0 { + div >>= 1; + shl -= 1; + let tmp = 1 << shl; + mask = tmp - 1; + let sub = duo.wrapping_sub(div); + if (sub as $iX) >= 0 { + duo = sub; + quo |= tmp; + } + if duo < div_original { + return (quo, duo); + } + } else { + mask = quo - 1; + } + + // central loop + div = div.wrapping_sub(1); + let mut i = shl; + loop { + if i == 0 { + break; + } + i -= 1; + // shift left 1 and subtract + duo = duo.wrapping_shl(1).wrapping_sub(div); + // create mask + let mask = (duo as $iX).wrapping_shr($n - 1) as $uX; + // restore + duo = duo.wrapping_add(div & mask); + } + // unpack + return ((duo & mask) | quo, duo >> shl); + + // miscellanious binary long division algorithms that might be better for specific + // architectures + + // Another kind of long division uses an interesting fact that `div` and `pow` can be + // negated when `duo` is negative to perform a "negated" division step that works in + // place of any normalization mechanism. This is a non-restoring division algorithm that + // is very similar to the non-restoring division algorithms that can be found on the + // internet, except there is only one test for `duo < 0`. The subtraction from `quo` can + // be viewed as shifting the least significant set bit right (e.x. if we enter a series + // of negated binary long division steps starting with `quo == 0b1011_0000` and + // `pow == 0b0000_1000`, `quo` will progress like this: 0b1010_1000, 0b1010_0100, + // 0b1010_0010, 0b1010_0001). + /* + let div_original = div; + let shl = $normalization_shift(duo, div, true); + let mut div: $uX = (div << shl); + let mut pow: $uX = 1 << shl; + let mut quo: $uX = pow; + duo = duo.wrapping_sub(div); + if duo < div_original { + return (quo, duo); + } + div >>= 1; + pow >>= 1; + loop { + if (duo as $iX) < 0 { + // Negated binary long division step. + duo = duo.wrapping_add(div); + quo = quo.wrapping_sub(pow); + } else { + // Normal long division step. + if duo < div_original { + return (quo, duo) + } + duo = duo.wrapping_sub(div); + quo = quo.wrapping_add(pow); + } + pow >>= 1; + div >>= 1; + } + */ + + // This is the Nonrestoring SWAR algorithm, combining the nonrestoring algorithm with + // SWAR techniques that makes the only difference between steps be negation of `div`. + // If there was an architecture with an instruction that negated inputs to an adder + // based on conditionals, and in place shifting (or a three input addition operation + // that can have `duo` as two of the inputs to effectively shift it left by 1), then a + // single instruction central loop is possible. Microarchitectures often have inputs to + // their ALU that can invert the arguments and carry in of adders, but the architectures + // unfortunately do not have an instruction to dynamically invert this input based on + // conditionals. + /* + // SWAR opening + let div_original = div; + let mut shl = $normalization_shift(duo, div, true); + let mut div: $uX = (div << shl); + duo = duo.wrapping_sub(div); + let mut quo: $uX = 1 << shl; + if duo < div_original { + return (quo, duo); + } + let mask: $uX; + if (div as $iX) < 0 { + div >>= 1; + shl -= 1; + let tmp = 1 << shl; + let sub = duo.wrapping_sub(div); + if (sub as $iX) >= 0 { + // restore + duo = sub; + quo |= tmp; + } + if duo < div_original { + return (quo, duo); + } + mask = tmp - 1; + } else { + mask = quo - 1; + } + + // central loop + let div: $uX = div.wrapping_sub(1); + let mut i = shl; + loop { + if i == 0 { + break; + } + i -= 1; + // note: the `wrapping_shl(1)` can be factored out, but would require another + // restoring division step to prevent `(duo as $iX)` from overflowing + if (duo as $iX) < 0 { + // Negated binary long division step. + duo = duo.wrapping_shl(1).wrapping_add(div); + } else { + // Normal long division step. + duo = duo.wrapping_shl(1).wrapping_sub(div); + } + } + if (duo as $iX) < 0 { + // Restore. This was not needed in the original nonrestoring algorithm because of + // the `duo < div_original` checks. + duo = duo.wrapping_add(div); + } + // unpack + return ((duo & mask) | quo, duo >> shl); + */ + } + }; +} diff --git a/vendor/compiler_builtins/src/int/specialized_div_rem/delegate.rs b/vendor/compiler_builtins/src/int/specialized_div_rem/delegate.rs new file mode 100644 index 000000000..330c6e4f8 --- /dev/null +++ b/vendor/compiler_builtins/src/int/specialized_div_rem/delegate.rs @@ -0,0 +1,319 @@ +/// Creates an unsigned division function that uses a combination of hardware division and +/// binary long division to divide integers larger than what hardware division by itself can do. This +/// function is intended for microarchitectures that have division hardware, but not fast enough +/// multiplication hardware for `impl_trifecta` to be faster. +#[allow(unused_macros)] +macro_rules! impl_delegate { + ( + $fn:ident, // name of the unsigned division function + $zero_div_fn:ident, // function called when division by zero is attempted + $half_normalization_shift:ident, // function for finding the normalization shift of $uX + $half_division:ident, // function for division of a $uX by a $uX + $n_h:expr, // the number of bits in $iH or $uH + $uH:ident, // unsigned integer with half the bit width of $uX + $uX:ident, // unsigned integer with half the bit width of $uD. + $uD:ident, // unsigned integer type for the inputs and outputs of `$fn` + $iD:ident // signed integer type with the same bitwidth as `$uD` + ) => { + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) { + // The two possibility algorithm, undersubtracting long division algorithm, or any kind + // of reciprocal based algorithm will not be fastest, because they involve large + // multiplications that we assume to not be fast enough relative to the divisions to + // outweigh setup times. + + // the number of bits in a $uX + let n = $n_h * 2; + + let duo_lo = duo as $uX; + let duo_hi = (duo >> n) as $uX; + let div_lo = div as $uX; + let div_hi = (div >> n) as $uX; + + match (div_lo == 0, div_hi == 0, duo_hi == 0) { + (true, true, _) => $zero_div_fn(), + (_, false, true) => { + // `duo` < `div` + return (0, duo); + } + (false, true, true) => { + // delegate to smaller division + let tmp = $half_division(duo_lo, div_lo); + return (tmp.0 as $uD, tmp.1 as $uD); + } + (false, true, false) => { + if duo_hi < div_lo { + // `quo_hi` will always be 0. This performs a binary long division algorithm + // to zero `duo_hi` followed by a half division. + + // We can calculate the normalization shift using only `$uX` size functions. + // If we calculated the normalization shift using + // `$half_normalization_shift(duo_hi, div_lo false)`, it would break the + // assumption the function has that the first argument is more than the + // second argument. If the arguments are switched, the assumption holds true + // since `duo_hi < div_lo`. + let norm_shift = $half_normalization_shift(div_lo, duo_hi, false); + let shl = if norm_shift == 0 { + // Consider what happens if the msbs of `duo_hi` and `div_lo` align with + // no shifting. The normalization shift will always return + // `norm_shift == 0` regardless of whether it is fully normalized, + // because `duo_hi < div_lo`. In that edge case, `n - norm_shift` would + // result in shift overflow down the line. For the edge case, because + // both `duo_hi < div_lo` and we are comparing all the significant bits + // of `duo_hi` and `div`, we can make `shl = n - 1`. + n - 1 + } else { + // We also cannot just use `shl = n - norm_shift - 1` in the general + // case, because when we are not in the edge case comparing all the + // significant bits, then the full `duo < div` may not be true and thus + // breaks the division algorithm. + n - norm_shift + }; + + // The 3 variable restoring division algorithm (see binary_long.rs) is ideal + // for this task, since `pow` and `quo` can be `$uX` and the delegation + // check is simple. + let mut div: $uD = div << shl; + let mut pow_lo: $uX = 1 << shl; + let mut quo_lo: $uX = 0; + let mut duo = duo; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as $iD) { + duo = sub; + quo_lo |= pow_lo; + let duo_hi = (duo >> n) as $uX; + if duo_hi == 0 { + // Delegate to get the rest of the quotient. Note that the + // `div_lo` here is the original unshifted `div`. + let tmp = $half_division(duo as $uX, div_lo); + return ((quo_lo | tmp.0) as $uD, tmp.1 as $uD); + } + } + div >>= 1; + pow_lo >>= 1; + } + } else if duo_hi == div_lo { + // `quo_hi == 1`. This branch is cheap and helps with edge cases. + let tmp = $half_division(duo as $uX, div as $uX); + return ((1 << n) | (tmp.0 as $uD), tmp.1 as $uD); + } else { + // `div_lo < duo_hi` + // `rem_hi == 0` + if (div_lo >> $n_h) == 0 { + // Short division of $uD by a $uH, using $uX by $uX division + let div_0 = div_lo as $uH as $uX; + let (quo_hi, rem_3) = $half_division(duo_hi, div_0); + + let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h); + let (quo_1, rem_2) = $half_division(duo_mid, div_0); + + let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h); + let (quo_0, rem_1) = $half_division(duo_lo, div_0); + + return ( + (quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n), + rem_1 as $uD, + ); + } + + // This is basically a short division composed of a half division for the hi + // part, specialized 3 variable binary long division in the middle, and + // another half division for the lo part. + let duo_lo = duo as $uX; + let tmp = $half_division(duo_hi, div_lo); + let quo_hi = tmp.0; + let mut duo = (duo_lo as $uD) | ((tmp.1 as $uD) << n); + // This check is required to avoid breaking the long division below. + if duo < div { + return ((quo_hi as $uD) << n, duo); + } + + // The half division handled all shift alignments down to `n`, so this + // division can continue with a shift of `n - 1`. + let mut div: $uD = div << (n - 1); + let mut pow_lo: $uX = 1 << (n - 1); + let mut quo_lo: $uX = 0; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as $iD) { + duo = sub; + quo_lo |= pow_lo; + let duo_hi = (duo >> n) as $uX; + if duo_hi == 0 { + // Delegate to get the rest of the quotient. Note that the + // `div_lo` here is the original unshifted `div`. + let tmp = $half_division(duo as $uX, div_lo); + return ( + (tmp.0) as $uD | (quo_lo as $uD) | ((quo_hi as $uD) << n), + tmp.1 as $uD, + ); + } + } + div >>= 1; + pow_lo >>= 1; + } + } + } + (_, false, false) => { + // Full $uD by $uD binary long division. `quo_hi` will always be 0. + if duo < div { + return (0, duo); + } + let div_original = div; + let shl = $half_normalization_shift(duo_hi, div_hi, false); + let mut duo = duo; + let mut div: $uD = div << shl; + let mut pow_lo: $uX = 1 << shl; + let mut quo_lo: $uX = 0; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as $iD) { + duo = sub; + quo_lo |= pow_lo; + if duo < div_original { + return (quo_lo as $uD, duo); + } + } + div >>= 1; + pow_lo >>= 1; + } + } + } + } + }; +} + +public_test_dep! { +/// Returns `n / d` and sets `*rem = n % d`. +/// +/// This specialization exists because: +/// - The LLVM backend for 32-bit SPARC cannot compile functions that return `(u128, u128)`, +/// so we have to use an old fashioned `&mut u128` argument to return the remainder. +/// - 64-bit SPARC does not have u64 * u64 => u128 widening multiplication, which makes the +/// delegate algorithm strategy the only reasonably fast way to perform `u128` division. +// used on SPARC +#[allow(dead_code)] +pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { + use super::*; + let duo_lo = duo as u64; + let duo_hi = (duo >> 64) as u64; + let div_lo = div as u64; + let div_hi = (div >> 64) as u64; + + match (div_lo == 0, div_hi == 0, duo_hi == 0) { + (true, true, _) => zero_div_fn(), + (_, false, true) => { + *rem = duo; + return 0; + } + (false, true, true) => { + let tmp = u64_by_u64_div_rem(duo_lo, div_lo); + *rem = tmp.1 as u128; + return tmp.0 as u128; + } + (false, true, false) => { + if duo_hi < div_lo { + let norm_shift = u64_normalization_shift(div_lo, duo_hi, false); + let shl = if norm_shift == 0 { + 64 - 1 + } else { + 64 - norm_shift + }; + + let mut div: u128 = div << shl; + let mut pow_lo: u64 = 1 << shl; + let mut quo_lo: u64 = 0; + let mut duo = duo; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as i128) { + duo = sub; + quo_lo |= pow_lo; + let duo_hi = (duo >> 64) as u64; + if duo_hi == 0 { + let tmp = u64_by_u64_div_rem(duo as u64, div_lo); + *rem = tmp.1 as u128; + return (quo_lo | tmp.0) as u128; + } + } + div >>= 1; + pow_lo >>= 1; + } + } else if duo_hi == div_lo { + let tmp = u64_by_u64_div_rem(duo as u64, div as u64); + *rem = tmp.1 as u128; + return (1 << 64) | (tmp.0 as u128); + } else { + if (div_lo >> 32) == 0 { + let div_0 = div_lo as u32 as u64; + let (quo_hi, rem_3) = u64_by_u64_div_rem(duo_hi, div_0); + + let duo_mid = ((duo >> 32) as u32 as u64) | (rem_3 << 32); + let (quo_1, rem_2) = u64_by_u64_div_rem(duo_mid, div_0); + + let duo_lo = (duo as u32 as u64) | (rem_2 << 32); + let (quo_0, rem_1) = u64_by_u64_div_rem(duo_lo, div_0); + + *rem = rem_1 as u128; + return (quo_0 as u128) | ((quo_1 as u128) << 32) | ((quo_hi as u128) << 64); + } + + let duo_lo = duo as u64; + let tmp = u64_by_u64_div_rem(duo_hi, div_lo); + let quo_hi = tmp.0; + let mut duo = (duo_lo as u128) | ((tmp.1 as u128) << 64); + if duo < div { + *rem = duo; + return (quo_hi as u128) << 64; + } + + let mut div: u128 = div << (64 - 1); + let mut pow_lo: u64 = 1 << (64 - 1); + let mut quo_lo: u64 = 0; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as i128) { + duo = sub; + quo_lo |= pow_lo; + let duo_hi = (duo >> 64) as u64; + if duo_hi == 0 { + let tmp = u64_by_u64_div_rem(duo as u64, div_lo); + *rem = tmp.1 as u128; + return (tmp.0) as u128 | (quo_lo as u128) | ((quo_hi as u128) << 64); + } + } + div >>= 1; + pow_lo >>= 1; + } + } + } + (_, false, false) => { + if duo < div { + *rem = duo; + return 0; + } + let div_original = div; + let shl = u64_normalization_shift(duo_hi, div_hi, false); + let mut duo = duo; + let mut div: u128 = div << shl; + let mut pow_lo: u64 = 1 << shl; + let mut quo_lo: u64 = 0; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as i128) { + duo = sub; + quo_lo |= pow_lo; + if duo < div_original { + *rem = duo; + return quo_lo as u128; + } + } + div >>= 1; + pow_lo >>= 1; + } + } + } +} +} diff --git a/vendor/compiler_builtins/src/int/specialized_div_rem/mod.rs b/vendor/compiler_builtins/src/int/specialized_div_rem/mod.rs new file mode 100644 index 000000000..6ec4675df --- /dev/null +++ b/vendor/compiler_builtins/src/int/specialized_div_rem/mod.rs @@ -0,0 +1,306 @@ +// TODO: when `unsafe_block_in_unsafe_fn` is stabilized, remove this +#![allow(unused_unsafe)] +// The functions are complex with many branches, and explicit +// `return`s makes it clear where function exit points are +#![allow(clippy::needless_return)] +#![allow(clippy::comparison_chain)] +// Clippy is confused by the complex configuration +#![allow(clippy::if_same_then_else)] +#![allow(clippy::needless_bool)] + +//! This `specialized_div_rem` module is originally from version 1.0.0 of the +//! `specialized-div-rem` crate. Note that `for` loops with ranges are not used in this +//! module, since unoptimized compilation may generate references to `memcpy`. +//! +//! The purpose of these macros is to easily change the both the division algorithm used +//! for a given integer size and the half division used by that algorithm. The way +//! functions call each other is also constructed such that linkers will find the chain of +//! software and hardware divisions needed for every size of signed and unsigned division. +//! For example, most target compilations do the following: +//! +//! - Many 128 bit division functions like `u128::wrapping_div` use +//! `std::intrinsics::unchecked_div`, which gets replaced by `__udivti3` because there +//! is not a 128 bit by 128 bit hardware division function in most architectures. +//! `__udivti3` uses `u128_div_rem` (this extra level of function calls exists because +//! `__umodti3` and `__udivmodti4` also exist, and `specialized_div_rem` supplies just +//! one function to calculate both the quotient and remainder. If configuration flags +//! enable it, `impl_trifecta!` defines `u128_div_rem` to use the trifecta algorithm, +//! which requires the half sized division `u64_by_u64_div_rem`. If the architecture +//! supplies a 64 bit hardware division instruction, `u64_by_u64_div_rem` will be +//! reduced to those instructions. Note that we do not specify the half size division +//! directly to be `__udivdi3`, because hardware division would never be introduced. +//! - If the architecture does not supply a 64 bit hardware division instruction, u64 +//! divisions will use functions such as `__udivdi3`. This will call `u64_div_rem` +//! which is defined by `impl_delegate!`. The half division for this algorithm is +//! `u32_by_u32_div_rem` which in turn becomes hardware division instructions or more +//! software division algorithms. +//! - If the architecture does not supply a 32 bit hardware instruction, linkers will +//! look for `__udivsi3`. `impl_binary_long!` is used, but this algorithm uses no half +//! division, so the chain of calls ends here. +//! +//! On some architectures like x86_64, an asymmetrically sized division is supplied, in +//! which 128 bit numbers can be divided by 64 bit numbers. `impl_asymmetric!` is used to +//! extend the 128 by 64 bit division to a full 128 by 128 bit division. + +// `allow(dead_code)` is used in various places, because the configuration code would otherwise be +// ridiculously complex + +#[macro_use] +mod norm_shift; + +#[macro_use] +mod binary_long; + +#[macro_use] +mod delegate; + +// used on SPARC +#[allow(unused_imports)] +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use self::delegate::u128_divide_sparc; + +#[cfg(feature = "public-test-deps")] +pub use self::delegate::u128_divide_sparc; + +#[macro_use] +mod trifecta; + +#[macro_use] +mod asymmetric; + +/// The behavior of all divisions by zero is controlled by this function. This function should be +/// impossible to reach by Rust users, unless `compiler-builtins` public division functions or +/// `core/std::unchecked_div/rem` are directly used without a zero check in front. +fn zero_div_fn() -> ! { + unsafe { core::hint::unreachable_unchecked() } +} + +const USE_LZ: bool = { + if cfg!(target_arch = "arm") { + if cfg!(target_feature = "thumb-mode") { + // ARM thumb targets have CLZ instructions if the instruction set of ARMv6T2 is + // supported. This is needed to successfully differentiate between targets like + // `thumbv8.base` and `thumbv8.main`. + cfg!(target_feature = "v6t2") + } else { + // Regular ARM targets have CLZ instructions if the ARMv5TE instruction set is + // supported. Technically, ARMv5T was the first to have CLZ, but the "v5t" target + // feature does not seem to work. + cfg!(target_feature = "v5te") + } + } else if cfg!(any(target_arch = "sparc", target_arch = "sparc64")) { + // LZD or LZCNT on SPARC only exists for the VIS 3 extension and later. + cfg!(target_feature = "vis3") + } else if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { + // The `B` extension on RISC-V determines if a CLZ assembly instruction exists + cfg!(target_feature = "b") + } else { + // All other common targets Rust supports should have CLZ instructions + true + } +}; + +impl_normalization_shift!( + u32_normalization_shift, + USE_LZ, + 32, + u32, + i32, + allow(dead_code) +); +impl_normalization_shift!( + u64_normalization_shift, + USE_LZ, + 64, + u64, + i64, + allow(dead_code) +); + +/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. +/// `checked_div` and `checked_rem` are used to avoid bringing in panic function +/// dependencies. +#[inline] +fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) { + if let Some(quo) = duo.checked_div(div) { + if let Some(rem) = duo.checked_rem(div) { + return (quo, rem); + } + } + zero_div_fn() +} + +// Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a +// microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is +// faster if the target pointer width is at least 64. +#[cfg(all( + not(any(target_pointer_width = "16", target_pointer_width = "32")), + not(all(not(feature = "no-asm"), target_arch = "x86_64")), + not(any(target_arch = "sparc", target_arch = "sparc64")) +))] +impl_trifecta!( + u128_div_rem, + zero_div_fn, + u64_by_u64_div_rem, + 32, + u32, + u64, + u128 +); + +// If the pointer width less than 64, then the target architecture almost certainly does not have +// the fast 64 to 128 bit widening multiplication needed for `trifecta` to be faster. +#[cfg(all( + any(target_pointer_width = "16", target_pointer_width = "32"), + not(all(not(feature = "no-asm"), target_arch = "x86_64")), + not(any(target_arch = "sparc", target_arch = "sparc64")) +))] +impl_delegate!( + u128_div_rem, + zero_div_fn, + u64_normalization_shift, + u64_by_u64_div_rem, + 32, + u32, + u64, + u128, + i128 +); + +/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. +/// +/// # Safety +/// +/// If the quotient does not fit in a `u64`, a floating point exception occurs. +/// If `div == 0`, then a division by zero exception occurs. +#[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))] +#[inline] +unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) { + let duo_lo = duo as u64; + let duo_hi = (duo >> 64) as u64; + let quo: u64; + let rem: u64; + unsafe { + // divides the combined registers rdx:rax (`duo` is split into two 64 bit parts to do this) + // by `div`. The quotient is stored in rax and the remainder in rdx. + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "div {0}", + in(reg) div, + inlateout("rax") duo_lo => quo, + inlateout("rdx") duo_hi => rem, + options(att_syntax, pure, nomem, nostack) + ); + } + (quo, rem) +} + +// use `asymmetric` instead of `trifecta` on x86_64 +#[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))] +impl_asymmetric!( + u128_div_rem, + zero_div_fn, + u64_by_u64_div_rem, + u128_by_u64_div_rem, + 32, + u32, + u64, + u128 +); + +/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. +/// `checked_div` and `checked_rem` are used to avoid bringing in panic function +/// dependencies. +#[inline] +#[allow(dead_code)] +fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) { + if let Some(quo) = duo.checked_div(div) { + if let Some(rem) = duo.checked_rem(div) { + return (quo, rem); + } + } + zero_div_fn() +} + +// When not on x86 and the pointer width is not 64, use `delegate` since the division size is larger +// than register size. +#[cfg(all( + not(all(not(feature = "no-asm"), target_arch = "x86")), + not(target_pointer_width = "64") +))] +impl_delegate!( + u64_div_rem, + zero_div_fn, + u32_normalization_shift, + u32_by_u32_div_rem, + 16, + u16, + u32, + u64, + i64 +); + +// When not on x86 and the pointer width is 64, use `binary_long`. +#[cfg(all( + not(all(not(feature = "no-asm"), target_arch = "x86")), + target_pointer_width = "64" +))] +impl_binary_long!( + u64_div_rem, + zero_div_fn, + u64_normalization_shift, + 64, + u64, + i64 +); + +/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. +/// +/// # Safety +/// +/// If the quotient does not fit in a `u32`, a floating point exception occurs. +/// If `div == 0`, then a division by zero exception occurs. +#[cfg(all(not(feature = "no-asm"), target_arch = "x86"))] +#[inline] +unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) { + let duo_lo = duo as u32; + let duo_hi = (duo >> 32) as u32; + let quo: u32; + let rem: u32; + unsafe { + // divides the combined registers rdx:rax (`duo` is split into two 32 bit parts to do this) + // by `div`. The quotient is stored in rax and the remainder in rdx. + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "div {0}", + in(reg) div, + inlateout("rax") duo_lo => quo, + inlateout("rdx") duo_hi => rem, + options(att_syntax, pure, nomem, nostack) + ); + } + (quo, rem) +} + +// use `asymmetric` instead of `delegate` on x86 +#[cfg(all(not(feature = "no-asm"), target_arch = "x86"))] +impl_asymmetric!( + u64_div_rem, + zero_div_fn, + u32_by_u32_div_rem, + u64_by_u32_div_rem, + 16, + u16, + u32, + u64 +); + +// 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division +impl_binary_long!( + u32_div_rem, + zero_div_fn, + u32_normalization_shift, + 32, + u32, + i32 +); diff --git a/vendor/compiler_builtins/src/int/specialized_div_rem/norm_shift.rs b/vendor/compiler_builtins/src/int/specialized_div_rem/norm_shift.rs new file mode 100644 index 000000000..61b67b6bc --- /dev/null +++ b/vendor/compiler_builtins/src/int/specialized_div_rem/norm_shift.rs @@ -0,0 +1,106 @@ +/// Creates a function used by some division algorithms to compute the "normalization shift". +#[allow(unused_macros)] +macro_rules! impl_normalization_shift { + ( + $name:ident, // name of the normalization shift function + // boolean for if `$uX::leading_zeros` should be used (if an architecture does not have a + // hardware instruction for `usize::leading_zeros`, then this should be `true`) + $use_lz:ident, + $n:tt, // the number of bits in a $iX or $uX + $uX:ident, // unsigned integer type for the inputs of `$name` + $iX:ident, // signed integer type for the inputs of `$name` + $($unsigned_attr:meta),* // attributes for the function + ) => { + /// Finds the shift left that the divisor `div` would need to be normalized for a binary + /// long division step with the dividend `duo`. NOTE: This function assumes that these edge + /// cases have been handled before reaching it: + /// ` + /// if div == 0 { + /// panic!("attempt to divide by zero") + /// } + /// if duo < div { + /// return (0, duo) + /// } + /// ` + /// + /// Normalization is defined as (where `shl` is the output of this function): + /// ` + /// if duo.leading_zeros() != (div << shl).leading_zeros() { + /// // If the most significant bits of `duo` and `div << shl` are not in the same place, + /// // then `div << shl` has one more leading zero than `duo`. + /// assert_eq!(duo.leading_zeros() + 1, (div << shl).leading_zeros()); + /// // Also, `2*(div << shl)` is not more than `duo` (otherwise the first division step + /// // would not be able to clear the msb of `duo`) + /// assert!(duo < (div << (shl + 1))); + /// } + /// if full_normalization { + /// // Some algorithms do not need "full" normalization, which means that `duo` is + /// // larger than `div << shl` when the most significant bits are aligned. + /// assert!((div << shl) <= duo); + /// } + /// ` + /// + /// Note: If the software bisection algorithm is being used in this function, it happens + /// that full normalization always occurs, so be careful that new algorithms are not + /// invisibly depending on this invariant when `full_normalization` is set to `false`. + $( + #[$unsigned_attr] + )* + fn $name(duo: $uX, div: $uX, full_normalization: bool) -> usize { + // We have to find the leading zeros of `div` to know where its msb (most significant + // set bit) is to even begin binary long division. It is also good to know where the msb + // of `duo` is so that useful work can be started instead of shifting `div` for all + // possible quotients (many division steps are wasted if `duo.leading_zeros()` is large + // and `div` starts out being shifted all the way to the msb). Aligning the msbs of + // `div` and `duo` could be done by shifting `div` left by + // `div.leading_zeros() - duo.leading_zeros()`, but some CPUs without division hardware + // also do not have single instructions for calculating `leading_zeros`. Instead of + // software doing two bisections to find the two `leading_zeros`, we do one bisection to + // find `div.leading_zeros() - duo.leading_zeros()` without actually knowing either of + // the leading zeros values. + + let mut shl: usize; + if $use_lz { + shl = (div.leading_zeros() - duo.leading_zeros()) as usize; + if full_normalization { + if duo < (div << shl) { + // when the msb of `duo` and `div` are aligned, the resulting `div` may be + // larger than `duo`, so we decrease the shift by 1. + shl -= 1; + } + } + } else { + let mut test = duo; + shl = 0usize; + let mut lvl = $n >> 1; + loop { + let tmp = test >> lvl; + // It happens that a final `duo < (div << shl)` check is not needed, because the + // `div <= tmp` check insures that the msb of `test` never passes the msb of + // `div`, and any set bits shifted off the end of `test` would still keep + // `div <= tmp` true. + if div <= tmp { + test = tmp; + shl += lvl; + } + // narrow down bisection + lvl >>= 1; + if lvl == 0 { + break + } + } + } + // tests the invariants that should hold before beginning binary long division + /* + if full_normalization { + assert!((div << shl) <= duo); + } + if duo.leading_zeros() != (div << shl).leading_zeros() { + assert_eq!(duo.leading_zeros() + 1, (div << shl).leading_zeros()); + assert!(duo < (div << (shl + 1))); + } + */ + shl + } + } +} diff --git a/vendor/compiler_builtins/src/int/specialized_div_rem/trifecta.rs b/vendor/compiler_builtins/src/int/specialized_div_rem/trifecta.rs new file mode 100644 index 000000000..7e104053b --- /dev/null +++ b/vendor/compiler_builtins/src/int/specialized_div_rem/trifecta.rs @@ -0,0 +1,386 @@ +/// Creates an unsigned division function optimized for division of integers with bitwidths +/// larger than the largest hardware integer division supported. These functions use large radix +/// division algorithms that require both fast division and very fast widening multiplication on the +/// target microarchitecture. Otherwise, `impl_delegate` should be used instead. +#[allow(unused_macros)] +macro_rules! impl_trifecta { + ( + $fn:ident, // name of the unsigned division function + $zero_div_fn:ident, // function called when division by zero is attempted + $half_division:ident, // function for division of a $uX by a $uX + $n_h:expr, // the number of bits in $iH or $uH + $uH:ident, // unsigned integer with half the bit width of $uX + $uX:ident, // unsigned integer with half the bit width of $uD + $uD:ident // unsigned integer type for the inputs and outputs of `$unsigned_name` + ) => { + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) { + // This is called the trifecta algorithm because it uses three main algorithms: short + // division for small divisors, the two possibility algorithm for large divisors, and an + // undersubtracting long division algorithm for intermediate cases. + + // This replicates `carrying_mul` (rust-lang rfc #2417). LLVM correctly optimizes this + // to use a widening multiply to 128 bits on the relevant architectures. + fn carrying_mul(lhs: $uX, rhs: $uX) -> ($uX, $uX) { + let tmp = (lhs as $uD).wrapping_mul(rhs as $uD); + (tmp as $uX, (tmp >> ($n_h * 2)) as $uX) + } + fn carrying_mul_add(lhs: $uX, mul: $uX, add: $uX) -> ($uX, $uX) { + let tmp = (lhs as $uD) + .wrapping_mul(mul as $uD) + .wrapping_add(add as $uD); + (tmp as $uX, (tmp >> ($n_h * 2)) as $uX) + } + + // the number of bits in a $uX + let n = $n_h * 2; + + if div == 0 { + $zero_div_fn() + } + + // Trying to use a normalization shift function will cause inelegancies in the code and + // inefficiencies for architectures with a native count leading zeros instruction. The + // undersubtracting algorithm needs both values (keeping the original `div_lz` but + // updating `duo_lz` multiple times), so we assume hardware support for fast + // `leading_zeros` calculation. + let div_lz = div.leading_zeros(); + let mut duo_lz = duo.leading_zeros(); + + // the possible ranges of `duo` and `div` at this point: + // `0 <= duo < 2^n_d` + // `1 <= div < 2^n_d` + + // quotient is 0 or 1 branch + if div_lz <= duo_lz { + // The quotient cannot be more than 1. The highest set bit of `duo` needs to be at + // least one place higher than `div` for the quotient to be more than 1. + if duo >= div { + return (1, duo - div); + } else { + return (0, duo); + } + } + + // `_sb` is the number of significant bits (from the ones place to the highest set bit) + // `{2, 2^div_sb} <= duo < 2^n_d` + // `1 <= div < {2^duo_sb, 2^(n_d - 1)}` + // smaller division branch + if duo_lz >= n { + // `duo < 2^n` so it will fit in a $uX. `div` will also fit in a $uX (because of the + // `div_lz <= duo_lz` branch) so no numerical error. + let (quo, rem) = $half_division(duo as $uX, div as $uX); + return (quo as $uD, rem as $uD); + } + + // `{2^n, 2^div_sb} <= duo < 2^n_d` + // `1 <= div < {2^duo_sb, 2^(n_d - 1)}` + // short division branch + if div_lz >= (n + $n_h) { + // `1 <= div < {2^duo_sb, 2^n_h}` + + // It is barely possible to improve the performance of this by calculating the + // reciprocal and removing one `$half_division`, but only if the CPU can do fast + // multiplications in parallel. Other reciprocal based methods can remove two + // `$half_division`s, but have multiplications that cannot be done in parallel and + // reduce performance. I have decided to use this trivial short division method and + // rely on the CPU having quick divisions. + + let duo_hi = (duo >> n) as $uX; + let div_0 = div as $uH as $uX; + let (quo_hi, rem_3) = $half_division(duo_hi, div_0); + + let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h); + let (quo_1, rem_2) = $half_division(duo_mid, div_0); + + let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h); + let (quo_0, rem_1) = $half_division(duo_lo, div_0); + + return ( + (quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n), + rem_1 as $uD, + ); + } + + // relative leading significant bits, cannot overflow because of above branches + let lz_diff = div_lz - duo_lz; + + // `{2^n, 2^div_sb} <= duo < 2^n_d` + // `2^n_h <= div < {2^duo_sb, 2^(n_d - 1)}` + // `mul` or `mul - 1` branch + if lz_diff < $n_h { + // Two possibility division algorithm + + // The most significant bits of `duo` and `div` are within `$n_h` bits of each + // other. If we take the `n` most significant bits of `duo` and divide them by the + // corresponding bits in `div`, it produces a quotient value `quo`. It happens that + // `quo` or `quo - 1` will always be the correct quotient for the whole number. In + // other words, the bits less significant than the `n` most significant bits of + // `duo` and `div` can only influence the quotient to be one of two values. + // Because there are only two possibilities, there only needs to be one `$uH` sized + // division, a `$uH` by `$uD` multiplication, and only one branch with a few simple + // operations. + // + // Proof that the true quotient can only be `quo` or `quo - 1`. + // All `/` operators here are floored divisions. + // + // `shift` is the number of bits not in the higher `n` significant bits of `duo`. + // (definitions) + // 0. shift = n - duo_lz + // 1. duo_sig_n == duo / 2^shift + // 2. div_sig_n == div / 2^shift + // 3. quo == duo_sig_n / div_sig_n + // + // + // We are trying to find the true quotient, `true_quo`. + // 4. true_quo = duo / div. (definition) + // + // This is true because of the bits that are cut off during the bit shift. + // 5. duo_sig_n * 2^shift <= duo < (duo_sig_n + 1) * 2^shift. + // 6. div_sig_n * 2^shift <= div < (div_sig_n + 1) * 2^shift. + // + // Dividing each bound of (5) by each bound of (6) gives 4 possibilities for what + // `true_quo == duo / div` is bounded by: + // (duo_sig_n * 2^shift) / (div_sig_n * 2^shift) + // (duo_sig_n * 2^shift) / ((div_sig_n + 1) * 2^shift) + // ((duo_sig_n + 1) * 2^shift) / (div_sig_n * 2^shift) + // ((duo_sig_n + 1) * 2^shift) / ((div_sig_n + 1) * 2^shift) + // + // Simplifying each of these four: + // duo_sig_n / div_sig_n + // duo_sig_n / (div_sig_n + 1) + // (duo_sig_n + 1) / div_sig_n + // (duo_sig_n + 1) / (div_sig_n + 1) + // + // Taking the smallest and the largest of these as the low and high bounds + // and replacing `duo / div` with `true_quo`: + // 7. duo_sig_n / (div_sig_n + 1) <= true_quo < (duo_sig_n + 1) / div_sig_n + // + // The `lz_diff < n_h` conditional on this branch makes sure that `div_sig_n` is at + // least `2^n_h`, and the `div_lz <= duo_lz` branch makes sure that the highest bit + // of `div_sig_n` is not the `2^(n - 1)` bit. + // 8. `2^(n - 1) <= duo_sig_n < 2^n` + // 9. `2^n_h <= div_sig_n < 2^(n - 1)` + // + // We want to prove that either + // `(duo_sig_n + 1) / div_sig_n == duo_sig_n / (div_sig_n + 1)` or that + // `(duo_sig_n + 1) / div_sig_n == duo_sig_n / (div_sig_n + 1) + 1`. + // + // We also want to prove that `quo` is one of these: + // `duo_sig_n / div_sig_n == duo_sig_n / (div_sig_n + 1)` or + // `duo_sig_n / div_sig_n == (duo_sig_n + 1) / div_sig_n`. + // + // When 1 is added to the numerator of `duo_sig_n / div_sig_n` to produce + // `(duo_sig_n + 1) / div_sig_n`, it is not possible that the value increases by + // more than 1 with floored integer arithmetic and `div_sig_n != 0`. Consider + // `x/y + 1 < (x + 1)/y` <=> `x/y + 1 < x/y + 1/y` <=> `1 < 1/y` <=> `y < 1`. + // `div_sig_n` is a nonzero integer. Thus, + // 10. `duo_sig_n / div_sig_n == (duo_sig_n + 1) / div_sig_n` or + // `(duo_sig_n / div_sig_n) + 1 == (duo_sig_n + 1) / div_sig_n. + // + // When 1 is added to the denominator of `duo_sig_n / div_sig_n` to produce + // `duo_sig_n / (div_sig_n + 1)`, it is not possible that the value decreases by + // more than 1 with the bounds (8) and (9). Consider `x/y - 1 <= x/(y + 1)` <=> + // `(x - y)/y < x/(y + 1)` <=> `(y + 1)*(x - y) < x*y` <=> `x*y - y*y + x - y < x*y` + // <=> `x < y*y + y`. The smallest value of `div_sig_n` is `2^n_h` and the largest + // value of `duo_sig_n` is `2^n - 1`. Substituting reveals `2^n - 1 < 2^n + 2^n_h`. + // Thus, + // 11. `duo_sig_n / div_sig_n == duo_sig_n / (div_sig_n + 1)` or + // `(duo_sig_n / div_sig_n) - 1` == duo_sig_n / (div_sig_n + 1)` + // + // Combining both (10) and (11), we know that + // `quo - 1 <= duo_sig_n / (div_sig_n + 1) <= true_quo + // < (duo_sig_n + 1) / div_sig_n <= quo + 1` and therefore: + // 12. quo - 1 <= true_quo < quo + 1 + // + // In a lot of division algorithms using smaller divisions to construct a larger + // division, we often encounter a situation where the approximate `quo` value + // calculated from a smaller division is multiple increments away from the true + // `quo` value. In those algorithms, multiple correction steps have to be applied. + // Those correction steps may need more multiplications to test `duo - (quo*div)` + // again. Because of the fact that our `quo` can only be one of two values, we can + // see if `duo - (quo*div)` overflows. If it did overflow, then we know that we have + // the larger of the two values (since the true quotient is unique, and any larger + // quotient will cause `duo - (quo*div)` to be negative). Also because there is only + // one correction needed, we can calculate the remainder `duo - (true_quo*div) == + // duo - ((quo - 1)*div) == duo - (quo*div - div) == duo + div - quo*div`. + // If `duo - (quo*div)` did not overflow, then we have the correct answer. + let shift = n - duo_lz; + let duo_sig_n = (duo >> shift) as $uX; + let div_sig_n = (div >> shift) as $uX; + let quo = $half_division(duo_sig_n, div_sig_n).0; + + // The larger `quo` value can overflow `$uD` in the right circumstances. This is a + // manual `carrying_mul_add` with overflow checking. + let div_lo = div as $uX; + let div_hi = (div >> n) as $uX; + let (tmp_lo, carry) = carrying_mul(quo, div_lo); + let (tmp_hi, overflow) = carrying_mul_add(quo, div_hi, carry); + let tmp = (tmp_lo as $uD) | ((tmp_hi as $uD) << n); + if (overflow != 0) || (duo < tmp) { + return ( + (quo - 1) as $uD, + // Both the addition and subtraction can overflow, but when combined end up + // as a correct positive number. + duo.wrapping_add(div).wrapping_sub(tmp), + ); + } else { + return (quo as $uD, duo - tmp); + } + } + + // Undersubtracting long division algorithm. + // Instead of clearing a minimum of 1 bit from `duo` per iteration via binary long + // division, `n_h - 1` bits are cleared per iteration with this algorithm. It is a more + // complicated version of regular long division. Most integer division algorithms tend + // to guess a part of the quotient, and may have a larger quotient than the true + // quotient (which when multiplied by `div` will "oversubtract" the original dividend). + // They then check if the quotient was in fact too large and then have to correct it. + // This long division algorithm has been carefully constructed to always underguess the + // quotient by slim margins. This allows different subalgorithms to be blindly jumped to + // without needing an extra correction step. + // + // The only problem is that this subalgorithm will not work for many ranges of `duo` and + // `div`. Fortunately, the short division, two possibility algorithm, and other simple + // cases happen to exactly fill these gaps. + // + // For an example, consider the division of 76543210 by 213 and assume that `n_h` is + // equal to two decimal digits (note: we are working with base 10 here for readability). + // The first `sig_n_h` part of the divisor (21) is taken and is incremented by 1 to + // prevent oversubtraction. We also record the number of extra places not a part of + // the `sig_n` or `sig_n_h` parts. + // + // sig_n_h == 2 digits, sig_n == 4 digits + // + // vvvv <- `duo_sig_n` + // 76543210 + // ^^^^ <- extra places in duo, `duo_extra == 4` + // + // vv <- `div_sig_n_h` + // 213 + // ^ <- extra places in div, `div_extra == 1` + // + // The difference in extra places, `duo_extra - div_extra == extra_shl == 3`, is used + // for shifting partial sums in the long division. + // + // In the first step, the first `sig_n` part of duo (7654) is divided by + // `div_sig_n_h_add_1` (22), which results in a partial quotient of 347. This is + // multiplied by the whole divisor to make 73911, which is shifted left by `extra_shl` + // and subtracted from duo. The partial quotient is also shifted left by `extra_shl` to + // be added to `quo`. + // + // 347 + // ________ + // |76543210 + // -73911 + // 2632210 + // + // Variables dependent on duo have to be updated: + // + // vvvv <- `duo_sig_n == 2632` + // 2632210 + // ^^^ <- `duo_extra == 3` + // + // `extra_shl == 2` + // + // Two more steps are taken after this and then duo fits into `n` bits, and then a final + // normal long division step is made. The partial quotients are all progressively added + // to each other in the actual algorithm, but here I have left them all in a tower that + // can be added together to produce the quotient, 359357. + // + // 14 + // 443 + // 119 + // 347 + // ________ + // |76543210 + // -73911 + // 2632210 + // -25347 + // 97510 + // -94359 + // 3151 + // -2982 + // 169 <- the remainder + + let mut duo = duo; + let mut quo: $uD = 0; + + // The number of lesser significant bits not a part of `div_sig_n_h` + let div_extra = (n + $n_h) - div_lz; + + // The most significant `n_h` bits of div + let div_sig_n_h = (div >> div_extra) as $uH; + + // This needs to be a `$uX` in case of overflow from the increment + let div_sig_n_h_add1 = (div_sig_n_h as $uX) + 1; + + // `{2^n, 2^(div_sb + n_h)} <= duo < 2^n_d` + // `2^n_h <= div < {2^(duo_sb - n_h), 2^n}` + loop { + // The number of lesser significant bits not a part of `duo_sig_n` + let duo_extra = n - duo_lz; + + // The most significant `n` bits of `duo` + let duo_sig_n = (duo >> duo_extra) as $uX; + + // the two possibility algorithm requires that the difference between msbs is less + // than `n_h`, so the comparison is `<=` here. + if div_extra <= duo_extra { + // Undersubtracting long division step + let quo_part = $half_division(duo_sig_n, div_sig_n_h_add1).0 as $uD; + let extra_shl = duo_extra - div_extra; + + // Addition to the quotient. + quo += (quo_part << extra_shl); + + // Subtraction from `duo`. At least `n_h - 1` bits are cleared from `duo` here. + duo -= (div.wrapping_mul(quo_part) << extra_shl); + } else { + // Two possibility algorithm + let shift = n - duo_lz; + let duo_sig_n = (duo >> shift) as $uX; + let div_sig_n = (div >> shift) as $uX; + let quo_part = $half_division(duo_sig_n, div_sig_n).0; + let div_lo = div as $uX; + let div_hi = (div >> n) as $uX; + + let (tmp_lo, carry) = carrying_mul(quo_part, div_lo); + // The undersubtracting long division algorithm has already run once, so + // overflow beyond `$uD` bits is not possible here + let (tmp_hi, _) = carrying_mul_add(quo_part, div_hi, carry); + let tmp = (tmp_lo as $uD) | ((tmp_hi as $uD) << n); + + if duo < tmp { + return ( + quo + ((quo_part - 1) as $uD), + duo.wrapping_add(div).wrapping_sub(tmp), + ); + } else { + return (quo + (quo_part as $uD), duo - tmp); + } + } + + duo_lz = duo.leading_zeros(); + + if div_lz <= duo_lz { + // quotient can have 0 or 1 added to it + if div <= duo { + return (quo + 1, duo - div); + } else { + return (quo, duo); + } + } + + // This can only happen if `div_sd < n` (because of previous "quo = 0 or 1" + // branches), but it is not worth it to unroll further. + if n <= duo_lz { + // simple division and addition + let tmp = $half_division(duo as $uX, div as $uX); + return (quo + (tmp.0 as $uD), tmp.1 as $uD); + } + } + } + }; +} diff --git a/vendor/compiler_builtins/src/int/udiv.rs b/vendor/compiler_builtins/src/int/udiv.rs new file mode 100644 index 000000000..fb09f87d8 --- /dev/null +++ b/vendor/compiler_builtins/src/int/udiv.rs @@ -0,0 +1,106 @@ +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use int::specialized_div_rem::*; + +#[cfg(feature = "public-test-deps")] +pub use int::specialized_div_rem::*; + +intrinsics! { + #[maybe_use_optimized_c_shim] + #[arm_aeabi_alias = __aeabi_uidiv] + /// Returns `n / d` + pub extern "C" fn __udivsi3(n: u32, d: u32) -> u32 { + u32_div_rem(n, d).0 + } + + #[maybe_use_optimized_c_shim] + /// Returns `n % d` + pub extern "C" fn __umodsi3(n: u32, d: u32) -> u32 { + u32_div_rem(n, d).1 + } + + #[avr_skip] + #[maybe_use_optimized_c_shim] + /// Returns `n / d` and sets `*rem = n % d` + pub extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 { + let quo_rem = u32_div_rem(n, d); + if let Some(rem) = rem { + *rem = quo_rem.1; + } + quo_rem.0 + } + + #[avr_skip] + #[maybe_use_optimized_c_shim] + /// Returns `n / d` + pub extern "C" fn __udivdi3(n: u64, d: u64) -> u64 { + u64_div_rem(n, d).0 + } + + #[avr_skip] + #[maybe_use_optimized_c_shim] + /// Returns `n % d` + pub extern "C" fn __umoddi3(n: u64, d: u64) -> u64 { + u64_div_rem(n, d).1 + } + + #[avr_skip] + #[maybe_use_optimized_c_shim] + /// Returns `n / d` and sets `*rem = n % d` + pub extern "C" fn __udivmoddi4(n: u64, d: u64, rem: Option<&mut u64>) -> u64 { + let quo_rem = u64_div_rem(n, d); + if let Some(rem) = rem { + *rem = quo_rem.1; + } + quo_rem.0 + } + + // Note: we use block configuration and not `if cfg!(...)`, because we need to entirely disable + // the existence of `u128_div_rem` to get 32-bit SPARC to compile, see `u128_divide_sparc` docs. + + #[avr_skip] + #[win64_128bit_abi_hack] + /// Returns `n / d` + pub extern "C" fn __udivti3(n: u128, d: u128) -> u128 { + #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { + u128_div_rem(n, d).0 + } + #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] { + u128_divide_sparc(n, d, &mut 0) + } + } + + #[avr_skip] + #[win64_128bit_abi_hack] + /// Returns `n % d` + pub extern "C" fn __umodti3(n: u128, d: u128) -> u128 { + #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { + u128_div_rem(n, d).1 + } + #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] { + let mut rem = 0; + u128_divide_sparc(n, d, &mut rem); + rem + } + } + + #[avr_skip] + #[win64_128bit_abi_hack] + /// Returns `n / d` and sets `*rem = n % d` + pub extern "C" fn __udivmodti4(n: u128, d: u128, rem: Option<&mut u128>) -> u128 { + #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { + let quo_rem = u128_div_rem(n, d); + if let Some(rem) = rem { + *rem = quo_rem.1; + } + quo_rem.0 + } + #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] { + let mut tmp = 0; + let quo = u128_divide_sparc(n, d, &mut tmp); + if let Some(rem) = rem { + *rem = tmp; + } + quo + } + } +} diff --git a/vendor/compiler_builtins/src/lib.rs b/vendor/compiler_builtins/src/lib.rs new file mode 100644 index 000000000..009923d27 --- /dev/null +++ b/vendor/compiler_builtins/src/lib.rs @@ -0,0 +1,72 @@ +#![cfg_attr(feature = "compiler-builtins", compiler_builtins)] +#![cfg_attr(not(feature = "no-asm"), feature(asm))] +#![feature(abi_unadjusted)] +#![cfg_attr(not(feature = "no-asm"), feature(global_asm))] +#![feature(cfg_target_has_atomic)] +#![feature(compiler_builtins)] +#![feature(core_ffi_c)] +#![feature(core_intrinsics)] +#![feature(lang_items)] +#![feature(linkage)] +#![feature(naked_functions)] +#![feature(repr_simd)] +#![no_builtins] +#![no_std] +#![allow(unused_features)] +// We use `u128` in a whole bunch of places which we currently agree with the +// compiler on ABIs and such, so we should be "good enough" for now and changes +// to the `u128` ABI will be reflected here. +#![allow(improper_ctypes, improper_ctypes_definitions)] +// `mem::swap` cannot be used because it may generate references to memcpy in unoptimized code. +#![allow(clippy::manual_swap)] +// Support compiling on both stage0 and stage1 which may differ in supported stable features. +#![allow(stable_features)] + +// We disable #[no_mangle] for tests so that we can verify the test results +// against the native compiler-rt implementations of the builtins. + +// NOTE cfg(all(feature = "c", ..)) indicate that compiler-rt provides an arch optimized +// implementation of that intrinsic and we'll prefer to use that + +// NOTE(aapcs, aeabi, arm) ARM targets use intrinsics named __aeabi_* instead of the intrinsics +// that follow "x86 naming convention" (e.g. addsf3). Those aeabi intrinsics must adhere to the +// AAPCS calling convention (`extern "aapcs"`) because that's how LLVM will call them. + +#[cfg(test)] +extern crate core; + +#[macro_use] +mod macros; + +pub mod float; +pub mod int; + +#[cfg(any( + all(target_family = "wasm", target_os = "unknown"), + all(target_arch = "x86_64", target_os = "uefi"), + all(target_arch = "arm", target_os = "none"), + all(target_vendor = "fortanix", target_env = "sgx") +))] +pub mod math; +pub mod mem; + +#[cfg(target_arch = "arm")] +pub mod arm; + +#[cfg(all( + kernel_user_helpers, + any(target_os = "linux", target_os = "android"), + target_arch = "arm" +))] +pub mod arm_linux; + +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +pub mod riscv; + +#[cfg(target_arch = "x86")] +pub mod x86; + +#[cfg(target_arch = "x86_64")] +pub mod x86_64; + +pub mod probestack; diff --git a/vendor/compiler_builtins/src/macros.rs b/vendor/compiler_builtins/src/macros.rs new file mode 100644 index 000000000..518a18d4d --- /dev/null +++ b/vendor/compiler_builtins/src/macros.rs @@ -0,0 +1,448 @@ +//! Macros shared throughout the compiler-builtins implementation + +/// Changes the visibility to `pub` if feature "public-test-deps" is set +#[cfg(not(feature = "public-test-deps"))] +macro_rules! public_test_dep { + ($(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*) => { + $(#[$($meta)*])* pub(crate) $ident $($tokens)* + }; +} + +/// Changes the visibility to `pub` if feature "public-test-deps" is set +#[cfg(feature = "public-test-deps")] +macro_rules! public_test_dep { + {$(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*} => { + $(#[$($meta)*])* pub $ident $($tokens)* + }; +} + +/// The "main macro" used for defining intrinsics. +/// +/// The compiler-builtins library is super platform-specific with tons of crazy +/// little tweaks for various platforms. As a result it *could* involve a lot of +/// #[cfg] and macro soup, but the intention is that this macro alleviates a lot +/// of that complexity. Ideally this macro has all the weird ABI things +/// platforms need and elsewhere in this library it just looks like normal Rust +/// code. +/// +/// This macro is structured to be invoked with a bunch of functions that looks +/// like: +/// +/// intrinsics! { +/// pub extern "C" fn foo(a: i32) -> u32 { +/// // ... +/// } +/// +/// #[nonstandard_attribute] +/// pub extern "C" fn bar(a: i32) -> u32 { +/// // ... +/// } +/// } +/// +/// Each function is defined in a manner that looks like a normal Rust function. +/// The macro then accepts a few nonstandard attributes that can decorate +/// various functions. Each of the attributes is documented below with what it +/// can do, and each of them slightly tweaks how further expansion happens. +/// +/// A quick overview of attributes supported right now are: +/// +/// * `maybe_use_optimized_c_shim` - indicates that the Rust implementation is +/// ignored if an optimized C version was compiled. +/// * `aapcs_on_arm` - forces the ABI of the function to be `"aapcs"` on ARM and +/// the specified ABI everywhere else. +/// * `unadjusted_on_win64` - like `aapcs_on_arm` this switches to the +/// `"unadjusted"` abi on Win64 and the specified abi elsewhere. +/// * `win64_128bit_abi_hack` - this attribute is used for 128-bit integer +/// intrinsics where the ABI is slightly tweaked on Windows platforms, but +/// it's a normal ABI elsewhere for returning a 128 bit integer. +/// * `arm_aeabi_alias` - handles the "aliasing" of various intrinsics on ARM +/// their otherwise typical names to other prefixed ones. +/// +macro_rules! intrinsics { + () => (); + + // Support cfg_attr: + ( + #[cfg_attr($e:meta, $($attr:tt)*)] + $(#[$($attrs:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + $($rest:tt)* + ) => ( + #[cfg($e)] + intrinsics! { + #[$($attr)*] + $(#[$($attrs)*])* + pub extern $abi fn $name($($argname: $ty),*) $(-> $ret)? { + $($body)* + } + } + + #[cfg(not($e))] + intrinsics! { + $(#[$($attrs)*])* + pub extern $abi fn $name($($argname: $ty),*) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // Right now there's a bunch of architecture-optimized intrinsics in the + // stock compiler-rt implementation. Not all of these have been ported over + // to Rust yet so when the `c` feature of this crate is enabled we fall back + // to the architecture-specific versions which should be more optimized. The + // purpose of this macro is to easily allow specifying this. + // + // The `#[maybe_use_optimized_c_shim]` attribute indicates that this + // intrinsic may have an optimized C version. In these situations the build + // script, if the C code is enabled and compiled, will emit a cfg directive + // to get passed to rustc for our compilation. If that cfg is set we skip + // the Rust implementation, but if the attribute is not enabled then we + // compile in the Rust implementation. + ( + #[maybe_use_optimized_c_shim] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg($name = "optimized-c")] + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + extern $abi { + fn $name($($argname: $ty),*) $(-> $ret)?; + } + unsafe { + $name($($argname),*) + } + } + + #[cfg(not($name = "optimized-c"))] + intrinsics! { + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // We recognize the `#[aapcs_on_arm]` attribute here and generate the + // same intrinsic but force it to have the `"aapcs"` calling convention on + // ARM and `"C"` elsewhere. + ( + #[aapcs_on_arm] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(target_arch = "arm")] + intrinsics! { + $(#[$($attr)*])* + pub extern "aapcs" fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + #[cfg(not(target_arch = "arm"))] + intrinsics! { + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // Like aapcs above we recognize an attribute for the "unadjusted" abi on + // win64 for some methods. + ( + #[unadjusted_on_win64] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(all(windows, target_pointer_width = "64"))] + intrinsics! { + $(#[$($attr)*])* + pub extern "unadjusted" fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + #[cfg(not(all(windows, target_pointer_width = "64")))] + intrinsics! { + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // Some intrinsics on win64 which return a 128-bit integer have an.. unusual + // calling convention. That's managed here with this "abi hack" which alters + // the generated symbol's ABI. + // + // This will still define a function in this crate with the given name and + // signature, but the actual symbol for the intrinsic may have a slightly + // different ABI on win64. + ( + #[win64_128bit_abi_hack] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(all(windows, target_arch = "x86_64"))] + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + + #[cfg(all(windows, target_arch = "x86_64"))] + pub mod $name { + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + pub extern $abi fn $name( $($argname: $ty),* ) + -> ::macros::win64_128bit_abi_hack::U64x2 + { + let e: $($ret)? = super::$name($($argname),*); + ::macros::win64_128bit_abi_hack::U64x2::from(e) + } + } + + #[cfg(not(all(windows, target_arch = "x86_64")))] + intrinsics! { + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // A bunch of intrinsics on ARM are aliased in the standard compiler-rt + // build under `__aeabi_*` aliases, and LLVM will call these instead of the + // original function. The aliasing here is used to generate these symbols in + // the object file. + ( + #[arm_aeabi_alias = $alias:ident] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(target_arch = "arm")] + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + + #[cfg(target_arch = "arm")] + pub mod $name { + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + super::$name($($argname),*) + } + } + + #[cfg(target_arch = "arm")] + pub mod $alias { + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + pub extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? { + super::$name($($argname),*) + } + } + + #[cfg(not(target_arch = "arm"))] + intrinsics! { + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // C mem* functions are only generated when the "mem" feature is enabled. + ( + #[mem_builtin] + $(#[$($attr:tt)*])* + pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + $(#[$($attr)*])* + pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + + #[cfg(feature = "mem")] + pub mod $name { + $(#[$($attr)*])* + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + super::$name($($argname),*) + } + } + + intrinsics!($($rest)*); + ); + + // Naked functions are special: we can't generate wrappers for them since + // they use a custom calling convention. + ( + #[naked] + $(#[$($attr:tt)*])* + pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + pub mod $name { + #[naked] + $(#[$($attr)*])* + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // For division and modulo, AVR uses a custom calling conventionĀ¹ that does + // not match our definitions here. Ideally we would just use hand-written + // naked functions, but that's quite a lot of code to portĀ² - so for the + // time being we are just ignoring the problematic functions, letting + // avr-gcc (which is required to compile to AVR anyway) link them from + // libgcc. + // + // Ā¹ https://gcc.gnu.org/wiki/avr-gcc (see "Exceptions to the Calling + // Convention") + // Ā² https://github.com/gcc-mirror/gcc/blob/31048012db98f5ec9c2ba537bfd850374bdd771f/libgcc/config/avr/lib1funcs.S + ( + #[avr_skip] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(not(target_arch = "avr"))] + intrinsics! { + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // This is the final catch-all rule. At this point we generate an + // intrinsic with a conditional `#[no_mangle]` directive to avoid + // interfering with duplicate symbols and whatnot during testing. + // + // The implementation is placed in a separate module, to take advantage + // of the fact that rustc partitions functions into code generation + // units based on module they are defined in. As a result we will have + // a separate object file for each intrinsic. For further details see + // corresponding PR in rustc https://github.com/rust-lang/rust/pull/70846 + // + // After the intrinsic is defined we just continue with the rest of the + // input we were given. + ( + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + + pub mod $name { + $(#[$($attr)*])* + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + super::$name($($argname),*) + } + } + + intrinsics!($($rest)*); + ); + + // Same as the above for unsafe functions. + ( + $(#[$($attr:tt)*])* + pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + $(#[$($attr)*])* + pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + + pub mod $name { + $(#[$($attr)*])* + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + super::$name($($argname),*) + } + } + + intrinsics!($($rest)*); + ); +} + +// Hack for LLVM expectations for ABI on windows. This is used by the +// `#[win64_128bit_abi_hack]` attribute recognized above +#[cfg(all(windows, target_pointer_width = "64"))] +pub mod win64_128bit_abi_hack { + #[repr(simd)] + pub struct U64x2(u64, u64); + + impl From<i128> for U64x2 { + fn from(i: i128) -> U64x2 { + use int::DInt; + let j = i as u128; + U64x2(j.lo(), j.hi()) + } + } + + impl From<u128> for U64x2 { + fn from(i: u128) -> U64x2 { + use int::DInt; + U64x2(i.lo(), i.hi()) + } + } +} diff --git a/vendor/compiler_builtins/src/math.rs b/vendor/compiler_builtins/src/math.rs new file mode 100644 index 000000000..fa59753f8 --- /dev/null +++ b/vendor/compiler_builtins/src/math.rs @@ -0,0 +1,117 @@ +#[allow(dead_code)] +#[path = "../libm/src/math/mod.rs"] +mod libm; + +macro_rules! no_mangle { + ($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => { + intrinsics! { + $( + pub extern "C" fn $fun($($iid: $ity),+) -> $oty { + self::libm::$fun($($iid),+) + } + )+ + } + } +} + +#[cfg(any( + all( + target_family = "wasm", + target_os = "unknown", + not(target_env = "wasi") + ), + all(target_arch = "x86_64", target_os = "uefi"), + all(target_arch = "xtensa", target_os = "none"), + all(target_vendor = "fortanix", target_env = "sgx") +))] +no_mangle! { + fn acos(x: f64) -> f64; + fn asin(x: f64) -> f64; + fn cbrt(x: f64) -> f64; + fn expm1(x: f64) -> f64; + fn hypot(x: f64, y: f64) -> f64; + fn tan(x: f64) -> f64; + fn cos(x: f64) -> f64; + fn expf(x: f32) -> f32; + fn log2(x: f64) -> f64; + fn log2f(x: f32) -> f32; + fn log10(x: f64) -> f64; + fn log10f(x: f32) -> f32; + fn log(x: f64) -> f64; + fn logf(x: f32) -> f32; + fn fmin(x: f64, y: f64) -> f64; + fn fminf(x: f32, y: f32) -> f32; + fn fmax(x: f64, y: f64) -> f64; + fn fmaxf(x: f32, y: f32) -> f32; + fn round(x: f64) -> f64; + fn roundf(x: f32) -> f32; + fn sin(x: f64) -> f64; + fn pow(x: f64, y: f64) -> f64; + fn powf(x: f32, y: f32) -> f32; + fn fmod(x: f64, y: f64) -> f64; + fn fmodf(x: f32, y: f32) -> f32; + fn acosf(n: f32) -> f32; + fn atan2f(a: f32, b: f32) -> f32; + fn atanf(n: f32) -> f32; + fn coshf(n: f32) -> f32; + fn expm1f(n: f32) -> f32; + fn fdim(a: f64, b: f64) -> f64; + fn fdimf(a: f32, b: f32) -> f32; + fn log1pf(n: f32) -> f32; + fn sinhf(n: f32) -> f32; + fn tanhf(n: f32) -> f32; + fn ldexp(f: f64, n: i32) -> f64; + fn ldexpf(f: f32, n: i32) -> f32; +} + +#[cfg(any( + all( + target_family = "wasm", + target_os = "unknown", + not(target_env = "wasi") + ), + all(target_arch = "xtensa", target_os = "none"), + all(target_vendor = "fortanix", target_env = "sgx") +))] +no_mangle! { + fn atan(x: f64) -> f64; + fn atan2(x: f64, y: f64) -> f64; + fn cosh(x: f64) -> f64; + fn log1p(x: f64) -> f64; + fn sinh(x: f64) -> f64; + fn tanh(x: f64) -> f64; + fn cosf(x: f32) -> f32; + fn exp(x: f64) -> f64; + fn sinf(x: f32) -> f32; + fn exp2(x: f64) -> f64; + fn exp2f(x: f32) -> f32; + fn fma(x: f64, y: f64, z: f64) -> f64; + fn fmaf(x: f32, y: f32, z: f32) -> f32; + fn asinf(n: f32) -> f32; + fn cbrtf(n: f32) -> f32; + fn hypotf(x: f32, y: f32) -> f32; + fn tanf(n: f32) -> f32; +} + +#[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] +no_mangle! { + fn ceil(x: f64) -> f64; + fn ceilf(x: f32) -> f32; + fn floor(x: f64) -> f64; + fn floorf(x: f32) -> f32; + fn trunc(x: f64) -> f64; + fn truncf(x: f32) -> f32; +} + +// only for the thumb*-none-eabi* targets +#[cfg(all(target_arch = "arm", target_os = "none"))] +no_mangle! { + fn fmin(x: f64, y: f64) -> f64; + fn fminf(x: f32, y: f32) -> f32; + fn fmax(x: f64, y: f64) -> f64; + fn fmaxf(x: f32, y: f32) -> f32; + // `f64 % f64` + fn fmod(x: f64, y: f64) -> f64; + // `f32 % f32` + fn fmodf(x: f32, y: f32) -> f32; +} diff --git a/vendor/compiler_builtins/src/mem/impls.rs b/vendor/compiler_builtins/src/mem/impls.rs new file mode 100644 index 000000000..815132425 --- /dev/null +++ b/vendor/compiler_builtins/src/mem/impls.rs @@ -0,0 +1,267 @@ +use core::intrinsics::likely; + +const WORD_SIZE: usize = core::mem::size_of::<usize>(); +const WORD_MASK: usize = WORD_SIZE - 1; + +// If the number of bytes involved exceed this threshold we will opt in word-wise copy. +// The value here selected is max(2 * WORD_SIZE, 16): +// * We need at least 2 * WORD_SIZE bytes to guarantee that at least 1 word will be copied through +// word-wise copy. +// * The word-wise copy logic needs to perform some checks so it has some small overhead. +// ensures that even on 32-bit platforms we have copied at least 8 bytes through +// word-wise copy so the saving of word-wise copy outweights the fixed overhead. +const WORD_COPY_THRESHOLD: usize = if 2 * WORD_SIZE > 16 { + 2 * WORD_SIZE +} else { + 16 +}; + +#[cfg(feature = "mem-unaligned")] +unsafe fn read_usize_unaligned(x: *const usize) -> usize { + // Do not use `core::ptr::read_unaligned` here, since it calls `copy_nonoverlapping` which + // is translated to memcpy in LLVM. + let x_read = (x as *const [u8; core::mem::size_of::<usize>()]).read(); + core::mem::transmute(x_read) +} + +#[inline(always)] +pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) { + #[inline(always)] + unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) { + let dest_end = dest.add(n); + while dest < dest_end { + *dest = *src; + dest = dest.add(1); + src = src.add(1); + } + } + + #[inline(always)] + unsafe fn copy_forward_aligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let mut src_usize = src as *mut usize; + let dest_end = dest.add(n) as *mut usize; + + while dest_usize < dest_end { + *dest_usize = *src_usize; + dest_usize = dest_usize.add(1); + src_usize = src_usize.add(1); + } + } + + #[cfg(not(feature = "mem-unaligned"))] + #[inline(always)] + unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let dest_end = dest.add(n) as *mut usize; + + // Calculate the misalignment offset and shift needed to reassemble value. + let offset = src as usize & WORD_MASK; + let shift = offset * 8; + + // Realign src + let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize; + // This will read (but won't use) bytes out of bound. + // cfg needed because not all targets will have atomic loads that can be lowered + // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I) + #[cfg(target_has_atomic_load_store = "ptr")] + let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned); + #[cfg(not(target_has_atomic_load_store = "ptr"))] + let mut prev_word = core::ptr::read_volatile(src_aligned); + + while dest_usize < dest_end { + src_aligned = src_aligned.add(1); + let cur_word = *src_aligned; + #[cfg(target_endian = "little")] + let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift); + #[cfg(target_endian = "big")] + let resembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift); + prev_word = cur_word; + + *dest_usize = resembled; + dest_usize = dest_usize.add(1); + } + } + + #[cfg(feature = "mem-unaligned")] + #[inline(always)] + unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let mut src_usize = src as *mut usize; + let dest_end = dest.add(n) as *mut usize; + + while dest_usize < dest_end { + *dest_usize = read_usize_unaligned(src_usize); + dest_usize = dest_usize.add(1); + src_usize = src_usize.add(1); + } + } + + if n >= WORD_COPY_THRESHOLD { + // Align dest + // Because of n >= 2 * WORD_SIZE, dst_misalignment < n + let dest_misalignment = (dest as usize).wrapping_neg() & WORD_MASK; + copy_forward_bytes(dest, src, dest_misalignment); + dest = dest.add(dest_misalignment); + src = src.add(dest_misalignment); + n -= dest_misalignment; + + let n_words = n & !WORD_MASK; + let src_misalignment = src as usize & WORD_MASK; + if likely(src_misalignment == 0) { + copy_forward_aligned_words(dest, src, n_words); + } else { + copy_forward_misaligned_words(dest, src, n_words); + } + dest = dest.add(n_words); + src = src.add(n_words); + n -= n_words; + } + copy_forward_bytes(dest, src, n); +} + +#[inline(always)] +pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { + // The following backward copy helper functions uses the pointers past the end + // as their inputs instead of pointers to the start! + #[inline(always)] + unsafe fn copy_backward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) { + let dest_start = dest.sub(n); + while dest_start < dest { + dest = dest.sub(1); + src = src.sub(1); + *dest = *src; + } + } + + #[inline(always)] + unsafe fn copy_backward_aligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let mut src_usize = src as *mut usize; + let dest_start = dest.sub(n) as *mut usize; + + while dest_start < dest_usize { + dest_usize = dest_usize.sub(1); + src_usize = src_usize.sub(1); + *dest_usize = *src_usize; + } + } + + #[cfg(not(feature = "mem-unaligned"))] + #[inline(always)] + unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let dest_start = dest.sub(n) as *mut usize; + + // Calculate the misalignment offset and shift needed to reassemble value. + let offset = src as usize & WORD_MASK; + let shift = offset * 8; + + // Realign src_aligned + let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize; + // This will read (but won't use) bytes out of bound. + // cfg needed because not all targets will have atomic loads that can be lowered + // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I) + #[cfg(target_has_atomic_load_store = "ptr")] + let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned); + #[cfg(not(target_has_atomic_load_store = "ptr"))] + let mut prev_word = core::ptr::read_volatile(src_aligned); + + while dest_start < dest_usize { + src_aligned = src_aligned.sub(1); + let cur_word = *src_aligned; + #[cfg(target_endian = "little")] + let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift; + #[cfg(target_endian = "big")] + let resembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift; + prev_word = cur_word; + + dest_usize = dest_usize.sub(1); + *dest_usize = resembled; + } + } + + #[cfg(feature = "mem-unaligned")] + #[inline(always)] + unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let mut src_usize = src as *mut usize; + let dest_start = dest.sub(n) as *mut usize; + + while dest_start < dest_usize { + dest_usize = dest_usize.sub(1); + src_usize = src_usize.sub(1); + *dest_usize = read_usize_unaligned(src_usize); + } + } + + let mut dest = dest.add(n); + let mut src = src.add(n); + + if n >= WORD_COPY_THRESHOLD { + // Align dest + // Because of n >= 2 * WORD_SIZE, dst_misalignment < n + let dest_misalignment = dest as usize & WORD_MASK; + copy_backward_bytes(dest, src, dest_misalignment); + dest = dest.sub(dest_misalignment); + src = src.sub(dest_misalignment); + n -= dest_misalignment; + + let n_words = n & !WORD_MASK; + let src_misalignment = src as usize & WORD_MASK; + if likely(src_misalignment == 0) { + copy_backward_aligned_words(dest, src, n_words); + } else { + copy_backward_misaligned_words(dest, src, n_words); + } + dest = dest.sub(n_words); + src = src.sub(n_words); + n -= n_words; + } + copy_backward_bytes(dest, src, n); +} + +#[inline(always)] +pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) { + #[inline(always)] + pub unsafe fn set_bytes_bytes(mut s: *mut u8, c: u8, n: usize) { + let end = s.add(n); + while s < end { + *s = c; + s = s.add(1); + } + } + + #[inline(always)] + pub unsafe fn set_bytes_words(s: *mut u8, c: u8, n: usize) { + let mut broadcast = c as usize; + let mut bits = 8; + while bits < WORD_SIZE * 8 { + broadcast |= broadcast << bits; + bits *= 2; + } + + let mut s_usize = s as *mut usize; + let end = s.add(n) as *mut usize; + + while s_usize < end { + *s_usize = broadcast; + s_usize = s_usize.add(1); + } + } + + if likely(n >= WORD_COPY_THRESHOLD) { + // Align s + // Because of n >= 2 * WORD_SIZE, dst_misalignment < n + let misalignment = (s as usize).wrapping_neg() & WORD_MASK; + set_bytes_bytes(s, c, misalignment); + s = s.add(misalignment); + n -= misalignment; + + let n_words = n & !WORD_MASK; + set_bytes_words(s, c, n_words); + s = s.add(n_words); + n -= n_words; + } + set_bytes_bytes(s, c, n); +} diff --git a/vendor/compiler_builtins/src/mem/mod.rs b/vendor/compiler_builtins/src/mem/mod.rs new file mode 100644 index 000000000..a55113861 --- /dev/null +++ b/vendor/compiler_builtins/src/mem/mod.rs @@ -0,0 +1,211 @@ +// Trying to satisfy clippy here is hopeless +#![allow(clippy::style)] + +#[allow(warnings)] +#[cfg(target_pointer_width = "16")] +type c_int = i16; +#[allow(warnings)] +#[cfg(not(target_pointer_width = "16"))] +type c_int = i32; + +use core::intrinsics::{atomic_load_unordered, atomic_store_unordered, exact_div}; +use core::mem; +use core::ops::{BitOr, Shl}; + +// memcpy/memmove/memset have optimized implementations on some architectures +#[cfg_attr( + all(not(feature = "no-asm"), target_arch = "x86_64"), + path = "x86_64.rs" +)] +mod impls; + +intrinsics! { + #[mem_builtin] + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] + pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { + impls::copy_forward(dest, src, n); + dest + } + + #[mem_builtin] + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] + pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { + let delta = (dest as usize).wrapping_sub(src as usize); + if delta >= n { + // We can copy forwards because either dest is far enough ahead of src, + // or src is ahead of dest (and delta overflowed). + impls::copy_forward(dest, src, n); + } else { + impls::copy_backward(dest, src, n); + } + dest + } + + #[mem_builtin] + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] + pub unsafe extern "C" fn memset(s: *mut u8, c: crate::mem::c_int, n: usize) -> *mut u8 { + impls::set_bytes(s, c as u8, n); + s + } + + #[mem_builtin] + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] + pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { + let mut i = 0; + while i < n { + let a = *s1.add(i); + let b = *s2.add(i); + if a != b { + return a as i32 - b as i32; + } + i += 1; + } + 0 + } + + #[mem_builtin] + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] + pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { + memcmp(s1, s2, n) + } + + #[mem_builtin] + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] + pub unsafe extern "C" fn strlen(s: *const core::ffi::c_char) -> usize { + let mut n = 0; + let mut s = s; + while *s != 0 { + n += 1; + s = s.offset(1); + } + n + } +} + +// `bytes` must be a multiple of `mem::size_of::<T>()` +#[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))] +fn memcpy_element_unordered_atomic<T: Copy>(dest: *mut T, src: *const T, bytes: usize) { + unsafe { + let n = exact_div(bytes, mem::size_of::<T>()); + let mut i = 0; + while i < n { + atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); + i += 1; + } + } +} + +// `bytes` must be a multiple of `mem::size_of::<T>()` +#[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))] +fn memmove_element_unordered_atomic<T: Copy>(dest: *mut T, src: *const T, bytes: usize) { + unsafe { + let n = exact_div(bytes, mem::size_of::<T>()); + if src < dest as *const T { + // copy from end + let mut i = n; + while i != 0 { + i -= 1; + atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); + } + } else { + // copy from beginning + let mut i = 0; + while i < n { + atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); + i += 1; + } + } + } +} + +// `T` must be a primitive integer type, and `bytes` must be a multiple of `mem::size_of::<T>()` +#[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))] +fn memset_element_unordered_atomic<T>(s: *mut T, c: u8, bytes: usize) +where + T: Copy + From<u8> + Shl<u32, Output = T> + BitOr<T, Output = T>, +{ + unsafe { + let n = exact_div(bytes, mem::size_of::<T>()); + + // Construct a value of type `T` consisting of repeated `c` + // bytes, to let us ensure we write each `T` atomically. + let mut x = T::from(c); + let mut i = 1; + while i < mem::size_of::<T>() { + x = x << 8 | T::from(c); + i += 1; + } + + // Write it to `s` + let mut i = 0; + while i < n { + atomic_store_unordered(s.add(i), x); + i += 1; + } + } +} + +intrinsics! { + #[cfg(target_has_atomic_load_store = "8")] + pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { + memcpy_element_unordered_atomic(dest, src, bytes); + } + #[cfg(target_has_atomic_load_store = "16")] + pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { + memcpy_element_unordered_atomic(dest, src, bytes); + } + #[cfg(target_has_atomic_load_store = "32")] + pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { + memcpy_element_unordered_atomic(dest, src, bytes); + } + #[cfg(target_has_atomic_load_store = "64")] + pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { + memcpy_element_unordered_atomic(dest, src, bytes); + } + #[cfg(target_has_atomic_load_store = "128")] + pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { + memcpy_element_unordered_atomic(dest, src, bytes); + } + + #[cfg(target_has_atomic_load_store = "8")] + pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { + memmove_element_unordered_atomic(dest, src, bytes); + } + #[cfg(target_has_atomic_load_store = "16")] + pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { + memmove_element_unordered_atomic(dest, src, bytes); + } + #[cfg(target_has_atomic_load_store = "32")] + pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { + memmove_element_unordered_atomic(dest, src, bytes); + } + #[cfg(target_has_atomic_load_store = "64")] + pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { + memmove_element_unordered_atomic(dest, src, bytes); + } + #[cfg(target_has_atomic_load_store = "128")] + pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { + memmove_element_unordered_atomic(dest, src, bytes); + } + + #[cfg(target_has_atomic_load_store = "8")] + pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_1(s: *mut u8, c: u8, bytes: usize) -> () { + memset_element_unordered_atomic(s, c, bytes); + } + #[cfg(target_has_atomic_load_store = "16")] + pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_2(s: *mut u16, c: u8, bytes: usize) -> () { + memset_element_unordered_atomic(s, c, bytes); + } + #[cfg(target_has_atomic_load_store = "32")] + pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_4(s: *mut u32, c: u8, bytes: usize) -> () { + memset_element_unordered_atomic(s, c, bytes); + } + #[cfg(target_has_atomic_load_store = "64")] + pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_8(s: *mut u64, c: u8, bytes: usize) -> () { + memset_element_unordered_atomic(s, c, bytes); + } + #[cfg(target_has_atomic_load_store = "128")] + pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_16(s: *mut u128, c: u8, bytes: usize) -> () { + memset_element_unordered_atomic(s, c, bytes); + } +} diff --git a/vendor/compiler_builtins/src/mem/x86_64.rs b/vendor/compiler_builtins/src/mem/x86_64.rs new file mode 100644 index 000000000..a7ab6f605 --- /dev/null +++ b/vendor/compiler_builtins/src/mem/x86_64.rs @@ -0,0 +1,100 @@ +// On most modern Intel and AMD processors, "rep movsq" and "rep stosq" have +// been enhanced to perform better than an simple qword loop, making them ideal +// for implementing memcpy/memset. Note that "rep cmps" has received no such +// enhancement, so it is not used to implement memcmp. +// +// On certain recent Intel processors, "rep movsb" and "rep stosb" have been +// further enhanced to automatically select the best microarchitectural +// implementation based on length and alignment. See the following features from +// the "IntelĀ® 64 and IA-32 Architectures Optimization Reference Manual": +// - ERMSB - Enhanced REP MOVSB and STOSB (Ivy Bridge and later) +// - FSRM - Fast Short REP MOV (Ice Lake and later) +// - Fast Zero-Length MOVSB (On no current hardware) +// - Fast Short STOSB (On no current hardware) +// +// To simplify things, we switch to using the byte-based variants if the "ermsb" +// feature is present at compile-time. We don't bother detecting other features. +// Note that ERMSB does not enhance the backwards (DF=1) "rep movsb". + +#[inline(always)] +#[cfg(target_feature = "ermsb")] +pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "repe movsb (%rsi), (%rdi)", + inout("rcx") count => _, + inout("rdi") dest => _, + inout("rsi") src => _, + options(att_syntax, nostack, preserves_flags) + ); +} + +#[inline(always)] +#[cfg(not(target_feature = "ermsb"))] +pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { + let qword_count = count >> 3; + let byte_count = count & 0b111; + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "repe movsq (%rsi), (%rdi)", + "mov {byte_count:e}, %ecx", + "repe movsb (%rsi), (%rdi)", + byte_count = in(reg) byte_count, + inout("rcx") qword_count => _, + inout("rdi") dest => _, + inout("rsi") src => _, + options(att_syntax, nostack, preserves_flags) + ); +} + +#[inline(always)] +pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { + let qword_count = count >> 3; + let byte_count = count & 0b111; + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "std", + "repe movsq (%rsi), (%rdi)", + "movl {byte_count:e}, %ecx", + "addq $7, %rdi", + "addq $7, %rsi", + "repe movsb (%rsi), (%rdi)", + "cld", + byte_count = in(reg) byte_count, + inout("rcx") qword_count => _, + inout("rdi") dest.add(count).wrapping_sub(8) => _, + inout("rsi") src.add(count).wrapping_sub(8) => _, + options(att_syntax, nostack) + ); +} + +#[inline(always)] +#[cfg(target_feature = "ermsb")] +pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "repe stosb %al, (%rdi)", + inout("rcx") count => _, + inout("rdi") dest => _, + inout("al") c => _, + options(att_syntax, nostack, preserves_flags) + ) +} + +#[inline(always)] +#[cfg(not(target_feature = "ermsb"))] +pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { + let qword_count = count >> 3; + let byte_count = count & 0b111; + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "repe stosq %rax, (%rdi)", + "mov {byte_count:e}, %ecx", + "repe stosb %al, (%rdi)", + byte_count = in(reg) byte_count, + inout("rcx") qword_count => _, + inout("rdi") dest => _, + in("rax") (c as u64) * 0x0101010101010101, + options(att_syntax, nostack, preserves_flags) + ); +} diff --git a/vendor/compiler_builtins/src/probestack.rs b/vendor/compiler_builtins/src/probestack.rs new file mode 100644 index 000000000..0c30384db --- /dev/null +++ b/vendor/compiler_builtins/src/probestack.rs @@ -0,0 +1,350 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! This module defines the `__rust_probestack` intrinsic which is used in the +//! implementation of "stack probes" on certain platforms. +//! +//! The purpose of a stack probe is to provide a static guarantee that if a +//! thread has a guard page then a stack overflow is guaranteed to hit that +//! guard page. If a function did not have a stack probe then there's a risk of +//! having a stack frame *larger* than the guard page, so a function call could +//! skip over the guard page entirely and then later hit maybe the heap or +//! another thread, possibly leading to security vulnerabilities such as [The +//! Stack Clash], for example. +//! +//! [The Stack Clash]: https://blog.qualys.com/securitylabs/2017/06/19/the-stack-clash +//! +//! The `__rust_probestack` is called in the prologue of functions whose stack +//! size is larger than the guard page, for example larger than 4096 bytes on +//! x86. This function is then responsible for "touching" all pages relevant to +//! the stack to ensure that that if any of them are the guard page we'll hit +//! them guaranteed. +//! +//! The precise ABI for how this function operates is defined by LLVM. There's +//! no real documentation as to what this is, so you'd basically need to read +//! the LLVM source code for reference. Often though the test cases can be +//! illuminating as to the ABI that's generated, or just looking at the output +//! of `llc`. +//! +//! Note that `#[naked]` is typically used here for the stack probe because the +//! ABI corresponds to no actual ABI. +//! +//! Finally it's worth noting that at the time of this writing LLVM only has +//! support for stack probes on x86 and x86_64. There's no support for stack +//! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would +//! be more than welcome to accept such a change! + +#![cfg(not(feature = "mangled-names"))] +// Windows already has builtins to do this. +#![cfg(not(windows))] +// All these builtins require assembly +#![cfg(not(feature = "no-asm"))] +// We only define stack probing for these architectures today. +#![cfg(any(target_arch = "x86_64", target_arch = "x86"))] + +extern "C" { + pub fn __rust_probestack(); +} + +// A wrapper for our implementation of __rust_probestack, which allows us to +// keep the assembly inline while controlling all CFI directives in the assembly +// emitted for the function. +// +// This is the ELF version. +#[cfg(not(any(target_vendor = "apple", target_os = "uefi")))] +macro_rules! define_rust_probestack { + ($body: expr) => { + concat!( + " + .pushsection .text.__rust_probestack + .globl __rust_probestack + .type __rust_probestack, @function + .hidden __rust_probestack + __rust_probestack: + ", + $body, + " + .size __rust_probestack, . - __rust_probestack + .popsection + " + ) + }; +} + +#[cfg(all(target_os = "uefi", target_arch = "x86_64"))] +macro_rules! define_rust_probestack { + ($body: expr) => { + concat!( + " + .globl __rust_probestack + __rust_probestack: + ", + $body + ) + }; +} + +// Same as above, but for Mach-O. Note that the triple underscore +// is deliberate +#[cfg(target_vendor = "apple")] +macro_rules! define_rust_probestack { + ($body: expr) => { + concat!( + " + .globl ___rust_probestack + ___rust_probestack: + ", + $body + ) + }; +} + +// In UEFI x86 arch, triple underscore is deliberate. +#[cfg(all(target_os = "uefi", target_arch = "x86"))] +macro_rules! define_rust_probestack { + ($body: expr) => { + concat!( + " + .globl ___rust_probestack + ___rust_probestack: + ", + $body + ) + }; +} + +// Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, +// ensuring that if any pages are unmapped we'll make a page fault. +// +// The ABI here is that the stack frame size is located in `%rax`. Upon +// return we're not supposed to modify `%rsp` or `%rax`. +// +// Any changes to this function should be replicated to the SGX version below. +#[cfg(all( + target_arch = "x86_64", + not(all(target_env = "sgx", target_vendor = "fortanix")) +))] +core::arch::global_asm!( + define_rust_probestack!( + " + .cfi_startproc + pushq %rbp + .cfi_adjust_cfa_offset 8 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + + mov %rax,%r11 // duplicate %rax as we're clobbering %r11 + + // Main loop, taken in one page increments. We're decrementing rsp by + // a page each time until there's less than a page remaining. We're + // guaranteed that this function isn't called unless there's more than a + // page needed. + // + // Note that we're also testing against `8(%rsp)` to account for the 8 + // bytes pushed on the stack orginally with our return address. Using + // `8(%rsp)` simulates us testing the stack pointer in the caller's + // context. + + // It's usually called when %rax >= 0x1000, but that's not always true. + // Dynamic stack allocation, which is needed to implement unsized + // rvalues, triggers stackprobe even if %rax < 0x1000. + // Thus we have to check %r11 first to avoid segfault. + cmp $0x1000,%r11 + jna 3f +2: + sub $0x1000,%rsp + test %rsp,8(%rsp) + sub $0x1000,%r11 + cmp $0x1000,%r11 + ja 2b + +3: + // Finish up the last remaining stack space requested, getting the last + // bits out of r11 + sub %r11,%rsp + test %rsp,8(%rsp) + + // Restore the stack pointer to what it previously was when entering + // this function. The caller will readjust the stack pointer after we + // return. + add %rax,%rsp + + leave + .cfi_def_cfa_register %rsp + .cfi_adjust_cfa_offset -8 + ret + .cfi_endproc + " + ), + options(att_syntax) +); + +// This function is the same as above, except that some instructions are +// [manually patched for LVI]. +// +// [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions +#[cfg(all( + target_arch = "x86_64", + all(target_env = "sgx", target_vendor = "fortanix") +))] +core::arch::global_asm!( + define_rust_probestack!( + " + .cfi_startproc + pushq %rbp + .cfi_adjust_cfa_offset 8 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + + mov %rax,%r11 // duplicate %rax as we're clobbering %r11 + + // Main loop, taken in one page increments. We're decrementing rsp by + // a page each time until there's less than a page remaining. We're + // guaranteed that this function isn't called unless there's more than a + // page needed. + // + // Note that we're also testing against `8(%rsp)` to account for the 8 + // bytes pushed on the stack orginally with our return address. Using + // `8(%rsp)` simulates us testing the stack pointer in the caller's + // context. + + // It's usually called when %rax >= 0x1000, but that's not always true. + // Dynamic stack allocation, which is needed to implement unsized + // rvalues, triggers stackprobe even if %rax < 0x1000. + // Thus we have to check %r11 first to avoid segfault. + cmp $0x1000,%r11 + jna 3f +2: + sub $0x1000,%rsp + test %rsp,8(%rsp) + sub $0x1000,%r11 + cmp $0x1000,%r11 + ja 2b + +3: + // Finish up the last remaining stack space requested, getting the last + // bits out of r11 + sub %r11,%rsp + test %rsp,8(%rsp) + + // Restore the stack pointer to what it previously was when entering + // this function. The caller will readjust the stack pointer after we + // return. + add %rax,%rsp + + leave + .cfi_def_cfa_register %rsp + .cfi_adjust_cfa_offset -8 + pop %r11 + lfence + jmp *%r11 + .cfi_endproc + " + ), + options(att_syntax) +); + +#[cfg(all(target_arch = "x86", not(target_os = "uefi")))] +// This is the same as x86_64 above, only translated for 32-bit sizes. Note +// that on Unix we're expected to restore everything as it was, this +// function basically can't tamper with anything. +// +// The ABI here is the same as x86_64, except everything is 32-bits large. +core::arch::global_asm!( + define_rust_probestack!( + " + .cfi_startproc + push %ebp + .cfi_adjust_cfa_offset 4 + .cfi_offset %ebp, -8 + mov %esp, %ebp + .cfi_def_cfa_register %ebp + push %ecx + mov %eax,%ecx + + cmp $0x1000,%ecx + jna 3f +2: + sub $0x1000,%esp + test %esp,8(%esp) + sub $0x1000,%ecx + cmp $0x1000,%ecx + ja 2b + +3: + sub %ecx,%esp + test %esp,8(%esp) + + add %eax,%esp + pop %ecx + leave + .cfi_def_cfa_register %esp + .cfi_adjust_cfa_offset -4 + ret + .cfi_endproc + " + ), + options(att_syntax) +); + +#[cfg(all(target_arch = "x86", target_os = "uefi"))] +// UEFI target is windows like target. LLVM will do _chkstk things like windows. +// probestack function will also do things like _chkstk in MSVC. +// So we need to sub %ax %sp in probestack when arch is x86. +// +// REF: Rust commit(74e80468347) +// rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805 +// Comments in LLVM: +// MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. +// MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp +// themselves. +core::arch::global_asm!( + define_rust_probestack!( + " + .cfi_startproc + push %ebp + .cfi_adjust_cfa_offset 4 + .cfi_offset %ebp, -8 + mov %esp, %ebp + .cfi_def_cfa_register %ebp + push %ecx + push %edx + mov %eax,%ecx + + cmp $0x1000,%ecx + jna 3f +2: + sub $0x1000,%esp + test %esp,8(%esp) + sub $0x1000,%ecx + cmp $0x1000,%ecx + ja 2b + +3: + sub %ecx,%esp + test %esp,8(%esp) + mov 4(%ebp),%edx + mov %edx, 12(%esp) + add %eax,%esp + pop %edx + pop %ecx + leave + + sub %eax, %esp + .cfi_def_cfa_register %esp + .cfi_adjust_cfa_offset -4 + ret + .cfi_endproc + " + ), + options(att_syntax) +); diff --git a/vendor/compiler_builtins/src/riscv.rs b/vendor/compiler_builtins/src/riscv.rs new file mode 100644 index 000000000..ee78b9dba --- /dev/null +++ b/vendor/compiler_builtins/src/riscv.rs @@ -0,0 +1,34 @@ +intrinsics! { + // Implementation from gcc + // https://raw.githubusercontent.com/gcc-mirror/gcc/master/libgcc/config/epiphany/mulsi3.c + pub extern "C" fn __mulsi3(a: u32, b: u32) -> u32 { + let (mut a, mut b) = (a, b); + let mut r = 0; + + while a > 0 { + if a & 1 > 0 { + r += b; + } + a >>= 1; + b <<= 1; + } + + r + } + + #[cfg(not(target_feature = "m"))] + pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 { + let (mut a, mut b) = (a, b); + let mut r = 0; + + while a > 0 { + if a & 1 > 0 { + r += b; + } + a >>= 1; + b <<= 1; + } + + r + } +} diff --git a/vendor/compiler_builtins/src/x86.rs b/vendor/compiler_builtins/src/x86.rs new file mode 100644 index 000000000..fd1f32e3a --- /dev/null +++ b/vendor/compiler_builtins/src/x86.rs @@ -0,0 +1,85 @@ +#![allow(unused_imports)] + +use core::intrinsics; + +// NOTE These functions are implemented using assembly because they using a custom +// calling convention which can't be implemented using a normal Rust function + +// NOTE These functions are never mangled as they are not tested against compiler-rt +// and mangling ___chkstk would break the `jmp ___chkstk` instruction in __alloca + +intrinsics! { + #[naked] + #[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn ___chkstk_ms() { + core::arch::asm!( + "push %ecx", + "push %eax", + "cmp $0x1000,%eax", + "lea 12(%esp),%ecx", + "jb 1f", + "2:", + "sub $0x1000,%ecx", + "test %ecx,(%ecx)", + "sub $0x1000,%eax", + "cmp $0x1000,%eax", + "ja 2b", + "1:", + "sub %eax,%ecx", + "test %ecx,(%ecx)", + "pop %eax", + "pop %ecx", + "ret", + options(noreturn, att_syntax) + ); + } + + // FIXME: __alloca should be an alias to __chkstk + #[naked] + #[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn __alloca() { + core::arch::asm!( + "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable" + options(noreturn, att_syntax) + ); + } + + #[naked] + #[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn ___chkstk() { + core::arch::asm!( + "push %ecx", + "cmp $0x1000,%eax", + "lea 8(%esp),%ecx", // esp before calling this routine -> ecx + "jb 1f", + "2:", + "sub $0x1000,%ecx", + "test %ecx,(%ecx)", + "sub $0x1000,%eax", + "cmp $0x1000,%eax", + "ja 2b", + "1:", + "sub %eax,%ecx", + "test %ecx,(%ecx)", + "lea 4(%esp),%eax", // load pointer to the return address into eax + "mov %ecx,%esp", // install the new top of stack pointer into esp + "mov -4(%eax),%ecx", // restore ecx + "push (%eax)", // push return address onto the stack + "sub %esp,%eax", // restore the original value in eax + "ret", + options(noreturn, att_syntax) + ); + } +} diff --git a/vendor/compiler_builtins/src/x86_64.rs b/vendor/compiler_builtins/src/x86_64.rs new file mode 100644 index 000000000..393eeddd8 --- /dev/null +++ b/vendor/compiler_builtins/src/x86_64.rs @@ -0,0 +1,94 @@ +#![allow(unused_imports)] + +use core::intrinsics; + +// NOTE These functions are implemented using assembly because they using a custom +// calling convention which can't be implemented using a normal Rust function + +// NOTE These functions are never mangled as they are not tested against compiler-rt +// and mangling ___chkstk would break the `jmp ___chkstk` instruction in __alloca + +intrinsics! { + #[naked] + #[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn ___chkstk_ms() { + core::arch::asm!( + "push %rcx", + "push %rax", + "cmp $0x1000,%rax", + "lea 24(%rsp),%rcx", + "jb 1f", + "2:", + "sub $0x1000,%rcx", + "test %rcx,(%rcx)", + "sub $0x1000,%rax", + "cmp $0x1000,%rax", + "ja 2b", + "1:", + "sub %rax,%rcx", + "test %rcx,(%rcx)", + "pop %rax", + "pop %rcx", + "ret", + options(noreturn, att_syntax) + ); + } + + #[naked] + #[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn __alloca() { + core::arch::asm!( + "mov %rcx,%rax", // x64 _alloca is a normal function with parameter in rcx + "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable" + options(noreturn, att_syntax) + ); + } + + #[naked] + #[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn ___chkstk() { + core::arch::asm!( + "push %rcx", + "cmp $0x1000,%rax", + "lea 16(%rsp),%rcx", // rsp before calling this routine -> rcx + "jb 1f", + "2:", + "sub $0x1000,%rcx", + "test %rcx,(%rcx)", + "sub $0x1000,%rax", + "cmp $0x1000,%rax", + "ja 2b", + "1:", + "sub %rax,%rcx", + "test %rcx,(%rcx)", + "lea 8(%rsp),%rax", // load pointer to the return address into rax + "mov %rcx,%rsp", // install the new top of stack pointer into rsp + "mov -8(%rax),%rcx", // restore rcx + "push (%rax)", // push return address onto the stack + "sub %rsp,%rax", // restore the original value in rax + "ret", + options(noreturn, att_syntax) + ); + } +} + +// HACK(https://github.com/rust-lang/rust/issues/62785): x86_64-unknown-uefi needs special LLVM +// support unless we emit the _fltused +mod _fltused { + #[no_mangle] + #[used] + #[cfg(target_os = "uefi")] + static _fltused: i32 = 0; +} |