diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:02:30 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:02:30 +0000 |
commit | 76cb841cb886eef6b3bee341a2266c76578724ad (patch) | |
tree | f5892e5ba6cc11949952a6ce4ecbe6d516d6ce58 /arch/x86/math-emu | |
parent | Initial commit. (diff) | |
download | linux-76cb841cb886eef6b3bee341a2266c76578724ad.tar.xz linux-76cb841cb886eef6b3bee341a2266c76578724ad.zip |
Adding upstream version 4.19.249.upstream/4.19.249
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/x86/math-emu')
47 files changed, 13016 insertions, 0 deletions
diff --git a/arch/x86/math-emu/Makefile b/arch/x86/math-emu/Makefile new file mode 100644 index 000000000..02211fc6f --- /dev/null +++ b/arch/x86/math-emu/Makefile @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for wm-FPU-emu +# + +#DEBUG = -DDEBUGGING +DEBUG = +PARANOID = -DPARANOID +ccflags-y += $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION) +asflags-y += $(PARANOID) + +# From 'C' language sources: +C_OBJS =fpu_entry.o errors.o \ + fpu_arith.o fpu_aux.o fpu_etc.o fpu_tags.o fpu_trig.o \ + load_store.o get_address.o \ + poly_atan.o poly_l2.o poly_2xm1.o poly_sin.o poly_tan.o \ + reg_add_sub.o reg_compare.o reg_constant.o reg_convert.o \ + reg_ld_str.o reg_divide.o reg_mul.o + +# From 80x86 assembler sources: +A_OBJS =reg_u_add.o reg_u_div.o reg_u_mul.o reg_u_sub.o \ + div_small.o reg_norm.o reg_round.o \ + wm_shrx.o wm_sqrt.o \ + div_Xsig.o polynom_Xsig.o round_Xsig.o \ + shr_Xsig.o mul_Xsig.o + +obj-y =$(C_OBJS) $(A_OBJS) + +proto: + cproto -e -DMAKING_PROTO *.c >fpu_proto.h diff --git a/arch/x86/math-emu/README b/arch/x86/math-emu/README new file mode 100644 index 000000000..e6235491d --- /dev/null +++ b/arch/x86/math-emu/README @@ -0,0 +1,427 @@ + +---------------------------------------------------------------------------+ + | wm-FPU-emu an FPU emulator for 80386 and 80486SX microprocessors. | + | | + | Copyright (C) 1992,1993,1994,1995,1996,1997,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@melbpc.org.au | + | | + | This program is free software; you can redistribute it and/or modify | + | it under the terms of the GNU General Public License version 2 as | + | published by the Free Software Foundation. | + | | + | This program is distributed in the hope that it will be useful, | + | but WITHOUT ANY WARRANTY; without even the implied warranty of | + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | + | GNU General Public License for more details. | + | | + | You should have received a copy of the GNU General Public License | + | along with this program; if not, write to the Free Software | + | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | + | | + +---------------------------------------------------------------------------+ + + + +wm-FPU-emu is an FPU emulator for Linux. It is derived from wm-emu387 +which was my 80387 emulator for early versions of djgpp (gcc under +msdos); wm-emu387 was in turn based upon emu387 which was written by +DJ Delorie for djgpp. The interface to the Linux kernel is based upon +the original Linux math emulator by Linus Torvalds. + +My target FPU for wm-FPU-emu is that described in the Intel486 +Programmer's Reference Manual (1992 edition). Unfortunately, numerous +facets of the functioning of the FPU are not well covered in the +Reference Manual. The information in the manual has been supplemented +with measurements on real 80486's. Unfortunately, it is simply not +possible to be sure that all of the peculiarities of the 80486 have +been discovered, so there is always likely to be obscure differences +in the detailed behaviour of the emulator and a real 80486. + +wm-FPU-emu does not implement all of the behaviour of the 80486 FPU, +but is very close. See "Limitations" later in this file for a list of +some differences. + +Please report bugs, etc to me at: + billm@melbpc.org.au +or b.metzenthen@medoto.unimelb.edu.au + +For more information on the emulator and on floating point topics, see +my web pages, currently at http://www.suburbia.net/~billm/ + + +--Bill Metzenthen + December 1999 + + +----------------------- Internals of wm-FPU-emu ----------------------- + +Numeric algorithms: +(1) Add, subtract, and multiply. Nothing remarkable in these. +(2) Divide has been tuned to get reasonable performance. The algorithm + is not the obvious one which most people seem to use, but is designed + to take advantage of the characteristics of the 80386. I expect that + it has been invented many times before I discovered it, but I have not + seen it. It is based upon one of those ideas which one carries around + for years without ever bothering to check it out. +(3) The sqrt function has been tuned to get good performance. It is based + upon Newton's classic method. Performance was improved by capitalizing + upon the properties of Newton's method, and the code is once again + structured taking account of the 80386 characteristics. +(4) The trig, log, and exp functions are based in each case upon quasi- + "optimal" polynomial approximations. My definition of "optimal" was + based upon getting good accuracy with reasonable speed. +(5) The argument reducing code for the trig function effectively uses + a value of pi which is accurate to more than 128 bits. As a consequence, + the reduced argument is accurate to more than 64 bits for arguments up + to a few pi, and accurate to more than 64 bits for most arguments, + even for arguments approaching 2^63. This is far superior to an + 80486, which uses a value of pi which is accurate to 66 bits. + +The code of the emulator is complicated slightly by the need to +account for a limited form of re-entrancy. Normally, the emulator will +emulate each FPU instruction to completion without interruption. +However, it may happen that when the emulator is accessing the user +memory space, swapping may be needed. In this case the emulator may be +temporarily suspended while disk i/o takes place. During this time +another process may use the emulator, thereby perhaps changing static +variables. The code which accesses user memory is confined to five +files: + fpu_entry.c + reg_ld_str.c + load_store.c + get_address.c + errors.c +As from version 1.12 of the emulator, no static variables are used +(apart from those in the kernel's per-process tables). The emulator is +therefore now fully re-entrant, rather than having just the restricted +form of re-entrancy which is required by the Linux kernel. + +----------------------- Limitations of wm-FPU-emu ----------------------- + +There are a number of differences between the current wm-FPU-emu +(version 2.01) and the 80486 FPU (apart from bugs). The differences +are fewer than those which applied to the 1.xx series of the emulator. +Some of the more important differences are listed below: + +The Roundup flag does not have much meaning for the transcendental +functions and its 80486 value with these functions is likely to differ +from its emulator value. + +In a few rare cases the Underflow flag obtained with the emulator will +be different from that obtained with an 80486. This occurs when the +following conditions apply simultaneously: +(a) the operands have a higher precision than the current setting of the + precision control (PC) flags. +(b) the underflow exception is masked. +(c) the magnitude of the exact result (before rounding) is less than 2^-16382. +(d) the magnitude of the final result (after rounding) is exactly 2^-16382. +(e) the magnitude of the exact result would be exactly 2^-16382 if the + operands were rounded to the current precision before the arithmetic + operation was performed. +If all of these apply, the emulator will set the Underflow flag but a real +80486 will not. + +NOTE: Certain formats of Extended Real are UNSUPPORTED. They are +unsupported by the 80486. They are the Pseudo-NaNs, Pseudoinfinities, +and Unnormals. None of these will be generated by an 80486 or by the +emulator. Do not use them. The emulator treats them differently in +detail from the way an 80486 does. + +Self modifying code can cause the emulator to fail. An example of such +code is: + movl %esp,[%ebx] + fld1 +The FPU instruction may be (usually will be) loaded into the pre-fetch +queue of the CPU before the mov instruction is executed. If the +destination of the 'movl' overlaps the FPU instruction then the bytes +in the prefetch queue and memory will be inconsistent when the FPU +instruction is executed. The emulator will be invoked but will not be +able to find the instruction which caused the device-not-present +exception. For this case, the emulator cannot emulate the behaviour of +an 80486DX. + +Handling of the address size override prefix byte (0x67) has not been +extensively tested yet. A major problem exists because using it in +vm86 mode can cause a general protection fault. Address offsets +greater than 0xffff appear to be illegal in vm86 mode but are quite +acceptable (and work) in real mode. A small test program developed to +check the addressing, and which runs successfully in real mode, +crashes dosemu under Linux and also brings Windows down with a general +protection fault message when run under the MS-DOS prompt of Windows +3.1. (The program simply reads data from a valid address). + +The emulator supports 16-bit protected mode, with one difference from +an 80486DX. A 80486DX will allow some floating point instructions to +write a few bytes below the lowest address of the stack. The emulator +will not allow this in 16-bit protected mode: no instructions are +allowed to write outside the bounds set by the protection. + +----------------------- Performance of wm-FPU-emu ----------------------- + +Speed. +----- + +The speed of floating point computation with the emulator will depend +upon instruction mix. Relative performance is best for the instructions +which require most computation. The simple instructions are adversely +affected by the FPU instruction trap overhead. + + +Timing: Some simple timing tests have been made on the emulator functions. +The times include load/store instructions. All times are in microseconds +measured on a 33MHz 386 with 64k cache. The Turbo C tests were under +ms-dos, the next two columns are for emulators running with the djgpp +ms-dos extender. The final column is for wm-FPU-emu in Linux 0.97, +using libm4.0 (hard). + +function Turbo C djgpp 1.06 WM-emu387 wm-FPU-emu + + + 60.5 154.8 76.5 139.4 + - 61.1-65.5 157.3-160.8 76.2-79.5 142.9-144.7 + * 71.0 190.8 79.6 146.6 + / 61.2-75.0 261.4-266.9 75.3-91.6 142.2-158.1 + + sin() 310.8 4692.0 319.0 398.5 + cos() 284.4 4855.2 308.0 388.7 + tan() 495.0 8807.1 394.9 504.7 + atan() 328.9 4866.4 601.1 419.5-491.9 + + sqrt() 128.7 crashed 145.2 227.0 + log() 413.1-419.1 5103.4-5354.21 254.7-282.2 409.4-437.1 + exp() 479.1 6619.2 469.1 850.8 + + +The performance under Linux is improved by the use of look-ahead code. +The following results show the improvement which is obtained under +Linux due to the look-ahead code. Also given are the times for the +original Linux emulator with the 4.1 'soft' lib. + + [ Linus' note: I changed look-ahead to be the default under linux, as + there was no reason not to use it after I had edited it to be + disabled during tracing ] + + wm-FPU-emu w original w + look-ahead 'soft' lib + + 106.4 190.2 + - 108.6-111.6 192.4-216.2 + * 113.4 193.1 + / 108.8-124.4 700.1-706.2 + + sin() 390.5 2642.0 + cos() 381.5 2767.4 + tan() 496.5 3153.3 + atan() 367.2-435.5 2439.4-3396.8 + + sqrt() 195.1 4732.5 + log() 358.0-387.5 3359.2-3390.3 + exp() 619.3 4046.4 + + +These figures are now somewhat out-of-date. The emulator has become +progressively slower for most functions as more of the 80486 features +have been implemented. + + +----------------------- Accuracy of wm-FPU-emu ----------------------- + + +The accuracy of the emulator is in almost all cases equal to or better +than that of an Intel 80486 FPU. + +The results of the basic arithmetic functions (+,-,*,/), and fsqrt +match those of an 80486 FPU. They are the best possible; the error for +these never exceeds 1/2 an lsb. The fprem and fprem1 instructions +return exact results; they have no error. + + +The following table compares the emulator accuracy for the sqrt(), +trig and log functions against the Turbo C "emulator". For this table, +each function was tested at about 400 points. Ideal worst-case results +would be 64 bits. The reduced Turbo C accuracy of cos() and tan() for +arguments greater than pi/4 can be thought of as being related to the +precision of the argument x; e.g. an argument of pi/2-(1e-10) which is +accurate to 64 bits can result in a relative accuracy in cos() of +about 64 + log2(cos(x)) = 31 bits. + + +Function Tested x range Worst result Turbo C + (relative bits) + +sqrt(x) 1 .. 2 64.1 63.2 +atan(x) 1e-10 .. 200 64.2 62.8 +cos(x) 0 .. pi/2-(1e-10) 64.4 (x <= pi/4) 62.4 + 64.1 (x = pi/2-(1e-10)) 31.9 +sin(x) 1e-10 .. pi/2 64.0 62.8 +tan(x) 1e-10 .. pi/2-(1e-10) 64.0 (x <= pi/4) 62.1 + 64.1 (x = pi/2-(1e-10)) 31.9 +exp(x) 0 .. 1 63.1 ** 62.9 +log(x) 1+1e-6 .. 2 63.8 ** 62.1 + +** The accuracy for exp() and log() is low because the FPU (emulator) +does not compute them directly; two operations are required. + + +The emulator passes the "paranoia" tests (compiled with gcc 2.3.3 or +later) for 'float' variables (24 bit precision numbers) when precision +control is set to 24, 53 or 64 bits, and for 'double' variables (53 +bit precision numbers) when precision control is set to 53 bits (a +properly performing FPU cannot pass the 'paranoia' tests for 'double' +variables when precision control is set to 64 bits). + +The code for reducing the argument for the trig functions (fsin, fcos, +fptan and fsincos) has been improved and now effectively uses a value +for pi which is accurate to more than 128 bits precision. As a +consequence, the accuracy of these functions for large arguments has +been dramatically improved (and is now very much better than an 80486 +FPU). There is also now no degradation of accuracy for fcos and fptan +for operands close to pi/2. Measured results are (note that the +definition of accuracy has changed slightly from that used for the +above table): + +Function Tested x range Worst result + (absolute bits) + +cos(x) 0 .. 9.22e+18 62.0 +sin(x) 1e-16 .. 9.22e+18 62.1 +tan(x) 1e-16 .. 9.22e+18 61.8 + +It is possible with some effort to find very large arguments which +give much degraded precision. For example, the integer number + 8227740058411162616.0 +is within about 10e-7 of a multiple of pi. To find the tan (for +example) of this number to 64 bits precision it would be necessary to +have a value of pi which had about 150 bits precision. The FPU +emulator computes the result to about 42.6 bits precision (the correct +result is about -9.739715e-8). On the other hand, an 80486 FPU returns +0.01059, which in relative terms is hopelessly inaccurate. + +For arguments close to critical angles (which occur at multiples of +pi/2) the emulator is more accurate than an 80486 FPU. For very large +arguments, the emulator is far more accurate. + + +Prior to version 1.20 of the emulator, the accuracy of the results for +the transcendental functions (in their principal range) was not as +good as the results from an 80486 FPU. From version 1.20, the accuracy +has been considerably improved and these functions now give measured +worst-case results which are better than the worst-case results given +by an 80486 FPU. + +The following table gives the measured results for the emulator. The +number of randomly selected arguments in each case is about half a +million. The group of three columns gives the frequency of the given +accuracy in number of times per million, thus the second of these +columns shows that an accuracy of between 63.80 and 63.89 bits was +found at a rate of 133 times per one million measurements for fsin. +The results show that the fsin, fcos and fptan instructions return +results which are in error (i.e. less accurate than the best possible +result (which is 64 bits)) for about one per cent of all arguments +between -pi/2 and +pi/2. The other instructions have a lower +frequency of results which are in error. The last two columns give +the worst accuracy which was found (in bits) and the approximate value +of the argument which produced it. + + frequency (per M) + ------------------- --------------- +instr arg range # tests 63.7 63.8 63.9 worst at arg + bits bits bits bits +----- ------------ ------- ---- ---- ----- ----- -------- +fsin (0,pi/2) 547756 0 133 10673 63.89 0.451317 +fcos (0,pi/2) 547563 0 126 10532 63.85 0.700801 +fptan (0,pi/2) 536274 11 267 10059 63.74 0.784876 +fpatan 4 quadrants 517087 0 8 1855 63.88 0.435121 (4q) +fyl2x (0,20) 541861 0 0 1323 63.94 1.40923 (x) +fyl2xp1 (-.293,.414) 520256 0 0 5678 63.93 0.408542 (x) +f2xm1 (-1,1) 538847 4 481 6488 63.79 0.167709 + + +Tests performed on an 80486 FPU showed results of lower accuracy. The +following table gives the results which were obtained with an AMD +486DX2/66 (other tests indicate that an Intel 486DX produces +identical results). The tests were basically the same as those used +to measure the emulator (the values, being random, were in general not +the same). The total number of tests for each instruction are given +at the end of the table, in case each about 100k tests were performed. +Another line of figures at the end of the table shows that most of the +instructions return results which are in error for more than 10 +percent of the arguments tested. + +The numbers in the body of the table give the approx number of times a +result of the given accuracy in bits (given in the left-most column) +was obtained per one million arguments. For three of the instructions, +two columns of results are given: * The second column for f2xm1 gives +the number cases where the results of the first column were for a +positive argument, this shows that this instruction gives better +results for positive arguments than it does for negative. * In the +cases of fcos and fptan, the first column gives the results when all +cases where arguments greater than 1.5 were removed from the results +given in the second column. Unlike the emulator, an 80486 FPU returns +results of relatively poor accuracy for these instructions when the +argument approaches pi/2. The table does not show those cases when the +accuracy of the results were less than 62 bits, which occurs quite +often for fsin and fptan when the argument approaches pi/2. This poor +accuracy is discussed above in relation to the Turbo C "emulator", and +the accuracy of the value of pi. + + +bits f2xm1 f2xm1 fpatan fcos fcos fyl2x fyl2xp1 fsin fptan fptan +62.0 0 0 0 0 437 0 0 0 0 925 +62.1 0 0 10 0 894 0 0 0 0 1023 +62.2 14 0 0 0 1033 0 0 0 0 945 +62.3 57 0 0 0 1202 0 0 0 0 1023 +62.4 385 0 0 10 1292 0 23 0 0 1178 +62.5 1140 0 0 119 1649 0 39 0 0 1149 +62.6 2037 0 0 189 1620 0 16 0 0 1169 +62.7 5086 14 0 646 2315 10 101 35 39 1402 +62.8 8818 86 0 984 3050 59 287 131 224 2036 +62.9 11340 1355 0 2126 4153 79 605 357 321 1948 +63.0 15557 4750 0 3319 5376 246 1281 862 808 2688 +63.1 20016 8288 0 4620 6628 511 2569 1723 1510 3302 +63.2 24945 11127 10 6588 8098 1120 4470 2968 2990 4724 +63.3 25686 12382 69 8774 10682 1906 6775 4482 5474 7236 +63.4 29219 14722 79 11109 12311 3094 9414 7259 8912 10587 +63.5 30458 14936 393 13802 15014 5874 12666 9609 13762 15262 +63.6 32439 16448 1277 17945 19028 10226 15537 14657 19158 20346 +63.7 35031 16805 4067 23003 23947 18910 20116 21333 25001 26209 +63.8 33251 15820 7673 24781 25675 24617 25354 24440 29433 30329 +63.9 33293 16833 18529 28318 29233 31267 31470 27748 29676 30601 + +Per cent with error: + 30.9 3.2 18.5 9.8 13.1 11.6 17.4 +Total arguments tested: + 70194 70099 101784 100641 100641 101799 128853 114893 102675 102675 + + +------------------------- Contributors ------------------------------- + +A number of people have contributed to the development of the +emulator, often by just reporting bugs, sometimes with suggested +fixes, and a few kind people have provided me with access in one way +or another to an 80486 machine. Contributors include (to those people +who I may have forgotten, please forgive me): + +Linus Torvalds +Tommy.Thorn@daimi.aau.dk +Andrew.Tridgell@anu.edu.au +Nick Holloway, alfie@dcs.warwick.ac.uk +Hermano Moura, moura@dcs.gla.ac.uk +Jon Jagger, J.Jagger@scp.ac.uk +Lennart Benschop +Brian Gallew, geek+@CMU.EDU +Thomas Staniszewski, ts3v+@andrew.cmu.edu +Martin Howell, mph@plasma.apana.org.au +M Saggaf, alsaggaf@athena.mit.edu +Peter Barker, PETER@socpsy.sci.fau.edu +tom@vlsivie.tuwien.ac.at +Dan Russel, russed@rpi.edu +Daniel Carosone, danielce@ee.mu.oz.au +cae@jpmorgan.com +Hamish Coleman, t933093@minyos.xx.rmit.oz.au +Bruce Evans, bde@kralizec.zeta.org.au +Timo Korvola, Timo.Korvola@hut.fi +Rick Lyons, rick@razorback.brisnet.org.au +Rick, jrs@world.std.com + +...and numerous others who responded to my request for help with +a real 80486. + diff --git a/arch/x86/math-emu/control_w.h b/arch/x86/math-emu/control_w.h new file mode 100644 index 000000000..60f4dcc5e --- /dev/null +++ b/arch/x86/math-emu/control_w.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*---------------------------------------------------------------------------+ + | control_w.h | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _CONTROLW_H_ +#define _CONTROLW_H_ + +#ifdef __ASSEMBLY__ +#define _Const_(x) $##x +#else +#define _Const_(x) x +#endif + +#define CW_RC _Const_(0x0C00) /* rounding control */ +#define CW_PC _Const_(0x0300) /* precision control */ + +#define CW_Precision Const_(0x0020) /* loss of precision mask */ +#define CW_Underflow Const_(0x0010) /* underflow mask */ +#define CW_Overflow Const_(0x0008) /* overflow mask */ +#define CW_ZeroDiv Const_(0x0004) /* divide by zero mask */ +#define CW_Denormal Const_(0x0002) /* denormalized operand mask */ +#define CW_Invalid Const_(0x0001) /* invalid operation mask */ + +#define CW_Exceptions _Const_(0x003f) /* all masks */ + +#define RC_RND _Const_(0x0000) +#define RC_DOWN _Const_(0x0400) +#define RC_UP _Const_(0x0800) +#define RC_CHOP _Const_(0x0C00) + +/* p 15-5: Precision control bits affect only the following: + ADD, SUB(R), MUL, DIV(R), and SQRT */ +#define PR_24_BITS _Const_(0x000) +#define PR_53_BITS _Const_(0x200) +#define PR_64_BITS _Const_(0x300) +#define PR_RESERVED_BITS _Const_(0x100) +/* FULL_PRECISION simulates all exceptions masked */ +#define FULL_PRECISION (PR_64_BITS | RC_RND | 0x3f) + +#endif /* _CONTROLW_H_ */ diff --git a/arch/x86/math-emu/div_Xsig.S b/arch/x86/math-emu/div_Xsig.S new file mode 100644 index 000000000..ee08449d2 --- /dev/null +++ b/arch/x86/math-emu/div_Xsig.S @@ -0,0 +1,367 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + .file "div_Xsig.S" +/*---------------------------------------------------------------------------+ + | div_Xsig.S | + | | + | Division subroutine for 96 bit quantities | + | | + | Copyright (C) 1994,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and | + | put the 96 bit result at the location d. | + | | + | The result may not be accurate to 96 bits. It is intended for use where | + | a result better than 64 bits is required. The result should usually be | + | good to at least 94 bits. | + | The returned result is actually divided by one half. This is done to | + | prevent overflow. | + | | + | .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd | + | | + | void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_emu.h" + + +#define XsigLL(x) (x) +#define XsigL(x) 4(x) +#define XsigH(x) 8(x) + + +#ifndef NON_REENTRANT_FPU +/* + Local storage on the stack: + Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + */ +#define FPU_accum_3 -4(%ebp) +#define FPU_accum_2 -8(%ebp) +#define FPU_accum_1 -12(%ebp) +#define FPU_accum_0 -16(%ebp) +#define FPU_result_3 -20(%ebp) +#define FPU_result_2 -24(%ebp) +#define FPU_result_1 -28(%ebp) + +#else +.data +/* + Local storage in a static area: + Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + */ + .align 4,0 +FPU_accum_3: + .long 0 +FPU_accum_2: + .long 0 +FPU_accum_1: + .long 0 +FPU_accum_0: + .long 0 +FPU_result_3: + .long 0 +FPU_result_2: + .long 0 +FPU_result_1: + .long 0 +#endif /* NON_REENTRANT_FPU */ + + +.text +ENTRY(div_Xsig) + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $28,%esp +#endif /* NON_REENTRANT_FPU */ + + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi /* pointer to num */ + movl PARAM2,%ebx /* pointer to denom */ + +#ifdef PARANOID + testl $0x80000000, XsigH(%ebx) /* Divisor */ + je L_bugged +#endif /* PARANOID */ + + +/*---------------------------------------------------------------------------+ + | Divide: Return arg1/arg2 to arg3. | + | | + | The maximum returned value is (ignoring exponents) | + | .ffffffff ffffffff | + | ------------------ = 1.ffffffff fffffffe | + | .80000000 00000000 | + | and the minimum is | + | .80000000 00000000 | + | ------------------ = .80000000 00000001 (rounded) | + | .ffffffff ffffffff | + | | + +---------------------------------------------------------------------------*/ + + /* Save extended dividend in local register */ + + /* Divide by 2 to prevent overflow */ + clc + movl XsigH(%esi),%eax + rcrl %eax + movl %eax,FPU_accum_3 + movl XsigL(%esi),%eax + rcrl %eax + movl %eax,FPU_accum_2 + movl XsigLL(%esi),%eax + rcrl %eax + movl %eax,FPU_accum_1 + movl $0,%eax + rcrl %eax + movl %eax,FPU_accum_0 + + movl FPU_accum_2,%eax /* Get the current num */ + movl FPU_accum_3,%edx + +/*----------------------------------------------------------------------*/ +/* Initialization done. + Do the first 32 bits. */ + + /* We will divide by a number which is too large */ + movl XsigH(%ebx),%ecx + addl $1,%ecx + jnc LFirst_div_not_1 + + /* here we need to divide by 100000000h, + i.e., no division at all.. */ + mov %edx,%eax + jmp LFirst_div_done + +LFirst_div_not_1: + divl %ecx /* Divide the numerator by the augmented + denom ms dw */ + +LFirst_div_done: + movl %eax,FPU_result_3 /* Put the result in the answer */ + + mull XsigH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_2 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_3 + + movl FPU_result_3,%eax /* Get the result back */ + mull XsigL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + sbbl $0,FPU_accum_3 + je LDo_2nd_32_bits /* Must check for non-zero result here */ + +#ifdef PARANOID + jb L_bugged_1 +#endif /* PARANOID */ + + /* need to subtract another once of the denom */ + incl FPU_result_3 /* Correct the answer */ + + movl XsigL(%ebx),%eax + movl XsigH(%ebx),%edx + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + sbbl $0,FPU_accum_3 + jne L_bugged_1 /* Must check for non-zero result here */ +#endif /* PARANOID */ + +/*----------------------------------------------------------------------*/ +/* Half of the main problem is done, there is just a reduced numerator + to handle now. + Work with the second 32 bits, FPU_accum_0 not used from now on */ +LDo_2nd_32_bits: + movl FPU_accum_2,%edx /* get the reduced num */ + movl FPU_accum_1,%eax + + /* need to check for possible subsequent overflow */ + cmpl XsigH(%ebx),%edx + jb LDo_2nd_div + ja LPrevent_2nd_overflow + + cmpl XsigL(%ebx),%eax + jb LDo_2nd_div + +LPrevent_2nd_overflow: +/* The numerator is greater or equal, would cause overflow */ + /* prevent overflow */ + subl XsigL(%ebx),%eax + sbbl XsigH(%ebx),%edx + movl %edx,FPU_accum_2 + movl %eax,FPU_accum_1 + + incl FPU_result_3 /* Reflect the subtraction in the answer */ + +#ifdef PARANOID + je L_bugged_2 /* Can't bump the result to 1.0 */ +#endif /* PARANOID */ + +LDo_2nd_div: + cmpl $0,%ecx /* augmented denom msw */ + jnz LSecond_div_not_1 + + /* %ecx == 0, we are dividing by 1.0 */ + mov %edx,%eax + jmp LSecond_div_done + +LSecond_div_not_1: + divl %ecx /* Divide the numerator by the denom ms dw */ + +LSecond_div_done: + movl %eax,FPU_result_2 /* Put the result in the answer */ + + mull XsigH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif /* PARANOID */ + + movl FPU_result_2,%eax /* Get the result back */ + mull XsigL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif /* PARANOID */ + + jz LDo_3rd_32_bits + +#ifdef PARANOID + cmpl $1,FPU_accum_2 + jne L_bugged_2 +#endif /* PARANOID */ + + /* need to subtract another once of the denom */ + movl XsigL(%ebx),%eax + movl XsigH(%ebx),%edx + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 + jne L_bugged_2 +#endif /* PARANOID */ + + addl $1,FPU_result_2 /* Correct the answer */ + adcl $0,FPU_result_3 + +#ifdef PARANOID + jc L_bugged_2 /* Must check for non-zero result here */ +#endif /* PARANOID */ + +/*----------------------------------------------------------------------*/ +/* The division is essentially finished here, we just need to perform + tidying operations. + Deal with the 3rd 32 bits */ +LDo_3rd_32_bits: + /* We use an approximation for the third 32 bits. + To take account of the 3rd 32 bits of the divisor + (call them del), we subtract del * (a/b) */ + + movl FPU_result_3,%eax /* a/b */ + mull XsigLL(%ebx) /* del */ + + subl %edx,FPU_accum_1 + + /* A borrow indicates that the result is negative */ + jnb LTest_over + + movl XsigH(%ebx),%edx + addl %edx,FPU_accum_1 + + subl $1,FPU_result_2 /* Adjust the answer */ + sbbl $0,FPU_result_3 + + /* The above addition might not have been enough, check again. */ + movl FPU_accum_1,%edx /* get the reduced num */ + cmpl XsigH(%ebx),%edx /* denom */ + jb LDo_3rd_div + + movl XsigH(%ebx),%edx + addl %edx,FPU_accum_1 + + subl $1,FPU_result_2 /* Adjust the answer */ + sbbl $0,FPU_result_3 + jmp LDo_3rd_div + +LTest_over: + movl FPU_accum_1,%edx /* get the reduced num */ + + /* need to check for possible subsequent overflow */ + cmpl XsigH(%ebx),%edx /* denom */ + jb LDo_3rd_div + + /* prevent overflow */ + subl XsigH(%ebx),%edx + movl %edx,FPU_accum_1 + + addl $1,FPU_result_2 /* Reflect the subtraction in the answer */ + adcl $0,FPU_result_3 + +LDo_3rd_div: + movl FPU_accum_0,%eax + movl FPU_accum_1,%edx + divl XsigH(%ebx) + + movl %eax,FPU_result_1 /* Rough estimate of third word */ + + movl PARAM3,%esi /* pointer to answer */ + + movl FPU_result_1,%eax + movl %eax,XsigLL(%esi) + movl FPU_result_2,%eax + movl %eax,XsigL(%esi) + movl FPU_result_3,%eax + movl %eax,XsigH(%esi) + +L_exit: + popl %ebx + popl %edi + popl %esi + + leave + ret + + +#ifdef PARANOID +/* The logic is wrong if we got here */ +L_bugged: + pushl EX_INTERNAL|0x240 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_1: + pushl EX_INTERNAL|0x241 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_2: + pushl EX_INTERNAL|0x242 + call EXCEPTION + pop %ebx + jmp L_exit +#endif /* PARANOID */ +ENDPROC(div_Xsig) diff --git a/arch/x86/math-emu/div_small.S b/arch/x86/math-emu/div_small.S new file mode 100644 index 000000000..8f5025c80 --- /dev/null +++ b/arch/x86/math-emu/div_small.S @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + .file "div_small.S" +/*---------------------------------------------------------------------------+ + | div_small.S | + | | + | Divide a 64 bit integer by a 32 bit integer & return remainder. | + | | + | Copyright (C) 1992,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | unsigned long FPU_div_small(unsigned long long *x, unsigned long y) | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" + +.text +ENTRY(FPU_div_small) + pushl %ebp + movl %esp,%ebp + + pushl %esi + + movl PARAM1,%esi /* pointer to num */ + movl PARAM2,%ecx /* The denominator */ + + movl 4(%esi),%eax /* Get the current num msw */ + xorl %edx,%edx + divl %ecx + + movl %eax,4(%esi) + + movl (%esi),%eax /* Get the num lsw */ + divl %ecx + + movl %eax,(%esi) + + movl %edx,%eax /* Return the remainder in eax */ + + popl %esi + + leave + ret +ENDPROC(FPU_div_small) diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c new file mode 100644 index 000000000..6b468517a --- /dev/null +++ b/arch/x86/math-emu/errors.c @@ -0,0 +1,685 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | errors.c | + | | + | The error handling functions for wm-FPU-emu | + | | + | Copyright (C) 1992,1993,1994,1996 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@jacobi.maths.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +#include <linux/signal.h> + +#include <linux/uaccess.h> + +#include "fpu_emu.h" +#include "fpu_system.h" +#include "exception.h" +#include "status_w.h" +#include "control_w.h" +#include "reg_constant.h" +#include "version.h" + +/* */ +#undef PRINT_MESSAGES +/* */ + +#if 0 +void Un_impl(void) +{ + u_char byte1, FPU_modrm; + unsigned long address = FPU_ORIG_EIP; + + RE_ENTRANT_CHECK_OFF; + /* No need to check access_ok(), we have previously fetched these bytes. */ + printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *)address); + if (FPU_CS == __USER_CS) { + while (1) { + FPU_get_user(byte1, (u_char __user *) address); + if ((byte1 & 0xf8) == 0xd8) + break; + printk("[%02x]", byte1); + address++; + } + printk("%02x ", byte1); + FPU_get_user(FPU_modrm, 1 + (u_char __user *) address); + + if (FPU_modrm >= 0300) + printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, + FPU_modrm & 7); + else + printk("/%d\n", (FPU_modrm >> 3) & 7); + } else { + printk("cs selector = %04x\n", FPU_CS); + } + + RE_ENTRANT_CHECK_ON; + + EXCEPTION(EX_Invalid); + +} +#endif /* 0 */ + +/* + Called for opcodes which are illegal and which are known to result in a + SIGILL with a real 80486. + */ +void FPU_illegal(void) +{ + math_abort(FPU_info, SIGILL); +} + +void FPU_printall(void) +{ + int i; + static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty", + "DeNorm", "Inf", "NaN" + }; + u_char byte1, FPU_modrm; + unsigned long address = FPU_ORIG_EIP; + + RE_ENTRANT_CHECK_OFF; + /* No need to check access_ok(), we have previously fetched these bytes. */ + printk("At %p:", (void *)address); + if (FPU_CS == __USER_CS) { +#define MAX_PRINTED_BYTES 20 + for (i = 0; i < MAX_PRINTED_BYTES; i++) { + FPU_get_user(byte1, (u_char __user *) address); + if ((byte1 & 0xf8) == 0xd8) { + printk(" %02x", byte1); + break; + } + printk(" [%02x]", byte1); + address++; + } + if (i == MAX_PRINTED_BYTES) + printk(" [more..]\n"); + else { + FPU_get_user(FPU_modrm, 1 + (u_char __user *) address); + + if (FPU_modrm >= 0300) + printk(" %02x (%02x+%d)\n", FPU_modrm, + FPU_modrm & 0xf8, FPU_modrm & 7); + else + printk(" /%d, mod=%d rm=%d\n", + (FPU_modrm >> 3) & 7, + (FPU_modrm >> 6) & 3, FPU_modrm & 7); + } + } else { + printk("%04x\n", FPU_CS); + } + + partial_status = status_word(); + +#ifdef DEBUGGING + if (partial_status & SW_Backward) + printk("SW: backward compatibility\n"); + if (partial_status & SW_C3) + printk("SW: condition bit 3\n"); + if (partial_status & SW_C2) + printk("SW: condition bit 2\n"); + if (partial_status & SW_C1) + printk("SW: condition bit 1\n"); + if (partial_status & SW_C0) + printk("SW: condition bit 0\n"); + if (partial_status & SW_Summary) + printk("SW: exception summary\n"); + if (partial_status & SW_Stack_Fault) + printk("SW: stack fault\n"); + if (partial_status & SW_Precision) + printk("SW: loss of precision\n"); + if (partial_status & SW_Underflow) + printk("SW: underflow\n"); + if (partial_status & SW_Overflow) + printk("SW: overflow\n"); + if (partial_status & SW_Zero_Div) + printk("SW: divide by zero\n"); + if (partial_status & SW_Denorm_Op) + printk("SW: denormalized operand\n"); + if (partial_status & SW_Invalid) + printk("SW: invalid operation\n"); +#endif /* DEBUGGING */ + + printk(" SW: b=%d st=%d es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n", partial_status & 0x8000 ? 1 : 0, /* busy */ + (partial_status & 0x3800) >> 11, /* stack top pointer */ + partial_status & 0x80 ? 1 : 0, /* Error summary status */ + partial_status & 0x40 ? 1 : 0, /* Stack flag */ + partial_status & SW_C3 ? 1 : 0, partial_status & SW_C2 ? 1 : 0, /* cc */ + partial_status & SW_C1 ? 1 : 0, partial_status & SW_C0 ? 1 : 0, /* cc */ + partial_status & SW_Precision ? 1 : 0, + partial_status & SW_Underflow ? 1 : 0, + partial_status & SW_Overflow ? 1 : 0, + partial_status & SW_Zero_Div ? 1 : 0, + partial_status & SW_Denorm_Op ? 1 : 0, + partial_status & SW_Invalid ? 1 : 0); + + printk(" CW: ic=%d rc=%d%d pc=%d%d iem=%d ef=%d%d%d%d%d%d\n", + control_word & 0x1000 ? 1 : 0, + (control_word & 0x800) >> 11, (control_word & 0x400) >> 10, + (control_word & 0x200) >> 9, (control_word & 0x100) >> 8, + control_word & 0x80 ? 1 : 0, + control_word & SW_Precision ? 1 : 0, + control_word & SW_Underflow ? 1 : 0, + control_word & SW_Overflow ? 1 : 0, + control_word & SW_Zero_Div ? 1 : 0, + control_word & SW_Denorm_Op ? 1 : 0, + control_word & SW_Invalid ? 1 : 0); + + for (i = 0; i < 8; i++) { + FPU_REG *r = &st(i); + u_char tagi = FPU_gettagi(i); + switch (tagi) { + case TAG_Empty: + continue; + break; + case TAG_Zero: + case TAG_Special: + tagi = FPU_Special(r); + case TAG_Valid: + printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i, + getsign(r) ? '-' : '+', + (long)(r->sigh >> 16), + (long)(r->sigh & 0xFFFF), + (long)(r->sigl >> 16), + (long)(r->sigl & 0xFFFF), + exponent(r) - EXP_BIAS + 1); + break; + default: + printk("Whoops! Error in errors.c: tag%d is %d ", i, + tagi); + continue; + break; + } + printk("%s\n", tag_desc[(int)(unsigned)tagi]); + } + + RE_ENTRANT_CHECK_ON; + +} + +static struct { + int type; + const char *name; +} exception_names[] = { + { + EX_StackOver, "stack overflow"}, { + EX_StackUnder, "stack underflow"}, { + EX_Precision, "loss of precision"}, { + EX_Underflow, "underflow"}, { + EX_Overflow, "overflow"}, { + EX_ZeroDiv, "divide by zero"}, { + EX_Denormal, "denormalized operand"}, { + EX_Invalid, "invalid operation"}, { + EX_INTERNAL, "INTERNAL BUG in " FPU_VERSION}, { + 0, NULL} +}; + +/* + EX_INTERNAL is always given with a code which indicates where the + error was detected. + + Internal error types: + 0x14 in fpu_etc.c + 0x1nn in a *.c file: + 0x101 in reg_add_sub.c + 0x102 in reg_mul.c + 0x104 in poly_atan.c + 0x105 in reg_mul.c + 0x107 in fpu_trig.c + 0x108 in reg_compare.c + 0x109 in reg_compare.c + 0x110 in reg_add_sub.c + 0x111 in fpe_entry.c + 0x112 in fpu_trig.c + 0x113 in errors.c + 0x115 in fpu_trig.c + 0x116 in fpu_trig.c + 0x117 in fpu_trig.c + 0x118 in fpu_trig.c + 0x119 in fpu_trig.c + 0x120 in poly_atan.c + 0x121 in reg_compare.c + 0x122 in reg_compare.c + 0x123 in reg_compare.c + 0x125 in fpu_trig.c + 0x126 in fpu_entry.c + 0x127 in poly_2xm1.c + 0x128 in fpu_entry.c + 0x129 in fpu_entry.c + 0x130 in get_address.c + 0x131 in get_address.c + 0x132 in get_address.c + 0x133 in get_address.c + 0x140 in load_store.c + 0x141 in load_store.c + 0x150 in poly_sin.c + 0x151 in poly_sin.c + 0x160 in reg_ld_str.c + 0x161 in reg_ld_str.c + 0x162 in reg_ld_str.c + 0x163 in reg_ld_str.c + 0x164 in reg_ld_str.c + 0x170 in fpu_tags.c + 0x171 in fpu_tags.c + 0x172 in fpu_tags.c + 0x180 in reg_convert.c + 0x2nn in an *.S file: + 0x201 in reg_u_add.S + 0x202 in reg_u_div.S + 0x203 in reg_u_div.S + 0x204 in reg_u_div.S + 0x205 in reg_u_mul.S + 0x206 in reg_u_sub.S + 0x207 in wm_sqrt.S + 0x208 in reg_div.S + 0x209 in reg_u_sub.S + 0x210 in reg_u_sub.S + 0x211 in reg_u_sub.S + 0x212 in reg_u_sub.S + 0x213 in wm_sqrt.S + 0x214 in wm_sqrt.S + 0x215 in wm_sqrt.S + 0x220 in reg_norm.S + 0x221 in reg_norm.S + 0x230 in reg_round.S + 0x231 in reg_round.S + 0x232 in reg_round.S + 0x233 in reg_round.S + 0x234 in reg_round.S + 0x235 in reg_round.S + 0x236 in reg_round.S + 0x240 in div_Xsig.S + 0x241 in div_Xsig.S + 0x242 in div_Xsig.S + */ + +asmlinkage __visible void FPU_exception(int n) +{ + int i, int_type; + + int_type = 0; /* Needed only to stop compiler warnings */ + if (n & EX_INTERNAL) { + int_type = n - EX_INTERNAL; + n = EX_INTERNAL; + /* Set lots of exception bits! */ + partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward); + } else { + /* Extract only the bits which we use to set the status word */ + n &= (SW_Exc_Mask); + /* Set the corresponding exception bit */ + partial_status |= n; + /* Set summary bits iff exception isn't masked */ + if (partial_status & ~control_word & CW_Exceptions) + partial_status |= (SW_Summary | SW_Backward); + if (n & (SW_Stack_Fault | EX_Precision)) { + if (!(n & SW_C1)) + /* This bit distinguishes over- from underflow for a stack fault, + and roundup from round-down for precision loss. */ + partial_status &= ~SW_C1; + } + } + + RE_ENTRANT_CHECK_OFF; + if ((~control_word & n & CW_Exceptions) || (n == EX_INTERNAL)) { + /* Get a name string for error reporting */ + for (i = 0; exception_names[i].type; i++) + if ((exception_names[i].type & n) == + exception_names[i].type) + break; + + if (exception_names[i].type) { +#ifdef PRINT_MESSAGES + printk("FP Exception: %s!\n", exception_names[i].name); +#endif /* PRINT_MESSAGES */ + } else + printk("FPU emulator: Unknown Exception: 0x%04x!\n", n); + + if (n == EX_INTERNAL) { + printk("FPU emulator: Internal error type 0x%04x\n", + int_type); + FPU_printall(); + } +#ifdef PRINT_MESSAGES + else + FPU_printall(); +#endif /* PRINT_MESSAGES */ + + /* + * The 80486 generates an interrupt on the next non-control FPU + * instruction. So we need some means of flagging it. + * We use the ES (Error Summary) bit for this. + */ + } + RE_ENTRANT_CHECK_ON; + +#ifdef __DEBUG__ + math_abort(FPU_info, SIGFPE); +#endif /* __DEBUG__ */ + +} + +/* Real operation attempted on a NaN. */ +/* Returns < 0 if the exception is unmasked */ +int real_1op_NaN(FPU_REG *a) +{ + int signalling, isNaN; + + isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000); + + /* The default result for the case of two "equal" NaNs (signs may + differ) is chosen to reproduce 80486 behaviour */ + signalling = isNaN && !(a->sigh & 0x40000000); + + if (!signalling) { + if (!isNaN) { /* pseudo-NaN, or other unsupported? */ + if (control_word & CW_Invalid) { + /* Masked response */ + reg_copy(&CONST_QNaN, a); + } + EXCEPTION(EX_Invalid); + return (!(control_word & CW_Invalid) ? FPU_Exception : + 0) | TAG_Special; + } + return TAG_Special; + } + + if (control_word & CW_Invalid) { + /* The masked response */ + if (!(a->sigh & 0x80000000)) { /* pseudo-NaN ? */ + reg_copy(&CONST_QNaN, a); + } + /* ensure a Quiet NaN */ + a->sigh |= 0x40000000; + } + + EXCEPTION(EX_Invalid); + + return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; +} + +/* Real operation attempted on two operands, one a NaN. */ +/* Returns < 0 if the exception is unmasked */ +int real_2op_NaN(FPU_REG const *b, u_char tagb, + int deststnr, FPU_REG const *defaultNaN) +{ + FPU_REG *dest = &st(deststnr); + FPU_REG const *a = dest; + u_char taga = FPU_gettagi(deststnr); + FPU_REG const *x; + int signalling, unsupported; + + if (taga == TAG_Special) + taga = FPU_Special(a); + if (tagb == TAG_Special) + tagb = FPU_Special(b); + + /* TW_NaN is also used for unsupported data types. */ + unsupported = ((taga == TW_NaN) + && !((exponent(a) == EXP_OVER) + && (a->sigh & 0x80000000))) + || ((tagb == TW_NaN) + && !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000))); + if (unsupported) { + if (control_word & CW_Invalid) { + /* Masked response */ + FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr); + } + EXCEPTION(EX_Invalid); + return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | + TAG_Special; + } + + if (taga == TW_NaN) { + x = a; + if (tagb == TW_NaN) { + signalling = !(a->sigh & b->sigh & 0x40000000); + if (significand(b) > significand(a)) + x = b; + else if (significand(b) == significand(a)) { + /* The default result for the case of two "equal" NaNs (signs may + differ) is chosen to reproduce 80486 behaviour */ + x = defaultNaN; + } + } else { + /* return the quiet version of the NaN in a */ + signalling = !(a->sigh & 0x40000000); + } + } else +#ifdef PARANOID + if (tagb == TW_NaN) +#endif /* PARANOID */ + { + signalling = !(b->sigh & 0x40000000); + x = b; + } +#ifdef PARANOID + else { + signalling = 0; + EXCEPTION(EX_INTERNAL | 0x113); + x = &CONST_QNaN; + } +#endif /* PARANOID */ + + if ((!signalling) || (control_word & CW_Invalid)) { + if (!x) + x = b; + + if (!(x->sigh & 0x80000000)) /* pseudo-NaN ? */ + x = &CONST_QNaN; + + FPU_copy_to_regi(x, TAG_Special, deststnr); + + if (!signalling) + return TAG_Special; + + /* ensure a Quiet NaN */ + dest->sigh |= 0x40000000; + } + + EXCEPTION(EX_Invalid); + + return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; +} + +/* Invalid arith operation on Valid registers */ +/* Returns < 0 if the exception is unmasked */ +asmlinkage __visible int arith_invalid(int deststnr) +{ + + EXCEPTION(EX_Invalid); + + if (control_word & CW_Invalid) { + /* The masked response */ + FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr); + } + + return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid; + +} + +/* Divide a finite number by zero */ +asmlinkage __visible int FPU_divide_by_zero(int deststnr, u_char sign) +{ + FPU_REG *dest = &st(deststnr); + int tag = TAG_Valid; + + if (control_word & CW_ZeroDiv) { + /* The masked response */ + FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr); + setsign(dest, sign); + tag = TAG_Special; + } + + EXCEPTION(EX_ZeroDiv); + + return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag; + +} + +/* This may be called often, so keep it lean */ +int set_precision_flag(int flags) +{ + if (control_word & CW_Precision) { + partial_status &= ~(SW_C1 & flags); + partial_status |= flags; /* The masked response */ + return 0; + } else { + EXCEPTION(flags); + return 1; + } +} + +/* This may be called often, so keep it lean */ +asmlinkage __visible void set_precision_flag_up(void) +{ + if (control_word & CW_Precision) + partial_status |= (SW_Precision | SW_C1); /* The masked response */ + else + EXCEPTION(EX_Precision | SW_C1); +} + +/* This may be called often, so keep it lean */ +asmlinkage __visible void set_precision_flag_down(void) +{ + if (control_word & CW_Precision) { /* The masked response */ + partial_status &= ~SW_C1; + partial_status |= SW_Precision; + } else + EXCEPTION(EX_Precision); +} + +asmlinkage __visible int denormal_operand(void) +{ + if (control_word & CW_Denormal) { /* The masked response */ + partial_status |= SW_Denorm_Op; + return TAG_Special; + } else { + EXCEPTION(EX_Denormal); + return TAG_Special | FPU_Exception; + } +} + +asmlinkage __visible int arith_overflow(FPU_REG *dest) +{ + int tag = TAG_Valid; + + if (control_word & CW_Overflow) { + /* The masked response */ +/* ###### The response here depends upon the rounding mode */ + reg_copy(&CONST_INF, dest); + tag = TAG_Special; + } else { + /* Subtract the magic number from the exponent */ + addexponent(dest, (-3 * (1 << 13))); + } + + EXCEPTION(EX_Overflow); + if (control_word & CW_Overflow) { + /* The overflow exception is masked. */ + /* By definition, precision is lost. + The roundup bit (C1) is also set because we have + "rounded" upwards to Infinity. */ + EXCEPTION(EX_Precision | SW_C1); + return tag; + } + + return tag; + +} + +asmlinkage __visible int arith_underflow(FPU_REG *dest) +{ + int tag = TAG_Valid; + + if (control_word & CW_Underflow) { + /* The masked response */ + if (exponent16(dest) <= EXP_UNDER - 63) { + reg_copy(&CONST_Z, dest); + partial_status &= ~SW_C1; /* Round down. */ + tag = TAG_Zero; + } else { + stdexp(dest); + } + } else { + /* Add the magic number to the exponent. */ + addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias); + } + + EXCEPTION(EX_Underflow); + if (control_word & CW_Underflow) { + /* The underflow exception is masked. */ + EXCEPTION(EX_Precision); + return tag; + } + + return tag; + +} + +void FPU_stack_overflow(void) +{ + + if (control_word & CW_Invalid) { + /* The masked response */ + top--; + FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); + } + + EXCEPTION(EX_StackOver); + + return; + +} + +void FPU_stack_underflow(void) +{ + + if (control_word & CW_Invalid) { + /* The masked response */ + FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); + } + + EXCEPTION(EX_StackUnder); + + return; + +} + +void FPU_stack_underflow_i(int i) +{ + + if (control_word & CW_Invalid) { + /* The masked response */ + FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i); + } + + EXCEPTION(EX_StackUnder); + + return; + +} + +void FPU_stack_underflow_pop(int i) +{ + + if (control_word & CW_Invalid) { + /* The masked response */ + FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i); + FPU_pop(); + } + + EXCEPTION(EX_StackUnder); + + return; + +} diff --git a/arch/x86/math-emu/exception.h b/arch/x86/math-emu/exception.h new file mode 100644 index 000000000..75230b977 --- /dev/null +++ b/arch/x86/math-emu/exception.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*---------------------------------------------------------------------------+ + | exception.h | + | | + | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _EXCEPTION_H_ +#define _EXCEPTION_H_ + +#ifdef __ASSEMBLY__ +#define Const_(x) $##x +#else +#define Const_(x) x +#endif + +#ifndef SW_C1 +#include "fpu_emu.h" +#endif /* SW_C1 */ + +#define FPU_BUSY Const_(0x8000) /* FPU busy bit (8087 compatibility) */ +#define EX_ErrorSummary Const_(0x0080) /* Error summary status */ +/* Special exceptions: */ +#define EX_INTERNAL Const_(0x8000) /* Internal error in wm-FPU-emu */ +#define EX_StackOver Const_(0x0041|SW_C1) /* stack overflow */ +#define EX_StackUnder Const_(0x0041) /* stack underflow */ +/* Exception flags: */ +#define EX_Precision Const_(0x0020) /* loss of precision */ +#define EX_Underflow Const_(0x0010) /* underflow */ +#define EX_Overflow Const_(0x0008) /* overflow */ +#define EX_ZeroDiv Const_(0x0004) /* divide by zero */ +#define EX_Denormal Const_(0x0002) /* denormalized operand */ +#define EX_Invalid Const_(0x0001) /* invalid operation */ + +#define PRECISION_LOST_UP Const_((EX_Precision | SW_C1)) +#define PRECISION_LOST_DOWN Const_(EX_Precision) + +#ifndef __ASSEMBLY__ + +#ifdef DEBUG +#define EXCEPTION(x) { printk("exception in %s at line %d\n", \ + __FILE__, __LINE__); FPU_exception(x); } +#else +#define EXCEPTION(x) FPU_exception(x) +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _EXCEPTION_H_ */ diff --git a/arch/x86/math-emu/fpu_arith.c b/arch/x86/math-emu/fpu_arith.c new file mode 100644 index 000000000..09006dc47 --- /dev/null +++ b/arch/x86/math-emu/fpu_arith.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | fpu_arith.c | + | | + | Code to implement the FPU register/register arithmetic instructions | + | | + | Copyright (C) 1992,1993,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "status_w.h" + +void fadd__(void) +{ + /* fadd st,st(i) */ + int i = FPU_rm; + clear_C1(); + FPU_add(&st(i), FPU_gettagi(i), 0, control_word); +} + +void fmul__(void) +{ + /* fmul st,st(i) */ + int i = FPU_rm; + clear_C1(); + FPU_mul(&st(i), FPU_gettagi(i), 0, control_word); +} + +void fsub__(void) +{ + /* fsub st,st(i) */ + clear_C1(); + FPU_sub(0, FPU_rm, control_word); +} + +void fsubr_(void) +{ + /* fsubr st,st(i) */ + clear_C1(); + FPU_sub(REV, FPU_rm, control_word); +} + +void fdiv__(void) +{ + /* fdiv st,st(i) */ + clear_C1(); + FPU_div(0, FPU_rm, control_word); +} + +void fdivr_(void) +{ + /* fdivr st,st(i) */ + clear_C1(); + FPU_div(REV, FPU_rm, control_word); +} + +void fadd_i(void) +{ + /* fadd st(i),st */ + int i = FPU_rm; + clear_C1(); + FPU_add(&st(i), FPU_gettagi(i), i, control_word); +} + +void fmul_i(void) +{ + /* fmul st(i),st */ + clear_C1(); + FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word); +} + +void fsubri(void) +{ + /* fsubr st(i),st */ + clear_C1(); + FPU_sub(DEST_RM, FPU_rm, control_word); +} + +void fsub_i(void) +{ + /* fsub st(i),st */ + clear_C1(); + FPU_sub(REV | DEST_RM, FPU_rm, control_word); +} + +void fdivri(void) +{ + /* fdivr st(i),st */ + clear_C1(); + FPU_div(DEST_RM, FPU_rm, control_word); +} + +void fdiv_i(void) +{ + /* fdiv st(i),st */ + clear_C1(); + FPU_div(REV | DEST_RM, FPU_rm, control_word); +} + +void faddp_(void) +{ + /* faddp st(i),st */ + int i = FPU_rm; + clear_C1(); + if (FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0) + FPU_pop(); +} + +void fmulp_(void) +{ + /* fmulp st(i),st */ + clear_C1(); + if (FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0) + FPU_pop(); +} + +void fsubrp(void) +{ + /* fsubrp st(i),st */ + clear_C1(); + if (FPU_sub(DEST_RM, FPU_rm, control_word) >= 0) + FPU_pop(); +} + +void fsubp_(void) +{ + /* fsubp st(i),st */ + clear_C1(); + if (FPU_sub(REV | DEST_RM, FPU_rm, control_word) >= 0) + FPU_pop(); +} + +void fdivrp(void) +{ + /* fdivrp st(i),st */ + clear_C1(); + if (FPU_div(DEST_RM, FPU_rm, control_word) >= 0) + FPU_pop(); +} + +void fdivp_(void) +{ + /* fdivp st(i),st */ + clear_C1(); + if (FPU_div(REV | DEST_RM, FPU_rm, control_word) >= 0) + FPU_pop(); +} diff --git a/arch/x86/math-emu/fpu_asm.h b/arch/x86/math-emu/fpu_asm.h new file mode 100644 index 000000000..a83353d52 --- /dev/null +++ b/arch/x86/math-emu/fpu_asm.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*---------------------------------------------------------------------------+ + | fpu_asm.h | + | | + | Copyright (C) 1992,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _FPU_ASM_H_ +#define _FPU_ASM_H_ + +#include <linux/linkage.h> + +#define EXCEPTION FPU_exception + +#define PARAM1 8(%ebp) +#define PARAM2 12(%ebp) +#define PARAM3 16(%ebp) +#define PARAM4 20(%ebp) +#define PARAM5 24(%ebp) +#define PARAM6 28(%ebp) +#define PARAM7 32(%ebp) + +#define SIGL_OFFSET 0 +#define EXP(x) 8(x) +#define SIG(x) SIGL_OFFSET##(x) +#define SIGL(x) SIGL_OFFSET##(x) +#define SIGH(x) 4(x) + +#endif /* _FPU_ASM_H_ */ diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c new file mode 100644 index 000000000..034748459 --- /dev/null +++ b/arch/x86/math-emu/fpu_aux.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | fpu_aux.c | + | | + | Code to implement some of the FPU auxiliary instructions. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "control_w.h" + +static void fnop(void) +{ +} + +static void fclex(void) +{ + partial_status &= + ~(SW_Backward | SW_Summary | SW_Stack_Fault | SW_Precision | + SW_Underflow | SW_Overflow | SW_Zero_Div | SW_Denorm_Op | + SW_Invalid); + no_ip_update = 1; +} + +/* Needs to be externally visible */ +void fpstate_init_soft(struct swregs_state *soft) +{ + struct address *oaddr, *iaddr; + memset(soft, 0, sizeof(*soft)); + soft->cwd = 0x037f; + soft->swd = 0; + soft->ftop = 0; /* We don't keep top in the status word internally. */ + soft->twd = 0xffff; + /* The behaviour is different from that detailed in + Section 15.1.6 of the Intel manual */ + oaddr = (struct address *)&soft->foo; + oaddr->offset = 0; + oaddr->selector = 0; + iaddr = (struct address *)&soft->fip; + iaddr->offset = 0; + iaddr->selector = 0; + iaddr->opcode = 0; + soft->no_update = 1; +} + +void finit(void) +{ + fpstate_init_soft(¤t->thread.fpu.state.soft); +} + +/* + * These are nops on the i387.. + */ +#define feni fnop +#define fdisi fnop +#define fsetpm fnop + +static FUNC const finit_table[] = { + feni, fdisi, fclex, finit, + fsetpm, FPU_illegal, FPU_illegal, FPU_illegal +}; + +void finit_(void) +{ + (finit_table[FPU_rm]) (); +} + +static void fstsw_ax(void) +{ + *(short *)&FPU_EAX = status_word(); + no_ip_update = 1; +} + +static FUNC const fstsw_table[] = { + fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal, + FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal +}; + +void fstsw_(void) +{ + (fstsw_table[FPU_rm]) (); +} + +static FUNC const fp_nop_table[] = { + fnop, FPU_illegal, FPU_illegal, FPU_illegal, + FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal +}; + +void fp_nop(void) +{ + (fp_nop_table[FPU_rm]) (); +} + +void fld_i_(void) +{ + FPU_REG *st_new_ptr; + int i; + u_char tag; + + if (STACK_OVERFLOW) { + FPU_stack_overflow(); + return; + } + + /* fld st(i) */ + i = FPU_rm; + if (NOT_EMPTY(i)) { + reg_copy(&st(i), st_new_ptr); + tag = FPU_gettagi(i); + push(); + FPU_settag0(tag); + } else { + if (control_word & CW_Invalid) { + /* The masked response */ + FPU_stack_underflow(); + } else + EXCEPTION(EX_StackUnder); + } + +} + +void fxch_i(void) +{ + /* fxch st(i) */ + FPU_REG t; + int i = FPU_rm; + FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i); + long tag_word = fpu_tag_word; + int regnr = top & 7, regnri = ((regnr + i) & 7); + u_char st0_tag = (tag_word >> (regnr * 2)) & 3; + u_char sti_tag = (tag_word >> (regnri * 2)) & 3; + + if (st0_tag == TAG_Empty) { + if (sti_tag == TAG_Empty) { + FPU_stack_underflow(); + FPU_stack_underflow_i(i); + return; + } + if (control_word & CW_Invalid) { + /* Masked response */ + FPU_copy_to_reg0(sti_ptr, sti_tag); + } + FPU_stack_underflow_i(i); + return; + } + if (sti_tag == TAG_Empty) { + if (control_word & CW_Invalid) { + /* Masked response */ + FPU_copy_to_regi(st0_ptr, st0_tag, i); + } + FPU_stack_underflow(); + return; + } + clear_C1(); + + reg_copy(st0_ptr, &t); + reg_copy(sti_ptr, st0_ptr); + reg_copy(&t, sti_ptr); + + tag_word &= ~(3 << (regnr * 2)) & ~(3 << (regnri * 2)); + tag_word |= (sti_tag << (regnr * 2)) | (st0_tag << (regnri * 2)); + fpu_tag_word = tag_word; +} + +static void fcmovCC(void) +{ + /* fcmovCC st(i) */ + int i = FPU_rm; + FPU_REG *st0_ptr = &st(0); + FPU_REG *sti_ptr = &st(i); + long tag_word = fpu_tag_word; + int regnr = top & 7; + int regnri = (top + i) & 7; + u_char sti_tag = (tag_word >> (regnri * 2)) & 3; + + if (sti_tag == TAG_Empty) { + FPU_stack_underflow(); + clear_C1(); + return; + } + reg_copy(sti_ptr, st0_ptr); + tag_word &= ~(3 << (regnr * 2)); + tag_word |= (sti_tag << (regnr * 2)); + fpu_tag_word = tag_word; +} + +void fcmovb(void) +{ + if (FPU_EFLAGS & X86_EFLAGS_CF) + fcmovCC(); +} + +void fcmove(void) +{ + if (FPU_EFLAGS & X86_EFLAGS_ZF) + fcmovCC(); +} + +void fcmovbe(void) +{ + if (FPU_EFLAGS & (X86_EFLAGS_CF|X86_EFLAGS_ZF)) + fcmovCC(); +} + +void fcmovu(void) +{ + if (FPU_EFLAGS & X86_EFLAGS_PF) + fcmovCC(); +} + +void fcmovnb(void) +{ + if (!(FPU_EFLAGS & X86_EFLAGS_CF)) + fcmovCC(); +} + +void fcmovne(void) +{ + if (!(FPU_EFLAGS & X86_EFLAGS_ZF)) + fcmovCC(); +} + +void fcmovnbe(void) +{ + if (!(FPU_EFLAGS & (X86_EFLAGS_CF|X86_EFLAGS_ZF))) + fcmovCC(); +} + +void fcmovnu(void) +{ + if (!(FPU_EFLAGS & X86_EFLAGS_PF)) + fcmovCC(); +} + +void ffree_(void) +{ + /* ffree st(i) */ + FPU_settagi(FPU_rm, TAG_Empty); +} + +void ffreep(void) +{ + /* ffree st(i) + pop - unofficial code */ + FPU_settagi(FPU_rm, TAG_Empty); + FPU_pop(); +} + +void fst_i_(void) +{ + /* fst st(i) */ + FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm); +} + +void fstp_i(void) +{ + /* fstp st(i) */ + FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm); + FPU_pop(); +} diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h new file mode 100644 index 000000000..0c122226c --- /dev/null +++ b/arch/x86/math-emu/fpu_emu.h @@ -0,0 +1,218 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*---------------------------------------------------------------------------+ + | fpu_emu.h | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _FPU_EMU_H_ +#define _FPU_EMU_H_ + +/* + * Define PECULIAR_486 to get a closer approximation to 80486 behaviour, + * rather than behaviour which appears to be cleaner. + * This is a matter of opinion: for all I know, the 80486 may simply + * be complying with the IEEE spec. Maybe one day I'll get to see the + * spec... + */ +#define PECULIAR_486 + +#ifdef __ASSEMBLY__ +#include "fpu_asm.h" +#define Const(x) $##x +#else +#define Const(x) x +#endif + +#define EXP_BIAS Const(0) +#define EXP_OVER Const(0x4000) /* smallest invalid large exponent */ +#define EXP_UNDER Const(-0x3fff) /* largest invalid small exponent */ +#define EXP_WAY_UNDER Const(-0x6000) /* Below the smallest denormal, but + still a 16 bit nr. */ +#define EXP_Infinity EXP_OVER +#define EXP_NaN EXP_OVER + +#define EXTENDED_Ebias Const(0x3fff) +#define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */ + +#define SIGN_POS Const(0) +#define SIGN_NEG Const(0x80) + +#define SIGN_Positive Const(0) +#define SIGN_Negative Const(0x8000) + +/* Keep the order TAG_Valid, TAG_Zero, TW_Denormal */ +/* The following fold to 2 (Special) in the Tag Word */ +#define TW_Denormal Const(4) /* De-normal */ +#define TW_Infinity Const(5) /* + or - infinity */ +#define TW_NaN Const(6) /* Not a Number */ +#define TW_Unsupported Const(7) /* Not supported by an 80486 */ + +#define TAG_Valid Const(0) /* valid */ +#define TAG_Zero Const(1) /* zero */ +#define TAG_Special Const(2) /* De-normal, + or - infinity, + or Not a Number */ +#define TAG_Empty Const(3) /* empty */ +#define TAG_Error Const(0x80) /* probably need to abort */ + +#define LOADED_DATA Const(10101) /* Special st() number to identify + loaded data (not on stack). */ + +/* A few flags (must be >= 0x10). */ +#define REV 0x10 +#define DEST_RM 0x20 +#define LOADED 0x40 + +#define FPU_Exception Const(0x80000000) /* Added to tag returns. */ + +#ifndef __ASSEMBLY__ + +#include "fpu_system.h" + +#include <uapi/asm/sigcontext.h> /* for struct _fpstate */ +#include <asm/math_emu.h> +#include <linux/linkage.h> + +/* +#define RE_ENTRANT_CHECKING + */ + +#ifdef RE_ENTRANT_CHECKING +extern u_char emulating; +# define RE_ENTRANT_CHECK_OFF emulating = 0 +# define RE_ENTRANT_CHECK_ON emulating = 1 +#else +# define RE_ENTRANT_CHECK_OFF +# define RE_ENTRANT_CHECK_ON +#endif /* RE_ENTRANT_CHECKING */ + +#define FWAIT_OPCODE 0x9b +#define OP_SIZE_PREFIX 0x66 +#define ADDR_SIZE_PREFIX 0x67 +#define PREFIX_CS 0x2e +#define PREFIX_DS 0x3e +#define PREFIX_ES 0x26 +#define PREFIX_SS 0x36 +#define PREFIX_FS 0x64 +#define PREFIX_GS 0x65 +#define PREFIX_REPE 0xf3 +#define PREFIX_REPNE 0xf2 +#define PREFIX_LOCK 0xf0 +#define PREFIX_CS_ 1 +#define PREFIX_DS_ 2 +#define PREFIX_ES_ 3 +#define PREFIX_FS_ 4 +#define PREFIX_GS_ 5 +#define PREFIX_SS_ 6 +#define PREFIX_DEFAULT 7 + +struct address { + unsigned int offset; + unsigned int selector:16; + unsigned int opcode:11; + unsigned int empty:5; +}; +struct fpu__reg { + unsigned sigl; + unsigned sigh; + short exp; +}; + +typedef void (*FUNC) (void); +typedef struct fpu__reg FPU_REG; +typedef void (*FUNC_ST0) (FPU_REG *st0_ptr, u_char st0_tag); +typedef struct { + u_char address_size, operand_size, segment; +} overrides; +/* This structure is 32 bits: */ +typedef struct { + overrides override; + u_char default_mode; +} fpu_addr_modes; +/* PROTECTED has a restricted meaning in the emulator; it is used + to signal that the emulator needs to do special things to ensure + that protection is respected in a segmented model. */ +#define PROTECTED 4 +#define SIXTEEN 1 /* We rely upon this being 1 (true) */ +#define VM86 SIXTEEN +#define PM16 (SIXTEEN | PROTECTED) +#define SEG32 PROTECTED +extern u_char const data_sizes_16[32]; + +#define register_base ((u_char *) registers ) +#define fpu_register(x) ( * ((FPU_REG *)( register_base + 10 * (x & 7) )) ) +#define st(x) ( * ((FPU_REG *)( register_base + 10 * ((top+x) & 7) )) ) + +#define STACK_OVERFLOW (FPU_stackoverflow(&st_new_ptr)) +#define NOT_EMPTY(i) (!FPU_empty_i(i)) + +#define NOT_EMPTY_ST0 (st0_tag ^ TAG_Empty) + +#define poppop() { FPU_pop(); FPU_pop(); } + +/* push() does not affect the tags */ +#define push() { top--; } + +#define signbyte(a) (((u_char *)(a))[9]) +#define getsign(a) (signbyte(a) & 0x80) +#define setsign(a,b) { if ((b) != 0) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; } +#define copysign(a,b) { if (getsign(a)) signbyte(b) |= 0x80; \ + else signbyte(b) &= 0x7f; } +#define changesign(a) { signbyte(a) ^= 0x80; } +#define setpositive(a) { signbyte(a) &= 0x7f; } +#define setnegative(a) { signbyte(a) |= 0x80; } +#define signpositive(a) ( (signbyte(a) & 0x80) == 0 ) +#define signnegative(a) (signbyte(a) & 0x80) + +static inline void reg_copy(FPU_REG const *x, FPU_REG *y) +{ + *(short *)&(y->exp) = *(const short *)&(x->exp); + *(long long *)&(y->sigl) = *(const long long *)&(x->sigl); +} + +#define exponent(x) (((*(short *)&((x)->exp)) & 0x7fff) - EXTENDED_Ebias) +#define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \ + ((y) + EXTENDED_Ebias) & 0x7fff; } +#define exponent16(x) (*(short *)&((x)->exp)) +#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (u16)(y); } +#define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); } +#define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; } + +#define isdenormal(ptr) (exponent(ptr) == EXP_BIAS+EXP_UNDER) + +#define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] ) + +/*----- Prototypes for functions written in assembler -----*/ +/* extern void reg_move(FPU_REG *a, FPU_REG *b); */ + +asmlinkage int FPU_normalize(FPU_REG *x); +asmlinkage int FPU_normalize_nuo(FPU_REG *x); +asmlinkage int FPU_u_sub(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG * answ, unsigned int control_w, u_char sign, + int expa, int expb); +asmlinkage int FPU_u_mul(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG * answ, unsigned int control_w, u_char sign, + int expon); +asmlinkage int FPU_u_div(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG * answ, unsigned int control_w, u_char sign); +asmlinkage int FPU_u_add(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG * answ, unsigned int control_w, u_char sign, + int expa, int expb); +asmlinkage int wm_sqrt(FPU_REG *n, int dummy1, int dummy2, + unsigned int control_w, u_char sign); +asmlinkage unsigned FPU_shrx(void *l, unsigned x); +asmlinkage unsigned FPU_shrxs(void *v, unsigned x); +asmlinkage unsigned long FPU_div_small(unsigned long long *x, unsigned long y); +asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy, + unsigned int control_w, u_char sign); + +#ifndef MAKING_PROTO +#include "fpu_proto.h" +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _FPU_EMU_H_ */ diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c new file mode 100644 index 000000000..9e2ba7e66 --- /dev/null +++ b/arch/x86/math-emu/fpu_entry.c @@ -0,0 +1,729 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | fpu_entry.c | + | | + | The entry functions for wm-FPU-emu | + | | + | Copyright (C) 1992,1993,1994,1996,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | See the files "README" and "COPYING" for further copyright and warranty | + | information. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | math_emulate(), restore_i387_soft() and save_i387_soft() are the only | + | entry points for wm-FPU-emu. | + +---------------------------------------------------------------------------*/ + +#include <linux/signal.h> +#include <linux/regset.h> + +#include <linux/uaccess.h> +#include <asm/traps.h> +#include <asm/user.h> +#include <asm/fpu/internal.h> + +#include "fpu_system.h" +#include "fpu_emu.h" +#include "exception.h" +#include "control_w.h" +#include "status_w.h" + +#define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */ + +/* fcmovCC and f(u)comi(p) are enabled if CPUID(1).EDX(15) "cmov" is set */ + +/* WARNING: "u" entries are not documented by Intel in their 80486 manual + and may not work on FPU clones or later Intel FPUs. + Changes to support them provided by Linus Torvalds. */ + +static FUNC const st_instr_table[64] = { +/* Opcode: d8 d9 da db */ +/* dc dd de df */ +/* c0..7 */ fadd__, fld_i_, fcmovb, fcmovnb, +/* c0..7 */ fadd_i, ffree_, faddp_, ffreep,/*u*/ +/* c8..f */ fmul__, fxch_i, fcmove, fcmovne, +/* c8..f */ fmul_i, fxch_i,/*u*/ fmulp_, fxch_i,/*u*/ +/* d0..7 */ fcom_st, fp_nop, fcmovbe, fcmovnbe, +/* d0..7 */ fcom_st,/*u*/ fst_i_, fcompst,/*u*/ fstp_i,/*u*/ +/* d8..f */ fcompst, fstp_i,/*u*/ fcmovu, fcmovnu, +/* d8..f */ fcompst,/*u*/ fstp_i, fcompp, fstp_i,/*u*/ +/* e0..7 */ fsub__, FPU_etc, __BAD__, finit_, +/* e0..7 */ fsubri, fucom_, fsubrp, fstsw_, +/* e8..f */ fsubr_, fconst, fucompp, fucomi_, +/* e8..f */ fsub_i, fucomp, fsubp_, fucomip, +/* f0..7 */ fdiv__, FPU_triga, __BAD__, fcomi_, +/* f0..7 */ fdivri, __BAD__, fdivrp, fcomip, +/* f8..f */ fdivr_, FPU_trigb, __BAD__, __BAD__, +/* f8..f */ fdiv_i, __BAD__, fdivp_, __BAD__, +}; + +#define _NONE_ 0 /* Take no special action */ +#define _REG0_ 1 /* Need to check for not empty st(0) */ +#define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */ +#define _REGi_ 0 /* Uses st(rm) */ +#define _PUSH_ 3 /* Need to check for space to push onto stack */ +#define _null_ 4 /* Function illegal or not implemented */ +#define _REGIi 5 /* Uses st(0) and st(rm), result to st(rm) */ +#define _REGIp 6 /* Uses st(0) and st(rm), result to st(rm) then pop */ +#define _REGIc 0 /* Compare st(0) and st(rm) */ +#define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */ + +static u_char const type_table[64] = { +/* Opcode: d8 d9 da db dc dd de df */ +/* c0..7 */ _REGI_, _NONE_, _REGIn, _REGIn, _REGIi, _REGi_, _REGIp, _REGi_, +/* c8..f */ _REGI_, _REGIn, _REGIn, _REGIn, _REGIi, _REGI_, _REGIp, _REGI_, +/* d0..7 */ _REGIc, _NONE_, _REGIn, _REGIn, _REGIc, _REG0_, _REGIc, _REG0_, +/* d8..f */ _REGIc, _REG0_, _REGIn, _REGIn, _REGIc, _REG0_, _REGIc, _REG0_, +/* e0..7 */ _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, +/* e8..f */ _REGI_, _NONE_, _REGIc, _REGIc, _REGIi, _REGIc, _REGIp, _REGIc, +/* f0..7 */ _REGI_, _NONE_, _null_, _REGIc, _REGIi, _null_, _REGIp, _REGIc, +/* f8..f */ _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, +}; + +#ifdef RE_ENTRANT_CHECKING +u_char emulating = 0; +#endif /* RE_ENTRANT_CHECKING */ + +static int valid_prefix(u_char *Byte, u_char __user ** fpu_eip, + overrides * override); + +void math_emulate(struct math_emu_info *info) +{ + u_char FPU_modrm, byte1; + unsigned short code; + fpu_addr_modes addr_modes; + int unmasked; + FPU_REG loaded_data; + FPU_REG *st0_ptr; + u_char loaded_tag, st0_tag; + void __user *data_address; + struct address data_sel_off; + struct address entry_sel_off; + unsigned long code_base = 0; + unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ + struct desc_struct code_descriptor; + struct fpu *fpu = ¤t->thread.fpu; + + fpu__initialize(fpu); + +#ifdef RE_ENTRANT_CHECKING + if (emulating) { + printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n"); + } + RE_ENTRANT_CHECK_ON; +#endif /* RE_ENTRANT_CHECKING */ + + FPU_info = info; + + FPU_ORIG_EIP = FPU_EIP; + + if ((FPU_EFLAGS & 0x00020000) != 0) { + /* Virtual 8086 mode */ + addr_modes.default_mode = VM86; + FPU_EIP += code_base = FPU_CS << 4; + code_limit = code_base + 0xffff; /* Assumes code_base <= 0xffff0000 */ + } else if (FPU_CS == __USER_CS && FPU_DS == __USER_DS) { + addr_modes.default_mode = 0; + } else if (FPU_CS == __KERNEL_CS) { + printk("math_emulate: %04x:%08lx\n", FPU_CS, FPU_EIP); + panic("Math emulation needed in kernel"); + } else { + + if ((FPU_CS & 4) != 4) { /* Must be in the LDT */ + /* Can only handle segmented addressing via the LDT + for now, and it must be 16 bit */ + printk("FPU emulator: Unsupported addressing mode\n"); + math_abort(FPU_info, SIGILL); + } + + code_descriptor = FPU_get_ldt_descriptor(FPU_CS); + if (code_descriptor.d) { + /* The above test may be wrong, the book is not clear */ + /* Segmented 32 bit protected mode */ + addr_modes.default_mode = SEG32; + } else { + /* 16 bit protected mode */ + addr_modes.default_mode = PM16; + } + FPU_EIP += code_base = seg_get_base(&code_descriptor); + code_limit = seg_get_limit(&code_descriptor) + 1; + code_limit *= seg_get_granularity(&code_descriptor); + code_limit += code_base - 1; + if (code_limit < code_base) + code_limit = 0xffffffff; + } + + FPU_lookahead = !(FPU_EFLAGS & X86_EFLAGS_TF); + + if (!valid_prefix(&byte1, (u_char __user **) & FPU_EIP, + &addr_modes.override)) { + RE_ENTRANT_CHECK_OFF; + printk + ("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n" + "FPU emulator: self-modifying code! (emulation impossible)\n", + byte1); + RE_ENTRANT_CHECK_ON; + EXCEPTION(EX_INTERNAL | 0x126); + math_abort(FPU_info, SIGILL); + } + + do_another_FPU_instruction: + + no_ip_update = 0; + + FPU_EIP++; /* We have fetched the prefix and first code bytes. */ + + if (addr_modes.default_mode) { + /* This checks for the minimum instruction bytes. + We also need to check any extra (address mode) code access. */ + if (FPU_EIP > code_limit) + math_abort(FPU_info, SIGSEGV); + } + + if ((byte1 & 0xf8) != 0xd8) { + if (byte1 == FWAIT_OPCODE) { + if (partial_status & SW_Summary) + goto do_the_FPU_interrupt; + else + goto FPU_fwait_done; + } +#ifdef PARANOID + EXCEPTION(EX_INTERNAL | 0x128); + math_abort(FPU_info, SIGILL); +#endif /* PARANOID */ + } + + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP); + RE_ENTRANT_CHECK_ON; + FPU_EIP++; + + if (partial_status & SW_Summary) { + /* Ignore the error for now if the current instruction is a no-wait + control instruction */ + /* The 80486 manual contradicts itself on this topic, + but a real 80486 uses the following instructions: + fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex. + */ + code = (FPU_modrm << 8) | byte1; + if (!((((code & 0xf803) == 0xe003) || /* fnclex, fninit, fnstsw */ + (((code & 0x3003) == 0x3001) && /* fnsave, fnstcw, fnstenv, + fnstsw */ + ((code & 0xc000) != 0xc000))))) { + /* + * We need to simulate the action of the kernel to FPU + * interrupts here. + */ + do_the_FPU_interrupt: + + FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */ + + RE_ENTRANT_CHECK_OFF; + current->thread.trap_nr = X86_TRAP_MF; + current->thread.error_code = 0; + send_sig(SIGFPE, current, 1); + return; + } + } + + entry_sel_off.offset = FPU_ORIG_EIP; + entry_sel_off.selector = FPU_CS; + entry_sel_off.opcode = (byte1 << 8) | FPU_modrm; + entry_sel_off.empty = 0; + + FPU_rm = FPU_modrm & 7; + + if (FPU_modrm < 0300) { + /* All of these instructions use the mod/rm byte to get a data address */ + + if ((addr_modes.default_mode & SIXTEEN) + ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX)) + data_address = + FPU_get_address_16(FPU_modrm, &FPU_EIP, + &data_sel_off, addr_modes); + else + data_address = + FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off, + addr_modes); + + if (addr_modes.default_mode) { + if (FPU_EIP - 1 > code_limit) + math_abort(FPU_info, SIGSEGV); + } + + if (!(byte1 & 1)) { + unsigned short status1 = partial_status; + + st0_ptr = &st(0); + st0_tag = FPU_gettag0(); + + /* Stack underflow has priority */ + if (NOT_EMPTY_ST0) { + if (addr_modes.default_mode & PROTECTED) { + /* This table works for 16 and 32 bit protected mode */ + if (access_limit < + data_sizes_16[(byte1 >> 1) & 3]) + math_abort(FPU_info, SIGSEGV); + } + + unmasked = 0; /* Do this here to stop compiler warnings. */ + switch ((byte1 >> 1) & 3) { + case 0: + unmasked = + FPU_load_single((float __user *) + data_address, + &loaded_data); + loaded_tag = unmasked & 0xff; + unmasked &= ~0xff; + break; + case 1: + loaded_tag = + FPU_load_int32((long __user *) + data_address, + &loaded_data); + break; + case 2: + unmasked = + FPU_load_double((double __user *) + data_address, + &loaded_data); + loaded_tag = unmasked & 0xff; + unmasked &= ~0xff; + break; + case 3: + default: /* Used here to suppress gcc warnings. */ + loaded_tag = + FPU_load_int16((short __user *) + data_address, + &loaded_data); + break; + } + + /* No more access to user memory, it is safe + to use static data now */ + + /* NaN operands have the next priority. */ + /* We have to delay looking at st(0) until after + loading the data, because that data might contain an SNaN */ + if (((st0_tag == TAG_Special) && isNaN(st0_ptr)) + || ((loaded_tag == TAG_Special) + && isNaN(&loaded_data))) { + /* Restore the status word; we might have loaded a + denormal. */ + partial_status = status1; + if ((FPU_modrm & 0x30) == 0x10) { + /* fcom or fcomp */ + EXCEPTION(EX_Invalid); + setcc(SW_C3 | SW_C2 | SW_C0); + if ((FPU_modrm & 0x08) + && (control_word & + CW_Invalid)) + FPU_pop(); /* fcomp, masked, so we pop. */ + } else { + if (loaded_tag == TAG_Special) + loaded_tag = + FPU_Special + (&loaded_data); +#ifdef PECULIAR_486 + /* This is not really needed, but gives behaviour + identical to an 80486 */ + if ((FPU_modrm & 0x28) == 0x20) + /* fdiv or fsub */ + real_2op_NaN + (&loaded_data, + loaded_tag, 0, + &loaded_data); + else +#endif /* PECULIAR_486 */ + /* fadd, fdivr, fmul, or fsubr */ + real_2op_NaN + (&loaded_data, + loaded_tag, 0, + st0_ptr); + } + goto reg_mem_instr_done; + } + + if (unmasked && !((FPU_modrm & 0x30) == 0x10)) { + /* Is not a comparison instruction. */ + if ((FPU_modrm & 0x38) == 0x38) { + /* fdivr */ + if ((st0_tag == TAG_Zero) && + ((loaded_tag == TAG_Valid) + || (loaded_tag == + TAG_Special + && + isdenormal + (&loaded_data)))) { + if (FPU_divide_by_zero + (0, + getsign + (&loaded_data)) + < 0) { + /* We use the fact here that the unmasked + exception in the loaded data was for a + denormal operand */ + /* Restore the state of the denormal op bit */ + partial_status + &= + ~SW_Denorm_Op; + partial_status + |= + status1 & + SW_Denorm_Op; + } else + setsign(st0_ptr, + getsign + (&loaded_data)); + } + } + goto reg_mem_instr_done; + } + + switch ((FPU_modrm >> 3) & 7) { + case 0: /* fadd */ + clear_C1(); + FPU_add(&loaded_data, loaded_tag, 0, + control_word); + break; + case 1: /* fmul */ + clear_C1(); + FPU_mul(&loaded_data, loaded_tag, 0, + control_word); + break; + case 2: /* fcom */ + FPU_compare_st_data(&loaded_data, + loaded_tag); + break; + case 3: /* fcomp */ + if (!FPU_compare_st_data + (&loaded_data, loaded_tag) + && !unmasked) + FPU_pop(); + break; + case 4: /* fsub */ + clear_C1(); + FPU_sub(LOADED | loaded_tag, + (int)&loaded_data, + control_word); + break; + case 5: /* fsubr */ + clear_C1(); + FPU_sub(REV | LOADED | loaded_tag, + (int)&loaded_data, + control_word); + break; + case 6: /* fdiv */ + clear_C1(); + FPU_div(LOADED | loaded_tag, + (int)&loaded_data, + control_word); + break; + case 7: /* fdivr */ + clear_C1(); + if (st0_tag == TAG_Zero) + partial_status = status1; /* Undo any denorm tag, + zero-divide has priority. */ + FPU_div(REV | LOADED | loaded_tag, + (int)&loaded_data, + control_word); + break; + } + } else { + if ((FPU_modrm & 0x30) == 0x10) { + /* The instruction is fcom or fcomp */ + EXCEPTION(EX_StackUnder); + setcc(SW_C3 | SW_C2 | SW_C0); + if ((FPU_modrm & 0x08) + && (control_word & CW_Invalid)) + FPU_pop(); /* fcomp */ + } else + FPU_stack_underflow(); + } + reg_mem_instr_done: + operand_address = data_sel_off; + } else { + if (!(no_ip_update = + FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6)) + >> 1, addr_modes, data_address))) { + operand_address = data_sel_off; + } + } + + } else { + /* None of these instructions access user memory */ + u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7); + +#ifdef PECULIAR_486 + /* This is supposed to be undefined, but a real 80486 seems + to do this: */ + operand_address.offset = 0; + operand_address.selector = FPU_DS; +#endif /* PECULIAR_486 */ + + st0_ptr = &st(0); + st0_tag = FPU_gettag0(); + switch (type_table[(int)instr_index]) { + case _NONE_: /* also _REGIc: _REGIn */ + break; + case _REG0_: + if (!NOT_EMPTY_ST0) { + FPU_stack_underflow(); + goto FPU_instruction_done; + } + break; + case _REGIi: + if (!NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm)) { + FPU_stack_underflow_i(FPU_rm); + goto FPU_instruction_done; + } + break; + case _REGIp: + if (!NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm)) { + FPU_stack_underflow_pop(FPU_rm); + goto FPU_instruction_done; + } + break; + case _REGI_: + if (!NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm)) { + FPU_stack_underflow(); + goto FPU_instruction_done; + } + break; + case _PUSH_: /* Only used by the fld st(i) instruction */ + break; + case _null_: + FPU_illegal(); + goto FPU_instruction_done; + default: + EXCEPTION(EX_INTERNAL | 0x111); + goto FPU_instruction_done; + } + (*st_instr_table[(int)instr_index]) (); + + FPU_instruction_done: + ; + } + + if (!no_ip_update) + instruction_address = entry_sel_off; + + FPU_fwait_done: + +#ifdef DEBUG + RE_ENTRANT_CHECK_OFF; + FPU_printall(); + RE_ENTRANT_CHECK_ON; +#endif /* DEBUG */ + + if (FPU_lookahead && !need_resched()) { + FPU_ORIG_EIP = FPU_EIP - code_base; + if (valid_prefix(&byte1, (u_char __user **) & FPU_EIP, + &addr_modes.override)) + goto do_another_FPU_instruction; + } + + if (addr_modes.default_mode) + FPU_EIP -= code_base; + + RE_ENTRANT_CHECK_OFF; +} + +/* Support for prefix bytes is not yet complete. To properly handle + all prefix bytes, further changes are needed in the emulator code + which accesses user address space. Access to separate segments is + important for msdos emulation. */ +static int valid_prefix(u_char *Byte, u_char __user **fpu_eip, + overrides * override) +{ + u_char byte; + u_char __user *ip = *fpu_eip; + + *override = (overrides) { + 0, 0, PREFIX_DEFAULT}; /* defaults */ + + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(byte, ip); + RE_ENTRANT_CHECK_ON; + + while (1) { + switch (byte) { + case ADDR_SIZE_PREFIX: + override->address_size = ADDR_SIZE_PREFIX; + goto do_next_byte; + + case OP_SIZE_PREFIX: + override->operand_size = OP_SIZE_PREFIX; + goto do_next_byte; + + case PREFIX_CS: + override->segment = PREFIX_CS_; + goto do_next_byte; + case PREFIX_ES: + override->segment = PREFIX_ES_; + goto do_next_byte; + case PREFIX_SS: + override->segment = PREFIX_SS_; + goto do_next_byte; + case PREFIX_FS: + override->segment = PREFIX_FS_; + goto do_next_byte; + case PREFIX_GS: + override->segment = PREFIX_GS_; + goto do_next_byte; + case PREFIX_DS: + override->segment = PREFIX_DS_; + goto do_next_byte; + +/* lock is not a valid prefix for FPU instructions, + let the cpu handle it to generate a SIGILL. */ +/* case PREFIX_LOCK: */ + + /* rep.. prefixes have no meaning for FPU instructions */ + case PREFIX_REPE: + case PREFIX_REPNE: + + do_next_byte: + ip++; + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(byte, ip); + RE_ENTRANT_CHECK_ON; + break; + case FWAIT_OPCODE: + *Byte = byte; + return 1; + default: + if ((byte & 0xf8) == 0xd8) { + *Byte = byte; + *fpu_eip = ip; + return 1; + } else { + /* Not a valid sequence of prefix bytes followed by + an FPU instruction. */ + *Byte = byte; /* Needed for error message. */ + return 0; + } + } + } +} + +void math_abort(struct math_emu_info *info, unsigned int signal) +{ + FPU_EIP = FPU_ORIG_EIP; + current->thread.trap_nr = X86_TRAP_MF; + current->thread.error_code = 0; + send_sig(signal, current, 1); + RE_ENTRANT_CHECK_OFF; + __asm__("movl %0,%%esp ; ret": :"g"(((long)info) - 4)); +#ifdef PARANOID + printk("ERROR: wm-FPU-emu math_abort failed!\n"); +#endif /* PARANOID */ +} + +#define S387 ((struct swregs_state *)s387) +#define sstatus_word() \ + ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top)) + +int fpregs_soft_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct swregs_state *s387 = &target->thread.fpu.state.soft; + void *space = s387->st_space; + int ret; + int offset, other, i, tags, regnr, tag, newtop; + + RE_ENTRANT_CHECK_OFF; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, s387, 0, + offsetof(struct swregs_state, st_space)); + RE_ENTRANT_CHECK_ON; + + if (ret) + return ret; + + S387->ftop = (S387->swd >> SW_Top_Shift) & 7; + offset = (S387->ftop & 7) * 10; + other = 80 - offset; + + RE_ENTRANT_CHECK_OFF; + + /* Copy all registers in stack order. */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + space + offset, 0, other); + if (!ret && offset) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + space, 0, offset); + + RE_ENTRANT_CHECK_ON; + + /* The tags may need to be corrected now. */ + tags = S387->twd; + newtop = S387->ftop; + for (i = 0; i < 8; i++) { + regnr = (i + newtop) & 7; + if (((tags >> ((regnr & 7) * 2)) & 3) != TAG_Empty) { + /* The loaded data over-rides all other cases. */ + tag = + FPU_tagof((FPU_REG *) ((u_char *) S387->st_space + + 10 * regnr)); + tags &= ~(3 << (regnr * 2)); + tags |= (tag & 3) << (regnr * 2); + } + } + S387->twd = tags; + + return ret; +} + +int fpregs_soft_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct swregs_state *s387 = &target->thread.fpu.state.soft; + const void *space = s387->st_space; + int ret; + int offset = (S387->ftop & 7) * 10, other = 80 - offset; + + RE_ENTRANT_CHECK_OFF; + +#ifdef PECULIAR_486 + S387->cwd &= ~0xe080; + /* An 80486 sets nearly all of the reserved bits to 1. */ + S387->cwd |= 0xffff0040; + S387->swd = sstatus_word() | 0xffff0000; + S387->twd |= 0xffff0000; + S387->fcs &= ~0xf8000000; + S387->fos |= 0xffff0000; +#endif /* PECULIAR_486 */ + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, s387, 0, + offsetof(struct swregs_state, st_space)); + + /* Copy all registers in stack order. */ + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + space + offset, 0, other); + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + space, 0, offset); + + RE_ENTRANT_CHECK_ON; + + return ret; +} diff --git a/arch/x86/math-emu/fpu_etc.c b/arch/x86/math-emu/fpu_etc.c new file mode 100644 index 000000000..1b118fd93 --- /dev/null +++ b/arch/x86/math-emu/fpu_etc.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | fpu_etc.c | + | | + | Implement a few FPU instructions. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "reg_constant.h" + +static void fchs(FPU_REG *st0_ptr, u_char st0tag) +{ + if (st0tag ^ TAG_Empty) { + signbyte(st0_ptr) ^= SIGN_NEG; + clear_C1(); + } else + FPU_stack_underflow(); +} + +static void fabs(FPU_REG *st0_ptr, u_char st0tag) +{ + if (st0tag ^ TAG_Empty) { + setpositive(st0_ptr); + clear_C1(); + } else + FPU_stack_underflow(); +} + +static void ftst_(FPU_REG *st0_ptr, u_char st0tag) +{ + switch (st0tag) { + case TAG_Zero: + setcc(SW_C3); + break; + case TAG_Valid: + if (getsign(st0_ptr) == SIGN_POS) + setcc(0); + else + setcc(SW_C0); + break; + case TAG_Special: + switch (FPU_Special(st0_ptr)) { + case TW_Denormal: + if (getsign(st0_ptr) == SIGN_POS) + setcc(0); + else + setcc(SW_C0); + if (denormal_operand() < 0) { +#ifdef PECULIAR_486 + /* This is weird! */ + if (getsign(st0_ptr) == SIGN_POS) + setcc(SW_C3); +#endif /* PECULIAR_486 */ + return; + } + break; + case TW_NaN: + setcc(SW_C0 | SW_C2 | SW_C3); /* Operand is not comparable */ + EXCEPTION(EX_Invalid); + break; + case TW_Infinity: + if (getsign(st0_ptr) == SIGN_POS) + setcc(0); + else + setcc(SW_C0); + break; + default: + setcc(SW_C0 | SW_C2 | SW_C3); /* Operand is not comparable */ + EXCEPTION(EX_INTERNAL | 0x14); + break; + } + break; + case TAG_Empty: + setcc(SW_C0 | SW_C2 | SW_C3); + EXCEPTION(EX_StackUnder); + break; + } +} + +static void fxam(FPU_REG *st0_ptr, u_char st0tag) +{ + int c = 0; + switch (st0tag) { + case TAG_Empty: + c = SW_C3 | SW_C0; + break; + case TAG_Zero: + c = SW_C3; + break; + case TAG_Valid: + c = SW_C2; + break; + case TAG_Special: + switch (FPU_Special(st0_ptr)) { + case TW_Denormal: + c = SW_C2 | SW_C3; /* Denormal */ + break; + case TW_NaN: + /* We also use NaN for unsupported types. */ + if ((st0_ptr->sigh & 0x80000000) + && (exponent(st0_ptr) == EXP_OVER)) + c = SW_C0; + break; + case TW_Infinity: + c = SW_C2 | SW_C0; + break; + } + } + if (getsign(st0_ptr) == SIGN_NEG) + c |= SW_C1; + setcc(c); +} + +static FUNC_ST0 const fp_etc_table[] = { + fchs, fabs, (FUNC_ST0) FPU_illegal, (FUNC_ST0) FPU_illegal, + ftst_, fxam, (FUNC_ST0) FPU_illegal, (FUNC_ST0) FPU_illegal +}; + +void FPU_etc(void) +{ + (fp_etc_table[FPU_rm]) (&st(0), FPU_gettag0()); +} diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h new file mode 100644 index 000000000..70d35c200 --- /dev/null +++ b/arch/x86/math-emu/fpu_proto.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _FPU_PROTO_H +#define _FPU_PROTO_H + +/* errors.c */ +extern void FPU_illegal(void); +extern void FPU_printall(void); +asmlinkage void FPU_exception(int n); +extern int real_1op_NaN(FPU_REG *a); +extern int real_2op_NaN(FPU_REG const *b, u_char tagb, int deststnr, + FPU_REG const *defaultNaN); +asmlinkage int arith_invalid(int deststnr); +asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign); +extern int set_precision_flag(int flags); +asmlinkage void set_precision_flag_up(void); +asmlinkage void set_precision_flag_down(void); +asmlinkage int denormal_operand(void); +asmlinkage int arith_overflow(FPU_REG *dest); +asmlinkage int arith_underflow(FPU_REG *dest); +extern void FPU_stack_overflow(void); +extern void FPU_stack_underflow(void); +extern void FPU_stack_underflow_i(int i); +extern void FPU_stack_underflow_pop(int i); +/* fpu_arith.c */ +extern void fadd__(void); +extern void fmul__(void); +extern void fsub__(void); +extern void fsubr_(void); +extern void fdiv__(void); +extern void fdivr_(void); +extern void fadd_i(void); +extern void fmul_i(void); +extern void fsubri(void); +extern void fsub_i(void); +extern void fdivri(void); +extern void fdiv_i(void); +extern void faddp_(void); +extern void fmulp_(void); +extern void fsubrp(void); +extern void fsubp_(void); +extern void fdivrp(void); +extern void fdivp_(void); +/* fpu_aux.c */ +extern void finit(void); +extern void finit_(void); +extern void fstsw_(void); +extern void fp_nop(void); +extern void fld_i_(void); +extern void fxch_i(void); +extern void fcmovb(void); +extern void fcmove(void); +extern void fcmovbe(void); +extern void fcmovu(void); +extern void fcmovnb(void); +extern void fcmovne(void); +extern void fcmovnbe(void); +extern void fcmovnu(void); +extern void ffree_(void); +extern void ffreep(void); +extern void fst_i_(void); +extern void fstp_i(void); +/* fpu_entry.c */ +extern void math_emulate(struct math_emu_info *info); +extern void math_abort(struct math_emu_info *info, unsigned int signal); +/* fpu_etc.c */ +extern void FPU_etc(void); +/* fpu_tags.c */ +extern int FPU_gettag0(void); +extern int FPU_gettagi(int stnr); +extern int FPU_gettag(int regnr); +extern void FPU_settag0(int tag); +extern void FPU_settagi(int stnr, int tag); +extern void FPU_settag(int regnr, int tag); +extern int FPU_Special(FPU_REG const *ptr); +extern int isNaN(FPU_REG const *ptr); +extern void FPU_pop(void); +extern int FPU_empty_i(int stnr); +extern int FPU_stackoverflow(FPU_REG ** st_new_ptr); +extern void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr); +extern void FPU_copy_to_reg1(FPU_REG const *r, u_char tag); +extern void FPU_copy_to_reg0(FPU_REG const *r, u_char tag); +/* fpu_trig.c */ +extern void FPU_triga(void); +extern void FPU_trigb(void); +/* get_address.c */ +extern void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, + fpu_addr_modes addr_modes); +extern void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, + fpu_addr_modes addr_modes); +/* load_store.c */ +extern int FPU_load_store(u_char type, fpu_addr_modes addr_modes, + void __user * data_address); +/* poly_2xm1.c */ +extern int poly_2xm1(u_char sign, FPU_REG * arg, FPU_REG *result); +/* poly_atan.c */ +extern void poly_atan(FPU_REG * st0_ptr, u_char st0_tag, FPU_REG *st1_ptr, + u_char st1_tag); +/* poly_l2.c */ +extern void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign); +extern int poly_l2p1(u_char s0, u_char s1, FPU_REG *r0, FPU_REG *r1, + FPU_REG * d); +/* poly_sin.c */ +extern void poly_sine(FPU_REG *st0_ptr); +extern void poly_cos(FPU_REG *st0_ptr); +/* poly_tan.c */ +extern void poly_tan(FPU_REG *st0_ptr); +/* reg_add_sub.c */ +extern int FPU_add(FPU_REG const *b, u_char tagb, int destrnr, int control_w); +extern int FPU_sub(int flags, int rm, int control_w); +/* reg_compare.c */ +extern int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag); +extern void fcom_st(void); +extern void fcompst(void); +extern void fcompp(void); +extern void fucom_(void); +extern void fucomp(void); +extern void fucompp(void); +extern void fcomi_(void); +extern void fcomip(void); +extern void fucomi_(void); +extern void fucomip(void); +/* reg_constant.c */ +extern void fconst(void); +/* reg_ld_str.c */ +extern int FPU_load_extended(long double __user *s, int stnr); +extern int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data); +extern int FPU_load_single(float __user *single, FPU_REG *loaded_data); +extern int FPU_load_int64(long long __user *_s); +extern int FPU_load_int32(long __user *_s, FPU_REG *loaded_data); +extern int FPU_load_int16(short __user *_s, FPU_REG *loaded_data); +extern int FPU_load_bcd(u_char __user *s); +extern int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, + long double __user * d); +extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, + double __user * dfloat); +extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, + float __user * single); +extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, + long long __user * d); +extern int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d); +extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d); +extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d); +extern int FPU_round_to_int(FPU_REG *r, u_char tag); +extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s); +extern void frstor(fpu_addr_modes addr_modes, u_char __user *data_address); +extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d); +extern void fsave(fpu_addr_modes addr_modes, u_char __user *data_address); +extern int FPU_tagof(FPU_REG *ptr); +/* reg_mul.c */ +extern int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w); + +extern int FPU_div(int flags, int regrm, int control_w); +/* reg_convert.c */ +extern int FPU_to_exp16(FPU_REG const *a, FPU_REG *x); +#endif /* _FPU_PROTO_H */ diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h new file mode 100644 index 000000000..c8b1b31ed --- /dev/null +++ b/arch/x86/math-emu/fpu_system.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*---------------------------------------------------------------------------+ + | fpu_system.h | + | | + | Copyright (C) 1992,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _FPU_SYSTEM_H +#define _FPU_SYSTEM_H + +/* system dependent definitions */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> + +#include <asm/desc.h> +#include <asm/mmu_context.h> + +static inline struct desc_struct FPU_get_ldt_descriptor(unsigned seg) +{ + static struct desc_struct zero_desc; + struct desc_struct ret = zero_desc; + +#ifdef CONFIG_MODIFY_LDT_SYSCALL + seg >>= 3; + mutex_lock(¤t->mm->context.lock); + if (current->mm->context.ldt && seg < current->mm->context.ldt->nr_entries) + ret = current->mm->context.ldt->entries[seg]; + mutex_unlock(¤t->mm->context.lock); +#endif + return ret; +} + +#define SEG_TYPE_WRITABLE (1U << 1) +#define SEG_TYPE_EXPANDS_DOWN (1U << 2) +#define SEG_TYPE_EXECUTE (1U << 3) +#define SEG_TYPE_EXPAND_MASK (SEG_TYPE_EXPANDS_DOWN | SEG_TYPE_EXECUTE) +#define SEG_TYPE_EXECUTE_MASK (SEG_TYPE_WRITABLE | SEG_TYPE_EXECUTE) + +static inline unsigned long seg_get_base(struct desc_struct *d) +{ + unsigned long base = (unsigned long)d->base2 << 24; + + return base | ((unsigned long)d->base1 << 16) | d->base0; +} + +static inline unsigned long seg_get_limit(struct desc_struct *d) +{ + return ((unsigned long)d->limit1 << 16) | d->limit0; +} + +static inline unsigned long seg_get_granularity(struct desc_struct *d) +{ + return d->g ? 4096 : 1; +} + +static inline bool seg_expands_down(struct desc_struct *d) +{ + return (d->type & SEG_TYPE_EXPAND_MASK) == SEG_TYPE_EXPANDS_DOWN; +} + +static inline bool seg_execute_only(struct desc_struct *d) +{ + return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_EXECUTE; +} + +static inline bool seg_writable(struct desc_struct *d) +{ + return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_WRITABLE; +} + +#define I387 (¤t->thread.fpu.state) +#define FPU_info (I387->soft.info) + +#define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) +#define FPU_SS (*(unsigned short *) &(FPU_info->regs->ss)) +#define FPU_DS (*(unsigned short *) &(FPU_info->regs->ds)) +#define FPU_EAX (FPU_info->regs->ax) +#define FPU_EFLAGS (FPU_info->regs->flags) +#define FPU_EIP (FPU_info->regs->ip) +#define FPU_ORIG_EIP (FPU_info->___orig_eip) + +#define FPU_lookahead (I387->soft.lookahead) + +/* nz if ip_offset and cs_selector are not to be set for the current + instruction. */ +#define no_ip_update (*(u_char *)&(I387->soft.no_update)) +#define FPU_rm (*(u_char *)&(I387->soft.rm)) + +/* Number of bytes of data which can be legally accessed by the current + instruction. This only needs to hold a number <= 108, so a byte will do. */ +#define access_limit (*(u_char *)&(I387->soft.alimit)) + +#define partial_status (I387->soft.swd) +#define control_word (I387->soft.cwd) +#define fpu_tag_word (I387->soft.twd) +#define registers (I387->soft.st_space) +#define top (I387->soft.ftop) + +#define instruction_address (*(struct address *)&I387->soft.fip) +#define operand_address (*(struct address *)&I387->soft.foo) + +#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \ + math_abort(FPU_info,SIGSEGV) +#define FPU_abort math_abort(FPU_info, SIGSEGV) + +#undef FPU_IGNORE_CODE_SEGV +#ifdef FPU_IGNORE_CODE_SEGV +/* access_ok() is very expensive, and causes the emulator to run + about 20% slower if applied to the code. Anyway, errors due to bad + code addresses should be much rarer than errors due to bad data + addresses. */ +#define FPU_code_access_ok(z) +#else +/* A simpler test than access_ok() can probably be done for + FPU_code_access_ok() because the only possible error is to step + past the upper boundary of a legal code area. */ +#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user *)FPU_EIP,z) +#endif + +#define FPU_get_user(x,y) get_user((x),(y)) +#define FPU_put_user(x,y) put_user((x),(y)) + +#endif diff --git a/arch/x86/math-emu/fpu_tags.c b/arch/x86/math-emu/fpu_tags.c new file mode 100644 index 000000000..bff95d4e7 --- /dev/null +++ b/arch/x86/math-emu/fpu_tags.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | fpu_tags.c | + | | + | Set FPU register tags. | + | | + | Copyright (C) 1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@jacobi.maths.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" +#include "fpu_system.h" +#include "exception.h" + +void FPU_pop(void) +{ + fpu_tag_word |= 3 << ((top & 7) * 2); + top++; +} + +int FPU_gettag0(void) +{ + return (fpu_tag_word >> ((top & 7) * 2)) & 3; +} + +int FPU_gettagi(int stnr) +{ + return (fpu_tag_word >> (((top + stnr) & 7) * 2)) & 3; +} + +int FPU_gettag(int regnr) +{ + return (fpu_tag_word >> ((regnr & 7) * 2)) & 3; +} + +void FPU_settag0(int tag) +{ + int regnr = top; + regnr &= 7; + fpu_tag_word &= ~(3 << (regnr * 2)); + fpu_tag_word |= (tag & 3) << (regnr * 2); +} + +void FPU_settagi(int stnr, int tag) +{ + int regnr = stnr + top; + regnr &= 7; + fpu_tag_word &= ~(3 << (regnr * 2)); + fpu_tag_word |= (tag & 3) << (regnr * 2); +} + +void FPU_settag(int regnr, int tag) +{ + regnr &= 7; + fpu_tag_word &= ~(3 << (regnr * 2)); + fpu_tag_word |= (tag & 3) << (regnr * 2); +} + +int FPU_Special(FPU_REG const *ptr) +{ + int exp = exponent(ptr); + + if (exp == EXP_BIAS + EXP_UNDER) + return TW_Denormal; + else if (exp != EXP_BIAS + EXP_OVER) + return TW_NaN; + else if ((ptr->sigh == 0x80000000) && (ptr->sigl == 0)) + return TW_Infinity; + return TW_NaN; +} + +int isNaN(FPU_REG const *ptr) +{ + return ((exponent(ptr) == EXP_BIAS + EXP_OVER) + && !((ptr->sigh == 0x80000000) && (ptr->sigl == 0))); +} + +int FPU_empty_i(int stnr) +{ + int regnr = (top + stnr) & 7; + + return ((fpu_tag_word >> (regnr * 2)) & 3) == TAG_Empty; +} + +int FPU_stackoverflow(FPU_REG ** st_new_ptr) +{ + *st_new_ptr = &st(-1); + + return ((fpu_tag_word >> (((top - 1) & 7) * 2)) & 3) != TAG_Empty; +} + +void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr) +{ + reg_copy(r, &st(stnr)); + FPU_settagi(stnr, tag); +} + +void FPU_copy_to_reg1(FPU_REG const *r, u_char tag) +{ + reg_copy(r, &st(1)); + FPU_settagi(1, tag); +} + +void FPU_copy_to_reg0(FPU_REG const *r, u_char tag) +{ + int regnr = top; + regnr &= 7; + + reg_copy(r, &st(0)); + + fpu_tag_word &= ~(3 << (regnr * 2)); + fpu_tag_word |= (tag & 3) << (regnr * 2); +} diff --git a/arch/x86/math-emu/fpu_trig.c b/arch/x86/math-emu/fpu_trig.c new file mode 100644 index 000000000..783c509f9 --- /dev/null +++ b/arch/x86/math-emu/fpu_trig.c @@ -0,0 +1,1644 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | fpu_trig.c | + | | + | Implementation of the FPU "transcendental" functions. | + | | + | Copyright (C) 1992,1993,1994,1997,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@melbpc.org.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "control_w.h" +#include "reg_constant.h" + +static void rem_kernel(unsigned long long st0, unsigned long long *y, + unsigned long long st1, unsigned long long q, int n); + +#define BETTER_THAN_486 + +#define FCOS 4 + +/* Used only by fptan, fsin, fcos, and fsincos. */ +/* This routine produces very accurate results, similar to + using a value of pi with more than 128 bits precision. */ +/* Limited measurements show no results worse than 64 bit precision + except for the results for arguments close to 2^63, where the + precision of the result sometimes degrades to about 63.9 bits */ +static int trig_arg(FPU_REG *st0_ptr, int even) +{ + FPU_REG tmp; + u_char tmptag; + unsigned long long q; + int old_cw = control_word, saved_status = partial_status; + int tag, st0_tag = TAG_Valid; + + if (exponent(st0_ptr) >= 63) { + partial_status |= SW_C2; /* Reduction incomplete. */ + return -1; + } + + control_word &= ~CW_RC; + control_word |= RC_CHOP; + + setpositive(st0_ptr); + tag = FPU_u_div(st0_ptr, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f, + SIGN_POS); + + FPU_round_to_int(&tmp, tag); /* Fortunately, this can't overflow + to 2^64 */ + q = significand(&tmp); + if (q) { + rem_kernel(significand(st0_ptr), + &significand(&tmp), + significand(&CONST_PI2), + q, exponent(st0_ptr) - exponent(&CONST_PI2)); + setexponent16(&tmp, exponent(&CONST_PI2)); + st0_tag = FPU_normalize(&tmp); + FPU_copy_to_reg0(&tmp, st0_tag); + } + + if ((even && !(q & 1)) || (!even && (q & 1))) { + st0_tag = + FPU_sub(REV | LOADED | TAG_Valid, (int)&CONST_PI2, + FULL_PRECISION); + +#ifdef BETTER_THAN_486 + /* So far, the results are exact but based upon a 64 bit + precision approximation to pi/2. The technique used + now is equivalent to using an approximation to pi/2 which + is accurate to about 128 bits. */ + if ((exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64) + || (q > 1)) { + /* This code gives the effect of having pi/2 to better than + 128 bits precision. */ + + significand(&tmp) = q + 1; + setexponent16(&tmp, 63); + FPU_normalize(&tmp); + tmptag = + FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, + FULL_PRECISION, SIGN_POS, + exponent(&CONST_PI2extra) + + exponent(&tmp)); + setsign(&tmp, getsign(&CONST_PI2extra)); + st0_tag = FPU_add(&tmp, tmptag, 0, FULL_PRECISION); + if (signnegative(st0_ptr)) { + /* CONST_PI2extra is negative, so the result of the addition + can be negative. This means that the argument is actually + in a different quadrant. The correction is always < pi/2, + so it can't overflow into yet another quadrant. */ + setpositive(st0_ptr); + q++; + } + } +#endif /* BETTER_THAN_486 */ + } +#ifdef BETTER_THAN_486 + else { + /* So far, the results are exact but based upon a 64 bit + precision approximation to pi/2. The technique used + now is equivalent to using an approximation to pi/2 which + is accurate to about 128 bits. */ + if (((q > 0) + && (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64)) + || (q > 1)) { + /* This code gives the effect of having p/2 to better than + 128 bits precision. */ + + significand(&tmp) = q; + setexponent16(&tmp, 63); + FPU_normalize(&tmp); /* This must return TAG_Valid */ + tmptag = + FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, + FULL_PRECISION, SIGN_POS, + exponent(&CONST_PI2extra) + + exponent(&tmp)); + setsign(&tmp, getsign(&CONST_PI2extra)); + st0_tag = FPU_sub(LOADED | (tmptag & 0x0f), (int)&tmp, + FULL_PRECISION); + if ((exponent(st0_ptr) == exponent(&CONST_PI2)) && + ((st0_ptr->sigh > CONST_PI2.sigh) + || ((st0_ptr->sigh == CONST_PI2.sigh) + && (st0_ptr->sigl > CONST_PI2.sigl)))) { + /* CONST_PI2extra is negative, so the result of the + subtraction can be larger than pi/2. This means + that the argument is actually in a different quadrant. + The correction is always < pi/2, so it can't overflow + into yet another quadrant. */ + st0_tag = + FPU_sub(REV | LOADED | TAG_Valid, + (int)&CONST_PI2, FULL_PRECISION); + q++; + } + } + } +#endif /* BETTER_THAN_486 */ + + FPU_settag0(st0_tag); + control_word = old_cw; + partial_status = saved_status & ~SW_C2; /* Reduction complete. */ + + return (q & 3) | even; +} + +/* Convert a long to register */ +static void convert_l2reg(long const *arg, int deststnr) +{ + int tag; + long num = *arg; + u_char sign; + FPU_REG *dest = &st(deststnr); + + if (num == 0) { + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + return; + } + + if (num > 0) { + sign = SIGN_POS; + } else { + num = -num; + sign = SIGN_NEG; + } + + dest->sigh = num; + dest->sigl = 0; + setexponent16(dest, 31); + tag = FPU_normalize(dest); + FPU_settagi(deststnr, tag); + setsign(dest, sign); + return; +} + +static void single_arg_error(FPU_REG *st0_ptr, u_char st0_tag) +{ + if (st0_tag == TAG_Empty) + FPU_stack_underflow(); /* Puts a QNaN in st(0) */ + else if (st0_tag == TW_NaN) + real_1op_NaN(st0_ptr); /* return with a NaN in st(0) */ +#ifdef PARANOID + else + EXCEPTION(EX_INTERNAL | 0x0112); +#endif /* PARANOID */ +} + +static void single_arg_2_error(FPU_REG *st0_ptr, u_char st0_tag) +{ + int isNaN; + + switch (st0_tag) { + case TW_NaN: + isNaN = (exponent(st0_ptr) == EXP_OVER) + && (st0_ptr->sigh & 0x80000000); + if (isNaN && !(st0_ptr->sigh & 0x40000000)) { /* Signaling ? */ + EXCEPTION(EX_Invalid); + if (control_word & CW_Invalid) { + /* The masked response */ + /* Convert to a QNaN */ + st0_ptr->sigh |= 0x40000000; + push(); + FPU_copy_to_reg0(st0_ptr, TAG_Special); + } + } else if (isNaN) { + /* A QNaN */ + push(); + FPU_copy_to_reg0(st0_ptr, TAG_Special); + } else { + /* pseudoNaN or other unsupported */ + EXCEPTION(EX_Invalid); + if (control_word & CW_Invalid) { + /* The masked response */ + FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); + push(); + FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); + } + } + break; /* return with a NaN in st(0) */ +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL | 0x0112); +#endif /* PARANOID */ + } +} + +/*---------------------------------------------------------------------------*/ + +static void f2xm1(FPU_REG *st0_ptr, u_char tag) +{ + FPU_REG a; + + clear_C1(); + + if (tag == TAG_Valid) { + /* For an 80486 FPU, the result is undefined if the arg is >= 1.0 */ + if (exponent(st0_ptr) < 0) { + denormal_arg: + + FPU_to_exp16(st0_ptr, &a); + + /* poly_2xm1(x) requires 0 < st(0) < 1. */ + poly_2xm1(getsign(st0_ptr), &a, st0_ptr); + } + set_precision_flag_up(); /* 80486 appears to always do this */ + return; + } + + if (tag == TAG_Zero) + return; + + if (tag == TAG_Special) + tag = FPU_Special(st0_ptr); + + switch (tag) { + case TW_Denormal: + if (denormal_operand() < 0) + return; + goto denormal_arg; + case TW_Infinity: + if (signnegative(st0_ptr)) { + /* -infinity gives -1 (p16-10) */ + FPU_copy_to_reg0(&CONST_1, TAG_Valid); + setnegative(st0_ptr); + } + return; + default: + single_arg_error(st0_ptr, tag); + } +} + +static void fptan(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st_new_ptr; + int q; + u_char arg_sign = getsign(st0_ptr); + + /* Stack underflow has higher priority */ + if (st0_tag == TAG_Empty) { + FPU_stack_underflow(); /* Puts a QNaN in st(0) */ + if (control_word & CW_Invalid) { + st_new_ptr = &st(-1); + push(); + FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */ + } + return; + } + + if (STACK_OVERFLOW) { + FPU_stack_overflow(); + return; + } + + if (st0_tag == TAG_Valid) { + if (exponent(st0_ptr) > -40) { + if ((q = trig_arg(st0_ptr, 0)) == -1) { + /* Operand is out of range */ + return; + } + + poly_tan(st0_ptr); + setsign(st0_ptr, (q & 1) ^ (arg_sign != 0)); + set_precision_flag_up(); /* We do not really know if up or down */ + } else { + /* For a small arg, the result == the argument */ + /* Underflow may happen */ + + denormal_arg: + + FPU_to_exp16(st0_ptr, st0_ptr); + + st0_tag = + FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign); + FPU_settag0(st0_tag); + } + push(); + FPU_copy_to_reg0(&CONST_1, TAG_Valid); + return; + } + + if (st0_tag == TAG_Zero) { + push(); + FPU_copy_to_reg0(&CONST_1, TAG_Valid); + setcc(0); + return; + } + + if (st0_tag == TAG_Special) + st0_tag = FPU_Special(st0_ptr); + + if (st0_tag == TW_Denormal) { + if (denormal_operand() < 0) + return; + + goto denormal_arg; + } + + if (st0_tag == TW_Infinity) { + /* The 80486 treats infinity as an invalid operand */ + if (arith_invalid(0) >= 0) { + st_new_ptr = &st(-1); + push(); + arith_invalid(0); + } + return; + } + + single_arg_2_error(st0_ptr, st0_tag); +} + +static void fxtract(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st_new_ptr; + u_char sign; + register FPU_REG *st1_ptr = st0_ptr; /* anticipate */ + + if (STACK_OVERFLOW) { + FPU_stack_overflow(); + return; + } + + clear_C1(); + + if (st0_tag == TAG_Valid) { + long e; + + push(); + sign = getsign(st1_ptr); + reg_copy(st1_ptr, st_new_ptr); + setexponent16(st_new_ptr, exponent(st_new_ptr)); + + denormal_arg: + + e = exponent16(st_new_ptr); + convert_l2reg(&e, 1); + setexponentpos(st_new_ptr, 0); + setsign(st_new_ptr, sign); + FPU_settag0(TAG_Valid); /* Needed if arg was a denormal */ + return; + } else if (st0_tag == TAG_Zero) { + sign = getsign(st0_ptr); + + if (FPU_divide_by_zero(0, SIGN_NEG) < 0) + return; + + push(); + FPU_copy_to_reg0(&CONST_Z, TAG_Zero); + setsign(st_new_ptr, sign); + return; + } + + if (st0_tag == TAG_Special) + st0_tag = FPU_Special(st0_ptr); + + if (st0_tag == TW_Denormal) { + if (denormal_operand() < 0) + return; + + push(); + sign = getsign(st1_ptr); + FPU_to_exp16(st1_ptr, st_new_ptr); + goto denormal_arg; + } else if (st0_tag == TW_Infinity) { + sign = getsign(st0_ptr); + setpositive(st0_ptr); + push(); + FPU_copy_to_reg0(&CONST_INF, TAG_Special); + setsign(st_new_ptr, sign); + return; + } else if (st0_tag == TW_NaN) { + if (real_1op_NaN(st0_ptr) < 0) + return; + + push(); + FPU_copy_to_reg0(st0_ptr, TAG_Special); + return; + } else if (st0_tag == TAG_Empty) { + /* Is this the correct behaviour? */ + if (control_word & EX_Invalid) { + FPU_stack_underflow(); + push(); + FPU_stack_underflow(); + } else + EXCEPTION(EX_StackUnder); + } +#ifdef PARANOID + else + EXCEPTION(EX_INTERNAL | 0x119); +#endif /* PARANOID */ +} + +static void fdecstp(void) +{ + clear_C1(); + top--; +} + +static void fincstp(void) +{ + clear_C1(); + top++; +} + +static void fsqrt_(FPU_REG *st0_ptr, u_char st0_tag) +{ + int expon; + + clear_C1(); + + if (st0_tag == TAG_Valid) { + u_char tag; + + if (signnegative(st0_ptr)) { + arith_invalid(0); /* sqrt(negative) is invalid */ + return; + } + + /* make st(0) in [1.0 .. 4.0) */ + expon = exponent(st0_ptr); + + denormal_arg: + + setexponent16(st0_ptr, (expon & 1)); + + /* Do the computation, the sign of the result will be positive. */ + tag = wm_sqrt(st0_ptr, 0, 0, control_word, SIGN_POS); + addexponent(st0_ptr, expon >> 1); + FPU_settag0(tag); + return; + } + + if (st0_tag == TAG_Zero) + return; + + if (st0_tag == TAG_Special) + st0_tag = FPU_Special(st0_ptr); + + if (st0_tag == TW_Infinity) { + if (signnegative(st0_ptr)) + arith_invalid(0); /* sqrt(-Infinity) is invalid */ + return; + } else if (st0_tag == TW_Denormal) { + if (signnegative(st0_ptr)) { + arith_invalid(0); /* sqrt(negative) is invalid */ + return; + } + + if (denormal_operand() < 0) + return; + + FPU_to_exp16(st0_ptr, st0_ptr); + + expon = exponent16(st0_ptr); + + goto denormal_arg; + } + + single_arg_error(st0_ptr, st0_tag); + +} + +static void frndint_(FPU_REG *st0_ptr, u_char st0_tag) +{ + int flags, tag; + + if (st0_tag == TAG_Valid) { + u_char sign; + + denormal_arg: + + sign = getsign(st0_ptr); + + if (exponent(st0_ptr) > 63) + return; + + if (st0_tag == TW_Denormal) { + if (denormal_operand() < 0) + return; + } + + /* Fortunately, this can't overflow to 2^64 */ + if ((flags = FPU_round_to_int(st0_ptr, st0_tag))) + set_precision_flag(flags); + + setexponent16(st0_ptr, 63); + tag = FPU_normalize(st0_ptr); + setsign(st0_ptr, sign); + FPU_settag0(tag); + return; + } + + if (st0_tag == TAG_Zero) + return; + + if (st0_tag == TAG_Special) + st0_tag = FPU_Special(st0_ptr); + + if (st0_tag == TW_Denormal) + goto denormal_arg; + else if (st0_tag == TW_Infinity) + return; + else + single_arg_error(st0_ptr, st0_tag); +} + +static int fsin(FPU_REG *st0_ptr, u_char tag) +{ + u_char arg_sign = getsign(st0_ptr); + + if (tag == TAG_Valid) { + int q; + + if (exponent(st0_ptr) > -40) { + if ((q = trig_arg(st0_ptr, 0)) == -1) { + /* Operand is out of range */ + return 1; + } + + poly_sine(st0_ptr); + + if (q & 2) + changesign(st0_ptr); + + setsign(st0_ptr, getsign(st0_ptr) ^ arg_sign); + + /* We do not really know if up or down */ + set_precision_flag_up(); + return 0; + } else { + /* For a small arg, the result == the argument */ + set_precision_flag_up(); /* Must be up. */ + return 0; + } + } + + if (tag == TAG_Zero) { + setcc(0); + return 0; + } + + if (tag == TAG_Special) + tag = FPU_Special(st0_ptr); + + if (tag == TW_Denormal) { + if (denormal_operand() < 0) + return 1; + + /* For a small arg, the result == the argument */ + /* Underflow may happen */ + FPU_to_exp16(st0_ptr, st0_ptr); + + tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign); + + FPU_settag0(tag); + + return 0; + } else if (tag == TW_Infinity) { + /* The 80486 treats infinity as an invalid operand */ + arith_invalid(0); + return 1; + } else { + single_arg_error(st0_ptr, tag); + return 1; + } +} + +static int f_cos(FPU_REG *st0_ptr, u_char tag) +{ + u_char st0_sign; + + st0_sign = getsign(st0_ptr); + + if (tag == TAG_Valid) { + int q; + + if (exponent(st0_ptr) > -40) { + if ((exponent(st0_ptr) < 0) + || ((exponent(st0_ptr) == 0) + && (significand(st0_ptr) <= + 0xc90fdaa22168c234LL))) { + poly_cos(st0_ptr); + + /* We do not really know if up or down */ + set_precision_flag_down(); + + return 0; + } else if ((q = trig_arg(st0_ptr, FCOS)) != -1) { + poly_sine(st0_ptr); + + if ((q + 1) & 2) + changesign(st0_ptr); + + /* We do not really know if up or down */ + set_precision_flag_down(); + + return 0; + } else { + /* Operand is out of range */ + return 1; + } + } else { + denormal_arg: + + setcc(0); + FPU_copy_to_reg0(&CONST_1, TAG_Valid); +#ifdef PECULIAR_486 + set_precision_flag_down(); /* 80486 appears to do this. */ +#else + set_precision_flag_up(); /* Must be up. */ +#endif /* PECULIAR_486 */ + return 0; + } + } else if (tag == TAG_Zero) { + FPU_copy_to_reg0(&CONST_1, TAG_Valid); + setcc(0); + return 0; + } + + if (tag == TAG_Special) + tag = FPU_Special(st0_ptr); + + if (tag == TW_Denormal) { + if (denormal_operand() < 0) + return 1; + + goto denormal_arg; + } else if (tag == TW_Infinity) { + /* The 80486 treats infinity as an invalid operand */ + arith_invalid(0); + return 1; + } else { + single_arg_error(st0_ptr, tag); /* requires st0_ptr == &st(0) */ + return 1; + } +} + +static void fcos(FPU_REG *st0_ptr, u_char st0_tag) +{ + f_cos(st0_ptr, st0_tag); +} + +static void fsincos(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st_new_ptr; + FPU_REG arg; + u_char tag; + + /* Stack underflow has higher priority */ + if (st0_tag == TAG_Empty) { + FPU_stack_underflow(); /* Puts a QNaN in st(0) */ + if (control_word & CW_Invalid) { + st_new_ptr = &st(-1); + push(); + FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */ + } + return; + } + + if (STACK_OVERFLOW) { + FPU_stack_overflow(); + return; + } + + if (st0_tag == TAG_Special) + tag = FPU_Special(st0_ptr); + else + tag = st0_tag; + + if (tag == TW_NaN) { + single_arg_2_error(st0_ptr, TW_NaN); + return; + } else if (tag == TW_Infinity) { + /* The 80486 treats infinity as an invalid operand */ + if (arith_invalid(0) >= 0) { + /* Masked response */ + push(); + arith_invalid(0); + } + return; + } + + reg_copy(st0_ptr, &arg); + if (!fsin(st0_ptr, st0_tag)) { + push(); + FPU_copy_to_reg0(&arg, st0_tag); + f_cos(&st(0), st0_tag); + } else { + /* An error, so restore st(0) */ + FPU_copy_to_reg0(&arg, st0_tag); + } +} + +/*---------------------------------------------------------------------------*/ +/* The following all require two arguments: st(0) and st(1) */ + +/* A lean, mean kernel for the fprem instructions. This relies upon + the division and rounding to an integer in do_fprem giving an + exact result. Because of this, rem_kernel() needs to deal only with + the least significant 64 bits, the more significant bits of the + result must be zero. + */ +static void rem_kernel(unsigned long long st0, unsigned long long *y, + unsigned long long st1, unsigned long long q, int n) +{ + int dummy; + unsigned long long x; + + x = st0 << n; + + /* Do the required multiplication and subtraction in the one operation */ + + /* lsw x -= lsw st1 * lsw q */ + asm volatile ("mull %4; subl %%eax,%0; sbbl %%edx,%1":"=m" + (((unsigned *)&x)[0]), "=m"(((unsigned *)&x)[1]), + "=a"(dummy) + :"2"(((unsigned *)&st1)[0]), "m"(((unsigned *)&q)[0]) + :"%dx"); + /* msw x -= msw st1 * lsw q */ + asm volatile ("mull %3; subl %%eax,%0":"=m" (((unsigned *)&x)[1]), + "=a"(dummy) + :"1"(((unsigned *)&st1)[1]), "m"(((unsigned *)&q)[0]) + :"%dx"); + /* msw x -= lsw st1 * msw q */ + asm volatile ("mull %3; subl %%eax,%0":"=m" (((unsigned *)&x)[1]), + "=a"(dummy) + :"1"(((unsigned *)&st1)[0]), "m"(((unsigned *)&q)[1]) + :"%dx"); + + *y = x; +} + +/* Remainder of st(0) / st(1) */ +/* This routine produces exact results, i.e. there is never any + rounding or truncation, etc of the result. */ +static void do_fprem(FPU_REG *st0_ptr, u_char st0_tag, int round) +{ + FPU_REG *st1_ptr = &st(1); + u_char st1_tag = FPU_gettagi(1); + + if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) { + FPU_REG tmp, st0, st1; + u_char st0_sign, st1_sign; + u_char tmptag; + int tag; + int old_cw; + int expdif; + long long q; + unsigned short saved_status; + int cc; + + fprem_valid: + /* Convert registers for internal use. */ + st0_sign = FPU_to_exp16(st0_ptr, &st0); + st1_sign = FPU_to_exp16(st1_ptr, &st1); + expdif = exponent16(&st0) - exponent16(&st1); + + old_cw = control_word; + cc = 0; + + /* We want the status following the denorm tests, but don't want + the status changed by the arithmetic operations. */ + saved_status = partial_status; + control_word &= ~CW_RC; + control_word |= RC_CHOP; + + if (expdif < 64) { + /* This should be the most common case */ + + if (expdif > -2) { + u_char sign = st0_sign ^ st1_sign; + tag = FPU_u_div(&st0, &st1, &tmp, + PR_64_BITS | RC_CHOP | 0x3f, + sign); + setsign(&tmp, sign); + + if (exponent(&tmp) >= 0) { + FPU_round_to_int(&tmp, tag); /* Fortunately, this can't + overflow to 2^64 */ + q = significand(&tmp); + + rem_kernel(significand(&st0), + &significand(&tmp), + significand(&st1), + q, expdif); + + setexponent16(&tmp, exponent16(&st1)); + } else { + reg_copy(&st0, &tmp); + q = 0; + } + + if ((round == RC_RND) + && (tmp.sigh & 0xc0000000)) { + /* We may need to subtract st(1) once more, + to get a result <= 1/2 of st(1). */ + unsigned long long x; + expdif = + exponent16(&st1) - exponent16(&tmp); + if (expdif <= 1) { + if (expdif == 0) + x = significand(&st1) - + significand(&tmp); + else /* expdif is 1 */ + x = (significand(&st1) + << 1) - + significand(&tmp); + if ((x < significand(&tmp)) || + /* or equi-distant (from 0 & st(1)) and q is odd */ + ((x == significand(&tmp)) + && (q & 1))) { + st0_sign = !st0_sign; + significand(&tmp) = x; + q++; + } + } + } + + if (q & 4) + cc |= SW_C0; + if (q & 2) + cc |= SW_C3; + if (q & 1) + cc |= SW_C1; + } else { + control_word = old_cw; + setcc(0); + return; + } + } else { + /* There is a large exponent difference ( >= 64 ) */ + /* To make much sense, the code in this section should + be done at high precision. */ + int exp_1, N; + u_char sign; + + /* prevent overflow here */ + /* N is 'a number between 32 and 63' (p26-113) */ + reg_copy(&st0, &tmp); + tmptag = st0_tag; + N = (expdif & 0x0000001f) + 32; /* This choice gives results + identical to an AMD 486 */ + setexponent16(&tmp, N); + exp_1 = exponent16(&st1); + setexponent16(&st1, 0); + expdif -= N; + + sign = getsign(&tmp) ^ st1_sign; + tag = + FPU_u_div(&tmp, &st1, &tmp, + PR_64_BITS | RC_CHOP | 0x3f, sign); + setsign(&tmp, sign); + + FPU_round_to_int(&tmp, tag); /* Fortunately, this can't + overflow to 2^64 */ + + rem_kernel(significand(&st0), + &significand(&tmp), + significand(&st1), + significand(&tmp), exponent(&tmp) + ); + setexponent16(&tmp, exp_1 + expdif); + + /* It is possible for the operation to be complete here. + What does the IEEE standard say? The Intel 80486 manual + implies that the operation will never be completed at this + point, and the behaviour of a real 80486 confirms this. + */ + if (!(tmp.sigh | tmp.sigl)) { + /* The result is zero */ + control_word = old_cw; + partial_status = saved_status; + FPU_copy_to_reg0(&CONST_Z, TAG_Zero); + setsign(&st0, st0_sign); +#ifdef PECULIAR_486 + setcc(SW_C2); +#else + setcc(0); +#endif /* PECULIAR_486 */ + return; + } + cc = SW_C2; + } + + control_word = old_cw; + partial_status = saved_status; + tag = FPU_normalize_nuo(&tmp); + reg_copy(&tmp, st0_ptr); + + /* The only condition to be looked for is underflow, + and it can occur here only if underflow is unmasked. */ + if ((exponent16(&tmp) <= EXP_UNDER) && (tag != TAG_Zero) + && !(control_word & CW_Underflow)) { + setcc(cc); + tag = arith_underflow(st0_ptr); + setsign(st0_ptr, st0_sign); + FPU_settag0(tag); + return; + } else if ((exponent16(&tmp) > EXP_UNDER) || (tag == TAG_Zero)) { + stdexp(st0_ptr); + setsign(st0_ptr, st0_sign); + } else { + tag = + FPU_round(st0_ptr, 0, 0, FULL_PRECISION, st0_sign); + } + FPU_settag0(tag); + setcc(cc); + + return; + } + + if (st0_tag == TAG_Special) + st0_tag = FPU_Special(st0_ptr); + if (st1_tag == TAG_Special) + st1_tag = FPU_Special(st1_ptr); + + if (((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) + || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) + || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal))) { + if (denormal_operand() < 0) + return; + goto fprem_valid; + } else if ((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) { + FPU_stack_underflow(); + return; + } else if (st0_tag == TAG_Zero) { + if (st1_tag == TAG_Valid) { + setcc(0); + return; + } else if (st1_tag == TW_Denormal) { + if (denormal_operand() < 0) + return; + setcc(0); + return; + } else if (st1_tag == TAG_Zero) { + arith_invalid(0); + return; + } /* fprem(?,0) always invalid */ + else if (st1_tag == TW_Infinity) { + setcc(0); + return; + } + } else if ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) { + if (st1_tag == TAG_Zero) { + arith_invalid(0); /* fprem(Valid,Zero) is invalid */ + return; + } else if (st1_tag != TW_NaN) { + if (((st0_tag == TW_Denormal) + || (st1_tag == TW_Denormal)) + && (denormal_operand() < 0)) + return; + + if (st1_tag == TW_Infinity) { + /* fprem(Valid,Infinity) is o.k. */ + setcc(0); + return; + } + } + } else if (st0_tag == TW_Infinity) { + if (st1_tag != TW_NaN) { + arith_invalid(0); /* fprem(Infinity,?) is invalid */ + return; + } + } + + /* One of the registers must contain a NaN if we got here. */ + +#ifdef PARANOID + if ((st0_tag != TW_NaN) && (st1_tag != TW_NaN)) + EXCEPTION(EX_INTERNAL | 0x118); +#endif /* PARANOID */ + + real_2op_NaN(st1_ptr, st1_tag, 0, st1_ptr); + +} + +/* ST(1) <- ST(1) * log ST; pop ST */ +static void fyl2x(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st1_ptr = &st(1), exponent; + u_char st1_tag = FPU_gettagi(1); + u_char sign; + int e, tag; + + clear_C1(); + + if ((st0_tag == TAG_Valid) && (st1_tag == TAG_Valid)) { + both_valid: + /* Both regs are Valid or Denormal */ + if (signpositive(st0_ptr)) { + if (st0_tag == TW_Denormal) + FPU_to_exp16(st0_ptr, st0_ptr); + else + /* Convert st(0) for internal use. */ + setexponent16(st0_ptr, exponent(st0_ptr)); + + if ((st0_ptr->sigh == 0x80000000) + && (st0_ptr->sigl == 0)) { + /* Special case. The result can be precise. */ + u_char esign; + e = exponent16(st0_ptr); + if (e >= 0) { + exponent.sigh = e; + esign = SIGN_POS; + } else { + exponent.sigh = -e; + esign = SIGN_NEG; + } + exponent.sigl = 0; + setexponent16(&exponent, 31); + tag = FPU_normalize_nuo(&exponent); + stdexp(&exponent); + setsign(&exponent, esign); + tag = + FPU_mul(&exponent, tag, 1, FULL_PRECISION); + if (tag >= 0) + FPU_settagi(1, tag); + } else { + /* The usual case */ + sign = getsign(st1_ptr); + if (st1_tag == TW_Denormal) + FPU_to_exp16(st1_ptr, st1_ptr); + else + /* Convert st(1) for internal use. */ + setexponent16(st1_ptr, + exponent(st1_ptr)); + poly_l2(st0_ptr, st1_ptr, sign); + } + } else { + /* negative */ + if (arith_invalid(1) < 0) + return; + } + + FPU_pop(); + + return; + } + + if (st0_tag == TAG_Special) + st0_tag = FPU_Special(st0_ptr); + if (st1_tag == TAG_Special) + st1_tag = FPU_Special(st1_ptr); + + if ((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) { + FPU_stack_underflow_pop(1); + return; + } else if ((st0_tag <= TW_Denormal) && (st1_tag <= TW_Denormal)) { + if (st0_tag == TAG_Zero) { + if (st1_tag == TAG_Zero) { + /* Both args zero is invalid */ + if (arith_invalid(1) < 0) + return; + } else { + u_char sign; + sign = getsign(st1_ptr) ^ SIGN_NEG; + if (FPU_divide_by_zero(1, sign) < 0) + return; + + setsign(st1_ptr, sign); + } + } else if (st1_tag == TAG_Zero) { + /* st(1) contains zero, st(0) valid <> 0 */ + /* Zero is the valid answer */ + sign = getsign(st1_ptr); + + if (signnegative(st0_ptr)) { + /* log(negative) */ + if (arith_invalid(1) < 0) + return; + } else if ((st0_tag == TW_Denormal) + && (denormal_operand() < 0)) + return; + else { + if (exponent(st0_ptr) < 0) + sign ^= SIGN_NEG; + + FPU_copy_to_reg1(&CONST_Z, TAG_Zero); + setsign(st1_ptr, sign); + } + } else { + /* One or both operands are denormals. */ + if (denormal_operand() < 0) + return; + goto both_valid; + } + } else if ((st0_tag == TW_NaN) || (st1_tag == TW_NaN)) { + if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0) + return; + } + /* One or both arg must be an infinity */ + else if (st0_tag == TW_Infinity) { + if ((signnegative(st0_ptr)) || (st1_tag == TAG_Zero)) { + /* log(-infinity) or 0*log(infinity) */ + if (arith_invalid(1) < 0) + return; + } else { + u_char sign = getsign(st1_ptr); + + if ((st1_tag == TW_Denormal) + && (denormal_operand() < 0)) + return; + + FPU_copy_to_reg1(&CONST_INF, TAG_Special); + setsign(st1_ptr, sign); + } + } + /* st(1) must be infinity here */ + else if (((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) + && (signpositive(st0_ptr))) { + if (exponent(st0_ptr) >= 0) { + if ((exponent(st0_ptr) == 0) && + (st0_ptr->sigh == 0x80000000) && + (st0_ptr->sigl == 0)) { + /* st(0) holds 1.0 */ + /* infinity*log(1) */ + if (arith_invalid(1) < 0) + return; + } + /* else st(0) is positive and > 1.0 */ + } else { + /* st(0) is positive and < 1.0 */ + + if ((st0_tag == TW_Denormal) + && (denormal_operand() < 0)) + return; + + changesign(st1_ptr); + } + } else { + /* st(0) must be zero or negative */ + if (st0_tag == TAG_Zero) { + /* This should be invalid, but a real 80486 is happy with it. */ + +#ifndef PECULIAR_486 + sign = getsign(st1_ptr); + if (FPU_divide_by_zero(1, sign) < 0) + return; +#endif /* PECULIAR_486 */ + + changesign(st1_ptr); + } else if (arith_invalid(1) < 0) /* log(negative) */ + return; + } + + FPU_pop(); +} + +static void fpatan(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st1_ptr = &st(1); + u_char st1_tag = FPU_gettagi(1); + int tag; + + clear_C1(); + if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) { + valid_atan: + + poly_atan(st0_ptr, st0_tag, st1_ptr, st1_tag); + + FPU_pop(); + + return; + } + + if (st0_tag == TAG_Special) + st0_tag = FPU_Special(st0_ptr); + if (st1_tag == TAG_Special) + st1_tag = FPU_Special(st1_ptr); + + if (((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) + || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) + || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal))) { + if (denormal_operand() < 0) + return; + + goto valid_atan; + } else if ((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) { + FPU_stack_underflow_pop(1); + return; + } else if ((st0_tag == TW_NaN) || (st1_tag == TW_NaN)) { + if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) >= 0) + FPU_pop(); + return; + } else if ((st0_tag == TW_Infinity) || (st1_tag == TW_Infinity)) { + u_char sign = getsign(st1_ptr); + if (st0_tag == TW_Infinity) { + if (st1_tag == TW_Infinity) { + if (signpositive(st0_ptr)) { + FPU_copy_to_reg1(&CONST_PI4, TAG_Valid); + } else { + setpositive(st1_ptr); + tag = + FPU_u_add(&CONST_PI4, &CONST_PI2, + st1_ptr, FULL_PRECISION, + SIGN_POS, + exponent(&CONST_PI4), + exponent(&CONST_PI2)); + if (tag >= 0) + FPU_settagi(1, tag); + } + } else { + if ((st1_tag == TW_Denormal) + && (denormal_operand() < 0)) + return; + + if (signpositive(st0_ptr)) { + FPU_copy_to_reg1(&CONST_Z, TAG_Zero); + setsign(st1_ptr, sign); /* An 80486 preserves the sign */ + FPU_pop(); + return; + } else { + FPU_copy_to_reg1(&CONST_PI, TAG_Valid); + } + } + } else { + /* st(1) is infinity, st(0) not infinity */ + if ((st0_tag == TW_Denormal) + && (denormal_operand() < 0)) + return; + + FPU_copy_to_reg1(&CONST_PI2, TAG_Valid); + } + setsign(st1_ptr, sign); + } else if (st1_tag == TAG_Zero) { + /* st(0) must be valid or zero */ + u_char sign = getsign(st1_ptr); + + if ((st0_tag == TW_Denormal) && (denormal_operand() < 0)) + return; + + if (signpositive(st0_ptr)) { + /* An 80486 preserves the sign */ + FPU_pop(); + return; + } + + FPU_copy_to_reg1(&CONST_PI, TAG_Valid); + setsign(st1_ptr, sign); + } else if (st0_tag == TAG_Zero) { + /* st(1) must be TAG_Valid here */ + u_char sign = getsign(st1_ptr); + + if ((st1_tag == TW_Denormal) && (denormal_operand() < 0)) + return; + + FPU_copy_to_reg1(&CONST_PI2, TAG_Valid); + setsign(st1_ptr, sign); + } +#ifdef PARANOID + else + EXCEPTION(EX_INTERNAL | 0x125); +#endif /* PARANOID */ + + FPU_pop(); + set_precision_flag_up(); /* We do not really know if up or down */ +} + +static void fprem(FPU_REG *st0_ptr, u_char st0_tag) +{ + do_fprem(st0_ptr, st0_tag, RC_CHOP); +} + +static void fprem1(FPU_REG *st0_ptr, u_char st0_tag) +{ + do_fprem(st0_ptr, st0_tag, RC_RND); +} + +static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag) +{ + u_char sign, sign1; + FPU_REG *st1_ptr = &st(1), a, b; + u_char st1_tag = FPU_gettagi(1); + + clear_C1(); + if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) { + valid_yl2xp1: + + sign = getsign(st0_ptr); + sign1 = getsign(st1_ptr); + + FPU_to_exp16(st0_ptr, &a); + FPU_to_exp16(st1_ptr, &b); + + if (poly_l2p1(sign, sign1, &a, &b, st1_ptr)) + return; + + FPU_pop(); + return; + } + + if (st0_tag == TAG_Special) + st0_tag = FPU_Special(st0_ptr); + if (st1_tag == TAG_Special) + st1_tag = FPU_Special(st1_ptr); + + if (((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) + || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) + || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal))) { + if (denormal_operand() < 0) + return; + + goto valid_yl2xp1; + } else if ((st0_tag == TAG_Empty) | (st1_tag == TAG_Empty)) { + FPU_stack_underflow_pop(1); + return; + } else if (st0_tag == TAG_Zero) { + switch (st1_tag) { + case TW_Denormal: + if (denormal_operand() < 0) + return; + + case TAG_Zero: + case TAG_Valid: + setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr)); + FPU_copy_to_reg1(st0_ptr, st0_tag); + break; + + case TW_Infinity: + /* Infinity*log(1) */ + if (arith_invalid(1) < 0) + return; + break; + + case TW_NaN: + if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0) + return; + break; + + default: +#ifdef PARANOID + EXCEPTION(EX_INTERNAL | 0x116); + return; +#endif /* PARANOID */ + break; + } + } else if ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) { + switch (st1_tag) { + case TAG_Zero: + if (signnegative(st0_ptr)) { + if (exponent(st0_ptr) >= 0) { + /* st(0) holds <= -1.0 */ +#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ + changesign(st1_ptr); +#else + if (arith_invalid(1) < 0) + return; +#endif /* PECULIAR_486 */ + } else if ((st0_tag == TW_Denormal) + && (denormal_operand() < 0)) + return; + else + changesign(st1_ptr); + } else if ((st0_tag == TW_Denormal) + && (denormal_operand() < 0)) + return; + break; + + case TW_Infinity: + if (signnegative(st0_ptr)) { + if ((exponent(st0_ptr) >= 0) && + !((st0_ptr->sigh == 0x80000000) && + (st0_ptr->sigl == 0))) { + /* st(0) holds < -1.0 */ +#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ + changesign(st1_ptr); +#else + if (arith_invalid(1) < 0) + return; +#endif /* PECULIAR_486 */ + } else if ((st0_tag == TW_Denormal) + && (denormal_operand() < 0)) + return; + else + changesign(st1_ptr); + } else if ((st0_tag == TW_Denormal) + && (denormal_operand() < 0)) + return; + break; + + case TW_NaN: + if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0) + return; + } + + } else if (st0_tag == TW_NaN) { + if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0) + return; + } else if (st0_tag == TW_Infinity) { + if (st1_tag == TW_NaN) { + if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0) + return; + } else if (signnegative(st0_ptr)) { +#ifndef PECULIAR_486 + /* This should have higher priority than denormals, but... */ + if (arith_invalid(1) < 0) /* log(-infinity) */ + return; +#endif /* PECULIAR_486 */ + if ((st1_tag == TW_Denormal) + && (denormal_operand() < 0)) + return; +#ifdef PECULIAR_486 + /* Denormal operands actually get higher priority */ + if (arith_invalid(1) < 0) /* log(-infinity) */ + return; +#endif /* PECULIAR_486 */ + } else if (st1_tag == TAG_Zero) { + /* log(infinity) */ + if (arith_invalid(1) < 0) + return; + } + + /* st(1) must be valid here. */ + + else if ((st1_tag == TW_Denormal) && (denormal_operand() < 0)) + return; + + /* The Manual says that log(Infinity) is invalid, but a real + 80486 sensibly says that it is o.k. */ + else { + u_char sign = getsign(st1_ptr); + FPU_copy_to_reg1(&CONST_INF, TAG_Special); + setsign(st1_ptr, sign); + } + } +#ifdef PARANOID + else { + EXCEPTION(EX_INTERNAL | 0x117); + return; + } +#endif /* PARANOID */ + + FPU_pop(); + return; + +} + +static void fscale(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st1_ptr = &st(1); + u_char st1_tag = FPU_gettagi(1); + int old_cw = control_word; + u_char sign = getsign(st0_ptr); + + clear_C1(); + if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) { + long scale; + FPU_REG tmp; + + /* Convert register for internal use. */ + setexponent16(st0_ptr, exponent(st0_ptr)); + + valid_scale: + + if (exponent(st1_ptr) > 30) { + /* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */ + + if (signpositive(st1_ptr)) { + EXCEPTION(EX_Overflow); + FPU_copy_to_reg0(&CONST_INF, TAG_Special); + } else { + EXCEPTION(EX_Underflow); + FPU_copy_to_reg0(&CONST_Z, TAG_Zero); + } + setsign(st0_ptr, sign); + return; + } + + control_word &= ~CW_RC; + control_word |= RC_CHOP; + reg_copy(st1_ptr, &tmp); + FPU_round_to_int(&tmp, st1_tag); /* This can never overflow here */ + control_word = old_cw; + scale = signnegative(st1_ptr) ? -tmp.sigl : tmp.sigl; + scale += exponent16(st0_ptr); + + setexponent16(st0_ptr, scale); + + /* Use FPU_round() to properly detect under/overflow etc */ + FPU_round(st0_ptr, 0, 0, control_word, sign); + + return; + } + + if (st0_tag == TAG_Special) + st0_tag = FPU_Special(st0_ptr); + if (st1_tag == TAG_Special) + st1_tag = FPU_Special(st1_ptr); + + if ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) { + switch (st1_tag) { + case TAG_Valid: + /* st(0) must be a denormal */ + if ((st0_tag == TW_Denormal) + && (denormal_operand() < 0)) + return; + + FPU_to_exp16(st0_ptr, st0_ptr); /* Will not be left on stack */ + goto valid_scale; + + case TAG_Zero: + if (st0_tag == TW_Denormal) + denormal_operand(); + return; + + case TW_Denormal: + denormal_operand(); + return; + + case TW_Infinity: + if ((st0_tag == TW_Denormal) + && (denormal_operand() < 0)) + return; + + if (signpositive(st1_ptr)) + FPU_copy_to_reg0(&CONST_INF, TAG_Special); + else + FPU_copy_to_reg0(&CONST_Z, TAG_Zero); + setsign(st0_ptr, sign); + return; + + case TW_NaN: + real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); + return; + } + } else if (st0_tag == TAG_Zero) { + switch (st1_tag) { + case TAG_Valid: + case TAG_Zero: + return; + + case TW_Denormal: + denormal_operand(); + return; + + case TW_Infinity: + if (signpositive(st1_ptr)) + arith_invalid(0); /* Zero scaled by +Infinity */ + return; + + case TW_NaN: + real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); + return; + } + } else if (st0_tag == TW_Infinity) { + switch (st1_tag) { + case TAG_Valid: + case TAG_Zero: + return; + + case TW_Denormal: + denormal_operand(); + return; + + case TW_Infinity: + if (signnegative(st1_ptr)) + arith_invalid(0); /* Infinity scaled by -Infinity */ + return; + + case TW_NaN: + real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); + return; + } + } else if (st0_tag == TW_NaN) { + if (st1_tag != TAG_Empty) { + real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); + return; + } + } +#ifdef PARANOID + if (!((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty))) { + EXCEPTION(EX_INTERNAL | 0x115); + return; + } +#endif + + /* At least one of st(0), st(1) must be empty */ + FPU_stack_underflow(); + +} + +/*---------------------------------------------------------------------------*/ + +static FUNC_ST0 const trig_table_a[] = { + f2xm1, fyl2x, fptan, fpatan, + fxtract, fprem1, (FUNC_ST0) fdecstp, (FUNC_ST0) fincstp +}; + +void FPU_triga(void) +{ + (trig_table_a[FPU_rm]) (&st(0), FPU_gettag0()); +} + +static FUNC_ST0 const trig_table_b[] = { + fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0) fsin, fcos +}; + +void FPU_trigb(void) +{ + (trig_table_b[FPU_rm]) (&st(0), FPU_gettag0()); +} diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c new file mode 100644 index 000000000..b82ca14ba --- /dev/null +++ b/arch/x86/math-emu/get_address.c @@ -0,0 +1,401 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | get_address.c | + | | + | Get the effective address from an FPU instruction. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +#include <linux/stddef.h> + +#include <linux/uaccess.h> +#include <asm/vm86.h> + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" + +#define FPU_WRITE_BIT 0x10 + +static int reg_offset[] = { + offsetof(struct pt_regs, ax), + offsetof(struct pt_regs, cx), + offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, bx), + offsetof(struct pt_regs, sp), + offsetof(struct pt_regs, bp), + offsetof(struct pt_regs, si), + offsetof(struct pt_regs, di) +}; + +#define REG_(x) (*(long *)(reg_offset[(x)] + (u_char *)FPU_info->regs)) + +static int reg_offset_vm86[] = { + offsetof(struct pt_regs, cs), + offsetof(struct kernel_vm86_regs, ds), + offsetof(struct kernel_vm86_regs, es), + offsetof(struct kernel_vm86_regs, fs), + offsetof(struct kernel_vm86_regs, gs), + offsetof(struct pt_regs, ss), + offsetof(struct kernel_vm86_regs, ds) +}; + +#define VM86_REG_(x) (*(unsigned short *) \ + (reg_offset_vm86[((unsigned)x)] + (u_char *)FPU_info->regs)) + +static int reg_offset_pm[] = { + offsetof(struct pt_regs, cs), + offsetof(struct pt_regs, ds), + offsetof(struct pt_regs, es), + offsetof(struct pt_regs, fs), + offsetof(struct pt_regs, ds), /* dummy, not saved on stack */ + offsetof(struct pt_regs, ss), + offsetof(struct pt_regs, ds) +}; + +#define PM_REG_(x) (*(unsigned short *) \ + (reg_offset_pm[((unsigned)x)] + (u_char *)FPU_info->regs)) + +/* Decode the SIB byte. This function assumes mod != 0 */ +static int sib(int mod, unsigned long *fpu_eip) +{ + u_char ss, index, base; + long offset; + + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(base, (u_char __user *) (*fpu_eip)); /* The SIB byte */ + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + ss = base >> 6; + index = (base >> 3) & 7; + base &= 7; + + if ((mod == 0) && (base == 5)) + offset = 0; /* No base register */ + else + offset = REG_(base); + + if (index == 4) { + /* No index register */ + /* A non-zero ss is illegal */ + if (ss) + EXCEPTION(EX_Invalid); + } else { + offset += (REG_(index)) << ss; + } + + if (mod == 1) { + /* 8 bit signed displacement */ + long displacement; + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(displacement, (signed char __user *)(*fpu_eip)); + offset += displacement; + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + } else if (mod == 2 || base == 5) { /* The second condition also has mod==0 */ + /* 32 bit displacement */ + long displacement; + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(4); + FPU_get_user(displacement, (long __user *)(*fpu_eip)); + offset += displacement; + RE_ENTRANT_CHECK_ON; + (*fpu_eip) += 4; + } + + return offset; +} + +static unsigned long vm86_segment(u_char segment, struct address *addr) +{ + segment--; +#ifdef PARANOID + if (segment > PREFIX_SS_) { + EXCEPTION(EX_INTERNAL | 0x130); + math_abort(FPU_info, SIGSEGV); + } +#endif /* PARANOID */ + addr->selector = VM86_REG_(segment); + return (unsigned long)VM86_REG_(segment) << 4; +} + +/* This should work for 16 and 32 bit protected mode. */ +static long pm_address(u_char FPU_modrm, u_char segment, + struct address *addr, long offset) +{ + struct desc_struct descriptor; + unsigned long base_address, limit, address, seg_top; + + segment--; + +#ifdef PARANOID + /* segment is unsigned, so this also detects if segment was 0: */ + if (segment > PREFIX_SS_) { + EXCEPTION(EX_INTERNAL | 0x132); + math_abort(FPU_info, SIGSEGV); + } +#endif /* PARANOID */ + + switch (segment) { + case PREFIX_GS_ - 1: + /* user gs handling can be lazy, use special accessors */ + addr->selector = get_user_gs(FPU_info->regs); + break; + default: + addr->selector = PM_REG_(segment); + } + + descriptor = FPU_get_ldt_descriptor(addr->selector); + base_address = seg_get_base(&descriptor); + address = base_address + offset; + limit = seg_get_limit(&descriptor) + 1; + limit *= seg_get_granularity(&descriptor); + limit += base_address - 1; + if (limit < base_address) + limit = 0xffffffff; + + if (seg_expands_down(&descriptor)) { + if (descriptor.g) { + seg_top = 0xffffffff; + } else { + seg_top = base_address + (1 << 20); + if (seg_top < base_address) + seg_top = 0xffffffff; + } + access_limit = + (address <= limit) || (address >= seg_top) ? 0 : + ((seg_top - address) >= 255 ? 255 : seg_top - address); + } else { + access_limit = + (address > limit) || (address < base_address) ? 0 : + ((limit - address) >= 254 ? 255 : limit - address + 1); + } + if (seg_execute_only(&descriptor) || + (!seg_writable(&descriptor) && (FPU_modrm & FPU_WRITE_BIT))) { + access_limit = 0; + } + return address; +} + +/* + MOD R/M byte: MOD == 3 has a special use for the FPU + SIB byte used iff R/M = 100b + + 7 6 5 4 3 2 1 0 + ..... ......... ......... + MOD OPCODE(2) R/M + + SIB byte + + 7 6 5 4 3 2 1 0 + ..... ......... ......... + SS INDEX BASE + +*/ + +void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, fpu_addr_modes addr_modes) +{ + u_char mod; + unsigned rm = FPU_modrm & 7; + long *cpu_reg_ptr; + int address = 0; /* Initialized just to stop compiler warnings. */ + + /* Memory accessed via the cs selector is write protected + in `non-segmented' 32 bit protected mode. */ + if (!addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) + && (addr_modes.override.segment == PREFIX_CS_)) { + math_abort(FPU_info, SIGSEGV); + } + + addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ + + mod = (FPU_modrm >> 6) & 3; + + if (rm == 4 && mod != 3) { + address = sib(mod, fpu_eip); + } else { + cpu_reg_ptr = ®_(rm); + switch (mod) { + case 0: + if (rm == 5) { + /* Special case: disp32 */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(4); + FPU_get_user(address, + (unsigned long __user + *)(*fpu_eip)); + (*fpu_eip) += 4; + RE_ENTRANT_CHECK_ON; + addr->offset = address; + return (void __user *)address; + } else { + address = *cpu_reg_ptr; /* Just return the contents + of the cpu register */ + addr->offset = address; + return (void __user *)address; + } + case 1: + /* 8 bit signed displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(address, (signed char __user *)(*fpu_eip)); + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + break; + case 2: + /* 32 bit displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(4); + FPU_get_user(address, (long __user *)(*fpu_eip)); + (*fpu_eip) += 4; + RE_ENTRANT_CHECK_ON; + break; + case 3: + /* Not legal for the FPU */ + EXCEPTION(EX_Invalid); + } + address += *cpu_reg_ptr; + } + + addr->offset = address; + + switch (addr_modes.default_mode) { + case 0: + break; + case VM86: + address += vm86_segment(addr_modes.override.segment, addr); + break; + case PM16: + case SEG32: + address = pm_address(FPU_modrm, addr_modes.override.segment, + addr, address); + break; + default: + EXCEPTION(EX_INTERNAL | 0x133); + } + + return (void __user *)address; +} + +void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, fpu_addr_modes addr_modes) +{ + u_char mod; + unsigned rm = FPU_modrm & 7; + int address = 0; /* Default used for mod == 0 */ + + /* Memory accessed via the cs selector is write protected + in `non-segmented' 32 bit protected mode. */ + if (!addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) + && (addr_modes.override.segment == PREFIX_CS_)) { + math_abort(FPU_info, SIGSEGV); + } + + addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ + + mod = (FPU_modrm >> 6) & 3; + + switch (mod) { + case 0: + if (rm == 6) { + /* Special case: disp16 */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(2); + FPU_get_user(address, + (unsigned short __user *)(*fpu_eip)); + (*fpu_eip) += 2; + RE_ENTRANT_CHECK_ON; + goto add_segment; + } + break; + case 1: + /* 8 bit signed displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(address, (signed char __user *)(*fpu_eip)); + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + break; + case 2: + /* 16 bit displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(2); + FPU_get_user(address, (unsigned short __user *)(*fpu_eip)); + (*fpu_eip) += 2; + RE_ENTRANT_CHECK_ON; + break; + case 3: + /* Not legal for the FPU */ + EXCEPTION(EX_Invalid); + break; + } + switch (rm) { + case 0: + address += FPU_info->regs->bx + FPU_info->regs->si; + break; + case 1: + address += FPU_info->regs->bx + FPU_info->regs->di; + break; + case 2: + address += FPU_info->regs->bp + FPU_info->regs->si; + if (addr_modes.override.segment == PREFIX_DEFAULT) + addr_modes.override.segment = PREFIX_SS_; + break; + case 3: + address += FPU_info->regs->bp + FPU_info->regs->di; + if (addr_modes.override.segment == PREFIX_DEFAULT) + addr_modes.override.segment = PREFIX_SS_; + break; + case 4: + address += FPU_info->regs->si; + break; + case 5: + address += FPU_info->regs->di; + break; + case 6: + address += FPU_info->regs->bp; + if (addr_modes.override.segment == PREFIX_DEFAULT) + addr_modes.override.segment = PREFIX_SS_; + break; + case 7: + address += FPU_info->regs->bx; + break; + } + + add_segment: + address &= 0xffff; + + addr->offset = address; + + switch (addr_modes.default_mode) { + case 0: + break; + case VM86: + address += vm86_segment(addr_modes.override.segment, addr); + break; + case PM16: + case SEG32: + address = pm_address(FPU_modrm, addr_modes.override.segment, + addr, address); + break; + default: + EXCEPTION(EX_INTERNAL | 0x131); + } + + return (void __user *)address; +} diff --git a/arch/x86/math-emu/load_store.c b/arch/x86/math-emu/load_store.c new file mode 100644 index 000000000..f821a9cd7 --- /dev/null +++ b/arch/x86/math-emu/load_store.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | load_store.c | + | | + | This file contains most of the code to interpret the FPU instructions | + | which load and store from user memory. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +#include <linux/uaccess.h> + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "control_w.h" + +#define _NONE_ 0 /* st0_ptr etc not needed */ +#define _REG0_ 1 /* Will be storing st(0) */ +#define _PUSH_ 3 /* Need to check for space to push onto stack */ +#define _null_ 4 /* Function illegal or not implemented */ + +#define pop_0() { FPU_settag0(TAG_Empty); top++; } + +/* index is a 5-bit value: (3-bit FPU_modrm.reg field | opcode[2,1]) */ +static u_char const type_table[32] = { + _PUSH_, _PUSH_, _PUSH_, _PUSH_, /* /0: d9:fld f32, db:fild m32, dd:fld f64, df:fild m16 */ + _null_, _REG0_, _REG0_, _REG0_, /* /1: d9:undef, db,dd,df:fisttp m32/64/16 */ + _REG0_, _REG0_, _REG0_, _REG0_, /* /2: d9:fst f32, db:fist m32, dd:fst f64, df:fist m16 */ + _REG0_, _REG0_, _REG0_, _REG0_, /* /3: d9:fstp f32, db:fistp m32, dd:fstp f64, df:fistp m16 */ + _NONE_, _null_, _NONE_, _PUSH_, + _NONE_, _PUSH_, _null_, _PUSH_, + _NONE_, _null_, _NONE_, _REG0_, + _NONE_, _REG0_, _NONE_, _REG0_ +}; + +u_char const data_sizes_16[32] = { + 4, 4, 8, 2, + 0, 4, 8, 2, /* /1: d9:undef, db,dd,df:fisttp */ + 4, 4, 8, 2, + 4, 4, 8, 2, + 14, 0, 94, 10, 2, 10, 0, 8, + 14, 0, 94, 10, 2, 10, 2, 8 +}; + +static u_char const data_sizes_32[32] = { + 4, 4, 8, 2, + 0, 4, 8, 2, /* /1: d9:undef, db,dd,df:fisttp */ + 4, 4, 8, 2, + 4, 4, 8, 2, + 28, 0, 108, 10, 2, 10, 0, 8, + 28, 0, 108, 10, 2, 10, 2, 8 +}; + +int FPU_load_store(u_char type, fpu_addr_modes addr_modes, + void __user * data_address) +{ + FPU_REG loaded_data; + FPU_REG *st0_ptr; + u_char st0_tag = TAG_Empty; /* This is just to stop a gcc warning. */ + u_char loaded_tag; + int sv_cw; + + st0_ptr = NULL; /* Initialized just to stop compiler warnings. */ + + if (addr_modes.default_mode & PROTECTED) { + if (addr_modes.default_mode == SEG32) { + if (access_limit < data_sizes_32[type]) + math_abort(FPU_info, SIGSEGV); + } else if (addr_modes.default_mode == PM16) { + if (access_limit < data_sizes_16[type]) + math_abort(FPU_info, SIGSEGV); + } +#ifdef PARANOID + else + EXCEPTION(EX_INTERNAL | 0x140); +#endif /* PARANOID */ + } + + switch (type_table[type]) { + case _NONE_: + break; + case _REG0_: + st0_ptr = &st(0); /* Some of these instructions pop after + storing */ + st0_tag = FPU_gettag0(); + break; + case _PUSH_: + { + if (FPU_gettagi(-1) != TAG_Empty) { + FPU_stack_overflow(); + return 0; + } + top--; + st0_ptr = &st(0); + } + break; + case _null_: + FPU_illegal(); + return 0; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL | 0x141); + return 0; +#endif /* PARANOID */ + } + + switch (type) { + /* type is a 5-bit value: (3-bit FPU_modrm.reg field | opcode[2,1]) */ + case 000: /* fld m32real (d9 /0) */ + clear_C1(); + loaded_tag = + FPU_load_single((float __user *)data_address, &loaded_data); + if ((loaded_tag == TAG_Special) + && isNaN(&loaded_data) + && (real_1op_NaN(&loaded_data) < 0)) { + top++; + break; + } + FPU_copy_to_reg0(&loaded_data, loaded_tag); + break; + case 001: /* fild m32int (db /0) */ + clear_C1(); + loaded_tag = + FPU_load_int32((long __user *)data_address, &loaded_data); + FPU_copy_to_reg0(&loaded_data, loaded_tag); + break; + case 002: /* fld m64real (dd /0) */ + clear_C1(); + loaded_tag = + FPU_load_double((double __user *)data_address, + &loaded_data); + if ((loaded_tag == TAG_Special) + && isNaN(&loaded_data) + && (real_1op_NaN(&loaded_data) < 0)) { + top++; + break; + } + FPU_copy_to_reg0(&loaded_data, loaded_tag); + break; + case 003: /* fild m16int (df /0) */ + clear_C1(); + loaded_tag = + FPU_load_int16((short __user *)data_address, &loaded_data); + FPU_copy_to_reg0(&loaded_data, loaded_tag); + break; + /* case 004: undefined (d9 /1) */ + /* fisttp are enabled if CPUID(1).ECX(0) "sse3" is set */ + case 005: /* fisttp m32int (db /1) */ + clear_C1(); + sv_cw = control_word; + control_word |= RC_CHOP; + if (FPU_store_int32 + (st0_ptr, st0_tag, (long __user *)data_address)) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + control_word = sv_cw; + break; + case 006: /* fisttp m64int (dd /1) */ + clear_C1(); + sv_cw = control_word; + control_word |= RC_CHOP; + if (FPU_store_int64 + (st0_ptr, st0_tag, (long long __user *)data_address)) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + control_word = sv_cw; + break; + case 007: /* fisttp m16int (df /1) */ + clear_C1(); + sv_cw = control_word; + control_word |= RC_CHOP; + if (FPU_store_int16 + (st0_ptr, st0_tag, (short __user *)data_address)) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + control_word = sv_cw; + break; + case 010: /* fst m32real */ + clear_C1(); + FPU_store_single(st0_ptr, st0_tag, + (float __user *)data_address); + break; + case 011: /* fist m32int */ + clear_C1(); + FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address); + break; + case 012: /* fst m64real */ + clear_C1(); + FPU_store_double(st0_ptr, st0_tag, + (double __user *)data_address); + break; + case 013: /* fist m16int */ + clear_C1(); + FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address); + break; + case 014: /* fstp m32real */ + clear_C1(); + if (FPU_store_single + (st0_ptr, st0_tag, (float __user *)data_address)) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 015: /* fistp m32int */ + clear_C1(); + if (FPU_store_int32 + (st0_ptr, st0_tag, (long __user *)data_address)) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 016: /* fstp m64real */ + clear_C1(); + if (FPU_store_double + (st0_ptr, st0_tag, (double __user *)data_address)) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 017: /* fistp m16int */ + clear_C1(); + if (FPU_store_int16 + (st0_ptr, st0_tag, (short __user *)data_address)) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 020: /* fldenv m14/28byte */ + fldenv(addr_modes, (u_char __user *) data_address); + /* Ensure that the values just loaded are not changed by + fix-up operations. */ + return 1; + case 022: /* frstor m94/108byte */ + frstor(addr_modes, (u_char __user *) data_address); + /* Ensure that the values just loaded are not changed by + fix-up operations. */ + return 1; + case 023: /* fbld m80dec */ + clear_C1(); + loaded_tag = FPU_load_bcd((u_char __user *) data_address); + FPU_settag0(loaded_tag); + break; + case 024: /* fldcw */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, data_address, 2); + FPU_get_user(control_word, + (unsigned short __user *)data_address); + RE_ENTRANT_CHECK_ON; + if (partial_status & ~control_word & CW_Exceptions) + partial_status |= (SW_Summary | SW_Backward); + else + partial_status &= ~(SW_Summary | SW_Backward); +#ifdef PECULIAR_486 + control_word |= 0x40; /* An 80486 appears to always set this bit */ +#endif /* PECULIAR_486 */ + return 1; + case 025: /* fld m80real */ + clear_C1(); + loaded_tag = + FPU_load_extended((long double __user *)data_address, 0); + FPU_settag0(loaded_tag); + break; + case 027: /* fild m64int */ + clear_C1(); + loaded_tag = FPU_load_int64((long long __user *)data_address); + if (loaded_tag == TAG_Error) + return 0; + FPU_settag0(loaded_tag); + break; + case 030: /* fstenv m14/28byte */ + fstenv(addr_modes, (u_char __user *) data_address); + return 1; + case 032: /* fsave */ + fsave(addr_modes, (u_char __user *) data_address); + return 1; + case 033: /* fbstp m80dec */ + clear_C1(); + if (FPU_store_bcd + (st0_ptr, st0_tag, (u_char __user *) data_address)) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 034: /* fstcw m16int */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, data_address, 2); + FPU_put_user(control_word, + (unsigned short __user *)data_address); + RE_ENTRANT_CHECK_ON; + return 1; + case 035: /* fstp m80real */ + clear_C1(); + if (FPU_store_extended + (st0_ptr, st0_tag, (long double __user *)data_address)) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 036: /* fstsw m2byte */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, data_address, 2); + FPU_put_user(status_word(), + (unsigned short __user *)data_address); + RE_ENTRANT_CHECK_ON; + return 1; + case 037: /* fistp m64int */ + clear_C1(); + if (FPU_store_int64 + (st0_ptr, st0_tag, (long long __user *)data_address)) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + } + return 0; +} diff --git a/arch/x86/math-emu/mul_Xsig.S b/arch/x86/math-emu/mul_Xsig.S new file mode 100644 index 000000000..3e489122a --- /dev/null +++ b/arch/x86/math-emu/mul_Xsig.S @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*---------------------------------------------------------------------------+ + | mul_Xsig.S | + | | + | Multiply a 12 byte fixed point number by another fixed point number. | + | | + | Copyright (C) 1992,1994,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | Call from C as: | + | void mul32_Xsig(Xsig *x, unsigned b) | + | | + | void mul64_Xsig(Xsig *x, unsigned long long *b) | + | | + | void mul_Xsig_Xsig(Xsig *x, unsigned *b) | + | | + | The result is neither rounded nor normalized, and the ls bit or so may | + | be wrong. | + | | + +---------------------------------------------------------------------------*/ + .file "mul_Xsig.S" + + +#include "fpu_emu.h" + +.text +ENTRY(mul32_Xsig) + pushl %ebp + movl %esp,%ebp + subl $16,%esp + pushl %esi + + movl PARAM1,%esi + movl PARAM2,%ecx + + xor %eax,%eax + movl %eax,-4(%ebp) + movl %eax,-8(%ebp) + + movl (%esi),%eax /* lsl of Xsig */ + mull %ecx /* msl of b */ + movl %edx,-12(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull %ecx /* msl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull %ecx /* msl of b */ + addl %eax,-8(%ebp) + adcl %edx,-4(%ebp) + + movl -12(%ebp),%eax + movl %eax,(%esi) + movl -8(%ebp),%eax + movl %eax,4(%esi) + movl -4(%ebp),%eax + movl %eax,8(%esi) + + popl %esi + leave + ret +ENDPROC(mul32_Xsig) + + +ENTRY(mul64_Xsig) + pushl %ebp + movl %esp,%ebp + subl $16,%esp + pushl %esi + + movl PARAM1,%esi + movl PARAM2,%ecx + + xor %eax,%eax + movl %eax,-4(%ebp) + movl %eax,-8(%ebp) + + movl (%esi),%eax /* lsl of Xsig */ + mull 4(%ecx) /* msl of b */ + movl %edx,-12(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull (%ecx) /* lsl of b */ + addl %edx,-12(%ebp) + adcl $0,-8(%ebp) + adcl $0,-4(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull 4(%ecx) /* msl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull (%ecx) /* lsl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull 4(%ecx) /* msl of b */ + addl %eax,-8(%ebp) + adcl %edx,-4(%ebp) + + movl -12(%ebp),%eax + movl %eax,(%esi) + movl -8(%ebp),%eax + movl %eax,4(%esi) + movl -4(%ebp),%eax + movl %eax,8(%esi) + + popl %esi + leave + ret +ENDPROC(mul64_Xsig) + + + +ENTRY(mul_Xsig_Xsig) + pushl %ebp + movl %esp,%ebp + subl $16,%esp + pushl %esi + + movl PARAM1,%esi + movl PARAM2,%ecx + + xor %eax,%eax + movl %eax,-4(%ebp) + movl %eax,-8(%ebp) + + movl (%esi),%eax /* lsl of Xsig */ + mull 8(%ecx) /* msl of b */ + movl %edx,-12(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull 4(%ecx) /* midl of b */ + addl %edx,-12(%ebp) + adcl $0,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull (%ecx) /* lsl of b */ + addl %edx,-12(%ebp) + adcl $0,-8(%ebp) + adcl $0,-4(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull 8(%ecx) /* msl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull 4(%ecx) /* midl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull 8(%ecx) /* msl of b */ + addl %eax,-8(%ebp) + adcl %edx,-4(%ebp) + + movl -12(%ebp),%edx + movl %edx,(%esi) + movl -8(%ebp),%edx + movl %edx,4(%esi) + movl -4(%ebp),%edx + movl %edx,8(%esi) + + popl %esi + leave + ret +ENDPROC(mul_Xsig_Xsig) diff --git a/arch/x86/math-emu/poly.h b/arch/x86/math-emu/poly.h new file mode 100644 index 000000000..fc1c887ca --- /dev/null +++ b/arch/x86/math-emu/poly.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*---------------------------------------------------------------------------+ + | poly.h | + | | + | Header file for the FPU-emu poly*.c source files. | + | | + | Copyright (C) 1994,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@melbpc.org.au | + | | + | Declarations and definitions for functions operating on Xsig (12-byte | + | extended-significand) quantities. | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _POLY_H +#define _POLY_H + +/* This 12-byte structure is used to improve the accuracy of computation + of transcendental functions. + Intended to be used to get results better than 8-byte computation + allows. 9-byte would probably be sufficient. + */ +typedef struct { + unsigned long lsw; + unsigned long midw; + unsigned long msw; +} Xsig; + +asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b, + unsigned long long *result); +asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x, + const unsigned long long terms[], const int n); + +asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult); +asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult); +asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult); + +asmlinkage void shr_Xsig(Xsig *, const int n); +asmlinkage int round_Xsig(Xsig *); +asmlinkage int norm_Xsig(Xsig *); +asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest); + +/* Macro to extract the most significant 32 bits from a long long */ +#define LL_MSW(x) (((unsigned long *)&x)[1]) + +/* Macro to initialize an Xsig struct */ +#define MK_XSIG(a,b,c) { c, b, a } + +/* Macro to access the 8 ms bytes of an Xsig as a long long */ +#define XSIG_LL(x) (*(unsigned long long *)&x.midw) + +/* + Need to run gcc with optimizations on to get these to + actually be in-line. + */ + +/* Multiply two fixed-point 32 bit numbers, producing a 32 bit result. + The answer is the ms word of the product. */ +/* Some versions of gcc make it difficult to stop eax from being clobbered. + Merely specifying that it is used doesn't work... + */ +static inline unsigned long mul_32_32(const unsigned long arg1, + const unsigned long arg2) +{ + int retval; + asm volatile ("mull %2; movl %%edx,%%eax":"=a" (retval) + :"0"(arg1), "g"(arg2) + :"dx"); + return retval; +} + +/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */ +static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2) +{ + asm volatile ("movl %1,%%edi; movl %2,%%esi;\n" + "movl (%%esi),%%eax; addl %%eax,(%%edi);\n" + "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n" + "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n":"=g" + (*dest):"g"(dest), "g"(x2) + :"ax", "si", "di"); +} + +/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */ +/* Note: the constraints in the asm statement didn't always work properly + with gcc 2.5.8. Changing from using edi to using ecx got around the + problem, but keep fingers crossed! */ +static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp) +{ + asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n" + "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n" + "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n" + "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n" + "jnc 0f;\n" + "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n" + "movl %4,%%ecx; incl (%%ecx)\n" + "movl $1,%%eax; jmp 1f;\n" + "0: xorl %%eax,%%eax;\n" "1:\n":"=g" (*exp), "=g"(*dest) + :"g"(dest), "g"(x2), "g"(exp) + :"cx", "si", "ax"); +} + +/* Negate (subtract from 1.0) the 12 byte Xsig */ +/* This is faster in a loop on my 386 than using the "neg" instruction. */ +static inline void negate_Xsig(Xsig *x) +{ + asm volatile ("movl %1,%%esi;\n" + "xorl %%ecx,%%ecx;\n" + "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n" + "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n" + "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n":"=g" + (*x):"g"(x):"si", "ax", "cx"); +} + +#endif /* _POLY_H */ diff --git a/arch/x86/math-emu/poly_2xm1.c b/arch/x86/math-emu/poly_2xm1.c new file mode 100644 index 000000000..aa33006ba --- /dev/null +++ b/arch/x86/math-emu/poly_2xm1.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | poly_2xm1.c | + | | + | Function to compute 2^x-1 by a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" +#include "control_w.h" +#include "poly.h" + +#define HIPOWER 11 +static const unsigned long long lterms[HIPOWER] = { + 0x0000000000000000LL, /* This term done separately as 12 bytes */ + 0xf5fdeffc162c7543LL, + 0x1c6b08d704a0bfa6LL, + 0x0276556df749cc21LL, + 0x002bb0ffcf14f6b8LL, + 0x0002861225ef751cLL, + 0x00001ffcbfcd5422LL, + 0x00000162c005d5f1LL, + 0x0000000da96ccb1bLL, + 0x0000000078d1b897LL, + 0x000000000422b029LL +}; + +static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194); + +/* Four slices: 0.0 : 0.25 : 0.50 : 0.75 : 1.0, + These numbers are 2^(1/4), 2^(1/2), and 2^(3/4) + */ +static const Xsig shiftterm0 = MK_XSIG(0, 0, 0); +static const Xsig shiftterm1 = MK_XSIG(0x9837f051, 0x8db8a96f, 0x46ad2318); +static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3); +static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9); + +static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1, + &shiftterm2, &shiftterm3 +}; + +/*--- poly_2xm1() -----------------------------------------------------------+ + | Requires st(0) which is TAG_Valid and < 1. | + +---------------------------------------------------------------------------*/ +int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result) +{ + long int exponent, shift; + unsigned long long Xll; + Xsig accumulator, Denom, argSignif; + u_char tag; + + exponent = exponent16(arg); + +#ifdef PARANOID + if (exponent >= 0) { /* Don't want a |number| >= 1.0 */ + /* Number negative, too large, or not Valid. */ + EXCEPTION(EX_INTERNAL | 0x127); + return 1; + } +#endif /* PARANOID */ + + argSignif.lsw = 0; + XSIG_LL(argSignif) = Xll = significand(arg); + + if (exponent == -1) { + shift = (argSignif.msw & 0x40000000) ? 3 : 2; + /* subtract 0.5 or 0.75 */ + exponent -= 2; + XSIG_LL(argSignif) <<= 2; + Xll <<= 2; + } else if (exponent == -2) { + shift = 1; + /* subtract 0.25 */ + exponent--; + XSIG_LL(argSignif) <<= 1; + Xll <<= 1; + } else + shift = 0; + + if (exponent < -2) { + /* Shift the argument right by the required places. */ + if (FPU_shrx(&Xll, -2 - exponent) >= 0x80000000U) + Xll++; /* round up */ + } + + accumulator.lsw = accumulator.midw = accumulator.msw = 0; + polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER - 1); + mul_Xsig_Xsig(&accumulator, &argSignif); + shr_Xsig(&accumulator, 3); + + mul_Xsig_Xsig(&argSignif, &hiterm); /* The leading term */ + add_two_Xsig(&accumulator, &argSignif, &exponent); + + if (shift) { + /* The argument is large, use the identity: + f(x+a) = f(a) * (f(x) + 1) - 1; + */ + shr_Xsig(&accumulator, -exponent); + accumulator.msw |= 0x80000000; /* add 1.0 */ + mul_Xsig_Xsig(&accumulator, shiftterm[shift]); + accumulator.msw &= 0x3fffffff; /* subtract 1.0 */ + exponent = 1; + } + + if (sign != SIGN_POS) { + /* The argument is negative, use the identity: + f(-x) = -f(x) / (1 + f(x)) + */ + Denom.lsw = accumulator.lsw; + XSIG_LL(Denom) = XSIG_LL(accumulator); + if (exponent < 0) + shr_Xsig(&Denom, -exponent); + else if (exponent > 0) { + /* exponent must be 1 here */ + XSIG_LL(Denom) <<= 1; + if (Denom.lsw & 0x80000000) + XSIG_LL(Denom) |= 1; + (Denom.lsw) <<= 1; + } + Denom.msw |= 0x80000000; /* add 1.0 */ + div_Xsig(&accumulator, &Denom, &accumulator); + } + + /* Convert to 64 bit signed-compatible */ + exponent += round_Xsig(&accumulator); + + result = &st(0); + significand(result) = XSIG_LL(accumulator); + setexponent16(result, exponent); + + tag = FPU_round(result, 1, 0, FULL_PRECISION, sign); + + setsign(result, sign); + FPU_settag0(tag); + + return 0; + +} diff --git a/arch/x86/math-emu/poly_atan.c b/arch/x86/math-emu/poly_atan.c new file mode 100644 index 000000000..7e7412c5a --- /dev/null +++ b/arch/x86/math-emu/poly_atan.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | poly_atan.c | + | | + | Compute the arctan of a FPU_REG, using a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" +#include "status_w.h" +#include "control_w.h" +#include "poly.h" + +#define HIPOWERon 6 /* odd poly, negative terms */ +static const unsigned long long oddnegterms[HIPOWERon] = { + 0x0000000000000000LL, /* Dummy (not for - 1.0) */ + 0x015328437f756467LL, + 0x0005dda27b73dec6LL, + 0x0000226bf2bfb91aLL, + 0x000000ccc439c5f7LL, + 0x0000000355438407LL +}; + +#define HIPOWERop 6 /* odd poly, positive terms */ +static const unsigned long long oddplterms[HIPOWERop] = { +/* 0xaaaaaaaaaaaaaaabLL, transferred to fixedpterm[] */ + 0x0db55a71875c9ac2LL, + 0x0029fce2d67880b0LL, + 0x0000dfd3908b4596LL, + 0x00000550fd61dab4LL, + 0x0000001c9422b3f9LL, + 0x000000003e3301e1LL +}; + +static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL; + +static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa); + +static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b); + +/*--- poly_atan() -----------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, + FPU_REG *st1_ptr, u_char st1_tag) +{ + u_char transformed, inverted, sign1, sign2; + int exponent; + long int dummy_exp; + Xsig accumulator, Numer, Denom, accumulatore, argSignif, argSq, argSqSq; + u_char tag; + + sign1 = getsign(st0_ptr); + sign2 = getsign(st1_ptr); + if (st0_tag == TAG_Valid) { + exponent = exponent(st0_ptr); + } else { + /* This gives non-compatible stack contents... */ + FPU_to_exp16(st0_ptr, st0_ptr); + exponent = exponent16(st0_ptr); + } + if (st1_tag == TAG_Valid) { + exponent -= exponent(st1_ptr); + } else { + /* This gives non-compatible stack contents... */ + FPU_to_exp16(st1_ptr, st1_ptr); + exponent -= exponent16(st1_ptr); + } + + if ((exponent < 0) || ((exponent == 0) && + ((st0_ptr->sigh < st1_ptr->sigh) || + ((st0_ptr->sigh == st1_ptr->sigh) && + (st0_ptr->sigl < st1_ptr->sigl))))) { + inverted = 1; + Numer.lsw = Denom.lsw = 0; + XSIG_LL(Numer) = significand(st0_ptr); + XSIG_LL(Denom) = significand(st1_ptr); + } else { + inverted = 0; + exponent = -exponent; + Numer.lsw = Denom.lsw = 0; + XSIG_LL(Numer) = significand(st1_ptr); + XSIG_LL(Denom) = significand(st0_ptr); + } + div_Xsig(&Numer, &Denom, &argSignif); + exponent += norm_Xsig(&argSignif); + + if ((exponent >= -1) + || ((exponent == -2) && (argSignif.msw > 0xd413ccd0))) { + /* The argument is greater than sqrt(2)-1 (=0.414213562...) */ + /* Convert the argument by an identity for atan */ + transformed = 1; + + if (exponent >= 0) { +#ifdef PARANOID + if (!((exponent == 0) && + (argSignif.lsw == 0) && (argSignif.midw == 0) && + (argSignif.msw == 0x80000000))) { + EXCEPTION(EX_INTERNAL | 0x104); /* There must be a logic error */ + return; + } +#endif /* PARANOID */ + argSignif.msw = 0; /* Make the transformed arg -> 0.0 */ + } else { + Numer.lsw = Denom.lsw = argSignif.lsw; + XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif); + + if (exponent < -1) + shr_Xsig(&Numer, -1 - exponent); + negate_Xsig(&Numer); + + shr_Xsig(&Denom, -exponent); + Denom.msw |= 0x80000000; + + div_Xsig(&Numer, &Denom, &argSignif); + + exponent = -1 + norm_Xsig(&argSignif); + } + } else { + transformed = 0; + } + + argSq.lsw = argSignif.lsw; + argSq.midw = argSignif.midw; + argSq.msw = argSignif.msw; + mul_Xsig_Xsig(&argSq, &argSq); + + argSqSq.lsw = argSq.lsw; + argSqSq.midw = argSq.midw; + argSqSq.msw = argSq.msw; + mul_Xsig_Xsig(&argSqSq, &argSqSq); + + accumulatore.lsw = argSq.lsw; + XSIG_LL(accumulatore) = XSIG_LL(argSq); + + shr_Xsig(&argSq, 2 * (-1 - exponent - 1)); + shr_Xsig(&argSqSq, 4 * (-1 - exponent - 1)); + + /* Now have argSq etc with binary point at the left + .1xxxxxxxx */ + + /* Do the basic fixed point polynomial evaluation */ + accumulator.msw = accumulator.midw = accumulator.lsw = 0; + polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), + oddplterms, HIPOWERop - 1); + mul64_Xsig(&accumulator, &XSIG_LL(argSq)); + negate_Xsig(&accumulator); + polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, + HIPOWERon - 1); + negate_Xsig(&accumulator); + add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp); + + mul64_Xsig(&accumulatore, &denomterm); + shr_Xsig(&accumulatore, 1 + 2 * (-1 - exponent)); + accumulatore.msw |= 0x80000000; + + div_Xsig(&accumulator, &accumulatore, &accumulator); + + mul_Xsig_Xsig(&accumulator, &argSignif); + mul_Xsig_Xsig(&accumulator, &argSq); + + shr_Xsig(&accumulator, 3); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &argSignif); + + if (transformed) { + /* compute pi/4 - accumulator */ + shr_Xsig(&accumulator, -1 - exponent); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &pi_signif); + exponent = -1; + } + + if (inverted) { + /* compute pi/2 - accumulator */ + shr_Xsig(&accumulator, -exponent); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &pi_signif); + exponent = 0; + } + + if (sign1) { + /* compute pi - accumulator */ + shr_Xsig(&accumulator, 1 - exponent); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &pi_signif); + exponent = 1; + } + + exponent += round_Xsig(&accumulator); + + significand(st1_ptr) = XSIG_LL(accumulator); + setexponent16(st1_ptr, exponent); + + tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2); + FPU_settagi(1, tag); + + set_precision_flag_up(); /* We do not really know if up or down, + use this as the default. */ + +} diff --git a/arch/x86/math-emu/poly_l2.c b/arch/x86/math-emu/poly_l2.c new file mode 100644 index 000000000..98b6949bb --- /dev/null +++ b/arch/x86/math-emu/poly_l2.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | poly_l2.c | + | | + | Compute the base 2 log of a FPU_REG, using a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" +#include "control_w.h" +#include "poly.h" + +static void log2_kernel(FPU_REG const *arg, u_char argsign, + Xsig * accum_result, long int *expon); + +/*--- poly_l2() -------------------------------------------------------------+ + | Base 2 logarithm by a polynomial approximation. | + +---------------------------------------------------------------------------*/ +void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign) +{ + long int exponent, expon, expon_expon; + Xsig accumulator, expon_accum, yaccum; + u_char sign, argsign; + FPU_REG x; + int tag; + + exponent = exponent16(st0_ptr); + + /* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */ + if (st0_ptr->sigh > (unsigned)0xb504f334) { + /* Treat as sqrt(2)/2 < st0_ptr < 1 */ + significand(&x) = -significand(st0_ptr); + setexponent16(&x, -1); + exponent++; + argsign = SIGN_NEG; + } else { + /* Treat as 1 <= st0_ptr < sqrt(2) */ + x.sigh = st0_ptr->sigh - 0x80000000; + x.sigl = st0_ptr->sigl; + setexponent16(&x, 0); + argsign = SIGN_POS; + } + tag = FPU_normalize_nuo(&x); + + if (tag == TAG_Zero) { + expon = 0; + accumulator.msw = accumulator.midw = accumulator.lsw = 0; + } else { + log2_kernel(&x, argsign, &accumulator, &expon); + } + + if (exponent < 0) { + sign = SIGN_NEG; + exponent = -exponent; + } else + sign = SIGN_POS; + expon_accum.msw = exponent; + expon_accum.midw = expon_accum.lsw = 0; + if (exponent) { + expon_expon = 31 + norm_Xsig(&expon_accum); + shr_Xsig(&accumulator, expon_expon - expon); + + if (sign ^ argsign) + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &expon_accum); + } else { + expon_expon = expon; + sign = argsign; + } + + yaccum.lsw = 0; + XSIG_LL(yaccum) = significand(st1_ptr); + mul_Xsig_Xsig(&accumulator, &yaccum); + + expon_expon += round_Xsig(&accumulator); + + if (accumulator.msw == 0) { + FPU_copy_to_reg1(&CONST_Z, TAG_Zero); + return; + } + + significand(st1_ptr) = XSIG_LL(accumulator); + setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1); + + tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign); + FPU_settagi(1, tag); + + set_precision_flag_up(); /* 80486 appears to always do this */ + + return; + +} + +/*--- poly_l2p1() -----------------------------------------------------------+ + | Base 2 logarithm by a polynomial approximation. | + | log2(x+1) | + +---------------------------------------------------------------------------*/ +int poly_l2p1(u_char sign0, u_char sign1, + FPU_REG * st0_ptr, FPU_REG * st1_ptr, FPU_REG * dest) +{ + u_char tag; + long int exponent; + Xsig accumulator, yaccum; + + if (exponent16(st0_ptr) < 0) { + log2_kernel(st0_ptr, sign0, &accumulator, &exponent); + + yaccum.lsw = 0; + XSIG_LL(yaccum) = significand(st1_ptr); + mul_Xsig_Xsig(&accumulator, &yaccum); + + exponent += round_Xsig(&accumulator); + + exponent += exponent16(st1_ptr) + 1; + if (exponent < EXP_WAY_UNDER) + exponent = EXP_WAY_UNDER; + + significand(dest) = XSIG_LL(accumulator); + setexponent16(dest, exponent); + + tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1); + FPU_settagi(1, tag); + + if (tag == TAG_Valid) + set_precision_flag_up(); /* 80486 appears to always do this */ + } else { + /* The magnitude of st0_ptr is far too large. */ + + if (sign0 != SIGN_POS) { + /* Trying to get the log of a negative number. */ +#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ + changesign(st1_ptr); +#else + if (arith_invalid(1) < 0) + return 1; +#endif /* PECULIAR_486 */ + } + + /* 80486 appears to do this */ + if (sign0 == SIGN_NEG) + set_precision_flag_down(); + else + set_precision_flag_up(); + } + + if (exponent(dest) <= EXP_UNDER) + EXCEPTION(EX_Underflow); + + return 0; + +} + +#undef HIPOWER +#define HIPOWER 10 +static const unsigned long long logterms[HIPOWER] = { + 0x2a8eca5705fc2ef0LL, + 0xf6384ee1d01febceLL, + 0x093bb62877cdf642LL, + 0x006985d8a9ec439bLL, + 0x0005212c4f55a9c8LL, + 0x00004326a16927f0LL, + 0x0000038d1d80a0e7LL, + 0x0000003141cc80c6LL, + 0x00000002b1668c9fLL, + 0x000000002c7a46aaLL +}; + +static const unsigned long leadterm = 0xb8000000; + +/*--- log2_kernel() ---------------------------------------------------------+ + | Base 2 logarithm by a polynomial approximation. | + | log2(x+1) | + +---------------------------------------------------------------------------*/ +static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result, + long int *expon) +{ + long int exponent, adj; + unsigned long long Xsq; + Xsig accumulator, Numer, Denom, argSignif, arg_signif; + + exponent = exponent16(arg); + Numer.lsw = Denom.lsw = 0; + XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg); + if (argsign == SIGN_POS) { + shr_Xsig(&Denom, 2 - (1 + exponent)); + Denom.msw |= 0x80000000; + div_Xsig(&Numer, &Denom, &argSignif); + } else { + shr_Xsig(&Denom, 1 - (1 + exponent)); + negate_Xsig(&Denom); + if (Denom.msw & 0x80000000) { + div_Xsig(&Numer, &Denom, &argSignif); + exponent++; + } else { + /* Denom must be 1.0 */ + argSignif.lsw = Numer.lsw; + argSignif.midw = Numer.midw; + argSignif.msw = Numer.msw; + } + } + +#ifndef PECULIAR_486 + /* Should check here that |local_arg| is within the valid range */ + if (exponent >= -2) { + if ((exponent > -2) || (argSignif.msw > (unsigned)0xafb0ccc0)) { + /* The argument is too large */ + } + } +#endif /* PECULIAR_486 */ + + arg_signif.lsw = argSignif.lsw; + XSIG_LL(arg_signif) = XSIG_LL(argSignif); + adj = norm_Xsig(&argSignif); + accumulator.lsw = argSignif.lsw; + XSIG_LL(accumulator) = XSIG_LL(argSignif); + mul_Xsig_Xsig(&accumulator, &accumulator); + shr_Xsig(&accumulator, 2 * (-1 - (1 + exponent + adj))); + Xsq = XSIG_LL(accumulator); + if (accumulator.lsw & 0x80000000) + Xsq++; + + accumulator.msw = accumulator.midw = accumulator.lsw = 0; + /* Do the basic fixed point polynomial evaluation */ + polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER - 1); + + mul_Xsig_Xsig(&accumulator, &argSignif); + shr_Xsig(&accumulator, 6 - adj); + + mul32_Xsig(&arg_signif, leadterm); + add_two_Xsig(&accumulator, &arg_signif, &exponent); + + *expon = exponent + 1; + accum_result->lsw = accumulator.lsw; + accum_result->midw = accumulator.midw; + accum_result->msw = accumulator.msw; + +} diff --git a/arch/x86/math-emu/poly_sin.c b/arch/x86/math-emu/poly_sin.c new file mode 100644 index 000000000..c192fba51 --- /dev/null +++ b/arch/x86/math-emu/poly_sin.c @@ -0,0 +1,379 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | poly_sin.c | + | | + | Computation of an approximation of the sin function and the cosine | + | function by a polynomial. | + | | + | Copyright (C) 1992,1993,1994,1997,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@melbpc.org.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" +#include "control_w.h" +#include "poly.h" + +#define N_COEFF_P 4 +#define N_COEFF_N 4 + +static const unsigned long long pos_terms_l[N_COEFF_P] = { + 0xaaaaaaaaaaaaaaabLL, + 0x00d00d00d00cf906LL, + 0x000006b99159a8bbLL, + 0x000000000d7392e6LL +}; + +static const unsigned long long neg_terms_l[N_COEFF_N] = { + 0x2222222222222167LL, + 0x0002e3bc74aab624LL, + 0x0000000b09229062LL, + 0x00000000000c7973LL +}; + +#define N_COEFF_PH 4 +#define N_COEFF_NH 4 +static const unsigned long long pos_terms_h[N_COEFF_PH] = { + 0x0000000000000000LL, + 0x05b05b05b05b0406LL, + 0x000049f93edd91a9LL, + 0x00000000c9c9ed62LL +}; + +static const unsigned long long neg_terms_h[N_COEFF_NH] = { + 0xaaaaaaaaaaaaaa98LL, + 0x001a01a01a019064LL, + 0x0000008f76c68a77LL, + 0x0000000000d58f5eLL +}; + +/*--- poly_sine() -----------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_sine(FPU_REG *st0_ptr) +{ + int exponent, echange; + Xsig accumulator, argSqrd, argTo4; + unsigned long fix_up, adj; + unsigned long long fixed_arg; + FPU_REG result; + + exponent = exponent(st0_ptr); + + accumulator.lsw = accumulator.midw = accumulator.msw = 0; + + /* Split into two ranges, for arguments below and above 1.0 */ + /* The boundary between upper and lower is approx 0.88309101259 */ + if ((exponent < -1) + || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) { + /* The argument is <= 0.88309101259 */ + + argSqrd.msw = st0_ptr->sigh; + argSqrd.midw = st0_ptr->sigl; + argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &significand(st0_ptr)); + shr_Xsig(&argSqrd, 2 * (-1 - exponent)); + argTo4.msw = argSqrd.msw; + argTo4.midw = argSqrd.midw; + argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, + N_COEFF_N - 1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, + N_COEFF_P - 1); + + shr_Xsig(&accumulator, 2); /* Divide by four */ + accumulator.msw |= 0x80000000; /* Add 1.0 */ + + mul64_Xsig(&accumulator, &significand(st0_ptr)); + mul64_Xsig(&accumulator, &significand(st0_ptr)); + mul64_Xsig(&accumulator, &significand(st0_ptr)); + + /* Divide by four, FPU_REG compatible, etc */ + exponent = 3 * exponent; + + /* The minimum exponent difference is 3 */ + shr_Xsig(&accumulator, exponent(st0_ptr) - exponent); + + negate_Xsig(&accumulator); + XSIG_LL(accumulator) += significand(st0_ptr); + + echange = round_Xsig(&accumulator); + + setexponentpos(&result, exponent(st0_ptr) + echange); + } else { + /* The argument is > 0.88309101259 */ + /* We use sin(st(0)) = cos(pi/2-st(0)) */ + + fixed_arg = significand(st0_ptr); + + if (exponent == 0) { + /* The argument is >= 1.0 */ + + /* Put the binary point at the left. */ + fixed_arg <<= 1; + } + /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ + fixed_arg = 0x921fb54442d18469LL - fixed_arg; + /* There is a special case which arises due to rounding, to fix here. */ + if (fixed_arg == 0xffffffffffffffffLL) + fixed_arg = 0; + + XSIG_LL(argSqrd) = fixed_arg; + argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &fixed_arg); + + XSIG_LL(argTo4) = XSIG_LL(argSqrd); + argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, + N_COEFF_NH - 1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, + N_COEFF_PH - 1); + negate_Xsig(&accumulator); + + mul64_Xsig(&accumulator, &fixed_arg); + mul64_Xsig(&accumulator, &fixed_arg); + + shr_Xsig(&accumulator, 3); + negate_Xsig(&accumulator); + + add_Xsig_Xsig(&accumulator, &argSqrd); + + shr_Xsig(&accumulator, 1); + + accumulator.lsw |= 1; /* A zero accumulator here would cause problems */ + negate_Xsig(&accumulator); + + /* The basic computation is complete. Now fix the answer to + compensate for the error due to the approximation used for + pi/2 + */ + + /* This has an exponent of -65 */ + fix_up = 0x898cc517; + /* The fix-up needs to be improved for larger args */ + if (argSqrd.msw & 0xffc00000) { + /* Get about 32 bit precision in these: */ + fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6; + } + fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg)); + + adj = accumulator.lsw; /* temp save */ + accumulator.lsw -= fix_up; + if (accumulator.lsw > adj) + XSIG_LL(accumulator)--; + + echange = round_Xsig(&accumulator); + + setexponentpos(&result, echange - 1); + } + + significand(&result) = XSIG_LL(accumulator); + setsign(&result, getsign(st0_ptr)); + FPU_copy_to_reg0(&result, TAG_Valid); + +#ifdef PARANOID + if ((exponent(&result) >= 0) + && (significand(&result) > 0x8000000000000000LL)) { + EXCEPTION(EX_INTERNAL | 0x150); + } +#endif /* PARANOID */ + +} + +/*--- poly_cos() ------------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_cos(FPU_REG *st0_ptr) +{ + FPU_REG result; + long int exponent, exp2, echange; + Xsig accumulator, argSqrd, fix_up, argTo4; + unsigned long long fixed_arg; + +#ifdef PARANOID + if ((exponent(st0_ptr) > 0) + || ((exponent(st0_ptr) == 0) + && (significand(st0_ptr) > 0xc90fdaa22168c234LL))) { + EXCEPTION(EX_Invalid); + FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); + return; + } +#endif /* PARANOID */ + + exponent = exponent(st0_ptr); + + accumulator.lsw = accumulator.midw = accumulator.msw = 0; + + if ((exponent < -1) + || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) { + /* arg is < 0.687705 */ + + argSqrd.msw = st0_ptr->sigh; + argSqrd.midw = st0_ptr->sigl; + argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &significand(st0_ptr)); + + if (exponent < -1) { + /* shift the argument right by the required places */ + shr_Xsig(&argSqrd, 2 * (-1 - exponent)); + } + + argTo4.msw = argSqrd.msw; + argTo4.midw = argSqrd.midw; + argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, + N_COEFF_NH - 1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, + N_COEFF_PH - 1); + negate_Xsig(&accumulator); + + mul64_Xsig(&accumulator, &significand(st0_ptr)); + mul64_Xsig(&accumulator, &significand(st0_ptr)); + shr_Xsig(&accumulator, -2 * (1 + exponent)); + + shr_Xsig(&accumulator, 3); + negate_Xsig(&accumulator); + + add_Xsig_Xsig(&accumulator, &argSqrd); + + shr_Xsig(&accumulator, 1); + + /* It doesn't matter if accumulator is all zero here, the + following code will work ok */ + negate_Xsig(&accumulator); + + if (accumulator.lsw & 0x80000000) + XSIG_LL(accumulator)++; + if (accumulator.msw == 0) { + /* The result is 1.0 */ + FPU_copy_to_reg0(&CONST_1, TAG_Valid); + return; + } else { + significand(&result) = XSIG_LL(accumulator); + + /* will be a valid positive nr with expon = -1 */ + setexponentpos(&result, -1); + } + } else { + fixed_arg = significand(st0_ptr); + + if (exponent == 0) { + /* The argument is >= 1.0 */ + + /* Put the binary point at the left. */ + fixed_arg <<= 1; + } + /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ + fixed_arg = 0x921fb54442d18469LL - fixed_arg; + /* There is a special case which arises due to rounding, to fix here. */ + if (fixed_arg == 0xffffffffffffffffLL) + fixed_arg = 0; + + exponent = -1; + exp2 = -1; + + /* A shift is needed here only for a narrow range of arguments, + i.e. for fixed_arg approx 2^-32, but we pick up more... */ + if (!(LL_MSW(fixed_arg) & 0xffff0000)) { + fixed_arg <<= 16; + exponent -= 16; + exp2 -= 16; + } + + XSIG_LL(argSqrd) = fixed_arg; + argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &fixed_arg); + + if (exponent < -1) { + /* shift the argument right by the required places */ + shr_Xsig(&argSqrd, 2 * (-1 - exponent)); + } + + argTo4.msw = argSqrd.msw; + argTo4.midw = argSqrd.midw; + argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, + N_COEFF_N - 1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, + N_COEFF_P - 1); + + shr_Xsig(&accumulator, 2); /* Divide by four */ + accumulator.msw |= 0x80000000; /* Add 1.0 */ + + mul64_Xsig(&accumulator, &fixed_arg); + mul64_Xsig(&accumulator, &fixed_arg); + mul64_Xsig(&accumulator, &fixed_arg); + + /* Divide by four, FPU_REG compatible, etc */ + exponent = 3 * exponent; + + /* The minimum exponent difference is 3 */ + shr_Xsig(&accumulator, exp2 - exponent); + + negate_Xsig(&accumulator); + XSIG_LL(accumulator) += fixed_arg; + + /* The basic computation is complete. Now fix the answer to + compensate for the error due to the approximation used for + pi/2 + */ + + /* This has an exponent of -65 */ + XSIG_LL(fix_up) = 0x898cc51701b839a2ll; + fix_up.lsw = 0; + + /* The fix-up needs to be improved for larger args */ + if (argSqrd.msw & 0xffc00000) { + /* Get about 32 bit precision in these: */ + fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2; + fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24; + } + + exp2 += norm_Xsig(&accumulator); + shr_Xsig(&accumulator, 1); /* Prevent overflow */ + exp2++; + shr_Xsig(&fix_up, 65 + exp2); + + add_Xsig_Xsig(&accumulator, &fix_up); + + echange = round_Xsig(&accumulator); + + setexponentpos(&result, exp2 + echange); + significand(&result) = XSIG_LL(accumulator); + } + + FPU_copy_to_reg0(&result, TAG_Valid); + +#ifdef PARANOID + if ((exponent(&result) >= 0) + && (significand(&result) > 0x8000000000000000LL)) { + EXCEPTION(EX_INTERNAL | 0x151); + } +#endif /* PARANOID */ + +} diff --git a/arch/x86/math-emu/poly_tan.c b/arch/x86/math-emu/poly_tan.c new file mode 100644 index 000000000..1f5b1d712 --- /dev/null +++ b/arch/x86/math-emu/poly_tan.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | poly_tan.c | + | | + | Compute the tan of a FPU_REG, using a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994,1997,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@melbpc.org.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" +#include "control_w.h" +#include "poly.h" + +#define HiPOWERop 3 /* odd poly, positive terms */ +static const unsigned long long oddplterm[HiPOWERop] = { + 0x0000000000000000LL, + 0x0051a1cf08fca228LL, + 0x0000000071284ff7LL +}; + +#define HiPOWERon 2 /* odd poly, negative terms */ +static const unsigned long long oddnegterm[HiPOWERon] = { + 0x1291a9a184244e80LL, + 0x0000583245819c21LL +}; + +#define HiPOWERep 2 /* even poly, positive terms */ +static const unsigned long long evenplterm[HiPOWERep] = { + 0x0e848884b539e888LL, + 0x00003c7f18b887daLL +}; + +#define HiPOWERen 2 /* even poly, negative terms */ +static const unsigned long long evennegterm[HiPOWERen] = { + 0xf1f0200fd51569ccLL, + 0x003afb46105c4432LL +}; + +static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL; + +/*--- poly_tan() ------------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_tan(FPU_REG *st0_ptr) +{ + long int exponent; + int invert; + Xsig argSq, argSqSq, accumulatoro, accumulatore, accum, + argSignif, fix_up; + unsigned long adj; + + exponent = exponent(st0_ptr); + +#ifdef PARANOID + if (signnegative(st0_ptr)) { /* Can't hack a number < 0.0 */ + arith_invalid(0); + return; + } /* Need a positive number */ +#endif /* PARANOID */ + + /* Split the problem into two domains, smaller and larger than pi/4 */ + if ((exponent == 0) + || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2))) { + /* The argument is greater than (approx) pi/4 */ + invert = 1; + accum.lsw = 0; + XSIG_LL(accum) = significand(st0_ptr); + + if (exponent == 0) { + /* The argument is >= 1.0 */ + /* Put the binary point at the left. */ + XSIG_LL(accum) <<= 1; + } + /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ + XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum); + /* This is a special case which arises due to rounding. */ + if (XSIG_LL(accum) == 0xffffffffffffffffLL) { + FPU_settag0(TAG_Valid); + significand(st0_ptr) = 0x8a51e04daabda360LL; + setexponent16(st0_ptr, + (0x41 + EXTENDED_Ebias) | SIGN_Negative); + return; + } + + argSignif.lsw = accum.lsw; + XSIG_LL(argSignif) = XSIG_LL(accum); + exponent = -1 + norm_Xsig(&argSignif); + } else { + invert = 0; + argSignif.lsw = 0; + XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr); + + if (exponent < -1) { + /* shift the argument right by the required places */ + if (FPU_shrx(&XSIG_LL(accum), -1 - exponent) >= + 0x80000000U) + XSIG_LL(accum)++; /* round up */ + } + } + + XSIG_LL(argSq) = XSIG_LL(accum); + argSq.lsw = accum.lsw; + mul_Xsig_Xsig(&argSq, &argSq); + XSIG_LL(argSqSq) = XSIG_LL(argSq); + argSqSq.lsw = argSq.lsw; + mul_Xsig_Xsig(&argSqSq, &argSqSq); + + /* Compute the negative terms for the numerator polynomial */ + accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0; + polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, + HiPOWERon - 1); + mul_Xsig_Xsig(&accumulatoro, &argSq); + negate_Xsig(&accumulatoro); + /* Add the positive terms */ + polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, + HiPOWERop - 1); + + /* Compute the positive terms for the denominator polynomial */ + accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0; + polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, + HiPOWERep - 1); + mul_Xsig_Xsig(&accumulatore, &argSq); + negate_Xsig(&accumulatore); + /* Add the negative terms */ + polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, + HiPOWERen - 1); + /* Multiply by arg^2 */ + mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); + mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); + /* de-normalize and divide by 2 */ + shr_Xsig(&accumulatore, -2 * (1 + exponent) + 1); + negate_Xsig(&accumulatore); /* This does 1 - accumulator */ + + /* Now find the ratio. */ + if (accumulatore.msw == 0) { + /* accumulatoro must contain 1.0 here, (actually, 0) but it + really doesn't matter what value we use because it will + have negligible effect in later calculations + */ + XSIG_LL(accum) = 0x8000000000000000LL; + accum.lsw = 0; + } else { + div_Xsig(&accumulatoro, &accumulatore, &accum); + } + + /* Multiply by 1/3 * arg^3 */ + mul64_Xsig(&accum, &XSIG_LL(argSignif)); + mul64_Xsig(&accum, &XSIG_LL(argSignif)); + mul64_Xsig(&accum, &XSIG_LL(argSignif)); + mul64_Xsig(&accum, &twothirds); + shr_Xsig(&accum, -2 * (exponent + 1)); + + /* tan(arg) = arg + accum */ + add_two_Xsig(&accum, &argSignif, &exponent); + + if (invert) { + /* We now have the value of tan(pi_2 - arg) where pi_2 is an + approximation for pi/2 + */ + /* The next step is to fix the answer to compensate for the + error due to the approximation used for pi/2 + */ + + /* This is (approx) delta, the error in our approx for pi/2 + (see above). It has an exponent of -65 + */ + XSIG_LL(fix_up) = 0x898cc51701b839a2LL; + fix_up.lsw = 0; + + if (exponent == 0) + adj = 0xffffffff; /* We want approx 1.0 here, but + this is close enough. */ + else if (exponent > -30) { + adj = accum.msw >> -(exponent + 1); /* tan */ + adj = mul_32_32(adj, adj); /* tan^2 */ + } else + adj = 0; + adj = mul_32_32(0x898cc517, adj); /* delta * tan^2 */ + + fix_up.msw += adj; + if (!(fix_up.msw & 0x80000000)) { /* did fix_up overflow ? */ + /* Yes, we need to add an msb */ + shr_Xsig(&fix_up, 1); + fix_up.msw |= 0x80000000; + shr_Xsig(&fix_up, 64 + exponent); + } else + shr_Xsig(&fix_up, 65 + exponent); + + add_two_Xsig(&accum, &fix_up, &exponent); + + /* accum now contains tan(pi/2 - arg). + Use tan(arg) = 1.0 / tan(pi/2 - arg) + */ + accumulatoro.lsw = accumulatoro.midw = 0; + accumulatoro.msw = 0x80000000; + div_Xsig(&accumulatoro, &accum, &accum); + exponent = -exponent - 1; + } + + /* Transfer the result */ + round_Xsig(&accum); + FPU_settag0(TAG_Valid); + significand(st0_ptr) = XSIG_LL(accum); + setexponent16(st0_ptr, exponent + EXTENDED_Ebias); /* Result is positive. */ + +} diff --git a/arch/x86/math-emu/polynom_Xsig.S b/arch/x86/math-emu/polynom_Xsig.S new file mode 100644 index 000000000..604f0b2d1 --- /dev/null +++ b/arch/x86/math-emu/polynom_Xsig.S @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*---------------------------------------------------------------------------+ + | polynomial_Xsig.S | + | | + | Fixed point arithmetic polynomial evaluation. | + | | + | Copyright (C) 1992,1993,1994,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | Call from C as: | + | void polynomial_Xsig(Xsig *accum, unsigned long long x, | + | unsigned long long terms[], int n) | + | | + | Computes: | + | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | + | and adds the result to the 12 byte Xsig. | + | The terms[] are each 8 bytes, but all computation is performed to 12 byte | + | precision. | + | | + | This function must be used carefully: most overflow of intermediate | + | results is controlled, but overflow of the result is not. | + | | + +---------------------------------------------------------------------------*/ + .file "polynomial_Xsig.S" + +#include "fpu_emu.h" + + +#define TERM_SIZE $8 +#define SUM_MS -20(%ebp) /* sum ms long */ +#define SUM_MIDDLE -24(%ebp) /* sum middle long */ +#define SUM_LS -28(%ebp) /* sum ls long */ +#define ACCUM_MS -4(%ebp) /* accum ms long */ +#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ +#define ACCUM_LS -12(%ebp) /* accum ls long */ +#define OVERFLOWED -16(%ebp) /* addition overflow flag */ + +.text +ENTRY(polynomial_Xsig) + pushl %ebp + movl %esp,%ebp + subl $32,%esp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM2,%esi /* x */ + movl PARAM3,%edi /* terms */ + + movl TERM_SIZE,%eax + mull PARAM4 /* n */ + addl %eax,%edi + + movl 4(%edi),%edx /* terms[n] */ + movl %edx,SUM_MS + movl (%edi),%edx /* terms[n] */ + movl %edx,SUM_MIDDLE + xor %eax,%eax + movl %eax,SUM_LS + movb %al,OVERFLOWED + + subl TERM_SIZE,%edi + decl PARAM4 + js L_accum_done + +L_accum_loop: + xor %eax,%eax + movl %eax,ACCUM_MS + movl %eax,ACCUM_MIDDLE + + movl SUM_MIDDLE,%eax + mull (%esi) /* x ls long */ + movl %edx,ACCUM_LS + + movl SUM_MIDDLE,%eax + mull 4(%esi) /* x ms long */ + addl %eax,ACCUM_LS + adcl %edx,ACCUM_MIDDLE + adcl $0,ACCUM_MS + + movl SUM_MS,%eax + mull (%esi) /* x ls long */ + addl %eax,ACCUM_LS + adcl %edx,ACCUM_MIDDLE + adcl $0,ACCUM_MS + + movl SUM_MS,%eax + mull 4(%esi) /* x ms long */ + addl %eax,ACCUM_MIDDLE + adcl %edx,ACCUM_MS + + testb $0xff,OVERFLOWED + jz L_no_overflow + + movl (%esi),%eax + addl %eax,ACCUM_MIDDLE + movl 4(%esi),%eax + adcl %eax,ACCUM_MS /* This could overflow too */ + +L_no_overflow: + +/* + * Now put the sum of next term and the accumulator + * into the sum register + */ + movl ACCUM_LS,%eax + addl (%edi),%eax /* term ls long */ + movl %eax,SUM_LS + movl ACCUM_MIDDLE,%eax + adcl (%edi),%eax /* term ls long */ + movl %eax,SUM_MIDDLE + movl ACCUM_MS,%eax + adcl 4(%edi),%eax /* term ms long */ + movl %eax,SUM_MS + sbbb %al,%al + movb %al,OVERFLOWED /* Used in the next iteration */ + + subl TERM_SIZE,%edi + decl PARAM4 + jns L_accum_loop + +L_accum_done: + movl PARAM1,%edi /* accum */ + movl SUM_LS,%eax + addl %eax,(%edi) + movl SUM_MIDDLE,%eax + adcl %eax,4(%edi) + movl SUM_MS,%eax + adcl %eax,8(%edi) + + popl %ebx + popl %edi + popl %esi + leave + ret +ENDPROC(polynomial_Xsig) diff --git a/arch/x86/math-emu/reg_add_sub.c b/arch/x86/math-emu/reg_add_sub.c new file mode 100644 index 000000000..29451dd07 --- /dev/null +++ b/arch/x86/math-emu/reg_add_sub.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | reg_add_sub.c | + | | + | Functions to add or subtract two registers and put the result in a third. | + | | + | Copyright (C) 1992,1993,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | For each function, the destination may be any FPU_REG, including one of | + | the source FPU_REGs. | + | Each function returns 0 if the answer is o.k., otherwise a non-zero | + | value is returned, indicating either an exception condition or an | + | internal error. | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "fpu_system.h" + +static +int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa, + FPU_REG const *b, u_char tagb, u_char signb, + FPU_REG * dest, int deststnr, int control_w); + +/* + Operates on st(0) and st(n), or on st(0) and temporary data. + The destination must be one of the source st(x). + */ +int FPU_add(FPU_REG const *b, u_char tagb, int deststnr, int control_w) +{ + FPU_REG *a = &st(0); + FPU_REG *dest = &st(deststnr); + u_char signb = getsign(b); + u_char taga = FPU_gettag0(); + u_char signa = getsign(a); + u_char saved_sign = getsign(dest); + int diff, tag, expa, expb; + + if (!(taga | tagb)) { + expa = exponent(a); + expb = exponent(b); + + valid_add: + /* Both registers are valid */ + if (!(signa ^ signb)) { + /* signs are the same */ + tag = + FPU_u_add(a, b, dest, control_w, signa, expa, expb); + } else { + /* The signs are different, so do a subtraction */ + diff = expa - expb; + if (!diff) { + diff = a->sigh - b->sigh; /* This works only if the ms bits + are identical. */ + if (!diff) { + diff = a->sigl > b->sigl; + if (!diff) + diff = -(a->sigl < b->sigl); + } + } + + if (diff > 0) { + tag = + FPU_u_sub(a, b, dest, control_w, signa, + expa, expb); + } else if (diff < 0) { + tag = + FPU_u_sub(b, a, dest, control_w, signb, + expb, expa); + } else { + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + /* sign depends upon rounding mode */ + setsign(dest, ((control_w & CW_RC) != RC_DOWN) + ? SIGN_POS : SIGN_NEG); + return TAG_Zero; + } + } + + if (tag < 0) { + setsign(dest, saved_sign); + return tag; + } + FPU_settagi(deststnr, tag); + return tag; + } + + if (taga == TAG_Special) + taga = FPU_Special(a); + if (tagb == TAG_Special) + tagb = FPU_Special(b); + + if (((taga == TAG_Valid) && (tagb == TW_Denormal)) + || ((taga == TW_Denormal) && (tagb == TAG_Valid)) + || ((taga == TW_Denormal) && (tagb == TW_Denormal))) { + FPU_REG x, y; + + if (denormal_operand() < 0) + return FPU_Exception; + + FPU_to_exp16(a, &x); + FPU_to_exp16(b, &y); + a = &x; + b = &y; + expa = exponent16(a); + expb = exponent16(b); + goto valid_add; + } + + if ((taga == TW_NaN) || (tagb == TW_NaN)) { + if (deststnr == 0) + return real_2op_NaN(b, tagb, deststnr, a); + else + return real_2op_NaN(a, taga, deststnr, a); + } + + return add_sub_specials(a, taga, signa, b, tagb, signb, + dest, deststnr, control_w); +} + +/* Subtract b from a. (a-b) -> dest */ +int FPU_sub(int flags, int rm, int control_w) +{ + FPU_REG const *a, *b; + FPU_REG *dest; + u_char taga, tagb, signa, signb, saved_sign, sign; + int diff, tag = 0, expa, expb, deststnr; + + a = &st(0); + taga = FPU_gettag0(); + + deststnr = 0; + if (flags & LOADED) { + b = (FPU_REG *) rm; + tagb = flags & 0x0f; + } else { + b = &st(rm); + tagb = FPU_gettagi(rm); + + if (flags & DEST_RM) + deststnr = rm; + } + + signa = getsign(a); + signb = getsign(b); + + if (flags & REV) { + signa ^= SIGN_NEG; + signb ^= SIGN_NEG; + } + + dest = &st(deststnr); + saved_sign = getsign(dest); + + if (!(taga | tagb)) { + expa = exponent(a); + expb = exponent(b); + + valid_subtract: + /* Both registers are valid */ + + diff = expa - expb; + + if (!diff) { + diff = a->sigh - b->sigh; /* Works only if ms bits are identical */ + if (!diff) { + diff = a->sigl > b->sigl; + if (!diff) + diff = -(a->sigl < b->sigl); + } + } + + switch ((((int)signa) * 2 + signb) / SIGN_NEG) { + case 0: /* P - P */ + case 3: /* N - N */ + if (diff > 0) { + /* |a| > |b| */ + tag = + FPU_u_sub(a, b, dest, control_w, signa, + expa, expb); + } else if (diff == 0) { + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + + /* sign depends upon rounding mode */ + setsign(dest, ((control_w & CW_RC) != RC_DOWN) + ? SIGN_POS : SIGN_NEG); + return TAG_Zero; + } else { + sign = signa ^ SIGN_NEG; + tag = + FPU_u_sub(b, a, dest, control_w, sign, expb, + expa); + } + break; + case 1: /* P - N */ + tag = + FPU_u_add(a, b, dest, control_w, SIGN_POS, expa, + expb); + break; + case 2: /* N - P */ + tag = + FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa, + expb); + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL | 0x111); + return -1; +#endif + } + if (tag < 0) { + setsign(dest, saved_sign); + return tag; + } + FPU_settagi(deststnr, tag); + return tag; + } + + if (taga == TAG_Special) + taga = FPU_Special(a); + if (tagb == TAG_Special) + tagb = FPU_Special(b); + + if (((taga == TAG_Valid) && (tagb == TW_Denormal)) + || ((taga == TW_Denormal) && (tagb == TAG_Valid)) + || ((taga == TW_Denormal) && (tagb == TW_Denormal))) { + FPU_REG x, y; + + if (denormal_operand() < 0) + return FPU_Exception; + + FPU_to_exp16(a, &x); + FPU_to_exp16(b, &y); + a = &x; + b = &y; + expa = exponent16(a); + expb = exponent16(b); + + goto valid_subtract; + } + + if ((taga == TW_NaN) || (tagb == TW_NaN)) { + FPU_REG const *d1, *d2; + if (flags & REV) { + d1 = b; + d2 = a; + } else { + d1 = a; + d2 = b; + } + if (flags & LOADED) + return real_2op_NaN(b, tagb, deststnr, d1); + if (flags & DEST_RM) + return real_2op_NaN(a, taga, deststnr, d2); + else + return real_2op_NaN(b, tagb, deststnr, d2); + } + + return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG, + dest, deststnr, control_w); +} + +static +int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa, + FPU_REG const *b, u_char tagb, u_char signb, + FPU_REG * dest, int deststnr, int control_w) +{ + if (((taga == TW_Denormal) || (tagb == TW_Denormal)) + && (denormal_operand() < 0)) + return FPU_Exception; + + if (taga == TAG_Zero) { + if (tagb == TAG_Zero) { + /* Both are zero, result will be zero. */ + u_char different_signs = signa ^ signb; + + FPU_copy_to_regi(a, TAG_Zero, deststnr); + if (different_signs) { + /* Signs are different. */ + /* Sign of answer depends upon rounding mode. */ + setsign(dest, ((control_w & CW_RC) != RC_DOWN) + ? SIGN_POS : SIGN_NEG); + } else + setsign(dest, signa); /* signa may differ from the sign of a. */ + return TAG_Zero; + } else { + reg_copy(b, dest); + if ((tagb == TW_Denormal) && (b->sigh & 0x80000000)) { + /* A pseudoDenormal, convert it. */ + addexponent(dest, 1); + tagb = TAG_Valid; + } else if (tagb > TAG_Empty) + tagb = TAG_Special; + setsign(dest, signb); /* signb may differ from the sign of b. */ + FPU_settagi(deststnr, tagb); + return tagb; + } + } else if (tagb == TAG_Zero) { + reg_copy(a, dest); + if ((taga == TW_Denormal) && (a->sigh & 0x80000000)) { + /* A pseudoDenormal */ + addexponent(dest, 1); + taga = TAG_Valid; + } else if (taga > TAG_Empty) + taga = TAG_Special; + setsign(dest, signa); /* signa may differ from the sign of a. */ + FPU_settagi(deststnr, taga); + return taga; + } else if (taga == TW_Infinity) { + if ((tagb != TW_Infinity) || (signa == signb)) { + FPU_copy_to_regi(a, TAG_Special, deststnr); + setsign(dest, signa); /* signa may differ from the sign of a. */ + return taga; + } + /* Infinity-Infinity is undefined. */ + return arith_invalid(deststnr); + } else if (tagb == TW_Infinity) { + FPU_copy_to_regi(b, TAG_Special, deststnr); + setsign(dest, signb); /* signb may differ from the sign of b. */ + return tagb; + } +#ifdef PARANOID + EXCEPTION(EX_INTERNAL | 0x101); +#endif + + return FPU_Exception; +} diff --git a/arch/x86/math-emu/reg_compare.c b/arch/x86/math-emu/reg_compare.c new file mode 100644 index 000000000..eacb5128f --- /dev/null +++ b/arch/x86/math-emu/reg_compare.c @@ -0,0 +1,479 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | reg_compare.c | + | | + | Compare two floating point registers | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | compare() is the core FPU_REG comparison function | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "status_w.h" + +static int compare(FPU_REG const *b, int tagb) +{ + int diff, exp0, expb; + u_char st0_tag; + FPU_REG *st0_ptr; + FPU_REG x, y; + u_char st0_sign, signb = getsign(b); + + st0_ptr = &st(0); + st0_tag = FPU_gettag0(); + st0_sign = getsign(st0_ptr); + + if (tagb == TAG_Special) + tagb = FPU_Special(b); + if (st0_tag == TAG_Special) + st0_tag = FPU_Special(st0_ptr); + + if (((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal)) + || ((tagb != TAG_Valid) && (tagb != TW_Denormal))) { + if (st0_tag == TAG_Zero) { + if (tagb == TAG_Zero) + return COMP_A_eq_B; + if (tagb == TAG_Valid) + return ((signb == + SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B); + if (tagb == TW_Denormal) + return ((signb == + SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) + | COMP_Denormal; + } else if (tagb == TAG_Zero) { + if (st0_tag == TAG_Valid) + return ((st0_sign == + SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); + if (st0_tag == TW_Denormal) + return ((st0_sign == + SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) + | COMP_Denormal; + } + + if (st0_tag == TW_Infinity) { + if ((tagb == TAG_Valid) || (tagb == TAG_Zero)) + return ((st0_sign == + SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); + else if (tagb == TW_Denormal) + return ((st0_sign == + SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) + | COMP_Denormal; + else if (tagb == TW_Infinity) { + /* The 80486 book says that infinities can be equal! */ + return (st0_sign == signb) ? COMP_A_eq_B : + ((st0_sign == + SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); + } + /* Fall through to the NaN code */ + } else if (tagb == TW_Infinity) { + if ((st0_tag == TAG_Valid) || (st0_tag == TAG_Zero)) + return ((signb == + SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B); + if (st0_tag == TW_Denormal) + return ((signb == + SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) + | COMP_Denormal; + /* Fall through to the NaN code */ + } + + /* The only possibility now should be that one of the arguments + is a NaN */ + if ((st0_tag == TW_NaN) || (tagb == TW_NaN)) { + int signalling = 0, unsupported = 0; + if (st0_tag == TW_NaN) { + signalling = + (st0_ptr->sigh & 0xc0000000) == 0x80000000; + unsupported = !((exponent(st0_ptr) == EXP_OVER) + && (st0_ptr-> + sigh & 0x80000000)); + } + if (tagb == TW_NaN) { + signalling |= + (b->sigh & 0xc0000000) == 0x80000000; + unsupported |= !((exponent(b) == EXP_OVER) + && (b->sigh & 0x80000000)); + } + if (signalling || unsupported) + return COMP_No_Comp | COMP_SNaN | COMP_NaN; + else + /* Neither is a signaling NaN */ + return COMP_No_Comp | COMP_NaN; + } + + EXCEPTION(EX_Invalid); + } + + if (st0_sign != signb) { + return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) + | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? + COMP_Denormal : 0); + } + + if ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) { + FPU_to_exp16(st0_ptr, &x); + FPU_to_exp16(b, &y); + st0_ptr = &x; + b = &y; + exp0 = exponent16(st0_ptr); + expb = exponent16(b); + } else { + exp0 = exponent(st0_ptr); + expb = exponent(b); + } + +#ifdef PARANOID + if (!(st0_ptr->sigh & 0x80000000)) + EXCEPTION(EX_Invalid); + if (!(b->sigh & 0x80000000)) + EXCEPTION(EX_Invalid); +#endif /* PARANOID */ + + diff = exp0 - expb; + if (diff == 0) { + diff = st0_ptr->sigh - b->sigh; /* Works only if ms bits are + identical */ + if (diff == 0) { + diff = st0_ptr->sigl > b->sigl; + if (diff == 0) + diff = -(st0_ptr->sigl < b->sigl); + } + } + + if (diff > 0) { + return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) + | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? + COMP_Denormal : 0); + } + if (diff < 0) { + return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) + | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? + COMP_Denormal : 0); + } + + return COMP_A_eq_B + | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? + COMP_Denormal : 0); + +} + +/* This function requires that st(0) is not empty */ +int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag) +{ + int f, c; + + c = compare(loaded_data, loaded_tag); + + if (c & COMP_NaN) { + EXCEPTION(EX_Invalid); + f = SW_C3 | SW_C2 | SW_C0; + } else + switch (c & 7) { + case COMP_A_lt_B: + f = SW_C0; + break; + case COMP_A_eq_B: + f = SW_C3; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = SW_C3 | SW_C2 | SW_C0; + break; + default: +#ifdef PARANOID + EXCEPTION(EX_INTERNAL | 0x121); +#endif /* PARANOID */ + f = SW_C3 | SW_C2 | SW_C0; + break; + } + setcc(f); + if (c & COMP_Denormal) { + return denormal_operand() < 0; + } + return 0; +} + +static int compare_st_st(int nr) +{ + int f, c; + FPU_REG *st_ptr; + + if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) { + setcc(SW_C3 | SW_C2 | SW_C0); + /* Stack fault */ + EXCEPTION(EX_StackUnder); + return !(control_word & CW_Invalid); + } + + st_ptr = &st(nr); + c = compare(st_ptr, FPU_gettagi(nr)); + if (c & COMP_NaN) { + setcc(SW_C3 | SW_C2 | SW_C0); + EXCEPTION(EX_Invalid); + return !(control_word & CW_Invalid); + } else + switch (c & 7) { + case COMP_A_lt_B: + f = SW_C0; + break; + case COMP_A_eq_B: + f = SW_C3; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = SW_C3 | SW_C2 | SW_C0; + break; + default: +#ifdef PARANOID + EXCEPTION(EX_INTERNAL | 0x122); +#endif /* PARANOID */ + f = SW_C3 | SW_C2 | SW_C0; + break; + } + setcc(f); + if (c & COMP_Denormal) { + return denormal_operand() < 0; + } + return 0; +} + +static int compare_i_st_st(int nr) +{ + int f, c; + FPU_REG *st_ptr; + + if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) { + FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF); + /* Stack fault */ + EXCEPTION(EX_StackUnder); + return !(control_word & CW_Invalid); + } + + partial_status &= ~SW_C0; + st_ptr = &st(nr); + c = compare(st_ptr, FPU_gettagi(nr)); + if (c & COMP_NaN) { + FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF); + EXCEPTION(EX_Invalid); + return !(control_word & CW_Invalid); + } + + switch (c & 7) { + case COMP_A_lt_B: + f = X86_EFLAGS_CF; + break; + case COMP_A_eq_B: + f = X86_EFLAGS_ZF; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF; + break; + default: +#ifdef PARANOID + EXCEPTION(EX_INTERNAL | 0x122); +#endif /* PARANOID */ + f = 0; + break; + } + FPU_EFLAGS = (FPU_EFLAGS & ~(X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF)) | f; + if (c & COMP_Denormal) { + return denormal_operand() < 0; + } + return 0; +} + +static int compare_u_st_st(int nr) +{ + int f = 0, c; + FPU_REG *st_ptr; + + if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) { + setcc(SW_C3 | SW_C2 | SW_C0); + /* Stack fault */ + EXCEPTION(EX_StackUnder); + return !(control_word & CW_Invalid); + } + + st_ptr = &st(nr); + c = compare(st_ptr, FPU_gettagi(nr)); + if (c & COMP_NaN) { + setcc(SW_C3 | SW_C2 | SW_C0); + if (c & COMP_SNaN) { /* This is the only difference between + un-ordered and ordinary comparisons */ + EXCEPTION(EX_Invalid); + return !(control_word & CW_Invalid); + } + return 0; + } else + switch (c & 7) { + case COMP_A_lt_B: + f = SW_C0; + break; + case COMP_A_eq_B: + f = SW_C3; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = SW_C3 | SW_C2 | SW_C0; + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL | 0x123); + f = SW_C3 | SW_C2 | SW_C0; + break; +#endif /* PARANOID */ + } + setcc(f); + if (c & COMP_Denormal) { + return denormal_operand() < 0; + } + return 0; +} + +static int compare_ui_st_st(int nr) +{ + int f = 0, c; + FPU_REG *st_ptr; + + if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) { + FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF); + /* Stack fault */ + EXCEPTION(EX_StackUnder); + return !(control_word & CW_Invalid); + } + + partial_status &= ~SW_C0; + st_ptr = &st(nr); + c = compare(st_ptr, FPU_gettagi(nr)); + if (c & COMP_NaN) { + FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF); + if (c & COMP_SNaN) { /* This is the only difference between + un-ordered and ordinary comparisons */ + EXCEPTION(EX_Invalid); + return !(control_word & CW_Invalid); + } + return 0; + } + + switch (c & 7) { + case COMP_A_lt_B: + f = X86_EFLAGS_CF; + break; + case COMP_A_eq_B: + f = X86_EFLAGS_ZF; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF; + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL | 0x123); + f = 0; + break; +#endif /* PARANOID */ + } + FPU_EFLAGS = (FPU_EFLAGS & ~(X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF)) | f; + if (c & COMP_Denormal) { + return denormal_operand() < 0; + } + return 0; +} + +/*---------------------------------------------------------------------------*/ + +void fcom_st(void) +{ + /* fcom st(i) */ + compare_st_st(FPU_rm); +} + +void fcompst(void) +{ + /* fcomp st(i) */ + if (!compare_st_st(FPU_rm)) + FPU_pop(); +} + +void fcompp(void) +{ + /* fcompp */ + if (FPU_rm != 1) { + FPU_illegal(); + return; + } + if (!compare_st_st(1)) + poppop(); +} + +void fucom_(void) +{ + /* fucom st(i) */ + compare_u_st_st(FPU_rm); + +} + +void fucomp(void) +{ + /* fucomp st(i) */ + if (!compare_u_st_st(FPU_rm)) + FPU_pop(); +} + +void fucompp(void) +{ + /* fucompp */ + if (FPU_rm == 1) { + if (!compare_u_st_st(1)) + poppop(); + } else + FPU_illegal(); +} + +/* P6+ compare-to-EFLAGS ops */ + +void fcomi_(void) +{ + /* fcomi st(i) */ + compare_i_st_st(FPU_rm); +} + +void fcomip(void) +{ + /* fcomip st(i) */ + if (!compare_i_st_st(FPU_rm)) + FPU_pop(); +} + +void fucomi_(void) +{ + /* fucomi st(i) */ + compare_ui_st_st(FPU_rm); +} + +void fucomip(void) +{ + /* fucomip st(i) */ + if (!compare_ui_st_st(FPU_rm)) + FPU_pop(); +} diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c new file mode 100644 index 000000000..742619e94 --- /dev/null +++ b/arch/x86/math-emu/reg_constant.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | reg_constant.c | + | | + | All of the constant FPU_REGs | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "reg_constant.h" +#include "control_w.h" + +#define MAKE_REG(s, e, l, h) { l, h, \ + (u16)((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } + +FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000); +#if 0 +FPU_REG const CONST_2 = MAKE_REG(POS, 1, 0x00000000, 0x80000000); +FPU_REG const CONST_HALF = MAKE_REG(POS, -1, 0x00000000, 0x80000000); +#endif /* 0 */ +static FPU_REG const CONST_L2T = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b); +static FPU_REG const CONST_L2E = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29); +FPU_REG const CONST_PI = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2); +FPU_REG const CONST_PI2 = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2); +FPU_REG const CONST_PI4 = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2); +static FPU_REG const CONST_LG2 = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84); +static FPU_REG const CONST_LN2 = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7); + +/* Extra bits to take pi/2 to more than 128 bits precision. */ +FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66, + 0xfc8f8cbb, 0xece675d1); + +/* Only the sign (and tag) is used in internal zeroes */ +FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0); + +/* Only the sign and significand (and tag) are used in internal NaNs */ +/* The 80486 never generates one of these +FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000); + */ +/* This is the real indefinite QNaN */ +FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000); + +/* Only the sign (and tag) is used in internal infinities */ +FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000); + +static void fld_const(FPU_REG const * c, int adj, u_char tag) +{ + FPU_REG *st_new_ptr; + + if (STACK_OVERFLOW) { + FPU_stack_overflow(); + return; + } + push(); + reg_copy(c, st_new_ptr); + st_new_ptr->sigl += adj; /* For all our fldxxx constants, we don't need to + borrow or carry. */ + FPU_settag0(tag); + clear_C1(); +} + +/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP + (and not one of RC_RND or RC_UP). + */ +#define DOWN_OR_CHOP(x) (x & RC_DOWN) + +static void fld1(int rc) +{ + fld_const(&CONST_1, 0, TAG_Valid); +} + +static void fldl2t(int rc) +{ + fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid); +} + +static void fldl2e(int rc) +{ + fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); +} + +static void fldpi(int rc) +{ + fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); +} + +static void fldlg2(int rc) +{ + fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); +} + +static void fldln2(int rc) +{ + fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); +} + +static void fldz(int rc) +{ + fld_const(&CONST_Z, 0, TAG_Zero); +} + +typedef void (*FUNC_RC) (int); + +static FUNC_RC constants_table[] = { + fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC) FPU_illegal +}; + +void fconst(void) +{ + (constants_table[FPU_rm]) (control_word & CW_RC); +} diff --git a/arch/x86/math-emu/reg_constant.h b/arch/x86/math-emu/reg_constant.h new file mode 100644 index 000000000..f2fdd344d --- /dev/null +++ b/arch/x86/math-emu/reg_constant.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*---------------------------------------------------------------------------+ + | reg_constant.h | + | | + | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _REG_CONSTANT_H_ +#define _REG_CONSTANT_H_ + +#include "fpu_emu.h" + +extern FPU_REG const CONST_1; +extern FPU_REG const CONST_PI; +extern FPU_REG const CONST_PI2; +extern FPU_REG const CONST_PI2extra; +extern FPU_REG const CONST_PI4; +extern FPU_REG const CONST_Z; +extern FPU_REG const CONST_PINF; +extern FPU_REG const CONST_INF; +extern FPU_REG const CONST_MINF; +extern FPU_REG const CONST_QNaN; + +#endif /* _REG_CONSTANT_H_ */ diff --git a/arch/x86/math-emu/reg_convert.c b/arch/x86/math-emu/reg_convert.c new file mode 100644 index 000000000..251180623 --- /dev/null +++ b/arch/x86/math-emu/reg_convert.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | reg_convert.c | + | | + | Convert register representation. | + | | + | Copyright (C) 1992,1993,1994,1996,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_emu.h" + +int FPU_to_exp16(FPU_REG const *a, FPU_REG *x) +{ + int sign = getsign(a); + + *(long long *)&(x->sigl) = *(const long long *)&(a->sigl); + + /* Set up the exponent as a 16 bit quantity. */ + setexponent16(x, exponent(a)); + + if (exponent16(x) == EXP_UNDER) { + /* The number is a de-normal or pseudodenormal. */ + /* We only deal with the significand and exponent. */ + + if (x->sigh & 0x80000000) { + /* Is a pseudodenormal. */ + /* This is non-80486 behaviour because the number + loses its 'denormal' identity. */ + addexponent(x, 1); + } else { + /* Is a denormal. */ + addexponent(x, 1); + FPU_normalize_nuo(x); + } + } + + if (!(x->sigh & 0x80000000)) { + EXCEPTION(EX_INTERNAL | 0x180); + } + + return sign; +} diff --git a/arch/x86/math-emu/reg_divide.c b/arch/x86/math-emu/reg_divide.c new file mode 100644 index 000000000..08c2f6de0 --- /dev/null +++ b/arch/x86/math-emu/reg_divide.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | reg_divide.c | + | | + | Divide one FPU_REG by another and put the result in a destination FPU_REG.| + | | + | Copyright (C) 1996 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@jacobi.maths.monash.edu.au | + | | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | The destination may be any FPU_REG, including one of the source FPU_REGs. | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" + +/* + Divide one register by another and put the result into a third register. + */ +int FPU_div(int flags, int rm, int control_w) +{ + FPU_REG x, y; + FPU_REG const *a, *b, *st0_ptr, *st_ptr; + FPU_REG *dest; + u_char taga, tagb, signa, signb, sign, saved_sign; + int tag, deststnr; + + if (flags & DEST_RM) + deststnr = rm; + else + deststnr = 0; + + if (flags & REV) { + b = &st(0); + st0_ptr = b; + tagb = FPU_gettag0(); + if (flags & LOADED) { + a = (FPU_REG *) rm; + taga = flags & 0x0f; + } else { + a = &st(rm); + st_ptr = a; + taga = FPU_gettagi(rm); + } + } else { + a = &st(0); + st0_ptr = a; + taga = FPU_gettag0(); + if (flags & LOADED) { + b = (FPU_REG *) rm; + tagb = flags & 0x0f; + } else { + b = &st(rm); + st_ptr = b; + tagb = FPU_gettagi(rm); + } + } + + signa = getsign(a); + signb = getsign(b); + + sign = signa ^ signb; + + dest = &st(deststnr); + saved_sign = getsign(dest); + + if (!(taga | tagb)) { + /* Both regs Valid, this should be the most common case. */ + reg_copy(a, &x); + reg_copy(b, &y); + setpositive(&x); + setpositive(&y); + tag = FPU_u_div(&x, &y, dest, control_w, sign); + + if (tag < 0) + return tag; + + FPU_settagi(deststnr, tag); + return tag; + } + + if (taga == TAG_Special) + taga = FPU_Special(a); + if (tagb == TAG_Special) + tagb = FPU_Special(b); + + if (((taga == TAG_Valid) && (tagb == TW_Denormal)) + || ((taga == TW_Denormal) && (tagb == TAG_Valid)) + || ((taga == TW_Denormal) && (tagb == TW_Denormal))) { + if (denormal_operand() < 0) + return FPU_Exception; + + FPU_to_exp16(a, &x); + FPU_to_exp16(b, &y); + tag = FPU_u_div(&x, &y, dest, control_w, sign); + if (tag < 0) + return tag; + + FPU_settagi(deststnr, tag); + return tag; + } else if ((taga <= TW_Denormal) && (tagb <= TW_Denormal)) { + if (tagb != TAG_Zero) { + /* Want to find Zero/Valid */ + if (tagb == TW_Denormal) { + if (denormal_operand() < 0) + return FPU_Exception; + } + + /* The result is zero. */ + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + setsign(dest, sign); + return TAG_Zero; + } + /* We have an exception condition, either 0/0 or Valid/Zero. */ + if (taga == TAG_Zero) { + /* 0/0 */ + return arith_invalid(deststnr); + } + /* Valid/Zero */ + return FPU_divide_by_zero(deststnr, sign); + } + /* Must have infinities, NaNs, etc */ + else if ((taga == TW_NaN) || (tagb == TW_NaN)) { + if (flags & LOADED) + return real_2op_NaN((FPU_REG *) rm, flags & 0x0f, 0, + st0_ptr); + + if (flags & DEST_RM) { + int tag; + tag = FPU_gettag0(); + if (tag == TAG_Special) + tag = FPU_Special(st0_ptr); + return real_2op_NaN(st0_ptr, tag, rm, + (flags & REV) ? st0_ptr : &st(rm)); + } else { + int tag; + tag = FPU_gettagi(rm); + if (tag == TAG_Special) + tag = FPU_Special(&st(rm)); + return real_2op_NaN(&st(rm), tag, 0, + (flags & REV) ? st0_ptr : &st(rm)); + } + } else if (taga == TW_Infinity) { + if (tagb == TW_Infinity) { + /* infinity/infinity */ + return arith_invalid(deststnr); + } else { + /* tagb must be Valid or Zero */ + if ((tagb == TW_Denormal) && (denormal_operand() < 0)) + return FPU_Exception; + + /* Infinity divided by Zero or Valid does + not raise and exception, but returns Infinity */ + FPU_copy_to_regi(a, TAG_Special, deststnr); + setsign(dest, sign); + return taga; + } + } else if (tagb == TW_Infinity) { + if ((taga == TW_Denormal) && (denormal_operand() < 0)) + return FPU_Exception; + + /* The result is zero. */ + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + setsign(dest, sign); + return TAG_Zero; + } +#ifdef PARANOID + else { + EXCEPTION(EX_INTERNAL | 0x102); + return FPU_Exception; + } +#endif /* PARANOID */ + + return 0; +} diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c new file mode 100644 index 000000000..d40ff4549 --- /dev/null +++ b/arch/x86/math-emu/reg_ld_str.c @@ -0,0 +1,1220 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | reg_ld_str.c | + | | + | All of the functions which transfer data between user memory and FPU_REGs.| + | | + | Copyright (C) 1992,1993,1994,1996,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" + +#include <linux/uaccess.h> + +#include "fpu_system.h" +#include "exception.h" +#include "reg_constant.h" +#include "control_w.h" +#include "status_w.h" + +#define DOUBLE_Emax 1023 /* largest valid exponent */ +#define DOUBLE_Ebias 1023 +#define DOUBLE_Emin (-1022) /* smallest valid exponent */ + +#define SINGLE_Emax 127 /* largest valid exponent */ +#define SINGLE_Ebias 127 +#define SINGLE_Emin (-126) /* smallest valid exponent */ + +static u_char normalize_no_excep(FPU_REG *r, int exp, int sign) +{ + u_char tag; + + setexponent16(r, exp); + + tag = FPU_normalize_nuo(r); + stdexp(r); + if (sign) + setnegative(r); + + return tag; +} + +int FPU_tagof(FPU_REG *ptr) +{ + int exp; + + exp = exponent16(ptr) & 0x7fff; + if (exp == 0) { + if (!(ptr->sigh | ptr->sigl)) { + return TAG_Zero; + } + /* The number is a de-normal or pseudodenormal. */ + return TAG_Special; + } + + if (exp == 0x7fff) { + /* Is an Infinity, a NaN, or an unsupported data type. */ + return TAG_Special; + } + + if (!(ptr->sigh & 0x80000000)) { + /* Unsupported data type. */ + /* Valid numbers have the ms bit set to 1. */ + /* Unnormal. */ + return TAG_Special; + } + + return TAG_Valid; +} + +/* Get a long double from user memory */ +int FPU_load_extended(long double __user *s, int stnr) +{ + FPU_REG *sti_ptr = &st(stnr); + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, s, 10); + __copy_from_user(sti_ptr, s, 10); + RE_ENTRANT_CHECK_ON; + + return FPU_tagof(sti_ptr); +} + +/* Get a double from user memory */ +int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data) +{ + int exp, tag, negative; + unsigned m64, l64; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, dfloat, 8); + FPU_get_user(m64, 1 + (unsigned long __user *)dfloat); + FPU_get_user(l64, (unsigned long __user *)dfloat); + RE_ENTRANT_CHECK_ON; + + negative = (m64 & 0x80000000) ? SIGN_Negative : SIGN_Positive; + exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias + EXTENDED_Ebias; + m64 &= 0xfffff; + if (exp > DOUBLE_Emax + EXTENDED_Ebias) { + /* Infinity or NaN */ + if ((m64 == 0) && (l64 == 0)) { + /* +- infinity */ + loaded_data->sigh = 0x80000000; + loaded_data->sigl = 0x00000000; + exp = EXP_Infinity + EXTENDED_Ebias; + tag = TAG_Special; + } else { + /* Must be a signaling or quiet NaN */ + exp = EXP_NaN + EXTENDED_Ebias; + loaded_data->sigh = (m64 << 11) | 0x80000000; + loaded_data->sigh |= l64 >> 21; + loaded_data->sigl = l64 << 11; + tag = TAG_Special; /* The calling function must look for NaNs */ + } + } else if (exp < DOUBLE_Emin + EXTENDED_Ebias) { + /* Zero or de-normal */ + if ((m64 == 0) && (l64 == 0)) { + /* Zero */ + reg_copy(&CONST_Z, loaded_data); + exp = 0; + tag = TAG_Zero; + } else { + /* De-normal */ + loaded_data->sigh = m64 << 11; + loaded_data->sigh |= l64 >> 21; + loaded_data->sigl = l64 << 11; + + return normalize_no_excep(loaded_data, DOUBLE_Emin, + negative) + | (denormal_operand() < 0 ? FPU_Exception : 0); + } + } else { + loaded_data->sigh = (m64 << 11) | 0x80000000; + loaded_data->sigh |= l64 >> 21; + loaded_data->sigl = l64 << 11; + + tag = TAG_Valid; + } + + setexponent16(loaded_data, exp | negative); + + return tag; +} + +/* Get a float from user memory */ +int FPU_load_single(float __user *single, FPU_REG *loaded_data) +{ + unsigned m32; + int exp, tag, negative; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, single, 4); + FPU_get_user(m32, (unsigned long __user *)single); + RE_ENTRANT_CHECK_ON; + + negative = (m32 & 0x80000000) ? SIGN_Negative : SIGN_Positive; + + if (!(m32 & 0x7fffffff)) { + /* Zero */ + reg_copy(&CONST_Z, loaded_data); + addexponent(loaded_data, negative); + return TAG_Zero; + } + exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias + EXTENDED_Ebias; + m32 = (m32 & 0x7fffff) << 8; + if (exp < SINGLE_Emin + EXTENDED_Ebias) { + /* De-normals */ + loaded_data->sigh = m32; + loaded_data->sigl = 0; + + return normalize_no_excep(loaded_data, SINGLE_Emin, negative) + | (denormal_operand() < 0 ? FPU_Exception : 0); + } else if (exp > SINGLE_Emax + EXTENDED_Ebias) { + /* Infinity or NaN */ + if (m32 == 0) { + /* +- infinity */ + loaded_data->sigh = 0x80000000; + loaded_data->sigl = 0x00000000; + exp = EXP_Infinity + EXTENDED_Ebias; + tag = TAG_Special; + } else { + /* Must be a signaling or quiet NaN */ + exp = EXP_NaN + EXTENDED_Ebias; + loaded_data->sigh = m32 | 0x80000000; + loaded_data->sigl = 0; + tag = TAG_Special; /* The calling function must look for NaNs */ + } + } else { + loaded_data->sigh = m32 | 0x80000000; + loaded_data->sigl = 0; + tag = TAG_Valid; + } + + setexponent16(loaded_data, exp | negative); /* Set the sign. */ + + return tag; +} + +/* Get a long long from user memory */ +int FPU_load_int64(long long __user *_s) +{ + long long s; + int sign; + FPU_REG *st0_ptr = &st(0); + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, _s, 8); + if (copy_from_user(&s, _s, 8)) + FPU_abort; + RE_ENTRANT_CHECK_ON; + + if (s == 0) { + reg_copy(&CONST_Z, st0_ptr); + return TAG_Zero; + } + + if (s > 0) + sign = SIGN_Positive; + else { + s = -s; + sign = SIGN_Negative; + } + + significand(st0_ptr) = s; + + return normalize_no_excep(st0_ptr, 63, sign); +} + +/* Get a long from user memory */ +int FPU_load_int32(long __user *_s, FPU_REG *loaded_data) +{ + long s; + int negative; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, _s, 4); + FPU_get_user(s, _s); + RE_ENTRANT_CHECK_ON; + + if (s == 0) { + reg_copy(&CONST_Z, loaded_data); + return TAG_Zero; + } + + if (s > 0) + negative = SIGN_Positive; + else { + s = -s; + negative = SIGN_Negative; + } + + loaded_data->sigh = s; + loaded_data->sigl = 0; + + return normalize_no_excep(loaded_data, 31, negative); +} + +/* Get a short from user memory */ +int FPU_load_int16(short __user *_s, FPU_REG *loaded_data) +{ + int s, negative; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, _s, 2); + /* Cast as short to get the sign extended. */ + FPU_get_user(s, _s); + RE_ENTRANT_CHECK_ON; + + if (s == 0) { + reg_copy(&CONST_Z, loaded_data); + return TAG_Zero; + } + + if (s > 0) + negative = SIGN_Positive; + else { + s = -s; + negative = SIGN_Negative; + } + + loaded_data->sigh = s << 16; + loaded_data->sigl = 0; + + return normalize_no_excep(loaded_data, 15, negative); +} + +/* Get a packed bcd array from user memory */ +int FPU_load_bcd(u_char __user *s) +{ + FPU_REG *st0_ptr = &st(0); + int pos; + u_char bcd; + long long l = 0; + int sign; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, s, 10); + RE_ENTRANT_CHECK_ON; + for (pos = 8; pos >= 0; pos--) { + l *= 10; + RE_ENTRANT_CHECK_OFF; + FPU_get_user(bcd, s + pos); + RE_ENTRANT_CHECK_ON; + l += bcd >> 4; + l *= 10; + l += bcd & 0x0f; + } + + RE_ENTRANT_CHECK_OFF; + FPU_get_user(sign, s + 9); + sign = sign & 0x80 ? SIGN_Negative : SIGN_Positive; + RE_ENTRANT_CHECK_ON; + + if (l == 0) { + reg_copy(&CONST_Z, st0_ptr); + addexponent(st0_ptr, sign); /* Set the sign. */ + return TAG_Zero; + } else { + significand(st0_ptr) = l; + return normalize_no_excep(st0_ptr, 63, sign); + } +} + +/*===========================================================================*/ + +/* Put a long double into user memory */ +int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, + long double __user * d) +{ + /* + The only exception raised by an attempt to store to an + extended format is the Invalid Stack exception, i.e. + attempting to store from an empty register. + */ + + if (st0_tag != TAG_Empty) { + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 10); + + FPU_put_user(st0_ptr->sigl, (unsigned long __user *)d); + FPU_put_user(st0_ptr->sigh, + (unsigned long __user *)((u_char __user *) d + 4)); + FPU_put_user(exponent16(st0_ptr), + (unsigned short __user *)((u_char __user *) d + + 8)); + RE_ENTRANT_CHECK_ON; + + return 1; + } + + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + if (control_word & CW_Invalid) { + /* The masked response */ + /* Put out the QNaN indefinite */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 10); + FPU_put_user(0, (unsigned long __user *)d); + FPU_put_user(0xc0000000, 1 + (unsigned long __user *)d); + FPU_put_user(0xffff, 4 + (short __user *)d); + RE_ENTRANT_CHECK_ON; + return 1; + } else + return 0; + +} + +/* Put a double into user memory */ +int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat) +{ + unsigned long l[2]; + unsigned long increment = 0; /* avoid gcc warnings */ + int precision_loss; + int exp; + FPU_REG tmp; + + l[0] = 0; + l[1] = 0; + if (st0_tag == TAG_Valid) { + reg_copy(st0_ptr, &tmp); + exp = exponent(&tmp); + + if (exp < DOUBLE_Emin) { /* It may be a denormal */ + addexponent(&tmp, -DOUBLE_Emin + 52); /* largest exp to be 51 */ +denormal_arg: + if ((precision_loss = FPU_round_to_int(&tmp, st0_tag))) { +#ifdef PECULIAR_486 + /* Did it round to a non-denormal ? */ + /* This behaviour might be regarded as peculiar, it appears + that the 80486 rounds to the dest precision, then + converts to decide underflow. */ + if (! + ((tmp.sigh == 0x00100000) && (tmp.sigl == 0) + && (st0_ptr->sigl & 0x000007ff))) +#endif /* PECULIAR_486 */ + { + EXCEPTION(EX_Underflow); + /* This is a special case: see sec 16.2.5.1 of + the 80486 book */ + if (!(control_word & CW_Underflow)) + return 0; + } + EXCEPTION(precision_loss); + if (!(control_word & CW_Precision)) + return 0; + } + l[0] = tmp.sigl; + l[1] = tmp.sigh; + } else { + if (tmp.sigl & 0x000007ff) { + precision_loss = 1; + switch (control_word & CW_RC) { + case RC_RND: + /* Rounding can get a little messy.. */ + increment = ((tmp.sigl & 0x7ff) > 0x400) | /* nearest */ + ((tmp.sigl & 0xc00) == 0xc00); /* odd -> even */ + break; + case RC_DOWN: /* towards -infinity */ + increment = + signpositive(&tmp) ? 0 : tmp. + sigl & 0x7ff; + break; + case RC_UP: /* towards +infinity */ + increment = + signpositive(&tmp) ? tmp. + sigl & 0x7ff : 0; + break; + case RC_CHOP: + increment = 0; + break; + } + + /* Truncate the mantissa */ + tmp.sigl &= 0xfffff800; + + if (increment) { + if (tmp.sigl >= 0xfffff800) { + /* the sigl part overflows */ + if (tmp.sigh == 0xffffffff) { + /* The sigh part overflows */ + tmp.sigh = 0x80000000; + exp++; + if (exp >= EXP_OVER) + goto overflow; + } else { + tmp.sigh++; + } + tmp.sigl = 0x00000000; + } else { + /* We only need to increment sigl */ + tmp.sigl += 0x00000800; + } + } + } else + precision_loss = 0; + + l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21); + l[1] = ((tmp.sigh >> 11) & 0xfffff); + + if (exp > DOUBLE_Emax) { + overflow: + EXCEPTION(EX_Overflow); + if (!(control_word & CW_Overflow)) + return 0; + set_precision_flag_up(); + if (!(control_word & CW_Precision)) + return 0; + + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + /* Overflow to infinity */ + l[1] = 0x7ff00000; /* Set to + INF */ + } else { + if (precision_loss) { + if (increment) + set_precision_flag_up(); + else + set_precision_flag_down(); + } + /* Add the exponent */ + l[1] |= (((exp + DOUBLE_Ebias) & 0x7ff) << 20); + } + } + } else if (st0_tag == TAG_Zero) { + /* Number is zero */ + } else if (st0_tag == TAG_Special) { + st0_tag = FPU_Special(st0_ptr); + if (st0_tag == TW_Denormal) { + /* A denormal will always underflow. */ +#ifndef PECULIAR_486 + /* An 80486 is supposed to be able to generate + a denormal exception here, but... */ + /* Underflow has priority. */ + if (control_word & CW_Underflow) + denormal_operand(); +#endif /* PECULIAR_486 */ + reg_copy(st0_ptr, &tmp); + goto denormal_arg; + } else if (st0_tag == TW_Infinity) { + l[1] = 0x7ff00000; + } else if (st0_tag == TW_NaN) { + /* Is it really a NaN ? */ + if ((exponent(st0_ptr) == EXP_OVER) + && (st0_ptr->sigh & 0x80000000)) { + /* See if we can get a valid NaN from the FPU_REG */ + l[0] = + (st0_ptr->sigl >> 11) | (st0_ptr-> + sigh << 21); + l[1] = ((st0_ptr->sigh >> 11) & 0xfffff); + if (!(st0_ptr->sigh & 0x40000000)) { + /* It is a signalling NaN */ + EXCEPTION(EX_Invalid); + if (!(control_word & CW_Invalid)) + return 0; + l[1] |= (0x40000000 >> 11); + } + l[1] |= 0x7ff00000; + } else { + /* It is an unsupported data type */ + EXCEPTION(EX_Invalid); + if (!(control_word & CW_Invalid)) + return 0; + l[1] = 0xfff80000; + } + } + } else if (st0_tag == TAG_Empty) { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + if (control_word & CW_Invalid) { + /* The masked response */ + /* Put out the QNaN indefinite */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, dfloat, 8); + FPU_put_user(0, (unsigned long __user *)dfloat); + FPU_put_user(0xfff80000, + 1 + (unsigned long __user *)dfloat); + RE_ENTRANT_CHECK_ON; + return 1; + } else + return 0; + } + if (getsign(st0_ptr)) + l[1] |= 0x80000000; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, dfloat, 8); + FPU_put_user(l[0], (unsigned long __user *)dfloat); + FPU_put_user(l[1], 1 + (unsigned long __user *)dfloat); + RE_ENTRANT_CHECK_ON; + + return 1; +} + +/* Put a float into user memory */ +int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single) +{ + long templ = 0; + unsigned long increment = 0; /* avoid gcc warnings */ + int precision_loss; + int exp; + FPU_REG tmp; + + if (st0_tag == TAG_Valid) { + + reg_copy(st0_ptr, &tmp); + exp = exponent(&tmp); + + if (exp < SINGLE_Emin) { + addexponent(&tmp, -SINGLE_Emin + 23); /* largest exp to be 22 */ + + denormal_arg: + + if ((precision_loss = FPU_round_to_int(&tmp, st0_tag))) { +#ifdef PECULIAR_486 + /* Did it round to a non-denormal ? */ + /* This behaviour might be regarded as peculiar, it appears + that the 80486 rounds to the dest precision, then + converts to decide underflow. */ + if (!((tmp.sigl == 0x00800000) && + ((st0_ptr->sigh & 0x000000ff) + || st0_ptr->sigl))) +#endif /* PECULIAR_486 */ + { + EXCEPTION(EX_Underflow); + /* This is a special case: see sec 16.2.5.1 of + the 80486 book */ + if (!(control_word & CW_Underflow)) + return 0; + } + EXCEPTION(precision_loss); + if (!(control_word & CW_Precision)) + return 0; + } + templ = tmp.sigl; + } else { + if (tmp.sigl | (tmp.sigh & 0x000000ff)) { + unsigned long sigh = tmp.sigh; + unsigned long sigl = tmp.sigl; + + precision_loss = 1; + switch (control_word & CW_RC) { + case RC_RND: + increment = ((sigh & 0xff) > 0x80) /* more than half */ + ||(((sigh & 0xff) == 0x80) && sigl) /* more than half */ + ||((sigh & 0x180) == 0x180); /* round to even */ + break; + case RC_DOWN: /* towards -infinity */ + increment = signpositive(&tmp) + ? 0 : (sigl | (sigh & 0xff)); + break; + case RC_UP: /* towards +infinity */ + increment = signpositive(&tmp) + ? (sigl | (sigh & 0xff)) : 0; + break; + case RC_CHOP: + increment = 0; + break; + } + + /* Truncate part of the mantissa */ + tmp.sigl = 0; + + if (increment) { + if (sigh >= 0xffffff00) { + /* The sigh part overflows */ + tmp.sigh = 0x80000000; + exp++; + if (exp >= EXP_OVER) + goto overflow; + } else { + tmp.sigh &= 0xffffff00; + tmp.sigh += 0x100; + } + } else { + tmp.sigh &= 0xffffff00; /* Finish the truncation */ + } + } else + precision_loss = 0; + + templ = (tmp.sigh >> 8) & 0x007fffff; + + if (exp > SINGLE_Emax) { + overflow: + EXCEPTION(EX_Overflow); + if (!(control_word & CW_Overflow)) + return 0; + set_precision_flag_up(); + if (!(control_word & CW_Precision)) + return 0; + + /* This is a special case: see sec 16.2.5.1 of the 80486 book. */ + /* Masked response is overflow to infinity. */ + templ = 0x7f800000; + } else { + if (precision_loss) { + if (increment) + set_precision_flag_up(); + else + set_precision_flag_down(); + } + /* Add the exponent */ + templ |= ((exp + SINGLE_Ebias) & 0xff) << 23; + } + } + } else if (st0_tag == TAG_Zero) { + templ = 0; + } else if (st0_tag == TAG_Special) { + st0_tag = FPU_Special(st0_ptr); + if (st0_tag == TW_Denormal) { + reg_copy(st0_ptr, &tmp); + + /* A denormal will always underflow. */ +#ifndef PECULIAR_486 + /* An 80486 is supposed to be able to generate + a denormal exception here, but... */ + /* Underflow has priority. */ + if (control_word & CW_Underflow) + denormal_operand(); +#endif /* PECULIAR_486 */ + goto denormal_arg; + } else if (st0_tag == TW_Infinity) { + templ = 0x7f800000; + } else if (st0_tag == TW_NaN) { + /* Is it really a NaN ? */ + if ((exponent(st0_ptr) == EXP_OVER) + && (st0_ptr->sigh & 0x80000000)) { + /* See if we can get a valid NaN from the FPU_REG */ + templ = st0_ptr->sigh >> 8; + if (!(st0_ptr->sigh & 0x40000000)) { + /* It is a signalling NaN */ + EXCEPTION(EX_Invalid); + if (!(control_word & CW_Invalid)) + return 0; + templ |= (0x40000000 >> 8); + } + templ |= 0x7f800000; + } else { + /* It is an unsupported data type */ + EXCEPTION(EX_Invalid); + if (!(control_word & CW_Invalid)) + return 0; + templ = 0xffc00000; + } + } +#ifdef PARANOID + else { + EXCEPTION(EX_INTERNAL | 0x164); + return 0; + } +#endif + } else if (st0_tag == TAG_Empty) { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + if (control_word & EX_Invalid) { + /* The masked response */ + /* Put out the QNaN indefinite */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, single, 4); + FPU_put_user(0xffc00000, + (unsigned long __user *)single); + RE_ENTRANT_CHECK_ON; + return 1; + } else + return 0; + } +#ifdef PARANOID + else { + EXCEPTION(EX_INTERNAL | 0x163); + return 0; + } +#endif + if (getsign(st0_ptr)) + templ |= 0x80000000; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, single, 4); + FPU_put_user(templ, (unsigned long __user *)single); + RE_ENTRANT_CHECK_ON; + + return 1; +} + +/* Put a long long into user memory */ +int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d) +{ + FPU_REG t; + long long tll; + int precision_loss; + + if (st0_tag == TAG_Empty) { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } else if (st0_tag == TAG_Special) { + st0_tag = FPU_Special(st0_ptr); + if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) { + EXCEPTION(EX_Invalid); + goto invalid_operand; + } + } + + reg_copy(st0_ptr, &t); + precision_loss = FPU_round_to_int(&t, st0_tag); + ((long *)&tll)[0] = t.sigl; + ((long *)&tll)[1] = t.sigh; + if ((precision_loss == 1) || + ((t.sigh & 0x80000000) && + !((t.sigh == 0x80000000) && (t.sigl == 0) && signnegative(&t)))) { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if (control_word & EX_Invalid) { + /* Produce something like QNaN "indefinite" */ + tll = 0x8000000000000000LL; + } else + return 0; + } else { + if (precision_loss) + set_precision_flag(precision_loss); + if (signnegative(&t)) + tll = -tll; + } + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 8); + if (copy_to_user(d, &tll, 8)) + FPU_abort; + RE_ENTRANT_CHECK_ON; + + return 1; +} + +/* Put a long into user memory */ +int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d) +{ + FPU_REG t; + int precision_loss; + + if (st0_tag == TAG_Empty) { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } else if (st0_tag == TAG_Special) { + st0_tag = FPU_Special(st0_ptr); + if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) { + EXCEPTION(EX_Invalid); + goto invalid_operand; + } + } + + reg_copy(st0_ptr, &t); + precision_loss = FPU_round_to_int(&t, st0_tag); + if (t.sigh || + ((t.sigl & 0x80000000) && + !((t.sigl == 0x80000000) && signnegative(&t)))) { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if (control_word & EX_Invalid) { + /* Produce something like QNaN "indefinite" */ + t.sigl = 0x80000000; + } else + return 0; + } else { + if (precision_loss) + set_precision_flag(precision_loss); + if (signnegative(&t)) + t.sigl = -(long)t.sigl; + } + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 4); + FPU_put_user(t.sigl, (unsigned long __user *)d); + RE_ENTRANT_CHECK_ON; + + return 1; +} + +/* Put a short into user memory */ +int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d) +{ + FPU_REG t; + int precision_loss; + + if (st0_tag == TAG_Empty) { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } else if (st0_tag == TAG_Special) { + st0_tag = FPU_Special(st0_ptr); + if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) { + EXCEPTION(EX_Invalid); + goto invalid_operand; + } + } + + reg_copy(st0_ptr, &t); + precision_loss = FPU_round_to_int(&t, st0_tag); + if (t.sigh || + ((t.sigl & 0xffff8000) && + !((t.sigl == 0x8000) && signnegative(&t)))) { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if (control_word & EX_Invalid) { + /* Produce something like QNaN "indefinite" */ + t.sigl = 0x8000; + } else + return 0; + } else { + if (precision_loss) + set_precision_flag(precision_loss); + if (signnegative(&t)) + t.sigl = -t.sigl; + } + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 2); + FPU_put_user((short)t.sigl, d); + RE_ENTRANT_CHECK_ON; + + return 1; +} + +/* Put a packed bcd array into user memory */ +int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d) +{ + FPU_REG t; + unsigned long long ll; + u_char b; + int i, precision_loss; + u_char sign = (getsign(st0_ptr) == SIGN_NEG) ? 0x80 : 0; + + if (st0_tag == TAG_Empty) { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } else if (st0_tag == TAG_Special) { + st0_tag = FPU_Special(st0_ptr); + if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) { + EXCEPTION(EX_Invalid); + goto invalid_operand; + } + } + + reg_copy(st0_ptr, &t); + precision_loss = FPU_round_to_int(&t, st0_tag); + ll = significand(&t); + + /* Check for overflow, by comparing with 999999999999999999 decimal. */ + if ((t.sigh > 0x0de0b6b3) || + ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff))) { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if (control_word & CW_Invalid) { + /* Produce the QNaN "indefinite" */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 10); + for (i = 0; i < 7; i++) + FPU_put_user(0, d + i); /* These bytes "undefined" */ + FPU_put_user(0xc0, d + 7); /* This byte "undefined" */ + FPU_put_user(0xff, d + 8); + FPU_put_user(0xff, d + 9); + RE_ENTRANT_CHECK_ON; + return 1; + } else + return 0; + } else if (precision_loss) { + /* Precision loss doesn't stop the data transfer */ + set_precision_flag(precision_loss); + } + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 10); + RE_ENTRANT_CHECK_ON; + for (i = 0; i < 9; i++) { + b = FPU_div_small(&ll, 10); + b |= (FPU_div_small(&ll, 10)) << 4; + RE_ENTRANT_CHECK_OFF; + FPU_put_user(b, d + i); + RE_ENTRANT_CHECK_ON; + } + RE_ENTRANT_CHECK_OFF; + FPU_put_user(sign, d + 9); + RE_ENTRANT_CHECK_ON; + + return 1; +} + +/*===========================================================================*/ + +/* r gets mangled such that sig is int, sign: + it is NOT normalized */ +/* The return value (in eax) is zero if the result is exact, + if bits are changed due to rounding, truncation, etc, then + a non-zero value is returned */ +/* Overflow is signalled by a non-zero return value (in eax). + In the case of overflow, the returned significand always has the + largest possible value */ +int FPU_round_to_int(FPU_REG *r, u_char tag) +{ + u_char very_big; + unsigned eax; + + if (tag == TAG_Zero) { + /* Make sure that zero is returned */ + significand(r) = 0; + return 0; /* o.k. */ + } + + if (exponent(r) > 63) { + r->sigl = r->sigh = ~0; /* The largest representable number */ + return 1; /* overflow */ + } + + eax = FPU_shrxs(&r->sigl, 63 - exponent(r)); + very_big = !(~(r->sigh) | ~(r->sigl)); /* test for 0xfff...fff */ +#define half_or_more (eax & 0x80000000) +#define frac_part (eax) +#define more_than_half ((eax & 0x80000001) == 0x80000001) + switch (control_word & CW_RC) { + case RC_RND: + if (more_than_half /* nearest */ + || (half_or_more && (r->sigl & 1))) { /* odd -> even */ + if (very_big) + return 1; /* overflow */ + significand(r)++; + return PRECISION_LOST_UP; + } + break; + case RC_DOWN: + if (frac_part && getsign(r)) { + if (very_big) + return 1; /* overflow */ + significand(r)++; + return PRECISION_LOST_UP; + } + break; + case RC_UP: + if (frac_part && !getsign(r)) { + if (very_big) + return 1; /* overflow */ + significand(r)++; + return PRECISION_LOST_UP; + } + break; + case RC_CHOP: + break; + } + + return eax ? PRECISION_LOST_DOWN : 0; + +} + +/*===========================================================================*/ + +u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s) +{ + unsigned short tag_word = 0; + u_char tag; + int i; + + if ((addr_modes.default_mode == VM86) || + ((addr_modes.default_mode == PM16) + ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX))) { + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, s, 0x0e); + FPU_get_user(control_word, (unsigned short __user *)s); + FPU_get_user(partial_status, (unsigned short __user *)(s + 2)); + FPU_get_user(tag_word, (unsigned short __user *)(s + 4)); + FPU_get_user(instruction_address.offset, + (unsigned short __user *)(s + 6)); + FPU_get_user(instruction_address.selector, + (unsigned short __user *)(s + 8)); + FPU_get_user(operand_address.offset, + (unsigned short __user *)(s + 0x0a)); + FPU_get_user(operand_address.selector, + (unsigned short __user *)(s + 0x0c)); + RE_ENTRANT_CHECK_ON; + s += 0x0e; + if (addr_modes.default_mode == VM86) { + instruction_address.offset + += (instruction_address.selector & 0xf000) << 4; + operand_address.offset += + (operand_address.selector & 0xf000) << 4; + } + } else { + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, s, 0x1c); + FPU_get_user(control_word, (unsigned short __user *)s); + FPU_get_user(partial_status, (unsigned short __user *)(s + 4)); + FPU_get_user(tag_word, (unsigned short __user *)(s + 8)); + FPU_get_user(instruction_address.offset, + (unsigned long __user *)(s + 0x0c)); + FPU_get_user(instruction_address.selector, + (unsigned short __user *)(s + 0x10)); + FPU_get_user(instruction_address.opcode, + (unsigned short __user *)(s + 0x12)); + FPU_get_user(operand_address.offset, + (unsigned long __user *)(s + 0x14)); + FPU_get_user(operand_address.selector, + (unsigned long __user *)(s + 0x18)); + RE_ENTRANT_CHECK_ON; + s += 0x1c; + } + +#ifdef PECULIAR_486 + control_word &= ~0xe080; +#endif /* PECULIAR_486 */ + + top = (partial_status >> SW_Top_Shift) & 7; + + if (partial_status & ~control_word & CW_Exceptions) + partial_status |= (SW_Summary | SW_Backward); + else + partial_status &= ~(SW_Summary | SW_Backward); + + for (i = 0; i < 8; i++) { + tag = tag_word & 3; + tag_word >>= 2; + + if (tag == TAG_Empty) + /* New tag is empty. Accept it */ + FPU_settag(i, TAG_Empty); + else if (FPU_gettag(i) == TAG_Empty) { + /* Old tag is empty and new tag is not empty. New tag is determined + by old reg contents */ + if (exponent(&fpu_register(i)) == -EXTENDED_Ebias) { + if (! + (fpu_register(i).sigl | fpu_register(i). + sigh)) + FPU_settag(i, TAG_Zero); + else + FPU_settag(i, TAG_Special); + } else if (exponent(&fpu_register(i)) == + 0x7fff - EXTENDED_Ebias) { + FPU_settag(i, TAG_Special); + } else if (fpu_register(i).sigh & 0x80000000) + FPU_settag(i, TAG_Valid); + else + FPU_settag(i, TAG_Special); /* An Un-normal */ + } + /* Else old tag is not empty and new tag is not empty. Old tag + remains correct */ + } + + return s; +} + +void frstor(fpu_addr_modes addr_modes, u_char __user *data_address) +{ + int i, regnr; + u_char __user *s = fldenv(addr_modes, data_address); + int offset = (top & 7) * 10, other = 80 - offset; + + /* Copy all registers in stack order. */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, s, 80); + __copy_from_user(register_base + offset, s, other); + if (offset) + __copy_from_user(register_base, s + other, offset); + RE_ENTRANT_CHECK_ON; + + for (i = 0; i < 8; i++) { + regnr = (i + top) & 7; + if (FPU_gettag(regnr) != TAG_Empty) + /* The loaded data over-rides all other cases. */ + FPU_settag(regnr, FPU_tagof(&st(i))); + } + +} + +u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d) +{ + if ((addr_modes.default_mode == VM86) || + ((addr_modes.default_mode == PM16) + ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX))) { + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 14); +#ifdef PECULIAR_486 + FPU_put_user(control_word & ~0xe080, (unsigned long __user *)d); +#else + FPU_put_user(control_word, (unsigned short __user *)d); +#endif /* PECULIAR_486 */ + FPU_put_user(status_word(), (unsigned short __user *)(d + 2)); + FPU_put_user(fpu_tag_word, (unsigned short __user *)(d + 4)); + FPU_put_user(instruction_address.offset, + (unsigned short __user *)(d + 6)); + FPU_put_user(operand_address.offset, + (unsigned short __user *)(d + 0x0a)); + if (addr_modes.default_mode == VM86) { + FPU_put_user((instruction_address. + offset & 0xf0000) >> 4, + (unsigned short __user *)(d + 8)); + FPU_put_user((operand_address.offset & 0xf0000) >> 4, + (unsigned short __user *)(d + 0x0c)); + } else { + FPU_put_user(instruction_address.selector, + (unsigned short __user *)(d + 8)); + FPU_put_user(operand_address.selector, + (unsigned short __user *)(d + 0x0c)); + } + RE_ENTRANT_CHECK_ON; + d += 0x0e; + } else { + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 7 * 4); +#ifdef PECULIAR_486 + control_word &= ~0xe080; + /* An 80486 sets nearly all of the reserved bits to 1. */ + control_word |= 0xffff0040; + partial_status = status_word() | 0xffff0000; + fpu_tag_word |= 0xffff0000; + I387->soft.fcs &= ~0xf8000000; + I387->soft.fos |= 0xffff0000; +#endif /* PECULIAR_486 */ + if (__copy_to_user(d, &control_word, 7 * 4)) + FPU_abort; + RE_ENTRANT_CHECK_ON; + d += 0x1c; + } + + control_word |= CW_Exceptions; + partial_status &= ~(SW_Summary | SW_Backward); + + return d; +} + +void fsave(fpu_addr_modes addr_modes, u_char __user *data_address) +{ + u_char __user *d; + int offset = (top & 7) * 10, other = 80 - offset; + + d = fstenv(addr_modes, data_address); + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 80); + + /* Copy all registers in stack order. */ + if (__copy_to_user(d, register_base + offset, other)) + FPU_abort; + if (offset) + if (__copy_to_user(d + other, register_base, offset)) + FPU_abort; + RE_ENTRANT_CHECK_ON; + + finit(); +} + +/*===========================================================================*/ diff --git a/arch/x86/math-emu/reg_mul.c b/arch/x86/math-emu/reg_mul.c new file mode 100644 index 000000000..d69618572 --- /dev/null +++ b/arch/x86/math-emu/reg_mul.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0 +/*---------------------------------------------------------------------------+ + | reg_mul.c | + | | + | Multiply one FPU_REG by another, put the result in a destination FPU_REG. | + | | + | Copyright (C) 1992,1993,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | Returns the tag of the result if no exceptions or errors occurred. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | The destination may be any FPU_REG, including one of the source FPU_REGs. | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" +#include "exception.h" +#include "reg_constant.h" +#include "fpu_system.h" + +/* + Multiply two registers to give a register result. + The sources are st(deststnr) and (b,tagb,signb). + The destination is st(deststnr). + */ +/* This routine must be called with non-empty source registers */ +int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w) +{ + FPU_REG *a = &st(deststnr); + FPU_REG *dest = a; + u_char taga = FPU_gettagi(deststnr); + u_char saved_sign = getsign(dest); + u_char sign = (getsign(a) ^ getsign(b)); + int tag; + + if (!(taga | tagb)) { + /* Both regs Valid, this should be the most common case. */ + + tag = + FPU_u_mul(a, b, dest, control_w, sign, + exponent(a) + exponent(b)); + if (tag < 0) { + setsign(dest, saved_sign); + return tag; + } + FPU_settagi(deststnr, tag); + return tag; + } + + if (taga == TAG_Special) + taga = FPU_Special(a); + if (tagb == TAG_Special) + tagb = FPU_Special(b); + + if (((taga == TAG_Valid) && (tagb == TW_Denormal)) + || ((taga == TW_Denormal) && (tagb == TAG_Valid)) + || ((taga == TW_Denormal) && (tagb == TW_Denormal))) { + FPU_REG x, y; + if (denormal_operand() < 0) + return FPU_Exception; + + FPU_to_exp16(a, &x); + FPU_to_exp16(b, &y); + tag = FPU_u_mul(&x, &y, dest, control_w, sign, + exponent16(&x) + exponent16(&y)); + if (tag < 0) { + setsign(dest, saved_sign); + return tag; + } + FPU_settagi(deststnr, tag); + return tag; + } else if ((taga <= TW_Denormal) && (tagb <= TW_Denormal)) { + if (((tagb == TW_Denormal) || (taga == TW_Denormal)) + && (denormal_operand() < 0)) + return FPU_Exception; + + /* Must have either both arguments == zero, or + one valid and the other zero. + The result is therefore zero. */ + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + /* The 80486 book says that the answer is +0, but a real + 80486 behaves this way. + IEEE-754 apparently says it should be this way. */ + setsign(dest, sign); + return TAG_Zero; + } + /* Must have infinities, NaNs, etc */ + else if ((taga == TW_NaN) || (tagb == TW_NaN)) { + return real_2op_NaN(b, tagb, deststnr, &st(0)); + } else if (((taga == TW_Infinity) && (tagb == TAG_Zero)) + || ((tagb == TW_Infinity) && (taga == TAG_Zero))) { + return arith_invalid(deststnr); /* Zero*Infinity is invalid */ + } else if (((taga == TW_Denormal) || (tagb == TW_Denormal)) + && (denormal_operand() < 0)) { + return FPU_Exception; + } else if (taga == TW_Infinity) { + FPU_copy_to_regi(a, TAG_Special, deststnr); + setsign(dest, sign); + return TAG_Special; + } else if (tagb == TW_Infinity) { + FPU_copy_to_regi(b, TAG_Special, deststnr); + setsign(dest, sign); + return TAG_Special; + } +#ifdef PARANOID + else { + EXCEPTION(EX_INTERNAL | 0x102); + return FPU_Exception; + } +#endif /* PARANOID */ + + return 0; +} diff --git a/arch/x86/math-emu/reg_norm.S b/arch/x86/math-emu/reg_norm.S new file mode 100644 index 000000000..7f6b4392a --- /dev/null +++ b/arch/x86/math-emu/reg_norm.S @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*---------------------------------------------------------------------------+ + | reg_norm.S | + | | + | Copyright (C) 1992,1993,1994,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | Normalize the value in a FPU_REG. | + | | + | Call from C as: | + | int FPU_normalize(FPU_REG *n) | + | | + | int FPU_normalize_nuo(FPU_REG *n) | + | | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" + + +.text +ENTRY(FPU_normalize) + pushl %ebp + movl %esp,%ebp + pushl %ebx + + movl PARAM1,%ebx + + movl SIGH(%ebx),%edx + movl SIGL(%ebx),%eax + + orl %edx,%edx /* ms bits */ + js L_done /* Already normalized */ + jnz L_shift_1 /* Shift left 1 - 31 bits */ + + orl %eax,%eax + jz L_zero /* The contents are zero */ + + movl %eax,%edx + xorl %eax,%eax + subw $32,EXP(%ebx) /* This can cause an underflow */ + +/* We need to shift left by 1 - 31 bits */ +L_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + shld %cl,%eax,%edx + shl %cl,%eax + subw %cx,EXP(%ebx) /* This can cause an underflow */ + + movl %edx,SIGH(%ebx) + movl %eax,SIGL(%ebx) + +L_done: + cmpw EXP_OVER,EXP(%ebx) + jge L_overflow + + cmpw EXP_UNDER,EXP(%ebx) + jle L_underflow + +L_exit_valid: + movl TAG_Valid,%eax + + /* Convert the exponent to 80x87 form. */ + addw EXTENDED_Ebias,EXP(%ebx) + andw $0x7fff,EXP(%ebx) + +L_exit: + popl %ebx + leave + ret + + +L_zero: + movw $0,EXP(%ebx) + movl TAG_Zero,%eax + jmp L_exit + +L_underflow: + /* Convert the exponent to 80x87 form. */ + addw EXTENDED_Ebias,EXP(%ebx) + push %ebx + call arith_underflow + pop %ebx + jmp L_exit + +L_overflow: + /* Convert the exponent to 80x87 form. */ + addw EXTENDED_Ebias,EXP(%ebx) + push %ebx + call arith_overflow + pop %ebx + jmp L_exit +ENDPROC(FPU_normalize) + + + +/* Normalise without reporting underflow or overflow */ +ENTRY(FPU_normalize_nuo) + pushl %ebp + movl %esp,%ebp + pushl %ebx + + movl PARAM1,%ebx + + movl SIGH(%ebx),%edx + movl SIGL(%ebx),%eax + + orl %edx,%edx /* ms bits */ + js L_exit_nuo_valid /* Already normalized */ + jnz L_nuo_shift_1 /* Shift left 1 - 31 bits */ + + orl %eax,%eax + jz L_exit_nuo_zero /* The contents are zero */ + + movl %eax,%edx + xorl %eax,%eax + subw $32,EXP(%ebx) /* This can cause an underflow */ + +/* We need to shift left by 1 - 31 bits */ +L_nuo_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + shld %cl,%eax,%edx + shl %cl,%eax + subw %cx,EXP(%ebx) /* This can cause an underflow */ + + movl %edx,SIGH(%ebx) + movl %eax,SIGL(%ebx) + +L_exit_nuo_valid: + movl TAG_Valid,%eax + + popl %ebx + leave + ret + +L_exit_nuo_zero: + movl TAG_Zero,%eax + movw EXP_UNDER,EXP(%ebx) + + popl %ebx + leave + ret +ENDPROC(FPU_normalize_nuo) diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S new file mode 100644 index 000000000..04563421e --- /dev/null +++ b/arch/x86/math-emu/reg_round.S @@ -0,0 +1,711 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + .file "reg_round.S" +/*---------------------------------------------------------------------------+ + | reg_round.S | + | | + | Rounding/truncation/etc for FPU basic arithmetic functions. | + | | + | Copyright (C) 1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | This code has four possible entry points. | + | The following must be entered by a jmp instruction: | + | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. | + | | + | The FPU_round entry point is intended to be used by C code. | + | From C, call as: | + | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) | + | | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + | | + | For correct "up" and "down" rounding, the argument must have the correct | + | sign. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Four entry points. | + | | + | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: | + | %eax:%ebx 64 bit significand | + | %edx 32 bit extension of the significand | + | %edi pointer to an FPU_REG for the result to be stored | + | stack calling function must have set up a C stack frame and | + | pushed %esi, %edi, and %ebx | + | | + | Needed just for the fpu_reg_round_sqrt entry point: | + | %cx A control word in the same format as the FPU control word. | + | Otherwise, PARAM4 must give such a value. | + | | + | | + | The significand and its extension are assumed to be exact in the | + | following sense: | + | If the significand by itself is the exact result then the significand | + | extension (%edx) must contain 0, otherwise the significand extension | + | must be non-zero. | + | If the significand extension is non-zero then the significand is | + | smaller than the magnitude of the correct exact result by an amount | + | greater than zero and less than one ls bit of the significand. | + | The significand extension is only required to have three possible | + | non-zero values: | + | less than 0x80000000 <=> the significand is less than 1/2 an ls | + | bit smaller than the magnitude of the | + | true exact result. | + | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | + | smaller than the magnitude of the true | + | exact result. | + | greater than 0x80000000 <=> the significand is more than 1/2 an ls | + | bit smaller than the magnitude of the | + | true exact result. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | The code in this module has become quite complex, but it should handle | + | all of the FPU flags which are set at this stage of the basic arithmetic | + | computations. | + | There are a few rare cases where the results are not set identically to | + | a real FPU. These require a bit more thought because at this stage the | + | results of the code here appear to be more consistent... | + | This may be changed in a future version. | + +---------------------------------------------------------------------------*/ + + +#include "fpu_emu.h" +#include "exception.h" +#include "control_w.h" + +/* Flags for FPU_bits_lost */ +#define LOST_DOWN $1 +#define LOST_UP $2 + +/* Flags for FPU_denormal */ +#define DENORMAL $1 +#define UNMASKED_UNDERFLOW $2 + + +#ifndef NON_REENTRANT_FPU +/* Make the code re-entrant by putting + local storage on the stack: */ +#define FPU_bits_lost (%esp) +#define FPU_denormal 1(%esp) + +#else +/* Not re-entrant, so we can gain speed by putting + local storage in a static area: */ +.data + .align 4,0 +FPU_bits_lost: + .byte 0 +FPU_denormal: + .byte 0 +#endif /* NON_REENTRANT_FPU */ + + +.text +.globl fpu_reg_round +.globl fpu_Arith_exit + +/* Entry point when called from C */ +ENTRY(FPU_round) + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%edi + movl SIGH(%edi),%eax + movl SIGL(%edi),%ebx + movl PARAM2,%edx + +fpu_reg_round: /* Normal entry point */ + movl PARAM4,%ecx + +#ifndef NON_REENTRANT_FPU + pushl %ebx /* adjust the stack pointer */ +#endif /* NON_REENTRANT_FPU */ + +#ifdef PARANOID +/* Cannot use this here yet */ +/* orl %eax,%eax */ +/* jns L_entry_bugged */ +#endif /* PARANOID */ + + cmpw EXP_UNDER,EXP(%edi) + jle L_Make_denorm /* The number is a de-normal */ + + movb $0,FPU_denormal /* 0 -> not a de-normal */ + +Denorm_done: + movb $0,FPU_bits_lost /* No bits yet lost in rounding */ + + movl %ecx,%esi + andl CW_PC,%ecx + cmpl PR_64_BITS,%ecx + je LRound_To_64 + + cmpl PR_53_BITS,%ecx + je LRound_To_53 + + cmpl PR_24_BITS,%ecx + je LRound_To_24 + +#ifdef PECULIAR_486 +/* With the precision control bits set to 01 "(reserved)", a real 80486 + behaves as if the precision control bits were set to 11 "64 bits" */ + cmpl PR_RESERVED_BITS,%ecx + je LRound_To_64 +#ifdef PARANOID + jmp L_bugged_denorm_486 +#endif /* PARANOID */ +#else +#ifdef PARANOID + jmp L_bugged_denorm /* There is no bug, just a bad control word */ +#endif /* PARANOID */ +#endif /* PECULIAR_486 */ + + +/* Round etc to 24 bit precision */ +LRound_To_24: + movl %esi,%ecx + andl CW_RC,%ecx + cmpl RC_RND,%ecx + je LRound_nearest_24 + + cmpl RC_CHOP,%ecx + je LCheck_truncate_24 + + cmpl RC_UP,%ecx /* Towards +infinity */ + je LUp_24 + + cmpl RC_DOWN,%ecx /* Towards -infinity */ + je LDown_24 + +#ifdef PARANOID + jmp L_bugged_round24 +#endif /* PARANOID */ + +LUp_24: + cmpb SIGN_POS,PARAM5 + jne LCheck_truncate_24 /* If negative then up==truncate */ + + jmp LCheck_24_round_up + +LDown_24: + cmpb SIGN_POS,PARAM5 + je LCheck_truncate_24 /* If positive then down==truncate */ + +LCheck_24_round_up: + movl %eax,%ecx + andl $0x000000ff,%ecx + orl %ebx,%ecx + orl %edx,%ecx + jnz LDo_24_round_up + jmp L_Re_normalise + +LRound_nearest_24: + /* Do rounding of the 24th bit if needed (nearest or even) */ + movl %eax,%ecx + andl $0x000000ff,%ecx + cmpl $0x00000080,%ecx + jc LCheck_truncate_24 /* less than half, no increment needed */ + + jne LGreater_Half_24 /* greater than half, increment needed */ + + /* Possibly half, we need to check the ls bits */ + orl %ebx,%ebx + jnz LGreater_Half_24 /* greater than half, increment needed */ + + orl %edx,%edx + jnz LGreater_Half_24 /* greater than half, increment needed */ + + /* Exactly half, increment only if 24th bit is 1 (round to even) */ + testl $0x00000100,%eax + jz LDo_truncate_24 + +LGreater_Half_24: /* Rounding: increment at the 24th bit */ +LDo_24_round_up: + andl $0xffffff00,%eax /* Truncate to 24 bits */ + xorl %ebx,%ebx + movb LOST_UP,FPU_bits_lost + addl $0x00000100,%eax + jmp LCheck_Round_Overflow + +LCheck_truncate_24: + movl %eax,%ecx + andl $0x000000ff,%ecx + orl %ebx,%ecx + orl %edx,%ecx + jz L_Re_normalise /* No truncation needed */ + +LDo_truncate_24: + andl $0xffffff00,%eax /* Truncate to 24 bits */ + xorl %ebx,%ebx + movb LOST_DOWN,FPU_bits_lost + jmp L_Re_normalise + + +/* Round etc to 53 bit precision */ +LRound_To_53: + movl %esi,%ecx + andl CW_RC,%ecx + cmpl RC_RND,%ecx + je LRound_nearest_53 + + cmpl RC_CHOP,%ecx + je LCheck_truncate_53 + + cmpl RC_UP,%ecx /* Towards +infinity */ + je LUp_53 + + cmpl RC_DOWN,%ecx /* Towards -infinity */ + je LDown_53 + +#ifdef PARANOID + jmp L_bugged_round53 +#endif /* PARANOID */ + +LUp_53: + cmpb SIGN_POS,PARAM5 + jne LCheck_truncate_53 /* If negative then up==truncate */ + + jmp LCheck_53_round_up + +LDown_53: + cmpb SIGN_POS,PARAM5 + je LCheck_truncate_53 /* If positive then down==truncate */ + +LCheck_53_round_up: + movl %ebx,%ecx + andl $0x000007ff,%ecx + orl %edx,%ecx + jnz LDo_53_round_up + jmp L_Re_normalise + +LRound_nearest_53: + /* Do rounding of the 53rd bit if needed (nearest or even) */ + movl %ebx,%ecx + andl $0x000007ff,%ecx + cmpl $0x00000400,%ecx + jc LCheck_truncate_53 /* less than half, no increment needed */ + + jnz LGreater_Half_53 /* greater than half, increment needed */ + + /* Possibly half, we need to check the ls bits */ + orl %edx,%edx + jnz LGreater_Half_53 /* greater than half, increment needed */ + + /* Exactly half, increment only if 53rd bit is 1 (round to even) */ + testl $0x00000800,%ebx + jz LTruncate_53 + +LGreater_Half_53: /* Rounding: increment at the 53rd bit */ +LDo_53_round_up: + movb LOST_UP,FPU_bits_lost + andl $0xfffff800,%ebx /* Truncate to 53 bits */ + addl $0x00000800,%ebx + adcl $0,%eax + jmp LCheck_Round_Overflow + +LCheck_truncate_53: + movl %ebx,%ecx + andl $0x000007ff,%ecx + orl %edx,%ecx + jz L_Re_normalise + +LTruncate_53: + movb LOST_DOWN,FPU_bits_lost + andl $0xfffff800,%ebx /* Truncate to 53 bits */ + jmp L_Re_normalise + + +/* Round etc to 64 bit precision */ +LRound_To_64: + movl %esi,%ecx + andl CW_RC,%ecx + cmpl RC_RND,%ecx + je LRound_nearest_64 + + cmpl RC_CHOP,%ecx + je LCheck_truncate_64 + + cmpl RC_UP,%ecx /* Towards +infinity */ + je LUp_64 + + cmpl RC_DOWN,%ecx /* Towards -infinity */ + je LDown_64 + +#ifdef PARANOID + jmp L_bugged_round64 +#endif /* PARANOID */ + +LUp_64: + cmpb SIGN_POS,PARAM5 + jne LCheck_truncate_64 /* If negative then up==truncate */ + + orl %edx,%edx + jnz LDo_64_round_up + jmp L_Re_normalise + +LDown_64: + cmpb SIGN_POS,PARAM5 + je LCheck_truncate_64 /* If positive then down==truncate */ + + orl %edx,%edx + jnz LDo_64_round_up + jmp L_Re_normalise + +LRound_nearest_64: + cmpl $0x80000000,%edx + jc LCheck_truncate_64 + + jne LDo_64_round_up + + /* Now test for round-to-even */ + testb $1,%bl + jz LCheck_truncate_64 + +LDo_64_round_up: + movb LOST_UP,FPU_bits_lost + addl $1,%ebx + adcl $0,%eax + +LCheck_Round_Overflow: + jnc L_Re_normalise + + /* Overflow, adjust the result (significand to 1.0) */ + rcrl $1,%eax + rcrl $1,%ebx + incw EXP(%edi) + jmp L_Re_normalise + +LCheck_truncate_64: + orl %edx,%edx + jz L_Re_normalise + +LTruncate_64: + movb LOST_DOWN,FPU_bits_lost + +L_Re_normalise: + testb $0xff,FPU_denormal + jnz Normalise_result + +L_Normalised: + movl TAG_Valid,%edx + +L_deNormalised: + cmpb LOST_UP,FPU_bits_lost + je L_precision_lost_up + + cmpb LOST_DOWN,FPU_bits_lost + je L_precision_lost_down + +L_no_precision_loss: + /* store the result */ + +L_Store_significand: + movl %eax,SIGH(%edi) + movl %ebx,SIGL(%edi) + + cmpw EXP_OVER,EXP(%edi) + jge L_overflow + + movl %edx,%eax + + /* Convert the exponent to 80x87 form. */ + addw EXTENDED_Ebias,EXP(%edi) + andw $0x7fff,EXP(%edi) + +fpu_reg_round_signed_special_exit: + + cmpb SIGN_POS,PARAM5 + je fpu_reg_round_special_exit + + orw $0x8000,EXP(%edi) /* Negative sign for the result. */ + +fpu_reg_round_special_exit: + +#ifndef NON_REENTRANT_FPU + popl %ebx /* adjust the stack pointer */ +#endif /* NON_REENTRANT_FPU */ + +fpu_Arith_exit: + popl %ebx + popl %edi + popl %esi + leave + ret + + +/* + * Set the FPU status flags to represent precision loss due to + * round-up. + */ +L_precision_lost_up: + push %edx + push %eax + call set_precision_flag_up + popl %eax + popl %edx + jmp L_no_precision_loss + +/* + * Set the FPU status flags to represent precision loss due to + * truncation. + */ +L_precision_lost_down: + push %edx + push %eax + call set_precision_flag_down + popl %eax + popl %edx + jmp L_no_precision_loss + + +/* + * The number is a denormal (which might get rounded up to a normal) + * Shift the number right the required number of bits, which will + * have to be undone later... + */ +L_Make_denorm: + /* The action to be taken depends upon whether the underflow + exception is masked */ + testb CW_Underflow,%cl /* Underflow mask. */ + jz Unmasked_underflow /* Do not make a denormal. */ + + movb DENORMAL,FPU_denormal + + pushl %ecx /* Save */ + movw EXP_UNDER+1,%cx + subw EXP(%edi),%cx + + cmpw $64,%cx /* shrd only works for 0..31 bits */ + jnc Denorm_shift_more_than_63 + + cmpw $32,%cx /* shrd only works for 0..31 bits */ + jnc Denorm_shift_more_than_32 + +/* + * We got here without jumps by assuming that the most common requirement + * is for a small de-normalising shift. + * Shift by [1..31] bits + */ + addw %cx,EXP(%edi) + orl %edx,%edx /* extension */ + setne %ch /* Save whether %edx is non-zero */ + xorl %edx,%edx + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + orb %ch,%dl + popl %ecx + jmp Denorm_done + +/* Shift by [32..63] bits */ +Denorm_shift_more_than_32: + addw %cx,EXP(%edi) + subb $32,%cl + orl %edx,%edx + setne %ch + orb %ch,%bl + xorl %edx,%edx + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + orl %edx,%edx /* test these 32 bits */ + setne %cl + orb %ch,%bl + orb %cl,%bl + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + popl %ecx + jmp Denorm_done + +/* Shift by [64..) bits */ +Denorm_shift_more_than_63: + cmpw $64,%cx + jne Denorm_shift_more_than_64 + +/* Exactly 64 bit shift */ + addw %cx,EXP(%edi) + xorl %ecx,%ecx + orl %edx,%edx + setne %cl + orl %ebx,%ebx + setne %ch + orb %ch,%cl + orb %cl,%al + movl %eax,%edx + xorl %eax,%eax + xorl %ebx,%ebx + popl %ecx + jmp Denorm_done + +Denorm_shift_more_than_64: + movw EXP_UNDER+1,EXP(%edi) +/* This is easy, %eax must be non-zero, so.. */ + movl $1,%edx + xorl %eax,%eax + xorl %ebx,%ebx + popl %ecx + jmp Denorm_done + + +Unmasked_underflow: + movb UNMASKED_UNDERFLOW,FPU_denormal + jmp Denorm_done + + +/* Undo the de-normalisation. */ +Normalise_result: + cmpb UNMASKED_UNDERFLOW,FPU_denormal + je Signal_underflow + +/* The number must be a denormal if we got here. */ +#ifdef PARANOID + /* But check it... just in case. */ + cmpw EXP_UNDER+1,EXP(%edi) + jne L_norm_bugged +#endif /* PARANOID */ + +#ifdef PECULIAR_486 + /* + * This implements a special feature of 80486 behaviour. + * Underflow will be signalled even if the number is + * not a denormal after rounding. + * This difference occurs only for masked underflow, and not + * in the unmasked case. + * Actual 80486 behaviour differs from this in some circumstances. + */ + orl %eax,%eax /* ms bits */ + js LPseudoDenormal /* Will be masked underflow */ +#else + orl %eax,%eax /* ms bits */ + js L_Normalised /* No longer a denormal */ +#endif /* PECULIAR_486 */ + + jnz LDenormal_adj_exponent + + orl %ebx,%ebx + jz L_underflow_to_zero /* The contents are zero */ + +LDenormal_adj_exponent: + decw EXP(%edi) + +LPseudoDenormal: + testb $0xff,FPU_bits_lost /* bits lost == underflow */ + movl TAG_Special,%edx + jz L_deNormalised + + /* There must be a masked underflow */ + push %eax + pushl EX_Underflow + call EXCEPTION + popl %eax + popl %eax + movl TAG_Special,%edx + jmp L_deNormalised + + +/* + * The operations resulted in a number too small to represent. + * Masked response. + */ +L_underflow_to_zero: + push %eax + call set_precision_flag_down + popl %eax + + push %eax + pushl EX_Underflow + call EXCEPTION + popl %eax + popl %eax + +/* Reduce the exponent to EXP_UNDER */ + movw EXP_UNDER,EXP(%edi) + movl TAG_Zero,%edx + jmp L_Store_significand + + +/* The operations resulted in a number too large to represent. */ +L_overflow: + addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */ + push %edi + call arith_overflow + pop %edi + jmp fpu_reg_round_signed_special_exit + + +Signal_underflow: + /* The number may have been changed to a non-denormal */ + /* by the rounding operations. */ + cmpw EXP_UNDER,EXP(%edi) + jle Do_unmasked_underflow + + jmp L_Normalised + +Do_unmasked_underflow: + /* Increase the exponent by the magic number */ + addw $(3*(1<<13)),EXP(%edi) + push %eax + pushl EX_Underflow + call EXCEPTION + popl %eax + popl %eax + jmp L_Normalised + + +#ifdef PARANOID +#ifdef PECULIAR_486 +L_bugged_denorm_486: + pushl EX_INTERNAL|0x236 + call EXCEPTION + popl %ebx + jmp L_exception_exit +#else +L_bugged_denorm: + pushl EX_INTERNAL|0x230 + call EXCEPTION + popl %ebx + jmp L_exception_exit +#endif /* PECULIAR_486 */ + +L_bugged_round24: + pushl EX_INTERNAL|0x231 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_bugged_round53: + pushl EX_INTERNAL|0x232 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_bugged_round64: + pushl EX_INTERNAL|0x233 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_norm_bugged: + pushl EX_INTERNAL|0x234 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_entry_bugged: + pushl EX_INTERNAL|0x235 + call EXCEPTION + popl %ebx +L_exception_exit: + mov $-1,%eax + jmp fpu_reg_round_special_exit +#endif /* PARANOID */ + +ENDPROC(FPU_round) diff --git a/arch/x86/math-emu/reg_u_add.S b/arch/x86/math-emu/reg_u_add.S new file mode 100644 index 000000000..50fe9f8c8 --- /dev/null +++ b/arch/x86/math-emu/reg_u_add.S @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + .file "reg_u_add.S" +/*---------------------------------------------------------------------------+ + | reg_u_add.S | + | | + | Add two valid (TAG_Valid) FPU_REG numbers, of the same sign, and put the | + | result in a destination FPU_REG. | + | | + | Copyright (C) 1992,1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | Call from C as: | + | int FPU_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | + | int control_w) | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + | | + +---------------------------------------------------------------------------*/ + +/* + | Kernel addition routine FPU_u_add(reg *arg1, reg *arg2, reg *answ). + | Takes two valid reg f.p. numbers (TAG_Valid), which are + | treated as unsigned numbers, + | and returns their sum as a TAG_Valid or TAG_Special f.p. number. + | The returned number is normalized. + | Basic checks are performed if PARANOID is defined. + */ + +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" + +.text +ENTRY(FPU_u_add) + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi /* source 1 */ + movl PARAM2,%edi /* source 2 */ + + movl PARAM6,%ecx + movl %ecx,%edx + subl PARAM7,%ecx /* exp1 - exp2 */ + jge L_arg1_larger + + /* num1 is smaller */ + movl SIGL(%esi),%ebx + movl SIGH(%esi),%eax + + movl %edi,%esi + movl PARAM7,%edx + negw %cx + jmp L_accum_loaded + +L_arg1_larger: + /* num1 has larger or equal exponent */ + movl SIGL(%edi),%ebx + movl SIGH(%edi),%eax + +L_accum_loaded: + movl PARAM3,%edi /* destination */ + movw %dx,EXP(%edi) /* Copy exponent to destination */ + + xorl %edx,%edx /* clear the extension */ + +#ifdef PARANOID + testl $0x80000000,%eax + je L_bugged + + testl $0x80000000,SIGH(%esi) + je L_bugged +#endif /* PARANOID */ + +/* The number to be shifted is in %eax:%ebx:%edx */ + cmpw $32,%cx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + jmp L_shift_done + +L_more_than_31: + cmpw $64,%cx + jnc L_more_than_63 + + subb $32,%cl + jz L_exactly_32 + + shrd %cl,%eax,%edx + shr %cl,%eax + orl %ebx,%ebx + jz L_more_31_no_low /* none of the lowest bits is set */ + + orl $1,%edx /* record the fact in the extension */ + +L_more_31_no_low: + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_exactly_32: + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_more_than_63: + cmpw $65,%cx + jnc L_more_than_64 + + movl %eax,%edx + orl %ebx,%ebx + jz L_more_63_no_low + + orl $1,%edx + jmp L_more_63_no_low + +L_more_than_64: + movl $1,%edx /* The shifted nr always at least one '1' */ + +L_more_63_no_low: + xorl %ebx,%ebx + xorl %eax,%eax + +L_shift_done: + /* Now do the addition */ + addl SIGL(%esi),%ebx + adcl SIGH(%esi),%eax + jnc L_round_the_result + + /* Overflow, adjust the result */ + rcrl $1,%eax + rcrl $1,%ebx + rcrl $1,%edx + jnc L_no_bit_lost + + orl $1,%edx + +L_no_bit_lost: + incw EXP(%edi) + +L_round_the_result: + jmp fpu_reg_round /* Round the result */ + + + +#ifdef PARANOID +/* If we ever get here then we have problems! */ +L_bugged: + pushl EX_INTERNAL|0x201 + call EXCEPTION + pop %ebx + movl $-1,%eax + jmp L_exit + +L_exit: + popl %ebx + popl %edi + popl %esi + leave + ret +#endif /* PARANOID */ +ENDPROC(FPU_u_add) diff --git a/arch/x86/math-emu/reg_u_div.S b/arch/x86/math-emu/reg_u_div.S new file mode 100644 index 000000000..94d545e11 --- /dev/null +++ b/arch/x86/math-emu/reg_u_div.S @@ -0,0 +1,474 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + .file "reg_u_div.S" +/*---------------------------------------------------------------------------+ + | reg_u_div.S | + | | + | Divide one FPU_REG by another and put the result in a destination FPU_REG.| + | | + | Copyright (C) 1992,1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Call from C as: | + | int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest, | + | unsigned int control_word, char *sign) | + | | + | Does not compute the destination exponent, but does adjust it. | + | | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" + + +/* #define dSIGL(x) (x) */ +/* #define dSIGH(x) 4(x) */ + + +#ifndef NON_REENTRANT_FPU +/* + Local storage on the stack: + Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + Overflow flag: ovfl_flag + */ +#define FPU_accum_3 -4(%ebp) +#define FPU_accum_2 -8(%ebp) +#define FPU_accum_1 -12(%ebp) +#define FPU_accum_0 -16(%ebp) +#define FPU_result_1 -20(%ebp) +#define FPU_result_2 -24(%ebp) +#define FPU_ovfl_flag -28(%ebp) + +#else +.data +/* + Local storage in a static area: + Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + Overflow flag: ovfl_flag + */ + .align 4,0 +FPU_accum_3: + .long 0 +FPU_accum_2: + .long 0 +FPU_accum_1: + .long 0 +FPU_accum_0: + .long 0 +FPU_result_1: + .long 0 +FPU_result_2: + .long 0 +FPU_ovfl_flag: + .byte 0 +#endif /* NON_REENTRANT_FPU */ + +#define REGA PARAM1 +#define REGB PARAM2 +#define DEST PARAM3 + +.text +ENTRY(FPU_u_div) + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $28,%esp +#endif /* NON_REENTRANT_FPU */ + + pushl %esi + pushl %edi + pushl %ebx + + movl REGA,%esi + movl REGB,%ebx + movl DEST,%edi + + movswl EXP(%esi),%edx + movswl EXP(%ebx),%eax + subl %eax,%edx + addl EXP_BIAS,%edx + + /* A denormal and a large number can cause an exponent underflow */ + cmpl EXP_WAY_UNDER,%edx + jg xExp_not_underflow + + /* Set to a really low value allow correct handling */ + movl EXP_WAY_UNDER,%edx + +xExp_not_underflow: + + movw %dx,EXP(%edi) + +#ifdef PARANOID +/* testl $0x80000000, SIGH(%esi) // Dividend */ +/* je L_bugged */ + testl $0x80000000, SIGH(%ebx) /* Divisor */ + je L_bugged +#endif /* PARANOID */ + +/* Check if the divisor can be treated as having just 32 bits */ + cmpl $0,SIGL(%ebx) + jnz L_Full_Division /* Can't do a quick divide */ + +/* We should be able to zip through the division here */ + movl SIGH(%ebx),%ecx /* The divisor */ + movl SIGH(%esi),%edx /* Dividend */ + movl SIGL(%esi),%eax /* Dividend */ + + cmpl %ecx,%edx + setaeb FPU_ovfl_flag /* Keep a record */ + jb L_no_adjust + + subl %ecx,%edx /* Prevent the overflow */ + +L_no_adjust: + /* Divide the 64 bit number by the 32 bit denominator */ + divl %ecx + movl %eax,FPU_result_2 + + /* Work on the remainder of the first division */ + xorl %eax,%eax + divl %ecx + movl %eax,FPU_result_1 + + /* Work on the remainder of the 64 bit division */ + xorl %eax,%eax + divl %ecx + + testb $255,FPU_ovfl_flag /* was the num > denom ? */ + je L_no_overflow + + /* Do the shifting here */ + /* increase the exponent */ + incw EXP(%edi) + + /* shift the mantissa right one bit */ + stc /* To set the ms bit */ + rcrl FPU_result_2 + rcrl FPU_result_1 + rcrl %eax + +L_no_overflow: + jmp LRound_precision /* Do the rounding as required */ + + +/*---------------------------------------------------------------------------+ + | Divide: Return arg1/arg2 to arg3. | + | | + | This routine does not use the exponents of arg1 and arg2, but does | + | adjust the exponent of arg3. | + | | + | The maximum returned value is (ignoring exponents) | + | .ffffffff ffffffff | + | ------------------ = 1.ffffffff fffffffe | + | .80000000 00000000 | + | and the minimum is | + | .80000000 00000000 | + | ------------------ = .80000000 00000001 (rounded) | + | .ffffffff ffffffff | + | | + +---------------------------------------------------------------------------*/ + + +L_Full_Division: + /* Save extended dividend in local register */ + movl SIGL(%esi),%eax + movl %eax,FPU_accum_2 + movl SIGH(%esi),%eax + movl %eax,FPU_accum_3 + xorl %eax,%eax + movl %eax,FPU_accum_1 /* zero the extension */ + movl %eax,FPU_accum_0 /* zero the extension */ + + movl SIGL(%esi),%eax /* Get the current num */ + movl SIGH(%esi),%edx + +/*----------------------------------------------------------------------*/ +/* Initialization done. + Do the first 32 bits. */ + + movb $0,FPU_ovfl_flag + cmpl SIGH(%ebx),%edx /* Test for imminent overflow */ + jb LLess_than_1 + ja LGreater_than_1 + + cmpl SIGL(%ebx),%eax + jb LLess_than_1 + +LGreater_than_1: +/* The dividend is greater or equal, would cause overflow */ + setaeb FPU_ovfl_flag /* Keep a record */ + + subl SIGL(%ebx),%eax + sbbl SIGH(%ebx),%edx /* Prevent the overflow */ + movl %eax,FPU_accum_2 + movl %edx,FPU_accum_3 + +LLess_than_1: +/* At this point, we have a dividend < divisor, with a record of + adjustment in FPU_ovfl_flag */ + + /* We will divide by a number which is too large */ + movl SIGH(%ebx),%ecx + addl $1,%ecx + jnc LFirst_div_not_1 + + /* here we need to divide by 100000000h, + i.e., no division at all.. */ + mov %edx,%eax + jmp LFirst_div_done + +LFirst_div_not_1: + divl %ecx /* Divide the numerator by the augmented + denom ms dw */ + +LFirst_div_done: + movl %eax,FPU_result_2 /* Put the result in the answer */ + + mull SIGH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_2 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_3 + + movl FPU_result_2,%eax /* Get the result back */ + mull SIGL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + sbbl $0,FPU_accum_3 + je LDo_2nd_32_bits /* Must check for non-zero result here */ + +#ifdef PARANOID + jb L_bugged_1 +#endif /* PARANOID */ + + /* need to subtract another once of the denom */ + incl FPU_result_2 /* Correct the answer */ + + movl SIGL(%ebx),%eax + movl SIGH(%ebx),%edx + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + sbbl $0,FPU_accum_3 + jne L_bugged_1 /* Must check for non-zero result here */ +#endif /* PARANOID */ + +/*----------------------------------------------------------------------*/ +/* Half of the main problem is done, there is just a reduced numerator + to handle now. + Work with the second 32 bits, FPU_accum_0 not used from now on */ +LDo_2nd_32_bits: + movl FPU_accum_2,%edx /* get the reduced num */ + movl FPU_accum_1,%eax + + /* need to check for possible subsequent overflow */ + cmpl SIGH(%ebx),%edx + jb LDo_2nd_div + ja LPrevent_2nd_overflow + + cmpl SIGL(%ebx),%eax + jb LDo_2nd_div + +LPrevent_2nd_overflow: +/* The numerator is greater or equal, would cause overflow */ + /* prevent overflow */ + subl SIGL(%ebx),%eax + sbbl SIGH(%ebx),%edx + movl %edx,FPU_accum_2 + movl %eax,FPU_accum_1 + + incl FPU_result_2 /* Reflect the subtraction in the answer */ + +#ifdef PARANOID + je L_bugged_2 /* Can't bump the result to 1.0 */ +#endif /* PARANOID */ + +LDo_2nd_div: + cmpl $0,%ecx /* augmented denom msw */ + jnz LSecond_div_not_1 + + /* %ecx == 0, we are dividing by 1.0 */ + mov %edx,%eax + jmp LSecond_div_done + +LSecond_div_not_1: + divl %ecx /* Divide the numerator by the denom ms dw */ + +LSecond_div_done: + movl %eax,FPU_result_1 /* Put the result in the answer */ + + mull SIGH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif /* PARANOID */ + + movl FPU_result_1,%eax /* Get the result back */ + mull SIGL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif /* PARANOID */ + + jz LDo_3rd_32_bits + +#ifdef PARANOID + cmpl $1,FPU_accum_2 + jne L_bugged_2 +#endif /* PARANOID */ + + /* need to subtract another once of the denom */ + movl SIGL(%ebx),%eax + movl SIGH(%ebx),%edx + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 + jne L_bugged_2 +#endif /* PARANOID */ + + addl $1,FPU_result_1 /* Correct the answer */ + adcl $0,FPU_result_2 + +#ifdef PARANOID + jc L_bugged_2 /* Must check for non-zero result here */ +#endif /* PARANOID */ + +/*----------------------------------------------------------------------*/ +/* The division is essentially finished here, we just need to perform + tidying operations. + Deal with the 3rd 32 bits */ +LDo_3rd_32_bits: + movl FPU_accum_1,%edx /* get the reduced num */ + movl FPU_accum_0,%eax + + /* need to check for possible subsequent overflow */ + cmpl SIGH(%ebx),%edx /* denom */ + jb LRound_prep + ja LPrevent_3rd_overflow + + cmpl SIGL(%ebx),%eax /* denom */ + jb LRound_prep + +LPrevent_3rd_overflow: + /* prevent overflow */ + subl SIGL(%ebx),%eax + sbbl SIGH(%ebx),%edx + movl %edx,FPU_accum_1 + movl %eax,FPU_accum_0 + + addl $1,FPU_result_1 /* Reflect the subtraction in the answer */ + adcl $0,FPU_result_2 + jne LRound_prep + jnc LRound_prep + + /* This is a tricky spot, there is an overflow of the answer */ + movb $255,FPU_ovfl_flag /* Overflow -> 1.000 */ + +LRound_prep: +/* + * Prepare for rounding. + * To test for rounding, we just need to compare 2*accum with the + * denom. + */ + movl FPU_accum_0,%ecx + movl FPU_accum_1,%edx + movl %ecx,%eax + orl %edx,%eax + jz LRound_ovfl /* The accumulator contains zero. */ + + /* Multiply by 2 */ + clc + rcll $1,%ecx + rcll $1,%edx + jc LRound_large /* No need to compare, denom smaller */ + + subl SIGL(%ebx),%ecx + sbbl SIGH(%ebx),%edx + jnc LRound_not_small + + movl $0x70000000,%eax /* Denom was larger */ + jmp LRound_ovfl + +LRound_not_small: + jnz LRound_large + + movl $0x80000000,%eax /* Remainder was exactly 1/2 denom */ + jmp LRound_ovfl + +LRound_large: + movl $0xff000000,%eax /* Denom was smaller */ + +LRound_ovfl: +/* We are now ready to deal with rounding, but first we must get + the bits properly aligned */ + testb $255,FPU_ovfl_flag /* was the num > denom ? */ + je LRound_precision + + incw EXP(%edi) + + /* shift the mantissa right one bit */ + stc /* Will set the ms bit */ + rcrl FPU_result_2 + rcrl FPU_result_1 + rcrl %eax + +/* Round the result as required */ +LRound_precision: + decw EXP(%edi) /* binary point between 1st & 2nd bits */ + + movl %eax,%edx + movl FPU_result_1,%ebx + movl FPU_result_2,%eax + jmp fpu_reg_round + + +#ifdef PARANOID +/* The logic is wrong if we got here */ +L_bugged: + pushl EX_INTERNAL|0x202 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_1: + pushl EX_INTERNAL|0x203 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_2: + pushl EX_INTERNAL|0x204 + call EXCEPTION + pop %ebx + jmp L_exit + +L_exit: + movl $-1,%eax + popl %ebx + popl %edi + popl %esi + + leave + ret +#endif /* PARANOID */ + +ENDPROC(FPU_u_div) diff --git a/arch/x86/math-emu/reg_u_mul.S b/arch/x86/math-emu/reg_u_mul.S new file mode 100644 index 000000000..21cde47fb --- /dev/null +++ b/arch/x86/math-emu/reg_u_mul.S @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + .file "reg_u_mul.S" +/*---------------------------------------------------------------------------+ + | reg_u_mul.S | + | | + | Core multiplication routine | + | | + | Copyright (C) 1992,1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Basic multiplication routine. | + | Does not check the resulting exponent for overflow/underflow | + | | + | FPU_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw); | + | | + | Internal working is at approx 128 bits. | + | Result is rounded to nearest 53 or 64 bits, using "nearest or even". | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" + + + +#ifndef NON_REENTRANT_FPU +/* Local storage on the stack: */ +#define FPU_accum_0 -4(%ebp) /* ms word */ +#define FPU_accum_1 -8(%ebp) + +#else +/* Local storage in a static area: */ +.data + .align 4,0 +FPU_accum_0: + .long 0 +FPU_accum_1: + .long 0 +#endif /* NON_REENTRANT_FPU */ + + +.text +ENTRY(FPU_u_mul) + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $8,%esp +#endif /* NON_REENTRANT_FPU */ + + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi + movl PARAM2,%edi + +#ifdef PARANOID + testl $0x80000000,SIGH(%esi) + jz L_bugged + testl $0x80000000,SIGH(%edi) + jz L_bugged +#endif /* PARANOID */ + + xorl %ecx,%ecx + xorl %ebx,%ebx + + movl SIGL(%esi),%eax + mull SIGL(%edi) + movl %eax,FPU_accum_0 + movl %edx,FPU_accum_1 + + movl SIGL(%esi),%eax + mull SIGH(%edi) + addl %eax,FPU_accum_1 + adcl %edx,%ebx +/* adcl $0,%ecx // overflow here is not possible */ + + movl SIGH(%esi),%eax + mull SIGL(%edi) + addl %eax,FPU_accum_1 + adcl %edx,%ebx + adcl $0,%ecx + + movl SIGH(%esi),%eax + mull SIGH(%edi) + addl %eax,%ebx + adcl %edx,%ecx + + /* Get the sum of the exponents. */ + movl PARAM6,%eax + subl EXP_BIAS-1,%eax + + /* Two denormals can cause an exponent underflow */ + cmpl EXP_WAY_UNDER,%eax + jg Exp_not_underflow + + /* Set to a really low value allow correct handling */ + movl EXP_WAY_UNDER,%eax + +Exp_not_underflow: + +/* Have now finished with the sources */ + movl PARAM3,%edi /* Point to the destination */ + movw %ax,EXP(%edi) + +/* Now make sure that the result is normalized */ + testl $0x80000000,%ecx + jnz LResult_Normalised + + /* Normalize by shifting left one bit */ + shll $1,FPU_accum_0 + rcll $1,FPU_accum_1 + rcll $1,%ebx + rcll $1,%ecx + decw EXP(%edi) + +LResult_Normalised: + movl FPU_accum_0,%eax + movl FPU_accum_1,%edx + orl %eax,%eax + jz L_extent_zero + + orl $1,%edx + +L_extent_zero: + movl %ecx,%eax + jmp fpu_reg_round + + +#ifdef PARANOID +L_bugged: + pushl EX_INTERNAL|0x205 + call EXCEPTION + pop %ebx + jmp L_exit + +L_exit: + popl %ebx + popl %edi + popl %esi + leave + ret +#endif /* PARANOID */ + +ENDPROC(FPU_u_mul) diff --git a/arch/x86/math-emu/reg_u_sub.S b/arch/x86/math-emu/reg_u_sub.S new file mode 100644 index 000000000..f05dea7de --- /dev/null +++ b/arch/x86/math-emu/reg_u_sub.S @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + .file "reg_u_sub.S" +/*---------------------------------------------------------------------------+ + | reg_u_sub.S | + | | + | Core floating point subtraction routine. | + | | + | Copyright (C) 1992,1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | Call from C as: | + | int FPU_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | + | int control_w) | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + | | + +---------------------------------------------------------------------------*/ + +/* + | Kernel subtraction routine FPU_u_sub(reg *arg1, reg *arg2, reg *answ). + | Takes two valid reg f.p. numbers (TAG_Valid), which are + | treated as unsigned numbers, + | and returns their difference as a TAG_Valid or TAG_Zero f.p. + | number. + | The first number (arg1) must be the larger. + | The returned number is normalized. + | Basic checks are performed if PARANOID is defined. + */ + +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" + +.text +ENTRY(FPU_u_sub) + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi /* source 1 */ + movl PARAM2,%edi /* source 2 */ + + movl PARAM6,%ecx + subl PARAM7,%ecx /* exp1 - exp2 */ + +#ifdef PARANOID + /* source 2 is always smaller than source 1 */ + js L_bugged_1 + + testl $0x80000000,SIGH(%edi) /* The args are assumed to be be normalized */ + je L_bugged_2 + + testl $0x80000000,SIGH(%esi) + je L_bugged_2 +#endif /* PARANOID */ + +/*--------------------------------------+ + | Form a register holding the | + | smaller number | + +--------------------------------------*/ + movl SIGH(%edi),%eax /* register ms word */ + movl SIGL(%edi),%ebx /* register ls word */ + + movl PARAM3,%edi /* destination */ + movl PARAM6,%edx + movw %dx,EXP(%edi) /* Copy exponent to destination */ + + xorl %edx,%edx /* register extension */ + +/*--------------------------------------+ + | Shift the temporary register | + | right the required number of | + | places. | + +--------------------------------------*/ + + cmpw $32,%cx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + jmp L_shift_done + +L_more_than_31: + cmpw $64,%cx + jnc L_more_than_63 + + subb $32,%cl + jz L_exactly_32 + + shrd %cl,%eax,%edx + shr %cl,%eax + orl %ebx,%ebx + jz L_more_31_no_low /* none of the lowest bits is set */ + + orl $1,%edx /* record the fact in the extension */ + +L_more_31_no_low: + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_exactly_32: + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_more_than_63: + cmpw $65,%cx + jnc L_more_than_64 + + /* Shift right by 64 bits */ + movl %eax,%edx + orl %ebx,%ebx + jz L_more_63_no_low + + orl $1,%edx + jmp L_more_63_no_low + +L_more_than_64: + jne L_more_than_65 + + /* Shift right by 65 bits */ + /* Carry is clear if we get here */ + movl %eax,%edx + rcrl %edx + jnc L_shift_65_nc + + orl $1,%edx + jmp L_more_63_no_low + +L_shift_65_nc: + orl %ebx,%ebx + jz L_more_63_no_low + + orl $1,%edx + jmp L_more_63_no_low + +L_more_than_65: + movl $1,%edx /* The shifted nr always at least one '1' */ + +L_more_63_no_low: + xorl %ebx,%ebx + xorl %eax,%eax + +L_shift_done: +L_subtr: +/*------------------------------+ + | Do the subtraction | + +------------------------------*/ + xorl %ecx,%ecx + subl %edx,%ecx + movl %ecx,%edx + movl SIGL(%esi),%ecx + sbbl %ebx,%ecx + movl %ecx,%ebx + movl SIGH(%esi),%ecx + sbbl %eax,%ecx + movl %ecx,%eax + +#ifdef PARANOID + /* We can never get a borrow */ + jc L_bugged +#endif /* PARANOID */ + +/*--------------------------------------+ + | Normalize the result | + +--------------------------------------*/ + testl $0x80000000,%eax + jnz L_round /* no shifting needed */ + + orl %eax,%eax + jnz L_shift_1 /* shift left 1 - 31 bits */ + + orl %ebx,%ebx + jnz L_shift_32 /* shift left 32 - 63 bits */ + +/* + * A rare case, the only one which is non-zero if we got here + * is: 1000000 .... 0000 + * -0111111 .... 1111 1 + * -------------------- + * 0000000 .... 0000 1 + */ + + cmpl $0x80000000,%edx + jnz L_must_be_zero + + /* Shift left 64 bits */ + subw $64,EXP(%edi) + xchg %edx,%eax + jmp fpu_reg_round + +L_must_be_zero: +#ifdef PARANOID + orl %edx,%edx + jnz L_bugged_3 +#endif /* PARANOID */ + + /* The result is zero */ + movw $0,EXP(%edi) /* exponent */ + movl $0,SIGL(%edi) + movl $0,SIGH(%edi) + movl TAG_Zero,%eax + jmp L_exit + +L_shift_32: + movl %ebx,%eax + movl %edx,%ebx + movl $0,%edx + subw $32,EXP(%edi) /* Can get underflow here */ + +/* We need to shift left by 1 - 31 bits */ +L_shift_1: + bsrl %eax,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + shld %cl,%ebx,%eax + shld %cl,%edx,%ebx + shl %cl,%edx + subw %cx,EXP(%edi) /* Can get underflow here */ + +L_round: + jmp fpu_reg_round /* Round the result */ + + +#ifdef PARANOID +L_bugged_1: + pushl EX_INTERNAL|0x206 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged_2: + pushl EX_INTERNAL|0x209 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged_3: + pushl EX_INTERNAL|0x210 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged_4: + pushl EX_INTERNAL|0x211 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged: + pushl EX_INTERNAL|0x212 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_error_exit: + movl $-1,%eax + +#endif /* PARANOID */ + +L_exit: + popl %ebx + popl %edi + popl %esi + leave + ret +ENDPROC(FPU_u_sub) diff --git a/arch/x86/math-emu/round_Xsig.S b/arch/x86/math-emu/round_Xsig.S new file mode 100644 index 000000000..226a51e99 --- /dev/null +++ b/arch/x86/math-emu/round_Xsig.S @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*---------------------------------------------------------------------------+ + | round_Xsig.S | + | | + | Copyright (C) 1992,1993,1994,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | Normalize and round a 12 byte quantity. | + | Call from C as: | + | int round_Xsig(Xsig *n) | + | | + | Normalize a 12 byte quantity. | + | Call from C as: | + | int norm_Xsig(Xsig *n) | + | | + | Each function returns the size of the shift (nr of bits). | + | | + +---------------------------------------------------------------------------*/ + .file "round_Xsig.S" + +#include "fpu_emu.h" + + +.text +ENTRY(round_Xsig) + pushl %ebp + movl %esp,%ebp + pushl %ebx /* Reserve some space */ + pushl %ebx + pushl %esi + + movl PARAM1,%esi + + movl 8(%esi),%edx + movl 4(%esi),%ebx + movl (%esi),%eax + + movl $0,-4(%ebp) + + orl %edx,%edx /* ms bits */ + js L_round /* Already normalized */ + jnz L_shift_1 /* Shift left 1 - 31 bits */ + + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + movl $-32,-4(%ebp) + +/* We need to shift left by 1 - 31 bits */ +L_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + subl %ecx,-4(%ebp) + shld %cl,%ebx,%edx + shld %cl,%eax,%ebx + shl %cl,%eax + +L_round: + testl $0x80000000,%eax + jz L_exit + + addl $1,%ebx + adcl $0,%edx + jnz L_exit + + movl $0x80000000,%edx + incl -4(%ebp) + +L_exit: + movl %edx,8(%esi) + movl %ebx,4(%esi) + movl %eax,(%esi) + + movl -4(%ebp),%eax + + popl %esi + popl %ebx + leave + ret +ENDPROC(round_Xsig) + + + +ENTRY(norm_Xsig) + pushl %ebp + movl %esp,%ebp + pushl %ebx /* Reserve some space */ + pushl %ebx + pushl %esi + + movl PARAM1,%esi + + movl 8(%esi),%edx + movl 4(%esi),%ebx + movl (%esi),%eax + + movl $0,-4(%ebp) + + orl %edx,%edx /* ms bits */ + js L_n_exit /* Already normalized */ + jnz L_n_shift_1 /* Shift left 1 - 31 bits */ + + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + movl $-32,-4(%ebp) + + orl %edx,%edx /* ms bits */ + js L_n_exit /* Normalized now */ + jnz L_n_shift_1 /* Shift left 1 - 31 bits */ + + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + addl $-32,-4(%ebp) + jmp L_n_exit /* Might not be normalized, + but shift no more. */ + +/* We need to shift left by 1 - 31 bits */ +L_n_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + subl %ecx,-4(%ebp) + shld %cl,%ebx,%edx + shld %cl,%eax,%ebx + shl %cl,%eax + +L_n_exit: + movl %edx,8(%esi) + movl %ebx,4(%esi) + movl %eax,(%esi) + + movl -4(%ebp),%eax + + popl %esi + popl %ebx + leave + ret +ENDPROC(norm_Xsig) diff --git a/arch/x86/math-emu/shr_Xsig.S b/arch/x86/math-emu/shr_Xsig.S new file mode 100644 index 000000000..96f4779aa --- /dev/null +++ b/arch/x86/math-emu/shr_Xsig.S @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + .file "shr_Xsig.S" +/*---------------------------------------------------------------------------+ + | shr_Xsig.S | + | | + | 12 byte right shift function | + | | + | Copyright (C) 1992,1994,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | Call from C as: | + | void shr_Xsig(Xsig *arg, unsigned nr) | + | | + | Extended shift right function. | + | Fastest for small shifts. | + | Shifts the 12 byte quantity pointed to by the first arg (arg) | + | right by the number of bits specified by the second arg (nr). | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" + +.text +ENTRY(shr_Xsig) + push %ebp + movl %esp,%ebp + pushl %esi + movl PARAM2,%ecx + movl PARAM1,%esi + cmpl $32,%ecx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + pushl %ebx + movl (%esi),%eax /* lsl */ + movl 4(%esi),%ebx /* midl */ + movl 8(%esi),%edx /* msl */ + shrd %cl,%ebx,%eax + shrd %cl,%edx,%ebx + shr %cl,%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edx,8(%esi) + popl %ebx + popl %esi + leave + ret + +L_more_than_31: + cmpl $64,%ecx + jnc L_more_than_63 + + subb $32,%cl + movl 4(%esi),%eax /* midl */ + movl 8(%esi),%edx /* msl */ + shrd %cl,%edx,%eax + shr %cl,%edx + movl %eax,(%esi) + movl %edx,4(%esi) + movl $0,8(%esi) + popl %esi + leave + ret + +L_more_than_63: + cmpl $96,%ecx + jnc L_more_than_95 + + subb $64,%cl + movl 8(%esi),%eax /* msl */ + shr %cl,%eax + xorl %edx,%edx + movl %eax,(%esi) + movl %edx,4(%esi) + movl %edx,8(%esi) + popl %esi + leave + ret + +L_more_than_95: + xorl %eax,%eax + movl %eax,(%esi) + movl %eax,4(%esi) + movl %eax,8(%esi) + popl %esi + leave + ret +ENDPROC(shr_Xsig) diff --git a/arch/x86/math-emu/status_w.h b/arch/x86/math-emu/status_w.h new file mode 100644 index 000000000..b77bafec9 --- /dev/null +++ b/arch/x86/math-emu/status_w.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*---------------------------------------------------------------------------+ + | status_w.h | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _STATUS_H_ +#define _STATUS_H_ + +#include "fpu_emu.h" /* for definition of PECULIAR_486 */ + +#ifdef __ASSEMBLY__ +#define Const__(x) $##x +#else +#define Const__(x) x +#endif + +#define SW_Backward Const__(0x8000) /* backward compatibility */ +#define SW_C3 Const__(0x4000) /* condition bit 3 */ +#define SW_Top Const__(0x3800) /* top of stack */ +#define SW_Top_Shift Const__(11) /* shift for top of stack bits */ +#define SW_C2 Const__(0x0400) /* condition bit 2 */ +#define SW_C1 Const__(0x0200) /* condition bit 1 */ +#define SW_C0 Const__(0x0100) /* condition bit 0 */ +#define SW_Summary Const__(0x0080) /* exception summary */ +#define SW_Stack_Fault Const__(0x0040) /* stack fault */ +#define SW_Precision Const__(0x0020) /* loss of precision */ +#define SW_Underflow Const__(0x0010) /* underflow */ +#define SW_Overflow Const__(0x0008) /* overflow */ +#define SW_Zero_Div Const__(0x0004) /* divide by zero */ +#define SW_Denorm_Op Const__(0x0002) /* denormalized operand */ +#define SW_Invalid Const__(0x0001) /* invalid operation */ + +#define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */ + +#ifndef __ASSEMBLY__ + +#define COMP_A_gt_B 1 +#define COMP_A_eq_B 2 +#define COMP_A_lt_B 3 +#define COMP_No_Comp 4 +#define COMP_Denormal 0x20 +#define COMP_NaN 0x40 +#define COMP_SNaN 0x80 + +#define status_word() \ + ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top)) +static inline void setcc(int cc) +{ + partial_status &= ~(SW_C0 | SW_C1 | SW_C2 | SW_C3); + partial_status |= (cc) & (SW_C0 | SW_C1 | SW_C2 | SW_C3); +} + +#ifdef PECULIAR_486 + /* Default, this conveys no information, but an 80486 does it. */ + /* Clear the SW_C1 bit, "other bits undefined". */ +# define clear_C1() { partial_status &= ~SW_C1; } +# else +# define clear_C1() +#endif /* PECULIAR_486 */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _STATUS_H_ */ diff --git a/arch/x86/math-emu/version.h b/arch/x86/math-emu/version.h new file mode 100644 index 000000000..a0d73a1d2 --- /dev/null +++ b/arch/x86/math-emu/version.h @@ -0,0 +1,12 @@ +/*---------------------------------------------------------------------------+ + | version.h | + | | + | | + | Copyright (C) 1992,1993,1994,1996,1997,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@melbpc.org.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#define FPU_VERSION "wm-FPU-emu version 2.01" diff --git a/arch/x86/math-emu/wm_shrx.S b/arch/x86/math-emu/wm_shrx.S new file mode 100644 index 000000000..d588874eb --- /dev/null +++ b/arch/x86/math-emu/wm_shrx.S @@ -0,0 +1,207 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + .file "wm_shrx.S" +/*---------------------------------------------------------------------------+ + | wm_shrx.S | + | | + | 64 bit right shift functions | + | | + | Copyright (C) 1992,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | Call from C as: | + | unsigned FPU_shrx(void *arg1, unsigned arg2) | + | and | + | unsigned FPU_shrxs(void *arg1, unsigned arg2) | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" + +.text +/*---------------------------------------------------------------------------+ + | unsigned FPU_shrx(void *arg1, unsigned arg2) | + | | + | Extended shift right function. | + | Fastest for small shifts. | + | Shifts the 64 bit quantity pointed to by the first arg (arg1) | + | right by the number of bits specified by the second arg (arg2). | + | Forms a 96 bit quantity from the 64 bit arg and eax: | + | [ 64 bit arg ][ eax ] | + | shift right ---------> | + | The eax register is initialized to 0 before the shifting. | + | Results returned in the 64 bit arg and eax. | + +---------------------------------------------------------------------------*/ + +ENTRY(FPU_shrx) + push %ebp + movl %esp,%ebp + pushl %esi + movl PARAM2,%ecx + movl PARAM1,%esi + cmpl $32,%ecx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + pushl %ebx + movl (%esi),%ebx /* lsl */ + movl 4(%esi),%edx /* msl */ + xorl %eax,%eax /* extension */ + shrd %cl,%ebx,%eax + shrd %cl,%edx,%ebx + shr %cl,%edx + movl %ebx,(%esi) + movl %edx,4(%esi) + popl %ebx + popl %esi + leave + ret + +L_more_than_31: + cmpl $64,%ecx + jnc L_more_than_63 + + subb $32,%cl + movl (%esi),%eax /* lsl */ + movl 4(%esi),%edx /* msl */ + shrd %cl,%edx,%eax + shr %cl,%edx + movl %edx,(%esi) + movl $0,4(%esi) + popl %esi + leave + ret + +L_more_than_63: + cmpl $96,%ecx + jnc L_more_than_95 + + subb $64,%cl + movl 4(%esi),%eax /* msl */ + shr %cl,%eax + xorl %edx,%edx + movl %edx,(%esi) + movl %edx,4(%esi) + popl %esi + leave + ret + +L_more_than_95: + xorl %eax,%eax + movl %eax,(%esi) + movl %eax,4(%esi) + popl %esi + leave + ret +ENDPROC(FPU_shrx) + + +/*---------------------------------------------------------------------------+ + | unsigned FPU_shrxs(void *arg1, unsigned arg2) | + | | + | Extended shift right function (optimized for small floating point | + | integers). | + | Shifts the 64 bit quantity pointed to by the first arg (arg1) | + | right by the number of bits specified by the second arg (arg2). | + | Forms a 96 bit quantity from the 64 bit arg and eax: | + | [ 64 bit arg ][ eax ] | + | shift right ---------> | + | The eax register is initialized to 0 before the shifting. | + | The lower 8 bits of eax are lost and replaced by a flag which is | + | set (to 0x01) if any bit, apart from the first one, is set in the | + | part which has been shifted out of the arg. | + | Results returned in the 64 bit arg and eax. | + +---------------------------------------------------------------------------*/ +ENTRY(FPU_shrxs) + push %ebp + movl %esp,%ebp + pushl %esi + pushl %ebx + movl PARAM2,%ecx + movl PARAM1,%esi + cmpl $64,%ecx /* shrd only works for 0..31 bits */ + jnc Ls_more_than_63 + + cmpl $32,%ecx /* shrd only works for 0..31 bits */ + jc Ls_less_than_32 + +/* We got here without jumps by assuming that the most common requirement + is for small integers */ +/* Shift by [32..63] bits */ + subb $32,%cl + movl (%esi),%eax /* lsl */ + movl 4(%esi),%edx /* msl */ + xorl %ebx,%ebx + shrd %cl,%eax,%ebx + shrd %cl,%edx,%eax + shr %cl,%edx + orl %ebx,%ebx /* test these 32 bits */ + setne %bl + test $0x7fffffff,%eax /* and 31 bits here */ + setne %bh + orw %bx,%bx /* Any of the 63 bit set ? */ + setne %al + movl %edx,(%esi) + movl $0,4(%esi) + popl %ebx + popl %esi + leave + ret + +/* Shift by [0..31] bits */ +Ls_less_than_32: + movl (%esi),%ebx /* lsl */ + movl 4(%esi),%edx /* msl */ + xorl %eax,%eax /* extension */ + shrd %cl,%ebx,%eax + shrd %cl,%edx,%ebx + shr %cl,%edx + test $0x7fffffff,%eax /* only need to look at eax here */ + setne %al + movl %ebx,(%esi) + movl %edx,4(%esi) + popl %ebx + popl %esi + leave + ret + +/* Shift by [64..95] bits */ +Ls_more_than_63: + cmpl $96,%ecx + jnc Ls_more_than_95 + + subb $64,%cl + movl (%esi),%ebx /* lsl */ + movl 4(%esi),%eax /* msl */ + xorl %edx,%edx /* extension */ + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + orl %ebx,%edx + setne %bl + test $0x7fffffff,%eax /* only need to look at eax here */ + setne %bh + orw %bx,%bx + setne %al + xorl %edx,%edx + movl %edx,(%esi) /* set to zero */ + movl %edx,4(%esi) /* set to zero */ + popl %ebx + popl %esi + leave + ret + +Ls_more_than_95: +/* Shift by [96..inf) bits */ + xorl %eax,%eax + movl (%esi),%ebx + orl 4(%esi),%ebx + setne %al + xorl %ebx,%ebx + movl %ebx,(%esi) + movl %ebx,4(%esi) + popl %ebx + popl %esi + leave + ret +ENDPROC(FPU_shrxs) diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S new file mode 100644 index 000000000..515cdee90 --- /dev/null +++ b/arch/x86/math-emu/wm_sqrt.S @@ -0,0 +1,472 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + .file "wm_sqrt.S" +/*---------------------------------------------------------------------------+ + | wm_sqrt.S | + | | + | Fixed point arithmetic square root evaluation. | + | | + | Copyright (C) 1992,1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | Call from C as: | + | int wm_sqrt(FPU_REG *n, unsigned int control_word) | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | wm_sqrt(FPU_REG *n, unsigned int control_word) | + | returns the square root of n in n. | + | | + | Use Newton's method to compute the square root of a number, which must | + | be in the range [1.0 .. 4.0), to 64 bits accuracy. | + | Does not check the sign or tag of the argument. | + | Sets the exponent, but not the sign or tag of the result. | + | | + | The guess is kept in %esi:%edi | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_emu.h" + + +#ifndef NON_REENTRANT_FPU +/* Local storage on the stack: */ +#define FPU_accum_3 -4(%ebp) /* ms word */ +#define FPU_accum_2 -8(%ebp) +#define FPU_accum_1 -12(%ebp) +#define FPU_accum_0 -16(%ebp) + +/* + * The de-normalised argument: + * sq_2 sq_1 sq_0 + * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 + * ^ binary point here + */ +#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ +#define FPU_fsqrt_arg_1 -24(%ebp) +#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ + +#else +/* Local storage in a static area: */ +.data + .align 4,0 +FPU_accum_3: + .long 0 /* ms word */ +FPU_accum_2: + .long 0 +FPU_accum_1: + .long 0 +FPU_accum_0: + .long 0 + +/* The de-normalised argument: + sq_2 sq_1 sq_0 + b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 + ^ binary point here + */ +FPU_fsqrt_arg_2: + .long 0 /* ms word */ +FPU_fsqrt_arg_1: + .long 0 +FPU_fsqrt_arg_0: + .long 0 /* ls word, at most the ms bit is set */ +#endif /* NON_REENTRANT_FPU */ + + +.text +ENTRY(wm_sqrt) + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $28,%esp +#endif /* NON_REENTRANT_FPU */ + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi + + movl SIGH(%esi),%eax + movl SIGL(%esi),%ecx + xorl %edx,%edx + +/* We use a rough linear estimate for the first guess.. */ + + cmpw EXP_BIAS,EXP(%esi) + jnz sqrt_arg_ge_2 + + shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ + rcrl $1,%ecx + rcrl $1,%edx + +sqrt_arg_ge_2: +/* From here on, n is never accessed directly again until it is + replaced by the answer. */ + + movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ + movl %ecx,FPU_fsqrt_arg_1 + movl %edx,FPU_fsqrt_arg_0 + +/* Make a linear first estimate */ + shrl $1,%eax + addl $0x40000000,%eax + movl $0xaaaaaaaa,%ecx + mull %ecx + shll %edx /* max result was 7fff... */ + testl $0x80000000,%edx /* but min was 3fff... */ + jnz sqrt_prelim_no_adjust + + movl $0x80000000,%edx /* round up */ + +sqrt_prelim_no_adjust: + movl %edx,%esi /* Our first guess */ + +/* We have now computed (approx) (2 + x) / 3, which forms the basis + for a few iterations of Newton's method */ + + movl FPU_fsqrt_arg_2,%ecx /* ms word */ + +/* + * From our initial estimate, three iterations are enough to get us + * to 30 bits or so. This will then allow two iterations at better + * precision to complete the process. + */ + +/* Compute (g + n/g)/2 at each iteration (g is the guess). */ + shrl %ecx /* Doing this first will prevent a divide */ + /* overflow later. */ + + movl %ecx,%edx /* msw of the arg / 2 */ + divl %esi /* current estimate */ + shrl %esi /* divide by 2 */ + addl %eax,%esi /* the new estimate */ + + movl %ecx,%edx + divl %esi + shrl %esi + addl %eax,%esi + + movl %ecx,%edx + divl %esi + shrl %esi + addl %eax,%esi + +/* + * Now that an estimate accurate to about 30 bits has been obtained (in %esi), + * we improve it to 60 bits or so. + * + * The strategy from now on is to compute new estimates from + * guess := guess + (n - guess^2) / (2 * guess) + */ + +/* First, find the square of the guess */ + movl %esi,%eax + mull %esi +/* guess^2 now in %edx:%eax */ + + movl FPU_fsqrt_arg_1,%ecx + subl %ecx,%eax + movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ + sbbl %ecx,%edx + jnc sqrt_stage_2_positive + +/* Subtraction gives a negative result, + negate the result before division. */ + notl %edx + notl %eax + addl $1,%eax + adcl $0,%edx + + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + jmp sqrt_stage_2_finish + +sqrt_stage_2_positive: + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + + notl %ecx + notl %eax + addl $1,%eax + adcl $0,%ecx + +sqrt_stage_2_finish: + sarl $1,%ecx /* divide by 2 */ + rcrl $1,%eax + + /* Form the new estimate in %esi:%edi */ + movl %eax,%edi + addl %ecx,%esi + + jnz sqrt_stage_2_done /* result should be [1..2) */ + +#ifdef PARANOID +/* It should be possible to get here only if the arg is ffff....ffff */ + cmpl $0xffffffff,FPU_fsqrt_arg_1 + jnz sqrt_stage_2_error +#endif /* PARANOID */ + +/* The best rounded result. */ + xorl %eax,%eax + decl %eax + movl %eax,%edi + movl %eax,%esi + movl $0x7fffffff,%eax + jmp sqrt_round_result + +#ifdef PARANOID +sqrt_stage_2_error: + pushl EX_INTERNAL|0x213 + call EXCEPTION +#endif /* PARANOID */ + +sqrt_stage_2_done: + +/* Now the square root has been computed to better than 60 bits. */ + +/* Find the square of the guess. */ + movl %edi,%eax /* ls word of guess */ + mull %edi + movl %edx,FPU_accum_1 + + movl %esi,%eax + mull %esi + movl %edx,FPU_accum_3 + movl %eax,FPU_accum_2 + + movl %edi,%eax + mull %esi + addl %eax,FPU_accum_1 + adcl %edx,FPU_accum_2 + adcl $0,FPU_accum_3 + +/* movl %esi,%eax */ +/* mull %edi */ + addl %eax,FPU_accum_1 + adcl %edx,FPU_accum_2 + adcl $0,FPU_accum_3 + +/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ + + movl FPU_fsqrt_arg_0,%eax /* get normalized n */ + subl %eax,FPU_accum_1 + movl FPU_fsqrt_arg_1,%eax + sbbl %eax,FPU_accum_2 + movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ + sbbl %eax,FPU_accum_3 + jnc sqrt_stage_3_positive + +/* Subtraction gives a negative result, + negate the result before division */ + notl FPU_accum_1 + notl FPU_accum_2 + notl FPU_accum_3 + addl $1,FPU_accum_1 + adcl $0,FPU_accum_2 + +#ifdef PARANOID + adcl $0,FPU_accum_3 /* This must be zero */ + jz sqrt_stage_3_no_error + +sqrt_stage_3_error: + pushl EX_INTERNAL|0x207 + call EXCEPTION + +sqrt_stage_3_no_error: +#endif /* PARANOID */ + + movl FPU_accum_2,%edx + movl FPU_accum_1,%eax + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + + sarl $1,%ecx /* divide by 2 */ + rcrl $1,%eax + + /* prepare to round the result */ + + addl %ecx,%edi + adcl $0,%esi + + jmp sqrt_stage_3_finished + +sqrt_stage_3_positive: + movl FPU_accum_2,%edx + movl FPU_accum_1,%eax + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + + sarl $1,%ecx /* divide by 2 */ + rcrl $1,%eax + + /* prepare to round the result */ + + notl %eax /* Negate the correction term */ + notl %ecx + addl $1,%eax + adcl $0,%ecx /* carry here ==> correction == 0 */ + adcl $0xffffffff,%esi + + addl %ecx,%edi + adcl $0,%esi + +sqrt_stage_3_finished: + +/* + * The result in %esi:%edi:%esi should be good to about 90 bits here, + * and the rounding information here does not have sufficient accuracy + * in a few rare cases. + */ + cmpl $0xffffffe0,%eax + ja sqrt_near_exact_x + + cmpl $0x00000020,%eax + jb sqrt_near_exact + + cmpl $0x7fffffe0,%eax + jb sqrt_round_result + + cmpl $0x80000020,%eax + jb sqrt_get_more_precision + +sqrt_round_result: +/* Set up for rounding operations */ + movl %eax,%edx + movl %esi,%eax + movl %edi,%ebx + movl PARAM1,%edi + movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ + jmp fpu_reg_round + + +sqrt_near_exact_x: +/* First, the estimate must be rounded up. */ + addl $1,%edi + adcl $0,%esi + +sqrt_near_exact: +/* + * This is an easy case because x^1/2 is monotonic. + * We need just find the square of our estimate, compare it + * with the argument, and deduce whether our estimate is + * above, below, or exact. We use the fact that the estimate + * is known to be accurate to about 90 bits. + */ + movl %edi,%eax /* ls word of guess */ + mull %edi + movl %edx,%ebx /* 2nd ls word of square */ + movl %eax,%ecx /* ls word of square */ + + movl %edi,%eax + mull %esi + addl %eax,%ebx + addl %eax,%ebx + +#ifdef PARANOID + cmp $0xffffffb0,%ebx + jb sqrt_near_exact_ok + + cmp $0x00000050,%ebx + ja sqrt_near_exact_ok + + pushl EX_INTERNAL|0x214 + call EXCEPTION + +sqrt_near_exact_ok: +#endif /* PARANOID */ + + or %ebx,%ebx + js sqrt_near_exact_small + + jnz sqrt_near_exact_large + + or %ebx,%edx + jnz sqrt_near_exact_large + +/* Our estimate is exactly the right answer */ + xorl %eax,%eax + jmp sqrt_round_result + +sqrt_near_exact_small: +/* Our estimate is too small */ + movl $0x000000ff,%eax + jmp sqrt_round_result + +sqrt_near_exact_large: +/* Our estimate is too large, we need to decrement it */ + subl $1,%edi + sbbl $0,%esi + movl $0xffffff00,%eax + jmp sqrt_round_result + + +sqrt_get_more_precision: +/* This case is almost the same as the above, except we start + with an extra bit of precision in the estimate. */ + stc /* The extra bit. */ + rcll $1,%edi /* Shift the estimate left one bit */ + rcll $1,%esi + + movl %edi,%eax /* ls word of guess */ + mull %edi + movl %edx,%ebx /* 2nd ls word of square */ + movl %eax,%ecx /* ls word of square */ + + movl %edi,%eax + mull %esi + addl %eax,%ebx + addl %eax,%ebx + +/* Put our estimate back to its original value */ + stc /* The ms bit. */ + rcrl $1,%esi /* Shift the estimate left one bit */ + rcrl $1,%edi + +#ifdef PARANOID + cmp $0xffffff60,%ebx + jb sqrt_more_prec_ok + + cmp $0x000000a0,%ebx + ja sqrt_more_prec_ok + + pushl EX_INTERNAL|0x215 + call EXCEPTION + +sqrt_more_prec_ok: +#endif /* PARANOID */ + + or %ebx,%ebx + js sqrt_more_prec_small + + jnz sqrt_more_prec_large + + or %ebx,%ecx + jnz sqrt_more_prec_large + +/* Our estimate is exactly the right answer */ + movl $0x80000000,%eax + jmp sqrt_round_result + +sqrt_more_prec_small: +/* Our estimate is too small */ + movl $0x800000ff,%eax + jmp sqrt_round_result + +sqrt_more_prec_large: +/* Our estimate is too large */ + movl $0x7fffff00,%eax + jmp sqrt_round_result +ENDPROC(wm_sqrt) |