; $Id: powcore.asm $ ;; @file ; IPRT - No-CRT common pow code - AMD64 & X86. ; ; ; Copyright (C) 2006-2023 Oracle and/or its affiliates. ; ; This file is part of VirtualBox base platform packages, as ; available from https://www.virtualbox.org. ; ; This program is free software; you can redistribute it and/or ; modify it under the terms of the GNU General Public License ; as published by the Free Software Foundation, in version 3 of the ; License. ; ; This program is distributed in the hope that it will be useful, but ; WITHOUT ANY WARRANTY; without even the implied warranty of ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ; General Public License for more details. ; ; You should have received a copy of the GNU General Public License ; along with this program; if not, see . ; ; The contents of this file may alternatively be used under the terms ; of the Common Development and Distribution License Version 1.0 ; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included ; in the VirtualBox distribution, in which case the provisions of the ; CDDL are applicable instead of those of the GPL. ; ; You may elect to license modified versions of this file under the ; terms and conditions of either the GPL or the CDDL or both. ; ; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 ; %define RT_ASM_WITH_SEH64 %include "iprt/asmdefs.mac" %include "iprt/x86.mac" BEGINCODE extern NAME(RT_NOCRT(feraiseexcept)) ;; ; Call feraiseexcept(%1) %macro CALL_feraiseexcept_WITH 1 %ifdef RT_ARCH_X86 mov dword [xSP], X86_FSW_IE %elifdef ASM_CALL64_GCC mov edi, X86_FSW_IE %elifdef ASM_CALL64_MSC mov ecx, X86_FSW_IE %else %error calling conv. %endif call NAME(RT_NOCRT(feraiseexcept)) %endmacro ;; ; Compute the st1 to the power of st0. ; ; @returns st(0) = result ; eax = what's being returned: ; 0 - Just a value. ; 1 - The rBase value. Caller may take steps to ensure it's exactly the same. ; 2 - The rExp value. Caller may take steps to ensure it's exactly the same. ; @param rBase/st1 The base. ; @param rExp/st0 The exponent ; @param fFxamBase/dx The status flags after fxam(rBase). ; @param enmType/ebx The original parameter and return types: ; 0 - 32-bit / float ; 1 - 64-bit / double ; 2 - 80-bit / long double ; BEGINPROC rtNoCrtMathPowCore push xBP SEH64_PUSH_xBP mov xBP, xSP SEH64_SET_FRAME_xBP 0 sub xSP, 30h SEH64_ALLOCATE_STACK 30h SEH64_END_PROLOGUE ; ; Weed out special values, starting with the exponent. ; fxam fnstsw ax mov cx, ax ; cx=fxam(exp) and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) je .exp_finite cmp ax, X86_FSW_C3 ; Zero je .exp_zero cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals je .exp_finite cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity. je .exp_inf jmp .exp_nan .exp_finite: ; ; Detect special base values. ; mov ax, dx ; ax=fxam(base) and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) je .base_finite cmp ax, X86_FSW_C3 ; Zero je .base_zero cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals je .base_finite cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity. je .base_inf jmp .base_nan .base_finite: ; ; 1 in the base is also special. ; Rule 6 (see below): base == +1 and exponent = whatever: Return +1.0 ; fld1 fcomip st0, st2 je .return_base_value ; ; Check if the exponent is an integer value we can handle in a 64-bit ; GRP as that is simpler to handle accurately. ; ; In 64-bit integer range? fld tword [.s_r80MaxInt xWrtRIP] fcomip st0, st1 jb .not_integer_exp fld tword [.s_r80MinInt xWrtRIP] fcomip st0, st1 ja .not_integer_exp ; Convert it to integer. fld st0 ; -> st0=exp; st1=exp; st2=base fistp qword [xBP - 8] ; Save and pop 64-bit int (no non-popping version of this instruction). fild qword [xBP - 8] ; Load it again for comparison. fucomip st0, st1 ; Compare integer exp and floating point exp to see if they are the same. Pop. jne .not_integer_exp ; ; ; Ok, we've got an integer exponent value in that fits into a 64-bit. ; We'll multiply the base exponention bit by exponention bit, applying ; it as a factor for bits that are set. ; ; .integer_exp: ; Load the integer value into edx:exx / rdx and ditch the floating point exponent. mov xDX, [xBP - 8] %ifdef RT_ARCH_X86 mov eax, [xBP - 8 + 4] %endif ffreep st0 ; -> st0=base; ; Load a 1 onto the stack, we'll need it below as well as for converting ; a negative exponent to a positive one. fld1 ; -> st0=1.0; st1=base; ; If the exponent is negative, negate it and change base to 1/base. or xDX, xDX jns .integer_exp_positive neg xDX %ifdef RT_ARCH_X86 neg eax sbb edx, 0 %endif fdivr st1, st0 ; -> st0=1.0; st1=1/base .integer_exp_positive: ; ; We'll process edx:eax / rdx bit by bit till it's zero, using st0 for ; the multiplication factor corresponding to the current exponent bit ; and st1 as the result. ; fxch ; -> st0=base; st1=1.0; .integer_exp_loop: %ifdef RT_ARCH_X86 shrd eax, edx, 1 %else shr rdx, 1 %endif jnc .integer_exp_loop_advance fmul st1, st0 .integer_exp_loop_advance: ; Check if we're done. %ifdef RT_ARCH_AMD64 jz .integer_exp_return ; (we will have the flags for the shr rdx above) %else shr edx, 1 ; complete the above shift operation mov ecx, edx ; check if edx:eax is zero. or ecx, eax jz .integer_exp_return %endif ; Calculate the factor for the next bit. fmul st0, st0 jmp .integer_exp_loop .integer_exp_return: ffreep st0 ; drop the factor -> st0=result; no st1. jmp .return_val ; ; ; Non-integer or value was out of range for an int64_t. ; ; The approach here is the same as in exp.asm, only we have to do the ; log2(base) calculation first as it's a parameter and not a constant. ; ; .not_integer_exp: ; First reject negative numbers. We still have the fxam(base) status in dx. test dx, X86_FSW_C1 jnz .base_negative_non_integer_exp ; Swap the items on the stack, so we can process the base first. fxch st0, st1 ; -> st0=base; st1=exponent; ; ; From log2.asm: ; ; The fyl2xp1 instruction (ST1=ST1*log2(ST0+1.0), popping ST0) has a ; valid ST0 range of 1(1-sqrt(0.5)) (approx 0.29289321881) on both ; sides of zero. We try use it if we can. ; .above_one: ; For both fyl2xp1 and fyl2xp1 we need st1=1.0. fld1 fxch st0, st1 ; -> st0=base; st1=1.0; st2=exponent ; Check if the input is within the fyl2xp1 range. fld qword [.s_r64AbsFyL2xP1InputMax xWrtRIP] fcomip st0, st1 jbe .cannot_use_fyl2xp1 fld qword [.s_r64AbsFyL2xP1InputMin xWrtRIP] fcomip st0, st1 jae .cannot_use_fyl2xp1 ; Do the calculation. .use_fyl2xp1: fsub st0, st1 ; -> st0=base-1; st1=1.0; st2=exponent fyl2xp1 ; -> st0=1.0*log2(base-1.0+1.0); st1=exponent jmp .done_log2 .cannot_use_fyl2xp1: fyl2x ; -> st0=1.0*log2(base); st1=exponent .done_log2: ; ; From exp.asm: ; ; Convert to power of 2 and it'll be the same as exp2. ; fmulp ; st0=log2(base); st1=exponent -> st0=pow2exp ; ; Split the job in two on the fraction and integer l2base parts. ; fld st0 ; Push a copy of the pow2exp on the stack. frndint ; st0 = (int)pow2exp fsub st1, st0 ; st1 = pow2exp - (int)pow2exp; i.e. st1 = fraction, st0 = integer. fxch ; st0 = fraction, st1 = integer. ; 1. Calculate on the fraction. f2xm1 ; st0 = 2**fraction - 1.0 fld1 faddp ; st0 = 2**fraction ; 2. Apply the integer power of two. fscale ; st0 = result; st1 = integer part of pow2exp. fstp st1 ; st0 = result; no st1. ; ; Return st0. ; .return_val: xor eax, eax .return: leave ret ; ; ; pow() has a lot of defined behavior for special values, which is why ; this is the largest and most difficult part of the code. :-) ; ; On https://pubs.opengroup.org/onlinepubs/9699919799/functions/pow.html ; there are 21 error conditions listed in the return value section. ; The code below refers to this by number. ; ; When we get here: ; dx=fxam(base) ; cx=fxam(exponent) ; st1=base ; st0=exponent ; ; ; 1. Finit base < 0 and finit non-interger exponent: -> domain error (#IE) + NaN. ; ; The non-integer exponent claim might be wrong, as we only check if it ; fits into a int64_t register. But, I don't see how we can calculate ; it right now. ; .base_negative_non_integer_exp: CALL_feraiseexcept_WITH X86_FSW_IE jmp .return_nan ; ; 7. Exponent = +/-0.0, any base value including NaN: return +1.0 ; Note! According to https://en.cppreference.com/w/c/numeric/math/pow a ; domain error (#IE) occur if base=+/-0. Not implemented. .exp_zero: .return_plus_one: fld1 jmp .return_pop_pop_val ; ; 6. Exponent = whatever and base = 1: Return 1.0 ; 10. Exponent = +/-Inf and base = -1: Return 1.0 ;6+10 => Exponent = +/-Inf and |base| = 1: Return 1.0 ; 11. Exponent = -Inf and |base| < 1: Return +Inf ; 12. Exponent = -Inf and |base| > 1: Return +0 ; 13. Exponent = +Inf and |base| < 1: Return +0 ; 14. Exponent = +Inf and |base| > 1: Return +Inf ; ; Note! Rule 4 would trigger for the same conditions as 11 when base == 0, ; but it's optional to raise div/0 and it's apparently marked as ; obsolete in C23, so not implemented. ; .exp_inf: ; Check if base is NaN or unsupported. and dx, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 ; fxam(base) cmp dx, X86_FSW_C0 jbe .return_base_nan ; Calc fabs(base) and replace the exponent with 1.0 as we're very likely to need this here. ffreep st0 fabs fld1 ; st0=1.0; st1=|rdBase| fcomi st0, st1 je .return_plus_one ; Matches rule 6 + 10 (base is +/-1). ja .exp_inf_base_smaller_than_one .exp_inf_base_larger_than_one: test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign jz .return_plus_inf ; Matches rule 14 (exponent is +Inf). jmp .return_plus_zero ; Matches rule 12 (exponent is -Inf). .exp_inf_base_smaller_than_one: test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign jnz .return_plus_inf ; Matches rule 11 (exponent is -Inf). jmp .return_plus_zero ; Matches rule 13 (exponent is +Inf). ; ; 6. Exponent = whatever and base = 1: Return 1.0 ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN. ; .exp_nan: ; Check if base is a number and possible 1. test dx, X86_FSW_C2 ; dx=fxam(base); C2 is set for finite number, infinity and denormals. jz .return_exp_nan fld1 fcomip st0, st2 jne .return_exp_nan jmp .return_plus_one ; ; 4a. base == +/-0.0 and exp < 0 and exp is odd integer: Return +/-Inf, raise div/0. ; 4b. base == +/-0.0 and exp < 0 and exp is not odd int: Return +Inf, raise div/0. ; 8. base == +/-0.0 and exp > 0 and exp is odd integer: Return +/-0.0 ; 9. base == +/-0.0 and exp > 0 and exp is not odd int: Return +0 ; ; Note! Exponent must be finite and non-zero if we get here. ; .base_zero: fldz fcomip st0, st1 jbe .base_zero_plus_exp .base_zero_minus_exp: mov cx, dx ; stashing fxam(base) in CX because EDX is trashed by .is_exp_odd_integer call .is_exp_odd_integer ; trashes EDX but no ECX. or eax, eax jz .base_zero_minus_exp_not_odd_int ; Matching 4a. .base_zero_minus_exp_odd_int: test cx, X86_FSW_C1 ; base sign jz .raise_de_and_return_plus_inf .raise_de_and_return_minus_inf: CALL_feraiseexcept_WITH X86_FSW_DE jmp .return_minus_inf .raise_de_and_return_plus_inf: CALL_feraiseexcept_WITH X86_FSW_DE jmp .return_plus_inf ; Matching 4b. .base_zero_minus_exp_not_odd_int: CALL_feraiseexcept_WITH X86_FSW_DE jmp .return_plus_inf .base_zero_plus_exp: call .is_exp_odd_integer or eax, eax jnz .return_base_value ; Matching 8 .return_plus_zero: ; Matching 9 fldz jmp .return_pop_pop_val ; ; 15. base == -Inf and exp < 0 and exp is odd integer: Return -0 ; 16. base == -Inf and exp < 0 and exp is not odd int: Return +0 ; 17. base == -Inf and exp > 0 and exp is odd integer: Return -Inf ; 18. base == -Inf and exp > 0 and exp is not odd int: Return +Inf ; 19. base == +Inf and exp < 0: Return +0 ; 20. base == +Inf and exp > 0: Return +Inf ; ; Note! Exponent must be finite and non-zero if we get here. ; .base_inf: fldz fcomip st0, st1 jbe .base_inf_plus_exp .base_inf_minus_exp: test dx, X86_FSW_C1 jz .return_plus_zero ; Matches 19 (base == +Inf). .base_minus_inf_minus_exp: call .is_exp_odd_integer or eax, eax jz .return_plus_zero ; Matches 16 (exp not odd and < 0, base == -Inf) .return_minus_zero: ; Matches 15 (exp is odd and < 0, base == -Inf) fldz fchs jmp .return_pop_pop_val .base_inf_plus_exp: test dx, X86_FSW_C1 jz .return_plus_inf ; Matches 20 (base == +Inf). .base_minus_inf_plus_exp: call .is_exp_odd_integer or eax, eax jnz .return_minus_inf ; Matches 17 (exp is odd and > 0, base == +Inf) jmp .return_plus_inf ; Matches 18 (exp not odd and > 0, base == +Inf) ; ; Return the exponent NaN (or whatever) value. ; .return_exp_nan: fld st0 mov eax, 2 ; return param 2 jmp .return_pop_pop_val_with_eax ; ; Return the base NaN (or whatever) value. ; .return_base_nan: .return_base_value: .base_nan: ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN. fld st1 mov eax, 1 ; return param 1 jmp .return_pop_pop_val_with_eax ; ; Pops the two values off the FPU stack and returns NaN. ; .return_nan: fld qword [.s_r64QNan xWrtRIP] jmp .return_pop_pop_val ; ; Pops the two values off the FPU stack and returns +Inf. ; .return_plus_inf: fld qword [.s_r64PlusInf xWrtRIP] jmp .return_pop_pop_val ; ; Pops the two values off the FPU stack and returns -Inf. ; .return_minus_inf: fld qword [.s_r64MinusInf xWrtRIP] jmp .return_pop_pop_val ; ; Return st0, remove st1 and st2. ; .return_pop_pop_val: xor eax, eax .return_pop_pop_val_with_eax: fstp st2 ffreep st0 jmp .return ALIGNCODE(8) .s_r80MaxInt: dt +9223372036854775807.0 ALIGNCODE(8) .s_r80MinInt: dt -9223372036854775807.0 ALIGNCODE(8) ;; The fyl2xp1 instruction only works between +/-1(1-sqrt(0.5)). ; These two variables is that range + 1.0, so we can compare directly ; with the input w/o any extra fsub and fabs work. .s_r64AbsFyL2xP1InputMin: dq 0.708 ; -0.292 + 1.0 .s_r64AbsFyL2xP1InputMax: dq 1.292 .s_r64QNan: dq RTFLOAT64U_QNAN_MINUS .s_r64PlusInf: dq RTFLOAT64U_INF_PLUS .s_r64MinusInf: dq RTFLOAT64U_INF_MINUS ;; ; Sub-function that checks if the exponent (st0) is an odd integer or not. ; ; @returns eax = 1 if odd, 0 if even or not integer. ; @uses eax, edx, eflags. ; .is_exp_odd_integer: ; ; Save the FPU enviornment and mask all exceptions. ; fnstenv [xBP - 30h] mov ax, [xBP - 30h + X86FSTENV32P.FCW] or word [xBP - 30h + X86FSTENV32P.FCW], X86_FCW_MASK_ALL fldcw [xBP - 30h + X86FSTENV32P.FCW] mov [xBP - 30h + X86FSTENV32P.FCW], ax ; ; Convert to 64-bit integer (probably not 100% correct). ; fld st0 ; -> st0=exponent st1=exponent; st2=base; fistp qword [xBP - 10h] fild qword [xBP - 10h] ; -> st0=int(exponent) st1=exponent; st2=base; fcomip st0, st1 ; -> st0=exponent; st1=base; jne .is_exp_odd_integer__return_false ; jump if not integer. mov xAX, [xBP - 10h] %ifdef mov edx, [xBP - 10h + 4] %endif ; ; Check the lowest bit if it might be odd. ; This works both for positive and negative numbers. ; test al, 1 jz .is_exp_odd_integer__return_false ; jump if even. ; ; If the result is negative, convert to positive. ; %ifdef RT_ARCH_AMD64 bt rax, 63 %else bt edx, 31 %endif jnc .is_exp_odd_integer__positive %ifdef RT_ARCH_AMD64 neg xAX %else neg edx neg eax sbb edx, 0 %endif .is_exp_odd_integer__positive: ; ; Now find the most significant bit in the value so we can verify that ; the odd bit was part of the mantissa/fraction of the input. ; cmp bl, 3 ; Skip if 80-bit input, as it has a 64-bit mantissa which je .is_exp_odd_integer__return_true ; makes it a 1 bit more precision than out integer reg(s). %ifdef RT_ARCH_AMD64 bsr rax, rax %else bsr edx, edx jnz .is_exp_odd_integer__high_dword_is_zero lea eax, [edx + 20h] jmp .is_exp_odd_integer__first_bit_in_eax .is_exp_odd_integer__high_dword_is_zero: bsr eax, eax .is_exp_odd_integer__first_bit_in_eax: %endif ; ; The limit is 53 for double precision (one implicit bit + 52 bits fraction), ; and 24 for single precision types. ; mov ah, 53 ; RTFLOAT64U_FRACTION_BITS + 1 cmp bl, 0 jne .is_exp_odd_integer__is_double_limit mov ah, 24 ; RTFLOAT32U_FRACTION_BITS + 1 .is_exp_odd_integer__is_double_limit: cmp al, ah jae .is_exp_odd_integer__return_false mov eax, 1 ; Return. .is_exp_odd_integer__return_true: jmp .is_exp_odd_integer__return .is_exp_odd_integer__return_false: xor eax, eax .is_exp_odd_integer__return: ffreep st0 fldenv [xBP - 30h] ret ENDPROC rtNoCrtMathPowCore