From f215e02bf85f68d3a6106c2a1f4f7f063f819064 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 11 Apr 2024 10:17:27 +0200 Subject: Adding upstream version 7.0.14-dfsg. Signed-off-by: Daniel Baumann --- src/VBox/Runtime/common/math/powcore.asm | 633 +++++++++++++++++++++++++++++++ 1 file changed, 633 insertions(+) create mode 100644 src/VBox/Runtime/common/math/powcore.asm (limited to 'src/VBox/Runtime/common/math/powcore.asm') diff --git a/src/VBox/Runtime/common/math/powcore.asm b/src/VBox/Runtime/common/math/powcore.asm new file mode 100644 index 00000000..e37af891 --- /dev/null +++ b/src/VBox/Runtime/common/math/powcore.asm @@ -0,0 +1,633 @@ +; $Id: powcore.asm $ +;; @file +; IPRT - No-CRT common pow code - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see . +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +extern NAME(RT_NOCRT(feraiseexcept)) + +;; +; Call feraiseexcept(%1) +%macro CALL_feraiseexcept_WITH 1 + %ifdef RT_ARCH_X86 + mov dword [xSP], X86_FSW_IE + %elifdef ASM_CALL64_GCC + mov edi, X86_FSW_IE + %elifdef ASM_CALL64_MSC + mov ecx, X86_FSW_IE + %else + %error calling conv. + %endif + call NAME(RT_NOCRT(feraiseexcept)) +%endmacro + + +;; +; Compute the st1 to the power of st0. +; +; @returns st(0) = result +; eax = what's being returned: +; 0 - Just a value. +; 1 - The rBase value. Caller may take steps to ensure it's exactly the same. +; 2 - The rExp value. Caller may take steps to ensure it's exactly the same. +; @param rBase/st1 The base. +; @param rExp/st0 The exponent +; @param fFxamBase/dx The status flags after fxam(rBase). +; @param enmType/ebx The original parameter and return types: +; 0 - 32-bit / float +; 1 - 64-bit / double +; 2 - 80-bit / long double +; +BEGINPROC rtNoCrtMathPowCore + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 30h + SEH64_ALLOCATE_STACK 30h + SEH64_END_PROLOGUE + + ; + ; Weed out special values, starting with the exponent. + ; + fxam + fnstsw ax + mov cx, ax ; cx=fxam(exp) + + and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 + cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) + je .exp_finite + cmp ax, X86_FSW_C3 ; Zero + je .exp_zero + cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals + je .exp_finite + cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity. + je .exp_inf + jmp .exp_nan + +.exp_finite: + ; + ; Detect special base values. + ; + mov ax, dx ; ax=fxam(base) + and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 + cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) + je .base_finite + cmp ax, X86_FSW_C3 ; Zero + je .base_zero + cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals + je .base_finite + cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity. + je .base_inf + jmp .base_nan + +.base_finite: + ; + ; 1 in the base is also special. + ; Rule 6 (see below): base == +1 and exponent = whatever: Return +1.0 + ; + fld1 + fcomip st0, st2 + je .return_base_value + + ; + ; Check if the exponent is an integer value we can handle in a 64-bit + ; GRP as that is simpler to handle accurately. + ; + ; In 64-bit integer range? + fld tword [.s_r80MaxInt xWrtRIP] + fcomip st0, st1 + jb .not_integer_exp + + fld tword [.s_r80MinInt xWrtRIP] + fcomip st0, st1 + ja .not_integer_exp + + ; Convert it to integer. + fld st0 ; -> st0=exp; st1=exp; st2=base + fistp qword [xBP - 8] ; Save and pop 64-bit int (no non-popping version of this instruction). + + fild qword [xBP - 8] ; Load it again for comparison. + fucomip st0, st1 ; Compare integer exp and floating point exp to see if they are the same. Pop. + jne .not_integer_exp + + + ; + ; + ; Ok, we've got an integer exponent value in that fits into a 64-bit. + ; We'll multiply the base exponention bit by exponention bit, applying + ; it as a factor for bits that are set. + ; + ; +.integer_exp: + ; Load the integer value into edx:exx / rdx and ditch the floating point exponent. + mov xDX, [xBP - 8] +%ifdef RT_ARCH_X86 + mov eax, [xBP - 8 + 4] +%endif + ffreep st0 ; -> st0=base; + + ; Load a 1 onto the stack, we'll need it below as well as for converting + ; a negative exponent to a positive one. + fld1 ; -> st0=1.0; st1=base; + + ; If the exponent is negative, negate it and change base to 1/base. + or xDX, xDX + jns .integer_exp_positive + neg xDX +%ifdef RT_ARCH_X86 + neg eax + sbb edx, 0 +%endif + fdivr st1, st0 ; -> st0=1.0; st1=1/base +.integer_exp_positive: + + ; + ; We'll process edx:eax / rdx bit by bit till it's zero, using st0 for + ; the multiplication factor corresponding to the current exponent bit + ; and st1 as the result. + ; + fxch ; -> st0=base; st1=1.0; +.integer_exp_loop: +%ifdef RT_ARCH_X86 + shrd eax, edx, 1 +%else + shr rdx, 1 +%endif + jnc .integer_exp_loop_advance + fmul st1, st0 + +.integer_exp_loop_advance: + ; Check if we're done. +%ifdef RT_ARCH_AMD64 + jz .integer_exp_return ; (we will have the flags for the shr rdx above) +%else + shr edx, 1 ; complete the above shift operation + + mov ecx, edx ; check if edx:eax is zero. + or ecx, eax + jz .integer_exp_return +%endif + ; Calculate the factor for the next bit. + fmul st0, st0 + jmp .integer_exp_loop + +.integer_exp_return: + ffreep st0 ; drop the factor -> st0=result; no st1. + jmp .return_val + + + ; + ; + ; Non-integer or value was out of range for an int64_t. + ; + ; The approach here is the same as in exp.asm, only we have to do the + ; log2(base) calculation first as it's a parameter and not a constant. + ; + ; +.not_integer_exp: + + ; First reject negative numbers. We still have the fxam(base) status in dx. + test dx, X86_FSW_C1 + jnz .base_negative_non_integer_exp + + ; Swap the items on the stack, so we can process the base first. + fxch st0, st1 ; -> st0=base; st1=exponent; + + ; + ; From log2.asm: + ; + ; The fyl2xp1 instruction (ST1=ST1*log2(ST0+1.0), popping ST0) has a + ; valid ST0 range of 1(1-sqrt(0.5)) (approx 0.29289321881) on both + ; sides of zero. We try use it if we can. + ; +.above_one: + ; For both fyl2xp1 and fyl2xp1 we need st1=1.0. + fld1 + fxch st0, st1 ; -> st0=base; st1=1.0; st2=exponent + + ; Check if the input is within the fyl2xp1 range. + fld qword [.s_r64AbsFyL2xP1InputMax xWrtRIP] + fcomip st0, st1 + jbe .cannot_use_fyl2xp1 + + fld qword [.s_r64AbsFyL2xP1InputMin xWrtRIP] + fcomip st0, st1 + jae .cannot_use_fyl2xp1 + + ; Do the calculation. +.use_fyl2xp1: + fsub st0, st1 ; -> st0=base-1; st1=1.0; st2=exponent + fyl2xp1 ; -> st0=1.0*log2(base-1.0+1.0); st1=exponent + jmp .done_log2 + +.cannot_use_fyl2xp1: + fyl2x ; -> st0=1.0*log2(base); st1=exponent +.done_log2: + + ; + ; From exp.asm: + ; + ; Convert to power of 2 and it'll be the same as exp2. + ; + fmulp ; st0=log2(base); st1=exponent -> st0=pow2exp + + ; + ; Split the job in two on the fraction and integer l2base parts. + ; + fld st0 ; Push a copy of the pow2exp on the stack. + frndint ; st0 = (int)pow2exp + fsub st1, st0 ; st1 = pow2exp - (int)pow2exp; i.e. st1 = fraction, st0 = integer. + fxch ; st0 = fraction, st1 = integer. + + ; 1. Calculate on the fraction. + f2xm1 ; st0 = 2**fraction - 1.0 + fld1 + faddp ; st0 = 2**fraction + + ; 2. Apply the integer power of two. + fscale ; st0 = result; st1 = integer part of pow2exp. + fstp st1 ; st0 = result; no st1. + + ; + ; Return st0. + ; +.return_val: + xor eax, eax +.return: + leave + ret + + + ; + ; + ; pow() has a lot of defined behavior for special values, which is why + ; this is the largest and most difficult part of the code. :-) + ; + ; On https://pubs.opengroup.org/onlinepubs/9699919799/functions/pow.html + ; there are 21 error conditions listed in the return value section. + ; The code below refers to this by number. + ; + ; When we get here: + ; dx=fxam(base) + ; cx=fxam(exponent) + ; st1=base + ; st0=exponent + ; + + ; + ; 1. Finit base < 0 and finit non-interger exponent: -> domain error (#IE) + NaN. + ; + ; The non-integer exponent claim might be wrong, as we only check if it + ; fits into a int64_t register. But, I don't see how we can calculate + ; it right now. + ; +.base_negative_non_integer_exp: + CALL_feraiseexcept_WITH X86_FSW_IE + jmp .return_nan + + ; + ; 7. Exponent = +/-0.0, any base value including NaN: return +1.0 + ; Note! According to https://en.cppreference.com/w/c/numeric/math/pow a + ; domain error (#IE) occur if base=+/-0. Not implemented. +.exp_zero: +.return_plus_one: + fld1 + jmp .return_pop_pop_val + + ; + ; 6. Exponent = whatever and base = 1: Return 1.0 + ; 10. Exponent = +/-Inf and base = -1: Return 1.0 + ;6+10 => Exponent = +/-Inf and |base| = 1: Return 1.0 + ; 11. Exponent = -Inf and |base| < 1: Return +Inf + ; 12. Exponent = -Inf and |base| > 1: Return +0 + ; 13. Exponent = +Inf and |base| < 1: Return +0 + ; 14. Exponent = +Inf and |base| > 1: Return +Inf + ; + ; Note! Rule 4 would trigger for the same conditions as 11 when base == 0, + ; but it's optional to raise div/0 and it's apparently marked as + ; obsolete in C23, so not implemented. + ; +.exp_inf: + ; Check if base is NaN or unsupported. + and dx, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 ; fxam(base) + cmp dx, X86_FSW_C0 + jbe .return_base_nan + + ; Calc fabs(base) and replace the exponent with 1.0 as we're very likely to need this here. + ffreep st0 + fabs + fld1 ; st0=1.0; st1=|rdBase| + fcomi st0, st1 + je .return_plus_one ; Matches rule 6 + 10 (base is +/-1). + ja .exp_inf_base_smaller_than_one +.exp_inf_base_larger_than_one: + test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign + jz .return_plus_inf ; Matches rule 14 (exponent is +Inf). + jmp .return_plus_zero ; Matches rule 12 (exponent is -Inf). + +.exp_inf_base_smaller_than_one: + test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign + jnz .return_plus_inf ; Matches rule 11 (exponent is -Inf). + jmp .return_plus_zero ; Matches rule 13 (exponent is +Inf). + + ; + ; 6. Exponent = whatever and base = 1: Return 1.0 + ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN. + ; +.exp_nan: + ; Check if base is a number and possible 1. + test dx, X86_FSW_C2 ; dx=fxam(base); C2 is set for finite number, infinity and denormals. + jz .return_exp_nan + fld1 + fcomip st0, st2 + jne .return_exp_nan + jmp .return_plus_one + + ; + ; 4a. base == +/-0.0 and exp < 0 and exp is odd integer: Return +/-Inf, raise div/0. + ; 4b. base == +/-0.0 and exp < 0 and exp is not odd int: Return +Inf, raise div/0. + ; 8. base == +/-0.0 and exp > 0 and exp is odd integer: Return +/-0.0 + ; 9. base == +/-0.0 and exp > 0 and exp is not odd int: Return +0 + ; + ; Note! Exponent must be finite and non-zero if we get here. + ; +.base_zero: + fldz + fcomip st0, st1 + jbe .base_zero_plus_exp +.base_zero_minus_exp: + mov cx, dx ; stashing fxam(base) in CX because EDX is trashed by .is_exp_odd_integer + call .is_exp_odd_integer ; trashes EDX but no ECX. + or eax, eax + jz .base_zero_minus_exp_not_odd_int + + ; Matching 4a. +.base_zero_minus_exp_odd_int: + test cx, X86_FSW_C1 ; base sign + jz .raise_de_and_return_plus_inf +.raise_de_and_return_minus_inf: + CALL_feraiseexcept_WITH X86_FSW_DE + jmp .return_minus_inf +.raise_de_and_return_plus_inf: + CALL_feraiseexcept_WITH X86_FSW_DE + jmp .return_plus_inf + + ; Matching 4b. +.base_zero_minus_exp_not_odd_int: + CALL_feraiseexcept_WITH X86_FSW_DE + jmp .return_plus_inf + +.base_zero_plus_exp: + call .is_exp_odd_integer + or eax, eax + jnz .return_base_value ; Matching 8 +.return_plus_zero: ; Matching 9 + fldz + jmp .return_pop_pop_val + + ; + ; 15. base == -Inf and exp < 0 and exp is odd integer: Return -0 + ; 16. base == -Inf and exp < 0 and exp is not odd int: Return +0 + ; 17. base == -Inf and exp > 0 and exp is odd integer: Return -Inf + ; 18. base == -Inf and exp > 0 and exp is not odd int: Return +Inf + ; 19. base == +Inf and exp < 0: Return +0 + ; 20. base == +Inf and exp > 0: Return +Inf + ; + ; Note! Exponent must be finite and non-zero if we get here. + ; +.base_inf: + fldz + fcomip st0, st1 + jbe .base_inf_plus_exp +.base_inf_minus_exp: + test dx, X86_FSW_C1 + jz .return_plus_zero ; Matches 19 (base == +Inf). +.base_minus_inf_minus_exp: + call .is_exp_odd_integer + or eax, eax + jz .return_plus_zero ; Matches 16 (exp not odd and < 0, base == -Inf) +.return_minus_zero: ; Matches 15 (exp is odd and < 0, base == -Inf) + fldz + fchs + jmp .return_pop_pop_val + +.base_inf_plus_exp: + test dx, X86_FSW_C1 + jz .return_plus_inf ; Matches 20 (base == +Inf). +.base_minus_inf_plus_exp: + call .is_exp_odd_integer + or eax, eax + jnz .return_minus_inf ; Matches 17 (exp is odd and > 0, base == +Inf) + jmp .return_plus_inf ; Matches 18 (exp not odd and > 0, base == +Inf) + + ; + ; Return the exponent NaN (or whatever) value. + ; +.return_exp_nan: + fld st0 + mov eax, 2 ; return param 2 + jmp .return_pop_pop_val_with_eax + + ; + ; Return the base NaN (or whatever) value. + ; +.return_base_nan: +.return_base_value: +.base_nan: ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN. + fld st1 + mov eax, 1 ; return param 1 + jmp .return_pop_pop_val_with_eax + + ; + ; Pops the two values off the FPU stack and returns NaN. + ; +.return_nan: + fld qword [.s_r64QNan xWrtRIP] + jmp .return_pop_pop_val + + ; + ; Pops the two values off the FPU stack and returns +Inf. + ; +.return_plus_inf: + fld qword [.s_r64PlusInf xWrtRIP] + jmp .return_pop_pop_val + + ; + ; Pops the two values off the FPU stack and returns -Inf. + ; +.return_minus_inf: + fld qword [.s_r64MinusInf xWrtRIP] + jmp .return_pop_pop_val + + ; + ; Return st0, remove st1 and st2. + ; +.return_pop_pop_val: + xor eax, eax +.return_pop_pop_val_with_eax: + fstp st2 + ffreep st0 + jmp .return + + +ALIGNCODE(8) +.s_r80MaxInt: + dt +9223372036854775807.0 + +ALIGNCODE(8) +.s_r80MinInt: + dt -9223372036854775807.0 + +ALIGNCODE(8) + ;; The fyl2xp1 instruction only works between +/-1(1-sqrt(0.5)). + ; These two variables is that range + 1.0, so we can compare directly + ; with the input w/o any extra fsub and fabs work. +.s_r64AbsFyL2xP1InputMin: + dq 0.708 ; -0.292 + 1.0 +.s_r64AbsFyL2xP1InputMax: + dq 1.292 + +.s_r64QNan: + dq RTFLOAT64U_QNAN_MINUS +.s_r64PlusInf: + dq RTFLOAT64U_INF_PLUS +.s_r64MinusInf: + dq RTFLOAT64U_INF_MINUS + + ;; + ; Sub-function that checks if the exponent (st0) is an odd integer or not. + ; + ; @returns eax = 1 if odd, 0 if even or not integer. + ; @uses eax, edx, eflags. + ; +.is_exp_odd_integer: + ; + ; Save the FPU enviornment and mask all exceptions. + ; + fnstenv [xBP - 30h] + mov ax, [xBP - 30h + X86FSTENV32P.FCW] + or word [xBP - 30h + X86FSTENV32P.FCW], X86_FCW_MASK_ALL + fldcw [xBP - 30h + X86FSTENV32P.FCW] + mov [xBP - 30h + X86FSTENV32P.FCW], ax + + ; + ; Convert to 64-bit integer (probably not 100% correct). + ; + fld st0 ; -> st0=exponent st1=exponent; st2=base; + fistp qword [xBP - 10h] + fild qword [xBP - 10h] ; -> st0=int(exponent) st1=exponent; st2=base; + fcomip st0, st1 ; -> st0=exponent; st1=base; + jne .is_exp_odd_integer__return_false ; jump if not integer. + mov xAX, [xBP - 10h] +%ifdef + mov edx, [xBP - 10h + 4] +%endif + + ; + ; Check the lowest bit if it might be odd. + ; This works both for positive and negative numbers. + ; + test al, 1 + jz .is_exp_odd_integer__return_false ; jump if even. + + ; + ; If the result is negative, convert to positive. + ; +%ifdef RT_ARCH_AMD64 + bt rax, 63 +%else + bt edx, 31 +%endif + jnc .is_exp_odd_integer__positive +%ifdef RT_ARCH_AMD64 + neg xAX +%else + neg edx + neg eax + sbb edx, 0 +%endif +.is_exp_odd_integer__positive: + + ; + ; Now find the most significant bit in the value so we can verify that + ; the odd bit was part of the mantissa/fraction of the input. + ; + cmp bl, 3 ; Skip if 80-bit input, as it has a 64-bit mantissa which + je .is_exp_odd_integer__return_true ; makes it a 1 bit more precision than out integer reg(s). + +%ifdef RT_ARCH_AMD64 + bsr rax, rax +%else + bsr edx, edx + jnz .is_exp_odd_integer__high_dword_is_zero + lea eax, [edx + 20h] + jmp .is_exp_odd_integer__first_bit_in_eax +.is_exp_odd_integer__high_dword_is_zero: + bsr eax, eax +.is_exp_odd_integer__first_bit_in_eax: +%endif + ; + ; The limit is 53 for double precision (one implicit bit + 52 bits fraction), + ; and 24 for single precision types. + ; + mov ah, 53 ; RTFLOAT64U_FRACTION_BITS + 1 + cmp bl, 0 + jne .is_exp_odd_integer__is_double_limit + mov ah, 24 ; RTFLOAT32U_FRACTION_BITS + 1 +.is_exp_odd_integer__is_double_limit: + + cmp al, ah + jae .is_exp_odd_integer__return_false + mov eax, 1 + + ; Return. +.is_exp_odd_integer__return_true: + jmp .is_exp_odd_integer__return +.is_exp_odd_integer__return_false: + xor eax, eax +.is_exp_odd_integer__return: + ffreep st0 + fldenv [xBP - 30h] + ret + +ENDPROC rtNoCrtMathPowCore + -- cgit v1.2.3