diff options
Diffstat (limited to 'src/VBox/Runtime/common/math')
59 files changed, 7746 insertions, 0 deletions
diff --git a/src/VBox/Runtime/common/math/Makefile.kup b/src/VBox/Runtime/common/math/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/Runtime/common/math/Makefile.kup diff --git a/src/VBox/Runtime/common/math/RTUInt128MulByU64.asm b/src/VBox/Runtime/common/math/RTUInt128MulByU64.asm new file mode 100644 index 00000000..5055330d --- /dev/null +++ b/src/VBox/Runtime/common/math/RTUInt128MulByU64.asm @@ -0,0 +1,81 @@ +; $Id: RTUInt128MulByU64.asm $ +;; @file +; IPRT - RTUInt128MulByU64 - AMD64 implementation. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "internal/bignum.mac" + + +BEGINCODE + +;; +; Multiplies a 128-bit number with a 64-bit one. +; +; @returns puResult. +; @param puResult x86:[ebp + 8] gcc:rdi msc:rcx +; @param puValue1 x86:[ebp + 12] gcc:rsi msc:rdx +; @param uValue2 x86:[ebp + 16] gcc:rdx msc:r8 +; +BEGINPROC_EXPORTED RTUInt128MulByU64 +; SEH64_SET_FRAME_xSP 0 +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + %define puResult rdi + %define puValue1 rsi + %define uValue2 r8 + mov r8, rdx + %else + %define puResult rcx + %define puValue1 r9 + %define uValue2 r8 + mov r9, rdx + %endif + + ; puValue1->s.Lo * uValue2 + mov rax, [puValue1] + mul uValue2 + mov [puResult], rax + mov r11, rdx ; Store the lower half of the result. + + ; puValue1->s.Hi * uValue2 + mov rax, [puValue1 + 8] + mul uValue2 + add r11, rax ; Calc the second half of the result. + mov [puResult + 8], r11 ; Store the high half of the result. + + mov rax, puResult + +;%elifdef RT_ARCH_X86 +%else + %error "unsupported arch" +%endif + + ret +ENDPROC RTUInt128MulByU64 + diff --git a/src/VBox/Runtime/common/math/amd64/Makefile.kup b/src/VBox/Runtime/common/math/amd64/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/Runtime/common/math/amd64/Makefile.kup diff --git a/src/VBox/Runtime/common/math/bignum-amd64-x86.asm b/src/VBox/Runtime/common/math/bignum-amd64-x86.asm new file mode 100644 index 00000000..1ce399bd --- /dev/null +++ b/src/VBox/Runtime/common/math/bignum-amd64-x86.asm @@ -0,0 +1,881 @@ +; $Id: bignum-amd64-x86.asm $ +;; @file +; IPRT - Big Integer Numbers, AMD64 and X86 Assembly Workers +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + + +;********************************************************************************************************************************* +;* Header Files * +;********************************************************************************************************************************* +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "internal/bignum.mac" + + +;********************************************************************************************************************************* +;* Defined Constants And Macros * +;********************************************************************************************************************************* +%ifdef RT_ARCH_AMD64 + %macro sahf 0 + %error "SAHF not supported on ancient AMD64" + %endmacro + %macro lahf 0 + %error "LAHF not supported on ancient AMD64" + %endmacro +%endif + + +BEGINCODE + +;; +; Subtracts a number (pauSubtrahend) from a larger number (pauMinuend) and +; stores the result in pauResult. +; +; All three numbers are zero padded such that a borrow can be carried one (or +; two for 64-bit) elements beyond the end of the largest number. +; +; @returns nothing. +; @param pauResult x86:[ebp + 8] gcc:rdi msc:rcx +; @param pauMinuend x86:[ebp + 12] gcc:rsi msc:rdx +; @param pauSubtrahend x86:[ebp + 16] gcc:rdx msc:r8 +; @param cUsed x86:[ebp + 20] gcc:rcx msc:r9 +; +BEGINPROC rtBigNumMagnitudeSubAssemblyWorker + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + %define pauResult rdi + %define pauMinuend rsi + %define pauSubtrahend rdx + %define cUsed ecx + %else + %define pauResult rcx + %define pauMinuend rdx + %define pauSubtrahend r8 + %define cUsed r9d + %endif + xor r11d, r11d ; index register. + + %if RTBIGNUM_ELEMENT_SIZE == 4 + add cUsed, 1 ; cUsed = RT_ALIGN(cUsed, 2) / 2 + shr cUsed, 1 + %endif + cmp cUsed, 8 ; Skip the big loop if small number. + jb .small_job + + mov r10d, cUsed + shr r10d, 3 + clc +.big_loop: + mov rax, [pauMinuend + r11] + sbb rax, [pauSubtrahend + r11] + mov [pauResult + r11], rax + mov rax, [pauMinuend + r11 + 8] + sbb rax, [pauSubtrahend + r11 + 8] + mov [pauResult + r11 + 8], rax + mov rax, [pauMinuend + r11 + 16] + sbb rax, [pauSubtrahend + r11 + 16] + mov [pauResult + r11 + 16], rax + mov rax, [pauMinuend + r11 + 24] + sbb rax, [pauSubtrahend + r11 + 24] + mov [pauResult + r11 + 24], rax + mov rax, [pauMinuend + r11 + 32] + sbb rax, [pauSubtrahend + r11 + 32] + mov [pauResult + r11 + 32], rax + mov rax, [pauMinuend + r11 + 40] + sbb rax, [pauSubtrahend + r11 + 40] + mov [pauResult + r11 + 40], rax + mov rax, [pauMinuend + r11 + 48] + sbb rax, [pauSubtrahend + r11 + 48] + mov [pauResult + r11 + 48], rax + mov rax, [pauMinuend + r11 + 56] + sbb rax, [pauSubtrahend + r11 + 56] + mov [pauResult + r11 + 56], rax + lea r11, [r11 + 64] + dec r10d ; Does not change CF. + jnz .big_loop + + %if 0 ; Ancient AMD CPUs does have lahf/sahf, thus the mess in the %else. + lahf ; Save CF + and cUsed, 7 ; Up to seven odd rounds. + jz .done + sahf ; Restore CF. + jmp .small_loop ; Skip CF=1 (clc). + %else + jnc .no_carry + and cUsed, 7 ; Up to seven odd rounds. + jz .done + stc + jmp .small_loop ; Skip CF=1 (clc). +.no_carry: + and cUsed, 7 ; Up to seven odd rounds. + jz .done + %endif +.small_job: + clc +.small_loop: + mov rax, [pauMinuend + r11] + sbb rax, [pauSubtrahend + r11] + mov [pauResult + r11], rax + lea r11, [r11 + 8] + dec cUsed ; does not change CF. + jnz .small_loop + %ifdef RT_STRICT + jnc .done + int3 + %endif +.done: + +%elifdef RT_ARCH_X86 + push edi + push esi + push ebx + + mov edi, [ebp + 08h] ; pauResult + %define pauResult edi + mov ecx, [ebp + 0ch] ; pauMinuend + %define pauMinuend ecx + mov edx, [ebp + 10h] ; pauSubtrahend + %define pauSubtrahend edx + mov esi, [ebp + 14h] ; cUsed + %define cUsed esi + + xor ebx, ebx ; index register. + + cmp cUsed, 8 ; Skip the big loop if small number. + jb .small_job + + shr cUsed, 3 + clc +.big_loop: + mov eax, [pauMinuend + ebx] + sbb eax, [pauSubtrahend + ebx] + mov [pauResult + ebx], eax + mov eax, [pauMinuend + ebx + 4] + sbb eax, [pauSubtrahend + ebx + 4] + mov [pauResult + ebx + 4], eax + mov eax, [pauMinuend + ebx + 8] + sbb eax, [pauSubtrahend + ebx + 8] + mov [pauResult + ebx + 8], eax + mov eax, [pauMinuend + ebx + 12] + sbb eax, [pauSubtrahend + ebx + 12] + mov [pauResult + ebx + 12], eax + mov eax, [pauMinuend + ebx + 16] + sbb eax, [pauSubtrahend + ebx + 16] + mov [pauResult + ebx + 16], eax + mov eax, [pauMinuend + ebx + 20] + sbb eax, [pauSubtrahend + ebx + 20] + mov [pauResult + ebx + 20], eax + mov eax, [pauMinuend + ebx + 24] + sbb eax, [pauSubtrahend + ebx + 24] + mov [pauResult + ebx + 24], eax + mov eax, [pauMinuend + ebx + 28] + sbb eax, [pauSubtrahend + ebx + 28] + mov [pauResult + ebx + 28], eax + lea ebx, [ebx + 32] + dec cUsed ; Does not change CF. + jnz .big_loop + + lahf ; Save CF + mov cUsed, [ebp + 14h] ; Up to three final rounds. + and cUsed, 7 + jz .done + sahf ; Restore CF. + jmp .small_loop ; Skip CF=1 (clc). + +.small_job: + clc +.small_loop: + mov eax, [pauMinuend + ebx] + sbb eax, [pauSubtrahend + ebx] + mov [pauResult + ebx], eax + lea ebx, [ebx + 4] + dec cUsed ; Does not change CF + jnz .small_loop + %ifdef RT_STRICT + jnc .done + int3 + %endif +.done: + + pop ebx + pop esi + pop edi +%else + %error "Unsupported arch" +%endif + + leave + ret +%undef pauResult +%undef pauMinuend +%undef pauSubtrahend +%undef cUsed +ENDPROC rtBigNumMagnitudeSubAssemblyWorker + + + +;; +; Subtracts a number (pauSubtrahend) from a larger number (pauMinuend) and +; stores the result in pauResult. +; +; All three numbers are zero padded such that a borrow can be carried one (or +; two for 64-bit) elements beyond the end of the largest number. +; +; @returns nothing. +; @param pauResultMinuend x86:[ebp + 8] gcc:rdi msc:rcx +; @param pauSubtrahend x86:[ebp + 12] gcc:rsi msc:rdx +; @param cUsed x86:[ebp + 16] gcc:rdx msc:r8 +; +BEGINPROC rtBigNumMagnitudeSubThisAssemblyWorker + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + %define pauResultMinuend rdi + %define pauSubtrahend rsi + %define cUsed edx + %else + %define pauResultMinuend rcx + %define pauSubtrahend rdx + %define cUsed r8d + %endif + xor r11d, r11d ; index register. + + %if RTBIGNUM_ELEMENT_SIZE == 4 + add cUsed, 1 ; cUsed = RT_ALIGN(cUsed, 2) / 2 + shr cUsed, 1 + %endif + cmp cUsed, 8 ; Skip the big loop if small number. + jb .small_job + + mov r10d, cUsed + shr r10d, 3 + clc +.big_loop: + mov rax, [pauSubtrahend + r11] + sbb [pauResultMinuend + r11], rax + mov rax, [pauSubtrahend + r11 + 8] + sbb [pauResultMinuend + r11 + 8], rax + mov rax, [pauSubtrahend + r11 + 16] + sbb [pauResultMinuend + r11 + 16], rax + mov rax, [pauSubtrahend + r11 + 24] + sbb [pauResultMinuend + r11 + 24], rax + mov rax, [pauSubtrahend + r11 + 32] + sbb [pauResultMinuend + r11 + 32], rax + mov rax, [pauSubtrahend + r11 + 40] + sbb [pauResultMinuend + r11 + 40], rax + mov rax, [pauSubtrahend + r11 + 48] + sbb [pauResultMinuend + r11 + 48], rax + mov rax, [pauSubtrahend + r11 + 56] + sbb [pauResultMinuend + r11 + 56], rax + lea r11, [r11 + 64] + dec r10d ; Does not change CF. + jnz .big_loop + + %if 0 ; Ancient AMD CPUs does have lahf/sahf, thus the mess in the %else. + lahf ; Save CF + and cUsed, 7 ; Up to seven odd rounds. + jz .done + sahf ; Restore CF. + jmp .small_loop ; Skip CF=1 (clc). + %else + jnc .no_carry + and cUsed, 7 ; Up to seven odd rounds. + jz .done + stc + jmp .small_loop ; Skip CF=1 (clc). +.no_carry: + and cUsed, 7 ; Up to seven odd rounds. + jz .done + %endif +.small_job: + clc +.small_loop: + mov rax, [pauSubtrahend + r11] + sbb [pauResultMinuend + r11], rax + lea r11, [r11 + 8] + dec cUsed ; does not change CF. + jnz .small_loop + %ifdef RT_STRICT + jnc .done + int3 + %endif +.done: + +%elifdef RT_ARCH_X86 + push edi + push ebx + + mov edi, [ebp + 08h] ; pauResultMinuend + %define pauResultMinuend edi + mov edx, [ebp + 0ch] ; pauSubtrahend + %define pauSubtrahend edx + mov ecx, [ebp + 10h] ; cUsed + %define cUsed ecx + + xor ebx, ebx ; index register. + + cmp cUsed, 8 ; Skip the big loop if small number. + jb .small_job + + shr cUsed, 3 + clc +.big_loop: + mov eax, [pauSubtrahend + ebx] + sbb [pauResultMinuend + ebx], eax + mov eax, [pauSubtrahend + ebx + 4] + sbb [pauResultMinuend + ebx + 4], eax + mov eax, [pauSubtrahend + ebx + 8] + sbb [pauResultMinuend + ebx + 8], eax + mov eax, [pauSubtrahend + ebx + 12] + sbb [pauResultMinuend + ebx + 12], eax + mov eax, [pauSubtrahend + ebx + 16] + sbb [pauResultMinuend + ebx + 16], eax + mov eax, [pauSubtrahend + ebx + 20] + sbb [pauResultMinuend + ebx + 20], eax + mov eax, [pauSubtrahend + ebx + 24] + sbb [pauResultMinuend + ebx + 24], eax + mov eax, [pauSubtrahend + ebx + 28] + sbb [pauResultMinuend + ebx + 28], eax + lea ebx, [ebx + 32] + dec cUsed ; Does not change CF. + jnz .big_loop + + lahf ; Save CF + mov cUsed, [ebp + 10h] ; Up to seven odd rounds. + and cUsed, 7 + jz .done + sahf ; Restore CF. + jmp .small_loop ; Skip CF=1 (clc). + +.small_job: + clc +.small_loop: + mov eax, [pauSubtrahend + ebx] + sbb [pauResultMinuend + ebx], eax + lea ebx, [ebx + 4] + dec cUsed ; Does not change CF + jnz .small_loop + %ifdef RT_STRICT + jnc .done + int3 + %endif +.done: + + pop ebx + pop edi +%else + %error "Unsupported arch" +%endif + + leave + ret +ENDPROC rtBigNumMagnitudeSubThisAssemblyWorker + + +;; +; Shifts an element array one bit to the left, returning the final carry value. +; +; On 64-bit hosts the array is always zero padded to a multiple of 8 bytes, so +; we can use 64-bit operand sizes even if the element type is 32-bit. +; +; @returns The final carry value. +; @param pauElements x86:[ebp + 8] gcc:rdi msc:rcx +; @param cUsed x86:[ebp + 12] gcc:rsi msc:rdx +; @param uCarry x86:[ebp + 16] gcc:rdx msc:r8 +; +BEGINPROC rtBigNumMagnitudeShiftLeftOneAssemblyWorker + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + %define pauElements rdi + %define cUsed esi + %define uCarry edx + %else + %define pauElements rcx + %define cUsed edx + %define uCarry r8d + %endif +%elifdef RT_ARCH_X86 + %define pauElements ecx + mov pauElements, [ebp + 08h] + %define cUsed edx + mov cUsed, [ebp + 0ch] + %define uCarry eax + mov uCarry, [ebp + 10h] +%else + %error "Unsupported arch." +%endif + ; Lots to do? + cmp cUsed, 8 + jae .big_loop_init + + ; Check for empty array. + test cUsed, cUsed + jz .no_elements + jmp .small_loop_init + + ; Big loop - 8 unrolled loop iterations. +.big_loop_init: +%ifdef RT_ARCH_AMD64 + mov r11d, cUsed +%endif + shr cUsed, 3 + test uCarry, uCarry ; clear the carry flag + jz .big_loop + stc +.big_loop: +%if RTBIGNUM_ELEMENT_SIZE == 8 + rcl qword [pauElements], 1 + rcl qword [pauElements + 8], 1 + rcl qword [pauElements + 16], 1 + rcl qword [pauElements + 24], 1 + rcl qword [pauElements + 32], 1 + rcl qword [pauElements + 40], 1 + rcl qword [pauElements + 48], 1 + rcl qword [pauElements + 56], 1 + lea pauElements, [pauElements + 64] +%else + rcl dword [pauElements], 1 + rcl dword [pauElements + 4], 1 + rcl dword [pauElements + 8], 1 + rcl dword [pauElements + 12], 1 + rcl dword [pauElements + 16], 1 + rcl dword [pauElements + 20], 1 + rcl dword [pauElements + 24], 1 + rcl dword [pauElements + 28], 1 + lea pauElements, [pauElements + 32] +%endif + dec cUsed + jnz .big_loop + + ; More to do? + pushf ; save carry flag (uCarry no longer used on x86). +%ifdef RT_ARCH_AMD64 + mov cUsed, r11d +%else + mov cUsed, [ebp + 0ch] +%endif + and cUsed, 7 + jz .restore_cf_and_return ; Jump if we're good and done. + popf ; Restore CF. + jmp .small_loop ; Deal with the odd rounds. +.restore_cf_and_return: + popf + jmp .carry_to_eax + + ; Small loop - One round at the time. +.small_loop_init: + test uCarry, uCarry ; clear the carry flag + jz .small_loop + stc +.small_loop: +%if RTBIGNUM_ELEMENT_SIZE == 8 + rcl qword [pauElements], 1 + lea pauElements, [pauElements + 8] +%else + rcl dword [pauElements], 1 + lea pauElements, [pauElements + 4] +%endif + dec cUsed + jnz .small_loop + + ; Calculate return value. +.carry_to_eax: + mov eax, 0 + jnc .return + inc eax +.return: + leave + ret + +.no_elements: + mov eax, uCarry + jmp .return +ENDPROC rtBigNumMagnitudeShiftLeftOneAssemblyWorker + + +;; +; Performs a 128-bit by 64-bit division on 64-bit and +; a 64-bit by 32-bit divison on 32-bit. +; +; @returns nothing. +; @param puQuotient x86:[ebp + 8] gcc:rdi msc:rcx Double element. +; @param puRemainder x86:[ebp + 12] gcc:rsi msc:rdx Normal element. +; @param uDividendHi x86:[ebp + 16] gcc:rdx msc:r8 +; @param uDividendLo x86:[ebp + 20] gcc:rcx msc:r9 +; @param uDivisior x86:[ebp + 24] gcc:r8 msc:[rbp + 30h] +; +BEGINPROC rtBigNumElement2xDiv2xBy1x + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %if RTBIGNUM_ELEMENT_SIZE == 4 + %error "sorry not implemented yet." + sorry not implemented yet. + %endif + + %define uDividendHi rdx + %define uDividendLo rax + %ifdef ASM_CALL64_GCC + %define uDivisor r8 + %define puQuotient rdi + %define puRemainder rsi + mov rax, rcx + %else + %define puQuotient rcx + %define puRemainder r11 + %define uDivisor r10 + mov r11, rdx + mov r10, [rbp + 30h] + mov rdx, r8 + mov rax, r9 + %endif + +%elifdef RT_ARCH_X86 + push edi + push ebx + + %define uDividendHi edx + mov uDividendHi, [ebp + 10h] + %define uDividendLo eax + mov uDividendLo, [ebp + 14h] + %define uDivisor ecx + mov uDivisor, [ebp + 18h] + %define puQuotient edi + mov puQuotient, [ebp + 08h] + %define puRemainder ebx + mov puRemainder, [ebp + 0ch] +%else + %error "Unsupported arch." +%endif + +%ifdef RT_STRICT + ; + ; The dividend shall not be zero. + ; + test uDivisor, uDivisor + jnz .divisor_not_zero + int3 +.divisor_not_zero: +%endif + + ; + ; Avoid division overflow. This will calculate the high part of the quotient. + ; + mov RTBIGNUM_ELEMENT_PRE [puQuotient + RTBIGNUM_ELEMENT_SIZE], 0 + cmp uDividendHi, uDivisor + jb .do_divide + push xAX + mov xAX, xDX + xor edx, edx + div uDivisor + mov RTBIGNUM_ELEMENT_PRE [puQuotient + RTBIGNUM_ELEMENT_SIZE], xAX + pop xAX + + ; + ; Perform the division and store the result. + ; +.do_divide: + div uDivisor + mov RTBIGNUM_ELEMENT_PRE [puQuotient], xAX + mov RTBIGNUM_ELEMENT_PRE [puRemainder], xDX + + +%ifdef RT_ARCH_X86 + pop ebx + pop edi +%endif + leave + ret +ENDPROC rtBigNumElement2xDiv2xBy1x + + +;; +; Performs the core of long multiplication. +; +; @returns nothing. +; @param pauResult x86:[ebp + 8] gcc:rdi msc:rcx Initialized to zero. +; @param pauMultiplier x86:[ebp + 12] gcc:rsi msc:rdx +; @param cMultiplier x86:[ebp + 16] gcc:rdx msc:r8 +; @param pauMultiplicand x86:[ebp + 20] gcc:rcx msc:r9 +; @param cMultiplicand x86:[ebp + 24] gcc:r8 msc:[rbp + 30h] +; +BEGINPROC rtBigNumMagnitudeMultiplyAssemblyWorker + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %if RTBIGNUM_ELEMENT_SIZE == 4 + %error "sorry not implemented yet." + sorry not implemented yet. + %endif + + %ifdef ASM_CALL64_GCC + %define pauResult rdi + %define pauMultiplier rsi + %define cMultiplier r9 + %define pauMultiplicand rcx + %define cMultiplicand r8 + mov r9d, edx ; cMultiplier + mov r8d, r8d ; cMultiplicand - paranoia + %define uMultiplier r10 + %define iMultiplicand r11 + %else + %define pauResult rcx + %define pauMultiplier r11 + %define cMultiplier r8 + %define pauMultiplicand r9 + %define cMultiplicand r10 + mov pauMultiplier, rdx + mov r10d, dword [rbp + 30h] ; cMultiplicand + mov r8d, r8d ; cMultiplier - paranoia + %define uMultiplier r12 + push r12 + %define iMultiplicand r13 + push r13 + %endif + +%elifdef RT_ARCH_X86 + push edi + push esi + push ebx + sub esp, 10h + %define pauResult edi + mov pauResult, [ebp + 08h] + %define pauMultiplier dword [ebp + 0ch] + %define cMultiplier dword [ebp + 10h] + %define pauMultiplicand ecx + mov pauMultiplicand, [ebp + 14h] + %define cMultiplicand dword [ebp + 18h] + %define uMultiplier dword [ebp - 10h] + %define iMultiplicand ebx + +%else + %error "Unsupported arch." +%endif + + ; + ; Check that the multiplicand isn't empty (avoids an extra jump in the inner loop). + ; + cmp cMultiplicand, 0 + je .done + + ; + ; Loop thru each element in the multiplier. + ; + ; while (cMultiplier-- > 0) +.multiplier_loop: + cmp cMultiplier, 0 + jz .done + dec cMultiplier + + ; uMultiplier = *pauMultiplier +%ifdef RT_ARCH_X86 + mov edx, pauMultiplier + mov eax, [edx] + mov uMultiplier, eax +%else + mov uMultiplier, [pauMultiplier] +%endif + ; for (iMultiplicand = 0; iMultiplicand < cMultiplicand; iMultiplicand++) + xor iMultiplicand, iMultiplicand +.multiplicand_loop: + mov xAX, [pauMultiplicand + iMultiplicand * RTBIGNUM_ELEMENT_SIZE] + mul uMultiplier + add [pauResult + iMultiplicand * RTBIGNUM_ELEMENT_SIZE], xAX + adc [pauResult + iMultiplicand * RTBIGNUM_ELEMENT_SIZE + RTBIGNUM_ELEMENT_SIZE], xDX + jnc .next_multiplicand + lea xDX, [iMultiplicand + 2] +.next_adc: + adc RTBIGNUM_ELEMENT_PRE [pauResult + xDX * RTBIGNUM_ELEMENT_SIZE], 0 + inc xDX + jc .next_adc + +.next_multiplicand: + inc iMultiplicand ; iMultiplicand++ + cmp iMultiplicand, cMultiplicand ; iMultiplicand < cMultiplicand + jb .multiplicand_loop + + ; Advance and loop on multiplier. + add pauMultiplier, RTBIGNUM_ELEMENT_SIZE + add pauResult, RTBIGNUM_ELEMENT_SIZE + jmp .multiplier_loop + +.done: + +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + %else + pop r13 + pop r12 + %endif +%elifdef RT_ARCH_X86 + add esp, 10h + pop ebx + pop esi + pop edi +%endif + leave + ret +ENDPROC rtBigNumMagnitudeMultiplyAssemblyWorker + +;; +; Assembly implementation of the D4 step of Knuth's division algorithm. +; +; This subtracts Divisor * Qhat from the dividend at the current J index. +; +; @returns true if negative result (unlikely), false if positive. +; @param pauDividendJ x86:[ebp + 8] gcc:rdi msc:rcx Initialized to zero. +; @param pauDivisor x86:[ebp + 12] gcc:rsi msc:rdx +; @param cDivisor x86:[ebp + 16] gcc:edx msc:r8d +; @param uQhat x86:[ebp + 16] gcc:rcx msc:r9 +; +BEGINPROC rtBigNumKnuthD4_MulSub + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %if RTBIGNUM_ELEMENT_SIZE == 4 + %error "sorry not implemented yet." + sorry not implemented yet. + %endif + + %ifdef ASM_CALL64_GCC + %define pauDividendJ rdi + %define pauDivisor rsi + %define cDivisor r8 + %define uQhat rcx + mov r8d, edx ; cDivisor + %define uMulCarry r11 + %else + %define pauDividendJ rcx + %define pauDivisor r10 + %define cDivisor r8 + %define uQhat r9 + mov r10, rdx ; pauDivisor + mov r8d, r8d ; cDivisor - paranoia + %define uMulCarry r11 + %endif + +%elifdef RT_ARCH_X86 + push edi + push esi + push ebx + %define pauDividendJ edi + mov pauDividendJ, [ebp + 08h] + %define pauDivisor esi + mov pauDivisor, [ebp + 0ch] + %define cDivisor ecx + mov cDivisor, [ebp + 10h] + %define uQhat dword [ebp + 14h] + %define uMulCarry ebx +%else + %error "Unsupported arch." +%endif + +%ifdef RT_STRICT + ; + ; Some sanity checks. + ; + cmp cDivisor, 0 + jne .cDivisor_not_zero + int3 +.cDivisor_not_zero: +%endif + + ; + ; Initialize the loop. + ; + xor uMulCarry, uMulCarry + + ; + ; do ... while (cDivisor-- > 0); + ; +.the_loop: + ; RTUInt128MulU64ByU64(&uSub, uQhat, pauDivisor[i]); + mov xAX, uQhat + mul RTBIGNUM_ELEMENT_PRE [pauDivisor] + ; RTUInt128AssignAddU64(&uSub, uMulCarry); + add xAX, uMulCarry + adc xDX, 0 + mov uMulCarry, xDX + ; Subtract uSub.s.Lo+fCarry from pauDividendJ[i] + sub [pauDividendJ], xAX + adc uMulCarry, 0 +%ifdef RT_STRICT + jnc .uMulCarry_did_not_overflow + int3 +.uMulCarry_did_not_overflow +%endif + + ; Advance. + add pauDividendJ, RTBIGNUM_ELEMENT_SIZE + add pauDivisor, RTBIGNUM_ELEMENT_SIZE + dec cDivisor + jnz .the_loop + + ; + ; Final dividend element (no corresponding divisor element). + ; + sub [pauDividendJ], uMulCarry + sbb eax, eax + and eax, 1 + +.done: +%ifdef RT_ARCH_AMD64 +%elifdef RT_ARCH_X86 + pop ebx + pop esi + pop edi +%endif + leave + ret +ENDPROC rtBigNumKnuthD4_MulSub + diff --git a/src/VBox/Runtime/common/math/bignum.cpp b/src/VBox/Runtime/common/math/bignum.cpp new file mode 100644 index 00000000..7f903012 --- /dev/null +++ b/src/VBox/Runtime/common/math/bignum.cpp @@ -0,0 +1,2867 @@ +/* $Id: bignum.cpp $ */ +/** @file + * IPRT - Big Integer Numbers. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL) only, as it comes in the "COPYING.CDDL" file of the + * VirtualBox OSE distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +/*#ifdef IN_RING3 +# define RTMEM_WRAP_TO_EF_APIS +#endif*/ +#include "internal/iprt.h" +#include <iprt/bignum.h> + +#include <iprt/asm.h> +#include <iprt/asm-math.h> +#include <iprt/err.h> +#include <iprt/mem.h> +#include <iprt/memsafer.h> +#include <iprt/string.h> +#if RTBIGNUM_ELEMENT_BITS == 64 +# include <iprt/uint128.h> +#endif + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** Allocation alignment in elements. */ +#ifndef RTMEM_WRAP_TO_EF_APIS +# define RTBIGNUM_ALIGNMENT 4U +#else +# define RTBIGNUM_ALIGNMENT 1U +#endif + +/** The max size (in bytes) of an elements array. */ +#define RTBIGNUM_MAX_SIZE _4M + + +/** Assert the validity of a big number structure pointer in strict builds. */ +#ifdef RT_STRICT +# define RTBIGNUM_ASSERT_VALID(a_pBigNum) \ + do { \ + AssertPtr(a_pBigNum); \ + Assert(!(a_pBigNum)->fCurScrambled); \ + Assert( (a_pBigNum)->cUsed == (a_pBigNum)->cAllocated \ + || ASMMemIsZero(&(a_pBigNum)->pauElements[(a_pBigNum)->cUsed], \ + ((a_pBigNum)->cAllocated - (a_pBigNum)->cUsed) * RTBIGNUM_ELEMENT_SIZE)); \ + } while (0) +#else +# define RTBIGNUM_ASSERT_VALID(a_pBigNum) do {} while (0) +#endif + + +/** Enable assembly optimizations. */ +#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) +# define IPRT_BIGINT_WITH_ASM +#endif + + +/** @def RTBIGNUM_ZERO_ALIGN + * For calculating the rtBigNumEnsureExtraZeroElements argument from cUsed. + * This has to do with 64-bit assembly instruction operating as RTBIGNUMELEMENT + * was 64-bit on some hosts. + */ +#if defined(IPRT_BIGINT_WITH_ASM) && ARCH_BITS == 64 && RTBIGNUM_ELEMENT_SIZE == 4 && defined(RT_LITTLE_ENDIAN) +# define RTBIGNUM_ZERO_ALIGN(a_cUsed) RT_ALIGN_32(a_cUsed, 2) +#elif defined(IPRT_BIGINT_WITH_ASM) +# define RTBIGNUM_ZERO_ALIGN(a_cUsed) (a_cUsed) +#else +# define RTBIGNUM_ZERO_ALIGN(a_cUsed) (a_cUsed) +#endif + +#define RTBIGNUMELEMENT_HALF_MASK ( ((RTBIGNUMELEMENT)1 << (RTBIGNUM_ELEMENT_BITS / 2)) - (RTBIGNUMELEMENT)1) +#define RTBIGNUMELEMENT_LO_HALF(a_uElement) ( (RTBIGNUMELEMENT_HALF_MASK) & (a_uElement) ) +#define RTBIGNUMELEMENT_HI_HALF(a_uElement) ( (a_uElement) >> (RTBIGNUM_ELEMENT_BITS / 2) ) + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** Type the size of two elements. */ +#if RTBIGNUM_ELEMENT_BITS == 64 +typedef RTUINT128U RTBIGNUMELEMENT2X; +#else +typedef RTUINT64U RTBIGNUMELEMENT2X; +#endif + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +DECLINLINE(int) rtBigNumSetUsed(PRTBIGNUM pBigNum, uint32_t cNewUsed); + +#ifdef IPRT_BIGINT_WITH_ASM +/* bignum-amd64-x86.asm: */ +DECLASM(void) rtBigNumMagnitudeSubAssemblyWorker(RTBIGNUMELEMENT *pauResult, RTBIGNUMELEMENT const *pauMinuend, + RTBIGNUMELEMENT const *pauSubtrahend, uint32_t cUsed); +DECLASM(void) rtBigNumMagnitudeSubThisAssemblyWorker(RTBIGNUMELEMENT *pauMinuendResult, RTBIGNUMELEMENT const *pauSubtrahend, + uint32_t cUsed); +DECLASM(RTBIGNUMELEMENT) rtBigNumMagnitudeShiftLeftOneAssemblyWorker(RTBIGNUMELEMENT *pauElements, uint32_t cUsed, + RTBIGNUMELEMENT uCarry); +DECLASM(void) rtBigNumElement2xDiv2xBy1x(RTBIGNUMELEMENT2X *puQuotient, RTBIGNUMELEMENT *puRemainder, + RTBIGNUMELEMENT uDividendHi, RTBIGNUMELEMENT uDividendLo, RTBIGNUMELEMENT uDivisor); +DECLASM(void) rtBigNumMagnitudeMultiplyAssemblyWorker(PRTBIGNUMELEMENT pauResult, + PCRTBIGNUMELEMENT pauMultiplier, uint32_t cMultiplier, + PCRTBIGNUMELEMENT pauMultiplicand, uint32_t cMultiplicand); +#endif + + + + + +/** @name Functions working on one element. + * @{ */ + +DECLINLINE(uint32_t) rtBigNumElementBitCount(RTBIGNUMELEMENT uElement) +{ +#if RTBIGNUM_ELEMENT_SIZE == 8 + if (uElement >> 32) + return ASMBitLastSetU32((uint32_t)(uElement >> 32)) + 32; + return ASMBitLastSetU32((uint32_t)uElement); +#elif RTBIGNUM_ELEMENT_SIZE == 4 + return ASMBitLastSetU32(uElement); +#else +# error "Bad RTBIGNUM_ELEMENT_SIZE value" +#endif +} + + +/** + * Does addition with carry. + * + * This is a candidate for inline assembly on some platforms. + * + * @returns The result (the sum) + * @param uAugend What to add to. + * @param uAddend What to add to it. + * @param pfCarry Where to read the input carry and return the output + * carry. + */ +DECLINLINE(RTBIGNUMELEMENT) rtBigNumElementAddWithCarry(RTBIGNUMELEMENT uAugend, RTBIGNUMELEMENT uAddend, + RTBIGNUMELEMENT *pfCarry) +{ + RTBIGNUMELEMENT uRet = uAugend + uAddend; + if (!*pfCarry) + *pfCarry = uRet < uAugend; + else + { + uRet += 1; + *pfCarry = uRet <= uAugend; + } + return uRet; +} + + +#if !defined(IPRT_BIGINT_WITH_ASM) || defined(RT_STRICT) +/** + * Does addition with borrow. + * + * This is a candidate for inline assembly on some platforms. + * + * @returns The result (the sum) + * @param uMinuend What to subtract from. + * @param uSubtrahend What to subtract. + * @param pfBorrow Where to read the input borrow and return the output + * borrow. + */ +DECLINLINE(RTBIGNUMELEMENT) rtBigNumElementSubWithBorrow(RTBIGNUMELEMENT uMinuend, RTBIGNUMELEMENT uSubtrahend, + RTBIGNUMELEMENT *pfBorrow) +{ + RTBIGNUMELEMENT uRet = uMinuend - uSubtrahend - *pfBorrow; + + /* Figure out if we borrowed. */ + *pfBorrow = !*pfBorrow ? uMinuend < uSubtrahend : uMinuend <= uSubtrahend; + return uRet; +} +#endif + +/** @} */ + + + + +/** @name Double element primitives. + * @{ */ + +static int rtBigNumElement2xCopyToMagnitude(RTBIGNUMELEMENT2X const *pValue2x, PRTBIGNUM pDst) +{ + int rc; + if (pValue2x->s.Hi) + { + rc = rtBigNumSetUsed(pDst, 2); + if (RT_SUCCESS(rc)) + { + pDst->pauElements[0] = pValue2x->s.Lo; + pDst->pauElements[1] = pValue2x->s.Hi; + } + } + else if (pValue2x->s.Lo) + { + rc = rtBigNumSetUsed(pDst, 1); + if (RT_SUCCESS(rc)) + pDst->pauElements[0] = pValue2x->s.Lo; + } + else + rc = rtBigNumSetUsed(pDst, 0); + return rc; +} + +static void rtBigNumElement2xDiv(RTBIGNUMELEMENT2X *puQuotient, RTBIGNUMELEMENT2X *puRemainder, + RTBIGNUMELEMENT uDividendHi, RTBIGNUMELEMENT uDividendLo, + RTBIGNUMELEMENT uDivisorHi, RTBIGNUMELEMENT uDivisorLo) +{ + RTBIGNUMELEMENT2X uDividend; + uDividend.s.Lo = uDividendLo; + uDividend.s.Hi = uDividendHi; + + RTBIGNUMELEMENT2X uDivisor; + uDivisor.s.Lo = uDivisorLo; + uDivisor.s.Hi = uDivisorHi; + +#if RTBIGNUM_ELEMENT_BITS == 64 + RTUInt128DivRem(puQuotient, puRemainder, &uDividend, &uDivisor); +#else + puQuotient->u = uDividend.u / uDivisor.u; + puRemainder->u = uDividend.u % uDivisor.u; +#endif +} + +#ifndef IPRT_BIGINT_WITH_ASM +static void rtBigNumElement2xDiv2xBy1x(RTBIGNUMELEMENT2X *puQuotient, RTBIGNUMELEMENT *puRemainder, + RTBIGNUMELEMENT uDividendHi, RTBIGNUMELEMENT uDividendLo, RTBIGNUMELEMENT uDivisor) +{ + RTBIGNUMELEMENT2X uDividend; + uDividend.s.Lo = uDividendLo; + uDividend.s.Hi = uDividendHi; + +# if RTBIGNUM_ELEMENT_BITS == 64 + RTBIGNUMELEMENT2X uRemainder2x; + RTBIGNUMELEMENT2X uDivisor2x; + uDivisor2x.s.Hi = 0; + uDivisor2x.s.Lo = uDivisor; + /** @todo optimize this. */ + RTUInt128DivRem(puQuotient, &uRemainder2x, &uDividend, &uDivisor2x); + *puRemainder = uRemainder2x.s.Lo; +# else + puQuotient->u = uDividend.u / uDivisor; + puRemainder->u = uDividend.u % uDivisor; +# endif +} +#endif + +DECLINLINE(void) rtBigNumElement2xDec(RTBIGNUMELEMENT2X *puValue) +{ +#if RTBIGNUM_ELEMENT_BITS == 64 + if (puValue->s.Lo-- == 0) + puValue->s.Hi--; +#else + puValue->u -= 1; +#endif +} + +#if 0 /* unused */ +DECLINLINE(void) rtBigNumElement2xAdd1x(RTBIGNUMELEMENT2X *puValue, RTBIGNUMELEMENT uAdd) +{ +#if RTBIGNUM_ELEMENT_BITS == 64 + RTUInt128AssignAddU64(puValue, uAdd); +#else + puValue->u += uAdd; +#endif +} +#endif /* unused */ + +/** @} */ + + + + + +/** + * Scrambles a big number if required. + * + * @param pBigNum The big number. + */ +DECLINLINE(void) rtBigNumScramble(PRTBIGNUM pBigNum) +{ + if (pBigNum->fSensitive) + { + AssertReturnVoid(!pBigNum->fCurScrambled); + if (pBigNum->pauElements) + { + int rc = RTMemSaferScramble(pBigNum->pauElements, pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE); AssertRC(rc); + pBigNum->fCurScrambled = RT_SUCCESS(rc); + } + else + pBigNum->fCurScrambled = true; + } +} + + +/** + * Unscrambles a big number if required. + * + * @returns IPRT status code. + * @param pBigNum The big number. + */ +DECLINLINE(int) rtBigNumUnscramble(PRTBIGNUM pBigNum) +{ + if (pBigNum->fSensitive) + { + AssertReturn(pBigNum->fCurScrambled, VERR_INTERNAL_ERROR_2); + if (pBigNum->pauElements) + { + int rc = RTMemSaferUnscramble(pBigNum->pauElements, pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE); AssertRC(rc); + pBigNum->fCurScrambled = !RT_SUCCESS(rc); + return rc; + } + else + pBigNum->fCurScrambled = false; + } + return VINF_SUCCESS; +} + + +/** + * Getter function for pauElements which extends the array to infinity. + * + * @returns The element value. + * @param pBigNum The big number. + * @param iElement The element index. + */ +DECLINLINE(RTBIGNUMELEMENT) rtBigNumGetElement(PCRTBIGNUM pBigNum, uint32_t iElement) +{ + if (iElement < pBigNum->cUsed) + return pBigNum->pauElements[iElement]; + return 0; +} + + +/** + * Grows the pauElements array so it can fit at least @a cNewUsed entries. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param cNewUsed The new cUsed value. + * @param cMinElements The minimum number of elements. + */ +static int rtBigNumGrow(PRTBIGNUM pBigNum, uint32_t cNewUsed, uint32_t cMinElements) +{ + Assert(cMinElements >= cNewUsed); + uint32_t const cbOld = pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE; + uint32_t const cNew = RT_ALIGN_32(cMinElements, RTBIGNUM_ALIGNMENT); + uint32_t const cbNew = cNew * RTBIGNUM_ELEMENT_SIZE; + Assert(cbNew > cbOld); + if (cbNew <= RTBIGNUM_MAX_SIZE && cbNew > cbOld) + { + void *pvNew; + if (pBigNum->fSensitive) + pvNew = RTMemSaferReallocZ(cbOld, pBigNum->pauElements, cbNew); + else + pvNew = RTMemRealloc(pBigNum->pauElements, cbNew); + if (RT_LIKELY(pvNew)) + { + if (cbNew > cbOld) + RT_BZERO((char *)pvNew + cbOld, cbNew - cbOld); + if (pBigNum->cUsed > cNewUsed) + RT_BZERO((RTBIGNUMELEMENT *)pvNew + cNewUsed, (pBigNum->cUsed - cNewUsed) * RTBIGNUM_ELEMENT_SIZE); + + pBigNum->pauElements = (RTBIGNUMELEMENT *)pvNew; + pBigNum->cUsed = cNewUsed; + pBigNum->cAllocated = cNew; + return VINF_SUCCESS; + } + return VERR_NO_MEMORY; + } + return VERR_OUT_OF_RANGE; +} + + +/** + * Changes the cUsed member, growing the pauElements array if necessary. + * + * Any elements added to the array will be initialized to zero. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param cNewUsed The new cUsed value. + */ +DECLINLINE(int) rtBigNumSetUsed(PRTBIGNUM pBigNum, uint32_t cNewUsed) +{ + if (pBigNum->cAllocated >= cNewUsed) + { + if (pBigNum->cUsed > cNewUsed) + RT_BZERO(&pBigNum->pauElements[cNewUsed], (pBigNum->cUsed - cNewUsed) * RTBIGNUM_ELEMENT_SIZE); +#ifdef RT_STRICT + else if (pBigNum->cUsed != cNewUsed) + Assert(ASMMemIsZero(&pBigNum->pauElements[pBigNum->cUsed], (cNewUsed - pBigNum->cUsed) * RTBIGNUM_ELEMENT_SIZE)); +#endif + pBigNum->cUsed = cNewUsed; + return VINF_SUCCESS; + } + return rtBigNumGrow(pBigNum, cNewUsed, cNewUsed); +} + + +/** + * Extended version of rtBigNumSetUsed that also allow specifying the number of + * zero elements required. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param cNewUsed The new cUsed value. + * @param cMinElements The minimum number of elements allocated. The + * difference between @a cNewUsed and @a cMinElements + * is initialized to zero because all free elements are + * zero. + */ +DECLINLINE(int) rtBigNumSetUsedEx(PRTBIGNUM pBigNum, uint32_t cNewUsed, uint32_t cMinElements) +{ + if (pBigNum->cAllocated >= cMinElements) + { + if (pBigNum->cUsed > cNewUsed) + RT_BZERO(&pBigNum->pauElements[cNewUsed], (pBigNum->cUsed - cNewUsed) * RTBIGNUM_ELEMENT_SIZE); +#ifdef RT_STRICT + else if (pBigNum->cUsed != cNewUsed) + Assert(ASMMemIsZero(&pBigNum->pauElements[pBigNum->cUsed], (cNewUsed - pBigNum->cUsed) * RTBIGNUM_ELEMENT_SIZE)); +#endif + pBigNum->cUsed = cNewUsed; + return VINF_SUCCESS; + } + return rtBigNumGrow(pBigNum, cNewUsed, cMinElements); +} + + +/** + * For ensuring zero padding of pauElements for sub/add with carry assembly + * operations. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param cElements The number of elements that must be in the elements + * array array, where those after pBigNum->cUsed must + * be zero. + */ +DECLINLINE(int) rtBigNumEnsureExtraZeroElements(PRTBIGNUM pBigNum, uint32_t cElements) +{ + if (pBigNum->cAllocated >= cElements) + { + Assert( pBigNum->cAllocated == pBigNum->cUsed + || ASMMemIsZero(&pBigNum->pauElements[pBigNum->cUsed], + (pBigNum->cAllocated - pBigNum->cUsed) * RTBIGNUM_ELEMENT_SIZE)); + return VINF_SUCCESS; + } + return rtBigNumGrow(pBigNum, pBigNum->cUsed, cElements); +} + + +/** + * The slow part of rtBigNumEnsureElementPresent where we need to do actual zero + * extending. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param iElement The element we wish to access. + */ +static int rtBigNumEnsureElementPresentSlow(PRTBIGNUM pBigNum, uint32_t iElement) +{ + uint32_t const cOldUsed = pBigNum->cUsed; + int rc = rtBigNumSetUsed(pBigNum, iElement + 1); + if (RT_SUCCESS(rc)) + { + RT_BZERO(&pBigNum->pauElements[cOldUsed], (iElement + 1 - cOldUsed) * RTBIGNUM_ELEMENT_SIZE); + return VINF_SUCCESS; + } + return rc; +} + + +/** + * Zero extends the element array to make sure a the specified element index is + * accessible. + * + * This is typically used with bit operations and self modifying methods. Any + * new elements added will be initialized to zero. The caller is responsible + * for there not being any trailing zero elements. + * + * The number must be unscrambled. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param iElement The element we wish to access. + */ +DECLINLINE(int) rtBigNumEnsureElementPresent(PRTBIGNUM pBigNum, uint32_t iElement) +{ + if (iElement < pBigNum->cUsed) + return VINF_SUCCESS; + return rtBigNumEnsureElementPresentSlow(pBigNum, iElement); +} + + +/** + * Strips zero elements from the magnitude value. + * + * @param pBigNum The big number to strip. + */ +static void rtBigNumStripTrailingZeros(PRTBIGNUM pBigNum) +{ + uint32_t i = pBigNum->cUsed; + while (i > 0 && pBigNum->pauElements[i - 1] == 0) + i--; + pBigNum->cUsed = i; +} + + +/** + * Initialize the big number to zero. + * + * @returns @a pBigNum + * @param pBigNum The big number. + * @param fFlags The flags. + * @internal + */ +DECLINLINE(PRTBIGNUM) rtBigNumInitZeroInternal(PRTBIGNUM pBigNum, uint32_t fFlags) +{ + RT_ZERO(*pBigNum); + pBigNum->fSensitive = RT_BOOL(fFlags & RTBIGNUMINIT_F_SENSITIVE); + return pBigNum; +} + + +/** + * Initialize the big number to zero from a template variable. + * + * @returns @a pBigNum + * @param pBigNum The big number. + * @param pTemplate The template big number. + * @internal + */ +DECLINLINE(PRTBIGNUM) rtBigNumInitZeroTemplate(PRTBIGNUM pBigNum, PCRTBIGNUM pTemplate) +{ + RT_ZERO(*pBigNum); + pBigNum->fSensitive = pTemplate->fSensitive; + return pBigNum; +} + + +RTDECL(int) RTBigNumInit(PRTBIGNUM pBigNum, uint32_t fFlags, void const *pvRaw, size_t cbRaw) +{ + /* + * Validate input. + */ + AssertPtrReturn(pBigNum, VERR_INVALID_POINTER); + AssertReturn(RT_BOOL(fFlags & RTBIGNUMINIT_F_ENDIAN_BIG) ^ RT_BOOL(fFlags & RTBIGNUMINIT_F_ENDIAN_LITTLE), + VERR_INVALID_PARAMETER); + AssertReturn(RT_BOOL(fFlags & RTBIGNUMINIT_F_UNSIGNED) ^ RT_BOOL(fFlags & RTBIGNUMINIT_F_SIGNED), VERR_INVALID_PARAMETER); + if (cbRaw) + AssertPtrReturn(pvRaw, VERR_INVALID_POINTER); + + /* + * Initalize the big number to zero. + */ + rtBigNumInitZeroInternal(pBigNum, fFlags); + + /* + * Strip the input and figure the sign flag. + */ + uint8_t const *pb = (uint8_t const *)pvRaw; + if (cbRaw) + { + if (fFlags & RTBIGNUMINIT_F_ENDIAN_LITTLE) + { + if (fFlags & RTBIGNUMINIT_F_UNSIGNED) + { + while (cbRaw > 0 && pb[cbRaw - 1] == 0) + cbRaw--; + } + else + { + if (pb[cbRaw - 1] >> 7) + { + pBigNum->fNegative = 1; + while (cbRaw > 1 && pb[cbRaw - 1] == 0xff) + cbRaw--; + } + else + while (cbRaw > 0 && pb[cbRaw - 1] == 0) + cbRaw--; + } + } + else + { + if (fFlags & RTBIGNUMINIT_F_UNSIGNED) + { + while (cbRaw > 0 && *pb == 0) + pb++, cbRaw--; + } + else + { + if (*pb >> 7) + { + pBigNum->fNegative = 1; + while (cbRaw > 1 && *pb == 0xff) + pb++, cbRaw--; + } + else + while (cbRaw > 0 && *pb == 0) + pb++, cbRaw--; + } + } + } + + /* + * Allocate memory for the elements. + */ + size_t cbAligned = RT_ALIGN_Z(cbRaw, RTBIGNUM_ELEMENT_SIZE); + if (RT_UNLIKELY(cbAligned >= RTBIGNUM_MAX_SIZE)) + return VERR_OUT_OF_RANGE; + pBigNum->cUsed = (uint32_t)cbAligned / RTBIGNUM_ELEMENT_SIZE; + if (pBigNum->cUsed) + { + pBigNum->cAllocated = RT_ALIGN_32(pBigNum->cUsed, RTBIGNUM_ALIGNMENT); + if (pBigNum->fSensitive) + pBigNum->pauElements = (RTBIGNUMELEMENT *)RTMemSaferAllocZ(pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE); + else + pBigNum->pauElements = (RTBIGNUMELEMENT *)RTMemAlloc(pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE); + if (RT_UNLIKELY(!pBigNum->pauElements)) + return VERR_NO_MEMORY; + + /* + * Initialize the array. + */ + uint32_t i = 0; + if (fFlags & RTBIGNUMINIT_F_ENDIAN_LITTLE) + { + while (cbRaw >= RTBIGNUM_ELEMENT_SIZE) + { +#if RTBIGNUM_ELEMENT_SIZE == 8 + pBigNum->pauElements[i] = RT_MAKE_U64_FROM_U8(pb[0], pb[1], pb[2], pb[3], pb[4], pb[5], pb[6], pb[7]); +#elif RTBIGNUM_ELEMENT_SIZE == 4 + pBigNum->pauElements[i] = RT_MAKE_U32_FROM_U8(pb[0], pb[1], pb[2], pb[3]); +#else +# error "Bad RTBIGNUM_ELEMENT_SIZE value" +#endif + i++; + pb += RTBIGNUM_ELEMENT_SIZE; + cbRaw -= RTBIGNUM_ELEMENT_SIZE; + } + + if (cbRaw > 0) + { + RTBIGNUMELEMENT uLast = pBigNum->fNegative ? ~(RTBIGNUMELEMENT)0 : 0; + switch (cbRaw) + { + default: AssertFailed(); +#if RTBIGNUM_ELEMENT_SIZE == 8 + RT_FALL_THRU(); + case 7: uLast = (uLast << 8) | pb[6]; RT_FALL_THRU(); + case 6: uLast = (uLast << 8) | pb[5]; RT_FALL_THRU(); + case 5: uLast = (uLast << 8) | pb[4]; RT_FALL_THRU(); + case 4: uLast = (uLast << 8) | pb[3]; +#endif + RT_FALL_THRU(); + case 3: uLast = (uLast << 8) | pb[2]; RT_FALL_THRU(); + case 2: uLast = (uLast << 8) | pb[1]; RT_FALL_THRU(); + case 1: uLast = (uLast << 8) | pb[0]; + } + pBigNum->pauElements[i] = uLast; + } + } + else + { + pb += cbRaw; + while (cbRaw >= RTBIGNUM_ELEMENT_SIZE) + { + pb -= RTBIGNUM_ELEMENT_SIZE; +#if RTBIGNUM_ELEMENT_SIZE == 8 + pBigNum->pauElements[i] = RT_MAKE_U64_FROM_U8(pb[7], pb[6], pb[5], pb[4], pb[3], pb[2], pb[1], pb[0]); +#elif RTBIGNUM_ELEMENT_SIZE == 4 + pBigNum->pauElements[i] = RT_MAKE_U32_FROM_U8(pb[3], pb[2], pb[1], pb[0]); +#else +# error "Bad RTBIGNUM_ELEMENT_SIZE value" +#endif + i++; + cbRaw -= RTBIGNUM_ELEMENT_SIZE; + } + + if (cbRaw > 0) + { + RTBIGNUMELEMENT uLast = pBigNum->fNegative ? ~(RTBIGNUMELEMENT)0 : 0; + pb -= cbRaw; + switch (cbRaw) + { + default: AssertFailed(); +#if RTBIGNUM_ELEMENT_SIZE == 8 + RT_FALL_THRU(); + case 7: uLast = (uLast << 8) | *pb++; RT_FALL_THRU(); + case 6: uLast = (uLast << 8) | *pb++; RT_FALL_THRU(); + case 5: uLast = (uLast << 8) | *pb++; RT_FALL_THRU(); + case 4: uLast = (uLast << 8) | *pb++; +#endif + RT_FALL_THRU(); + case 3: uLast = (uLast << 8) | *pb++; RT_FALL_THRU(); + case 2: uLast = (uLast << 8) | *pb++; RT_FALL_THRU(); + case 1: uLast = (uLast << 8) | *pb++; + } + pBigNum->pauElements[i] = uLast; + } + } + + /* + * If negative, negate it so we get a positive magnitude value in pauElements. + */ + if (pBigNum->fNegative) + { + pBigNum->pauElements[0] = 0U - pBigNum->pauElements[0]; + for (i = 1; i < pBigNum->cUsed; i++) + pBigNum->pauElements[i] = 0U - pBigNum->pauElements[i] - 1U; + } + + /* + * Clear unused elements. + */ + if (pBigNum->cUsed != pBigNum->cAllocated) + { + RTBIGNUMELEMENT *puUnused = &pBigNum->pauElements[pBigNum->cUsed]; + AssertCompile(RTBIGNUM_ALIGNMENT <= 4); + switch (pBigNum->cAllocated - pBigNum->cUsed) + { + default: AssertFailed(); RT_FALL_THRU(); + case 3: *puUnused++ = 0; RT_FALL_THRU(); + case 2: *puUnused++ = 0; RT_FALL_THRU(); + case 1: *puUnused++ = 0; + } + } + RTBIGNUM_ASSERT_VALID(pBigNum); + } + + rtBigNumScramble(pBigNum); + return VINF_SUCCESS; +} + + +RTDECL(int) RTBigNumInitZero(PRTBIGNUM pBigNum, uint32_t fFlags) +{ + AssertReturn(!(fFlags & ~RTBIGNUMINIT_F_SENSITIVE), VERR_INVALID_PARAMETER); + AssertPtrReturn(pBigNum, VERR_INVALID_POINTER); + + rtBigNumInitZeroInternal(pBigNum, fFlags); + rtBigNumScramble(pBigNum); + return VINF_SUCCESS; +} + + +/** + * Internal clone function that assumes the caller takes care of scrambling. + * + * @returns IPRT status code. + * @param pBigNum The target number. + * @param pSrc The source number. + */ +static int rtBigNumCloneInternal(PRTBIGNUM pBigNum, PCRTBIGNUM pSrc) +{ + Assert(!pSrc->fCurScrambled); + int rc = VINF_SUCCESS; + + /* + * Copy over the data. + */ + RT_ZERO(*pBigNum); + pBigNum->fNegative = pSrc->fNegative; + pBigNum->fSensitive = pSrc->fSensitive; + pBigNum->cUsed = pSrc->cUsed; + if (pSrc->cUsed) + { + /* Duplicate the element array. */ + pBigNum->cAllocated = RT_ALIGN_32(pBigNum->cUsed, RTBIGNUM_ALIGNMENT); + if (pBigNum->fSensitive) + pBigNum->pauElements = (RTBIGNUMELEMENT *)RTMemSaferAllocZ(pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE); + else + pBigNum->pauElements = (RTBIGNUMELEMENT *)RTMemAlloc(pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE); + if (RT_LIKELY(pBigNum->pauElements)) + { + memcpy(pBigNum->pauElements, pSrc->pauElements, pBigNum->cUsed * RTBIGNUM_ELEMENT_SIZE); + if (pBigNum->cUsed != pBigNum->cAllocated) + RT_BZERO(&pBigNum->pauElements[pBigNum->cUsed], (pBigNum->cAllocated - pBigNum->cUsed) * RTBIGNUM_ELEMENT_SIZE); + } + else + { + RT_ZERO(*pBigNum); + rc = VERR_NO_MEMORY; + } + } + return rc; +} + + +RTDECL(int) RTBigNumClone(PRTBIGNUM pBigNum, PCRTBIGNUM pSrc) +{ + int rc = rtBigNumUnscramble((PRTBIGNUM)pSrc); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pSrc); + rc = rtBigNumCloneInternal(pBigNum, pSrc); + if (RT_SUCCESS(rc)) + rtBigNumScramble(pBigNum); + rtBigNumScramble((PRTBIGNUM)pSrc); + } + return rc; +} + + +RTDECL(int) RTBigNumDestroy(PRTBIGNUM pBigNum) +{ + if (pBigNum) + { + if (pBigNum->pauElements) + { + Assert(pBigNum->cAllocated > 0); + if (!pBigNum->fSensitive) + RTMemFree(pBigNum->pauElements); + else + { + RTMemSaferFree(pBigNum->pauElements, pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE); + RT_ZERO(*pBigNum); + } + pBigNum->pauElements = NULL; + } + } + return VINF_SUCCESS; +} + + +RTDECL(int) RTBigNumAssign(PRTBIGNUM pDst, PCRTBIGNUM pSrc) +{ + AssertReturn(pDst->fSensitive >= pSrc->fSensitive, VERR_BIGNUM_SENSITIVE_INPUT); + int rc = rtBigNumUnscramble(pDst); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pDst); + rc = rtBigNumUnscramble((PRTBIGNUM)pSrc); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pSrc); + if ( pDst->fSensitive == pSrc->fSensitive + || pDst->fSensitive) + { + if (pDst->cAllocated >= pSrc->cUsed) + { + if (pDst->cUsed > pSrc->cUsed) + RT_BZERO(&pDst->pauElements[pSrc->cUsed], (pDst->cUsed - pSrc->cUsed) * RTBIGNUM_ELEMENT_SIZE); + pDst->cUsed = pSrc->cUsed; + pDst->fNegative = pSrc->fNegative; + memcpy(pDst->pauElements, pSrc->pauElements, pSrc->cUsed * RTBIGNUM_ELEMENT_SIZE); + } + else + { + rc = rtBigNumGrow(pDst, pSrc->cUsed, pSrc->cUsed); + if (RT_SUCCESS(rc)) + { + pDst->fNegative = pSrc->fNegative; + memcpy(pDst->pauElements, pSrc->pauElements, pSrc->cUsed * RTBIGNUM_ELEMENT_SIZE); + } + } + } + else + rc = VERR_BIGNUM_SENSITIVE_INPUT; + rtBigNumScramble((PRTBIGNUM)pSrc); + } + rtBigNumScramble(pDst); + } + return rc; +} + + +/** + * Same as RTBigNumBitWidth, except that it ignore the signed bit. + * + * The number must be unscrambled. + * + * @returns The effective width of the magnitude, in bits. Returns 0 if the + * value is zero. + * @param pBigNum The bit number. + */ +static uint32_t rtBigNumMagnitudeBitWidth(PCRTBIGNUM pBigNum) +{ + uint32_t idxLast = pBigNum->cUsed; + if (idxLast) + { + idxLast--; + RTBIGNUMELEMENT uLast = pBigNum->pauElements[idxLast]; Assert(uLast); + return rtBigNumElementBitCount(uLast) + idxLast * RTBIGNUM_ELEMENT_BITS; + } + return 0; +} + + +RTDECL(uint32_t) RTBigNumBitWidth(PCRTBIGNUM pBigNum) +{ + uint32_t idxLast = pBigNum->cUsed; + if (idxLast) + { + idxLast--; + rtBigNumUnscramble((PRTBIGNUM)pBigNum); + RTBIGNUMELEMENT uLast = pBigNum->pauElements[idxLast]; Assert(uLast); + rtBigNumScramble((PRTBIGNUM)pBigNum); + return rtBigNumElementBitCount(uLast) + idxLast * RTBIGNUM_ELEMENT_BITS + pBigNum->fNegative; + } + return 0; +} + + +RTDECL(uint32_t) RTBigNumByteWidth(PCRTBIGNUM pBigNum) +{ + uint32_t cBits = RTBigNumBitWidth(pBigNum); + return (cBits + 7) / 8; +} + + +RTDECL(int) RTBigNumToBytesBigEndian(PCRTBIGNUM pBigNum, void *pvBuf, size_t cbWanted) +{ + AssertPtrReturn(pvBuf, VERR_INVALID_POINTER); + AssertReturn(cbWanted > 0, VERR_INVALID_PARAMETER); + + int rc = rtBigNumUnscramble((PRTBIGNUM)pBigNum); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pBigNum); + rc = VINF_SUCCESS; + if (pBigNum->cUsed != 0) + { + uint8_t *pbDst = (uint8_t *)pvBuf; + pbDst += cbWanted - 1; + for (uint32_t i = 0; i < pBigNum->cUsed; i++) + { + RTBIGNUMELEMENT uElement = pBigNum->pauElements[i]; + if (pBigNum->fNegative) + uElement = (RTBIGNUMELEMENT)0 - uElement - (i > 0); + if (cbWanted >= sizeof(uElement)) + { + *pbDst-- = (uint8_t)uElement; + uElement >>= 8; + *pbDst-- = (uint8_t)uElement; + uElement >>= 8; + *pbDst-- = (uint8_t)uElement; + uElement >>= 8; + *pbDst-- = (uint8_t)uElement; +#if RTBIGNUM_ELEMENT_SIZE == 8 + uElement >>= 8; + *pbDst-- = (uint8_t)uElement; + uElement >>= 8; + *pbDst-- = (uint8_t)uElement; + uElement >>= 8; + *pbDst-- = (uint8_t)uElement; + uElement >>= 8; + *pbDst-- = (uint8_t)uElement; +#elif RTBIGNUM_ELEMENT_SIZE != 4 +# error "Bad RTBIGNUM_ELEMENT_SIZE value" +#endif + cbWanted -= sizeof(uElement); + } + else + { + + uint32_t cBitsLeft = RTBIGNUM_ELEMENT_BITS; + while (cbWanted > 0) + { + *pbDst-- = (uint8_t)uElement; + uElement >>= 8; + cBitsLeft -= 8; + cbWanted--; + } + Assert(cBitsLeft > 0); Assert(cBitsLeft < RTBIGNUM_ELEMENT_BITS); + if ( i + 1 < pBigNum->cUsed + || ( !pBigNum->fNegative + ? uElement != 0 + : uElement != ((RTBIGNUMELEMENT)1 << cBitsLeft) - 1U ) ) + rc = VERR_BUFFER_OVERFLOW; + break; + } + } + + /* Sign extend the number to the desired output size. */ + if (cbWanted > 0) + memset(pbDst - cbWanted, pBigNum->fNegative ? 0 : 0xff, cbWanted); + } + else + RT_BZERO(pvBuf, cbWanted); + rtBigNumScramble((PRTBIGNUM)pBigNum); + } + return rc; +} + + +RTDECL(int) RTBigNumCompare(PRTBIGNUM pLeft, PRTBIGNUM pRight) +{ + int rc = rtBigNumUnscramble(pLeft); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pLeft); + rc = rtBigNumUnscramble(pRight); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pRight); + if (pLeft->fNegative == pRight->fNegative) + { + if (pLeft->cUsed == pRight->cUsed) + { + rc = 0; + uint32_t i = pLeft->cUsed; + while (i-- > 0) + if (pLeft->pauElements[i] != pRight->pauElements[i]) + { + rc = pLeft->pauElements[i] < pRight->pauElements[i] ? -1 : 1; + break; + } + if (pLeft->fNegative) + rc = -rc; + } + else + rc = !pLeft->fNegative + ? pLeft->cUsed < pRight->cUsed ? -1 : 1 + : pLeft->cUsed < pRight->cUsed ? 1 : -1; + } + else + rc = pLeft->fNegative ? -1 : 1; + + rtBigNumScramble(pRight); + } + rtBigNumScramble(pLeft); + } + return rc; +} + + +RTDECL(int) RTBigNumCompareWithU64(PRTBIGNUM pLeft, uint64_t uRight) +{ + int rc = rtBigNumUnscramble(pLeft); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pLeft); + if (!pLeft->fNegative) + { + if (pLeft->cUsed * RTBIGNUM_ELEMENT_SIZE <= sizeof(uRight)) + { + if (pLeft->cUsed == 0) + rc = uRight == 0 ? 0 : -1; + else + { +#if RTBIGNUM_ELEMENT_SIZE == 8 + uint64_t uLeft = rtBigNumGetElement(pLeft, 0); + if (uLeft < uRight) + rc = -1; + else + rc = uLeft == uRight ? 0 : 1; +#elif RTBIGNUM_ELEMENT_SIZE == 4 + uint32_t uSubLeft = rtBigNumGetElement(pLeft, 1); + uint32_t uSubRight = uRight >> 32; + if (uSubLeft == uSubRight) + { + uSubLeft = rtBigNumGetElement(pLeft, 0); + uSubRight = (uint32_t)uRight; + } + if (uSubLeft < uSubRight) + rc = -1; + else + rc = uSubLeft == uSubRight ? 0 : 1; +#else +# error "Bad RTBIGNUM_ELEMENT_SIZE value" +#endif + } + } + else + rc = 1; + } + else + rc = -1; + rtBigNumScramble(pLeft); + } + return rc; +} + + +RTDECL(int) RTBigNumCompareWithS64(PRTBIGNUM pLeft, int64_t iRight) +{ + int rc = rtBigNumUnscramble(pLeft); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pLeft); + if (pLeft->fNegative == (unsigned)(iRight < 0)) /* (unsigned cast is for MSC weirdness) */ + { + AssertCompile(RTBIGNUM_ELEMENT_SIZE <= sizeof(iRight)); + if (pLeft->cUsed * RTBIGNUM_ELEMENT_SIZE <= sizeof(iRight)) + { + uint64_t uRightMagn = !pLeft->fNegative ? (uint64_t)iRight : (uint64_t)-iRight; +#if RTBIGNUM_ELEMENT_SIZE == 8 + uint64_t uLeft = rtBigNumGetElement(pLeft, 0); + if (uLeft < uRightMagn) + rc = -1; + else + rc = uLeft == (uint64_t)uRightMagn ? 0 : 1; +#elif RTBIGNUM_ELEMENT_SIZE == 4 + uint32_t uSubLeft = rtBigNumGetElement(pLeft, 1); + uint32_t uSubRight = uRightMagn >> 32; + if (uSubLeft == uSubRight) + { + uSubLeft = rtBigNumGetElement(pLeft, 0); + uSubRight = (uint32_t)uRightMagn; + } + if (uSubLeft < uSubRight) + rc = -1; + else + rc = uSubLeft == uSubRight ? 0 : 1; +#else +# error "Bad RTBIGNUM_ELEMENT_SIZE value" +#endif + if (pLeft->fNegative) + rc = -rc; + } + else + rc = pLeft->fNegative ? -1 : 1; + } + else + rc = pLeft->fNegative ? -1 : 1; + rtBigNumScramble(pLeft); + } + return rc; +} + + +/** + * Compares the magnitude values of two big numbers. + * + * @retval -1 if pLeft is smaller than pRight. + * @retval 0 if pLeft is equal to pRight. + * @retval 1 if pLeft is larger than pRight. + * @param pLeft The left side number. + * @param pRight The right side number. + */ +static int rtBigNumMagnitudeCompare(PCRTBIGNUM pLeft, PCRTBIGNUM pRight) +{ + Assert(!pLeft->fCurScrambled); Assert(!pRight->fCurScrambled); + int rc; + uint32_t i = pLeft->cUsed; + if (i == pRight->cUsed) + { + rc = 0; + while (i-- > 0) + if (pLeft->pauElements[i] != pRight->pauElements[i]) + { + rc = pLeft->pauElements[i] < pRight->pauElements[i] ? -1 : 1; + break; + } + } + else + rc = i < pRight->cUsed ? -1 : 1; + return rc; +} + + +/** + * Copies the magnitude of on number (@a pSrc) to another (@a pBigNum). + * + * The variables must be unscrambled. The sign flag is not considered nor + * touched. + * + * @returns IPRT status code. + * @param pDst The destination number. + * @param pSrc The source number. + */ +DECLINLINE(int) rtBigNumMagnitudeCopy(PRTBIGNUM pDst, PCRTBIGNUM pSrc) +{ + int rc = rtBigNumSetUsed(pDst, pSrc->cUsed); + if (RT_SUCCESS(rc)) + memcpy(pDst->pauElements, pSrc->pauElements, pSrc->cUsed * RTBIGNUM_ELEMENT_SIZE); + return rc; +} + + + +/** + * Adds two magnitudes and stores them into a third. + * + * All variables must be unscrambled. The sign flag is not considered nor + * touched. + * + * @returns IPRT status code. + * @param pResult The resultant. + * @param pAugend To whom it shall be addede. + * @param pAddend The nombre to addede. + */ +static int rtBigNumMagnitudeAdd(PRTBIGNUM pResult, PCRTBIGNUM pAugend, PCRTBIGNUM pAddend) +{ + Assert(!pResult->fCurScrambled); Assert(!pAugend->fCurScrambled); Assert(!pAddend->fCurScrambled); + Assert(pResult != pAugend); Assert(pResult != pAddend); + + uint32_t cElements = RT_MAX(pAugend->cUsed, pAddend->cUsed); + int rc = rtBigNumSetUsed(pResult, cElements); + if (RT_SUCCESS(rc)) + { + /* + * The primitive way, requires at least two additions for each entry + * without machine code help. + */ + RTBIGNUMELEMENT fCarry = 0; + for (uint32_t i = 0; i < cElements; i++) + pResult->pauElements[i] = rtBigNumElementAddWithCarry(rtBigNumGetElement(pAugend, i), + rtBigNumGetElement(pAddend, i), + &fCarry); + if (fCarry) + { + rc = rtBigNumSetUsed(pResult, cElements + 1); + if (RT_SUCCESS(rc)) + pResult->pauElements[cElements++] = 1; + } + Assert(pResult->cUsed == cElements || RT_FAILURE_NP(rc)); + } + + return rc; +} + + +/** + * Substracts a smaller (or equal) magnitude from another one and stores it into + * a third. + * + * All variables must be unscrambled. The sign flag is not considered nor + * touched. For this reason, the @a pMinuend must be larger or equal to @a + * pSubtrahend. + * + * @returns IPRT status code. + * @param pResult There to store the result. + * @param pMinuend What to subtract from. + * @param pSubtrahend What to subtract. + */ +static int rtBigNumMagnitudeSub(PRTBIGNUM pResult, PCRTBIGNUM pMinuend, PCRTBIGNUM pSubtrahend) +{ + Assert(!pResult->fCurScrambled); Assert(!pMinuend->fCurScrambled); Assert(!pSubtrahend->fCurScrambled); + Assert(pResult != pMinuend); Assert(pResult != pSubtrahend); + Assert(pMinuend->cUsed >= pSubtrahend->cUsed); + + int rc; + if (pSubtrahend->cUsed) + { + /* + * Resize the result. In the assembly case, ensure that all three arrays + * has the same number of used entries, possibly with an extra zero + * element on 64-bit systems. + */ + rc = rtBigNumSetUsedEx(pResult, pMinuend->cUsed, RTBIGNUM_ZERO_ALIGN(pMinuend->cUsed)); +#ifdef IPRT_BIGINT_WITH_ASM + if (RT_SUCCESS(rc)) + rc = rtBigNumEnsureExtraZeroElements((PRTBIGNUM)pMinuend, RTBIGNUM_ZERO_ALIGN(pMinuend->cUsed)); + if (RT_SUCCESS(rc)) + rc = rtBigNumEnsureExtraZeroElements((PRTBIGNUM)pSubtrahend, RTBIGNUM_ZERO_ALIGN(pMinuend->cUsed)); +#endif + if (RT_SUCCESS(rc)) + { +#ifdef IPRT_BIGINT_WITH_ASM + /* + * Call assembly to do the work. + */ + rtBigNumMagnitudeSubAssemblyWorker(pResult->pauElements, pMinuend->pauElements, + pSubtrahend->pauElements, pMinuend->cUsed); +# ifdef RT_STRICT + RTBIGNUMELEMENT fBorrow = 0; + for (uint32_t i = 0; i < pMinuend->cUsed; i++) + { + RTBIGNUMELEMENT uCorrect = rtBigNumElementSubWithBorrow(pMinuend->pauElements[i], rtBigNumGetElement(pSubtrahend, i), &fBorrow); + AssertMsg(pResult->pauElements[i] == uCorrect, ("[%u]=%#x, expected %#x\n", i, pResult->pauElements[i], uCorrect)); + } +# endif +#else + /* + * The primitive C way. + */ + RTBIGNUMELEMENT fBorrow = 0; + for (uint32_t i = 0; i < pMinuend->cUsed; i++) + pResult->pauElements[i] = rtBigNumElementSubWithBorrow(pMinuend->pauElements[i], + rtBigNumGetElement(pSubtrahend, i), + &fBorrow); + Assert(fBorrow == 0); +#endif + + /* + * Trim the result. + */ + rtBigNumStripTrailingZeros(pResult); + } + } + /* + * Special case: Subtrahend is zero. + */ + else + rc = rtBigNumMagnitudeCopy(pResult, pMinuend); + + return rc; +} + + +/** + * Substracts a smaller (or equal) magnitude from another one and stores the + * result into the first. + * + * All variables must be unscrambled. The sign flag is not considered nor + * touched. For this reason, the @a pMinuendResult must be larger or equal to + * @a pSubtrahend. + * + * @returns IPRT status code (memory alloc error). + * @param pMinuendResult What to subtract from and return as result. + * @param pSubtrahend What to subtract. + */ +static int rtBigNumMagnitudeSubThis(PRTBIGNUM pMinuendResult, PCRTBIGNUM pSubtrahend) +{ + Assert(!pMinuendResult->fCurScrambled); Assert(!pSubtrahend->fCurScrambled); + Assert(pMinuendResult != pSubtrahend); + Assert(pMinuendResult->cUsed >= pSubtrahend->cUsed); + +#ifdef IPRT_BIGINT_WITH_ASM + /* + * Use the assembly worker. Requires same sized element arrays, so zero extend them. + */ + int rc = rtBigNumEnsureExtraZeroElements(pMinuendResult, RTBIGNUM_ZERO_ALIGN(pMinuendResult->cUsed)); + if (RT_SUCCESS(rc)) + rc = rtBigNumEnsureExtraZeroElements((PRTBIGNUM)pSubtrahend, RTBIGNUM_ZERO_ALIGN(pMinuendResult->cUsed)); + if (RT_FAILURE(rc)) + return rc; + rtBigNumMagnitudeSubThisAssemblyWorker(pMinuendResult->pauElements, pSubtrahend->pauElements, pMinuendResult->cUsed); +#else + /* + * The primitive way, as usual. + */ + RTBIGNUMELEMENT fBorrow = 0; + for (uint32_t i = 0; i < pMinuendResult->cUsed; i++) + pMinuendResult->pauElements[i] = rtBigNumElementSubWithBorrow(pMinuendResult->pauElements[i], + rtBigNumGetElement(pSubtrahend, i), + &fBorrow); + Assert(fBorrow == 0); +#endif + + /* + * Trim the result. + */ + rtBigNumStripTrailingZeros(pMinuendResult); + + return VINF_SUCCESS; +} + + +RTDECL(int) RTBigNumAdd(PRTBIGNUM pResult, PCRTBIGNUM pAugend, PCRTBIGNUM pAddend) +{ + Assert(pResult != pAugend); Assert(pResult != pAddend); + AssertReturn(pResult->fSensitive >= (pAugend->fSensitive | pAddend->fSensitive), VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pResult); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pResult); + rc = rtBigNumUnscramble((PRTBIGNUM)pAugend); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pAugend); + rc = rtBigNumUnscramble((PRTBIGNUM)pAddend); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pAddend); + + /* + * Same sign: Add magnitude, keep sign. + * 1 + 1 = 2 + * (-1) + (-1) = -2 + */ + if (pAugend->fNegative == pAddend->fNegative) + { + pResult->fNegative = pAugend->fNegative; + rc = rtBigNumMagnitudeAdd(pResult, pAugend, pAddend); + } + /* + * Different sign: Subtract smaller from larger, keep sign of larger. + * (-5) + 3 = -2 + * 5 + (-3) = 2 + * (-1) + 3 = 2 + * 1 + (-3) = -2 + */ + else if (rtBigNumMagnitudeCompare(pAugend, pAddend) >= 0) + { + pResult->fNegative = pAugend->fNegative; + rc = rtBigNumMagnitudeSub(pResult, pAugend, pAddend); + if (!pResult->cUsed) + pResult->fNegative = 0; + } + else + { + pResult->fNegative = pAddend->fNegative; + rc = rtBigNumMagnitudeSub(pResult, pAddend, pAugend); + } + rtBigNumScramble((PRTBIGNUM)pAddend); + } + rtBigNumScramble((PRTBIGNUM)pAugend); + } + rtBigNumScramble(pResult); + } + return rc; +} + + +RTDECL(int) RTBigNumSubtract(PRTBIGNUM pResult, PCRTBIGNUM pMinuend, PCRTBIGNUM pSubtrahend) +{ + Assert(pResult != pMinuend); Assert(pResult != pSubtrahend); + AssertReturn(pResult->fSensitive >= (pMinuend->fSensitive | pSubtrahend->fSensitive), VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pResult); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pResult); + if (pMinuend != pSubtrahend) + { + rc = rtBigNumUnscramble((PRTBIGNUM)pMinuend); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pMinuend); + rc = rtBigNumUnscramble((PRTBIGNUM)pSubtrahend); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pSubtrahend); + + /* + * Different sign: Add magnitude, keep sign of first. + * 1 - (-2) == 3 + * -1 - 2 == -3 + */ + if (pMinuend->fNegative != pSubtrahend->fNegative) + { + pResult->fNegative = pMinuend->fNegative; + rc = rtBigNumMagnitudeAdd(pResult, pMinuend, pSubtrahend); + } + /* + * Same sign, minuend has greater or equal absolute value: Subtract, keep sign of first. + * 10 - 7 = 3 + */ + else if (rtBigNumMagnitudeCompare(pMinuend, pSubtrahend) >= 0) + { + pResult->fNegative = pMinuend->fNegative; + rc = rtBigNumMagnitudeSub(pResult, pMinuend, pSubtrahend); + } + /* + * Same sign, subtrahend is larger: Reverse and subtract, invert sign of first. + * 7 - 10 = -3 + * -1 - (-3) = 2 + */ + else + { + pResult->fNegative = !pMinuend->fNegative; + rc = rtBigNumMagnitudeSub(pResult, pSubtrahend, pMinuend); + } + rtBigNumScramble((PRTBIGNUM)pSubtrahend); + } + rtBigNumScramble((PRTBIGNUM)pMinuend); + } + } + else + { + /* zero. */ + pResult->fNegative = 0; + rtBigNumSetUsed(pResult, 0); + } + rtBigNumScramble(pResult); + } + return rc; +} + + +RTDECL(int) RTBigNumNegateThis(PRTBIGNUM pThis) +{ + pThis->fNegative = !pThis->fNegative; + return VINF_SUCCESS; +} + + +RTDECL(int) RTBigNumNegate(PRTBIGNUM pResult, PCRTBIGNUM pBigNum) +{ + int rc = RTBigNumAssign(pResult, pBigNum); + if (RT_SUCCESS(rc)) + rc = RTBigNumNegateThis(pResult); + return rc; +} + + +/** + * Multiplies the magnitudes of two values, letting the caller care about the + * sign bit. + * + * @returns IPRT status code. + * @param pResult Where to store the result. + * @param pMultiplicand The first value. + * @param pMultiplier The second value. + */ +static int rtBigNumMagnitudeMultiply(PRTBIGNUM pResult, PCRTBIGNUM pMultiplicand, PCRTBIGNUM pMultiplier) +{ + Assert(pResult != pMultiplicand); Assert(pResult != pMultiplier); + Assert(!pResult->fCurScrambled); Assert(!pMultiplicand->fCurScrambled); Assert(!pMultiplier->fCurScrambled); + + /* + * Multiplication involving zero is zero. + */ + if (!pMultiplicand->cUsed || !pMultiplier->cUsed) + { + pResult->fNegative = 0; + rtBigNumSetUsed(pResult, 0); + return VINF_SUCCESS; + } + + /* + * Allocate a result array that is the sum of the two factors, initialize + * it to zero. + */ + uint32_t cMax = pMultiplicand->cUsed + pMultiplier->cUsed; + int rc = rtBigNumSetUsed(pResult, cMax); + if (RT_SUCCESS(rc)) + { + RT_BZERO(pResult->pauElements, pResult->cUsed * RTBIGNUM_ELEMENT_SIZE); + +#ifdef IPRT_BIGINT_WITH_ASM + rtBigNumMagnitudeMultiplyAssemblyWorker(pResult->pauElements, + pMultiplier->pauElements, pMultiplier->cUsed, + pMultiplicand->pauElements, pMultiplicand->cUsed); +#else + for (uint32_t i = 0; i < pMultiplier->cUsed; i++) + { + RTBIGNUMELEMENT uMultiplier = pMultiplier->pauElements[i]; + for (uint32_t j = 0; j < pMultiplicand->cUsed; j++) + { + RTBIGNUMELEMENT uHi; + RTBIGNUMELEMENT uLo; +#if RTBIGNUM_ELEMENT_SIZE == 4 + uint64_t u64 = ASMMult2xU32RetU64(pMultiplicand->pauElements[j], uMultiplier); + uLo = (uint32_t)u64; + uHi = u64 >> 32; +#elif RTBIGNUM_ELEMENT_SIZE == 8 + uLo = ASMMult2xU64Ret2xU64(pMultiplicand->pauElements[j], uMultiplier, &uHi); +#else +# error "Invalid RTBIGNUM_ELEMENT_SIZE value" +#endif + RTBIGNUMELEMENT fCarry = 0; + uint64_t k = i + j; + pResult->pauElements[k] = rtBigNumElementAddWithCarry(pResult->pauElements[k], uLo, &fCarry); + k++; + pResult->pauElements[k] = rtBigNumElementAddWithCarry(pResult->pauElements[k], uHi, &fCarry); + while (fCarry) + { + k++; + pResult->pauElements[k] = rtBigNumElementAddWithCarry(pResult->pauElements[k], 0, &fCarry); + } + Assert(k < cMax); + } + } +#endif + + /* It's possible we overestimated the output size by 1 element. */ + rtBigNumStripTrailingZeros(pResult); + } + return rc; +} + + +RTDECL(int) RTBigNumMultiply(PRTBIGNUM pResult, PCRTBIGNUM pMultiplicand, PCRTBIGNUM pMultiplier) +{ + Assert(pResult != pMultiplicand); Assert(pResult != pMultiplier); + AssertReturn(pResult->fSensitive >= (pMultiplicand->fSensitive | pMultiplier->fSensitive), VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pResult); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pResult); + rc = rtBigNumUnscramble((PRTBIGNUM)pMultiplicand); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pMultiplicand); + rc = rtBigNumUnscramble((PRTBIGNUM)pMultiplier); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pMultiplier); + + /* + * The sign values follow XOR rules: + * -1 * 1 = -1; 1 ^ 0 = 1 + * 1 * -1 = -1; 1 ^ 0 = 1 + * -1 * -1 = 1; 1 ^ 1 = 0 + * 1 * 1 = 1; 0 ^ 0 = 0 + */ + pResult->fNegative = pMultiplicand->fNegative ^ pMultiplier->fNegative; + rc = rtBigNumMagnitudeMultiply(pResult, pMultiplicand, pMultiplier); + + rtBigNumScramble((PRTBIGNUM)pMultiplier); + } + rtBigNumScramble((PRTBIGNUM)pMultiplicand); + } + rtBigNumScramble(pResult); + } + return rc; +} + + +#if 0 /* unused */ +/** + * Clears a bit in the magnitude of @a pBigNum. + * + * The variables must be unscrambled. + * + * @param pBigNum The big number. + * @param iBit The bit to clear (0-based). + */ +DECLINLINE(void) rtBigNumMagnitudeClearBit(PRTBIGNUM pBigNum, uint32_t iBit) +{ + uint32_t iElement = iBit / RTBIGNUM_ELEMENT_BITS; + if (iElement < pBigNum->cUsed) + { + iBit &= RTBIGNUM_ELEMENT_BITS - 1; + pBigNum->pauElements[iElement] &= ~RTBIGNUM_ELEMENT_BIT(iBit); + if (iElement + 1 == pBigNum->cUsed && !pBigNum->pauElements[iElement]) + rtBigNumStripTrailingZeros(pBigNum); + } +} +#endif /* unused */ + + +/** + * Sets a bit in the magnitude of @a pBigNum. + * + * The variables must be unscrambled. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param iBit The bit to clear (0-based). + */ +DECLINLINE(int) rtBigNumMagnitudeSetBit(PRTBIGNUM pBigNum, uint32_t iBit) +{ + uint32_t iElement = iBit / RTBIGNUM_ELEMENT_BITS; + int rc = rtBigNumEnsureElementPresent(pBigNum, iElement); + if (RT_SUCCESS(rc)) + { + iBit &= RTBIGNUM_ELEMENT_BITS - 1; + pBigNum->pauElements[iElement] |= RTBIGNUM_ELEMENT_BIT(iBit); + return VINF_SUCCESS; + } + return rc; +} + + +#if 0 /* unused */ +/** + * Writes a bit in the magnitude of @a pBigNum. + * + * The variables must be unscrambled. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param iBit The bit to write (0-based). + * @param fValue The bit value. + */ +DECLINLINE(int) rtBigNumMagnitudeWriteBit(PRTBIGNUM pBigNum, uint32_t iBit, bool fValue) +{ + if (fValue) + return rtBigNumMagnitudeSetBit(pBigNum, iBit); + rtBigNumMagnitudeClearBit(pBigNum, iBit); + return VINF_SUCCESS; +} +#endif + + +/** + * Returns the given magnitude bit. + * + * The variables must be unscrambled. + * + * @returns The bit value (1 or 0). + * @param pBigNum The big number. + * @param iBit The bit to return (0-based). + */ +DECLINLINE(RTBIGNUMELEMENT) rtBigNumMagnitudeGetBit(PCRTBIGNUM pBigNum, uint32_t iBit) +{ + uint32_t iElement = iBit / RTBIGNUM_ELEMENT_BITS; + if (iElement < pBigNum->cUsed) + { + iBit &= RTBIGNUM_ELEMENT_BITS - 1; + return (pBigNum->pauElements[iElement] >> iBit) & 1; + } + return 0; +} + + +/** + * Shifts the magnitude left by one. + * + * The variables must be unscrambled. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param uCarry The value to shift in at the bottom. + */ +DECLINLINE(int) rtBigNumMagnitudeShiftLeftOne(PRTBIGNUM pBigNum, RTBIGNUMELEMENT uCarry) +{ + Assert(uCarry <= 1); + + /* Do the shifting. */ + uint32_t cUsed = pBigNum->cUsed; +#ifdef IPRT_BIGINT_WITH_ASM + uCarry = rtBigNumMagnitudeShiftLeftOneAssemblyWorker(pBigNum->pauElements, cUsed, uCarry); +#else + for (uint32_t i = 0; i < cUsed; i++) + { + RTBIGNUMELEMENT uTmp = pBigNum->pauElements[i]; + pBigNum->pauElements[i] = (uTmp << 1) | uCarry; + uCarry = uTmp >> (RTBIGNUM_ELEMENT_BITS - 1); + } +#endif + + /* If we still carry a bit, we need to increase the size. */ + if (uCarry) + { + int rc = rtBigNumSetUsed(pBigNum, cUsed + 1); + AssertRCReturn(rc, rc); + pBigNum->pauElements[cUsed] = uCarry; + } + + return VINF_SUCCESS; +} + + +/** + * Shifts the magnitude left by @a cBits. + * + * The variables must be unscrambled. + * + * @returns IPRT status code. + * @param pResult Where to store the result. + * @param pValue The value to shift. + * @param cBits The shift count. + */ +static int rtBigNumMagnitudeShiftLeft(PRTBIGNUM pResult, PCRTBIGNUM pValue, uint32_t cBits) +{ + int rc; + if (cBits) + { + uint32_t cBitsNew = rtBigNumMagnitudeBitWidth(pValue); + if (cBitsNew > 0) + { + if (cBitsNew + cBits > cBitsNew) + { + cBitsNew += cBits; + rc = rtBigNumSetUsedEx(pResult, 0, RT_ALIGN_32(cBitsNew, RTBIGNUM_ELEMENT_BITS) / RTBIGNUM_ELEMENT_BITS); + if (RT_SUCCESS(rc)) + rc = rtBigNumSetUsed(pResult, RT_ALIGN_32(cBitsNew, RTBIGNUM_ELEMENT_BITS) / RTBIGNUM_ELEMENT_BITS); + if (RT_SUCCESS(rc)) + { + uint32_t const cLeft = pValue->cUsed; + PCRTBIGNUMELEMENT pauSrc = pValue->pauElements; + PRTBIGNUMELEMENT pauDst = pResult->pauElements; + + Assert(ASMMemIsZero(pauDst, (cBits / RTBIGNUM_ELEMENT_BITS) * RTBIGNUM_ELEMENT_SIZE)); + pauDst += cBits / RTBIGNUM_ELEMENT_BITS; + + cBits &= RTBIGNUM_ELEMENT_BITS - 1; + if (cBits) + { + RTBIGNUMELEMENT uPrev = 0; + for (uint32_t i = 0; i < cLeft; i++) + { + RTBIGNUMELEMENT uCur = pauSrc[i]; + pauDst[i] = (uCur << cBits) | (uPrev >> (RTBIGNUM_ELEMENT_BITS - cBits)); + uPrev = uCur; + } + uPrev >>= RTBIGNUM_ELEMENT_BITS - cBits; + if (uPrev) + pauDst[pValue->cUsed] = uPrev; + } + else + memcpy(pauDst, pauSrc, cLeft * RTBIGNUM_ELEMENT_SIZE); + } + } + else + rc = VERR_OUT_OF_RANGE; + } + /* Shifting zero always yields a zero result. */ + else + rc = rtBigNumSetUsed(pResult, 0); + } + else + rc = rtBigNumMagnitudeCopy(pResult, pValue); + return rc; +} + + +RTDECL(int) RTBigNumShiftLeft(PRTBIGNUM pResult, PCRTBIGNUM pValue, uint32_t cBits) +{ + Assert(pResult != pValue); + AssertReturn(pResult->fSensitive >= pValue->fSensitive, VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pResult); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pResult); + rc = rtBigNumUnscramble((PRTBIGNUM)pValue); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pValue); + + pResult->fNegative = pValue->fNegative; + rc = rtBigNumMagnitudeShiftLeft(pResult, pValue, cBits); + + rtBigNumScramble((PRTBIGNUM)pValue); + } + rtBigNumScramble(pResult); + } + return rc; +} + + +/** + * Shifts the magnitude right by @a cBits. + * + * The variables must be unscrambled. + * + * @returns IPRT status code. + * @param pResult Where to store the result. + * @param pValue The value to shift. + * @param cBits The shift count. + */ +static int rtBigNumMagnitudeShiftRight(PRTBIGNUM pResult, PCRTBIGNUM pValue, uint32_t cBits) +{ + int rc; + if (cBits) + { + uint32_t cBitsNew = rtBigNumMagnitudeBitWidth(pValue); + if (cBitsNew > cBits) + { + cBitsNew -= cBits; + uint32_t cElementsNew = RT_ALIGN_32(cBitsNew, RTBIGNUM_ELEMENT_BITS) / RTBIGNUM_ELEMENT_BITS; + rc = rtBigNumSetUsed(pResult, cElementsNew); + if (RT_SUCCESS(rc)) + { + uint32_t i = cElementsNew; + PCRTBIGNUMELEMENT pauSrc = pValue->pauElements; + PRTBIGNUMELEMENT pauDst = pResult->pauElements; + + pauSrc += cBits / RTBIGNUM_ELEMENT_BITS; + + cBits &= RTBIGNUM_ELEMENT_BITS - 1; + if (cBits) + { + RTBIGNUMELEMENT uPrev = &pauSrc[i] == &pValue->pauElements[pValue->cUsed] ? 0 : pauSrc[i]; + while (i-- > 0) + { + RTBIGNUMELEMENT uCur = pauSrc[i]; + pauDst[i] = (uCur >> cBits) | (uPrev << (RTBIGNUM_ELEMENT_BITS - cBits)); + uPrev = uCur; + } + } + else + memcpy(pauDst, pauSrc, i * RTBIGNUM_ELEMENT_SIZE); + } + } + else + rc = rtBigNumSetUsed(pResult, 0); + } + else + rc = rtBigNumMagnitudeCopy(pResult, pValue); + return rc; +} + + +RTDECL(int) RTBigNumShiftRight(PRTBIGNUM pResult, PCRTBIGNUM pValue, uint32_t cBits) +{ + Assert(pResult != pValue); + AssertReturn(pResult->fSensitive >= pValue->fSensitive, VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pResult); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pResult); + rc = rtBigNumUnscramble((PRTBIGNUM)pValue); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pValue); + + pResult->fNegative = pValue->fNegative; + rc = rtBigNumMagnitudeShiftRight(pResult, pValue, cBits); + if (!pResult->cUsed) + pResult->fNegative = 0; + + rtBigNumScramble((PRTBIGNUM)pValue); + } + rtBigNumScramble(pResult); + } + return rc; +} + + +/** + * Implements the D3 test for Qhat decrementation. + * + * @returns True if Qhat should be decremented. + * @param puQhat Pointer to Qhat. + * @param uRhat The remainder. + * @param uDivisorY The penultimate divisor element. + * @param uDividendJMinus2 The j-2 dividend element. + */ +DECLINLINE(bool) rtBigNumKnuthD3_ShouldDecrementQhat(RTBIGNUMELEMENT2X const *puQhat, RTBIGNUMELEMENT uRhat, + RTBIGNUMELEMENT uDivisorY, RTBIGNUMELEMENT uDividendJMinus2) +{ + if (puQhat->s.Lo == RTBIGNUM_ELEMENT_MAX && puQhat->s.Hi == 0) + return true; +#if RTBIGNUM_ELEMENT_BITS == 64 + RTBIGNUMELEMENT2X TmpLeft; + RTUInt128MulByU64(&TmpLeft, puQhat, uDivisorY); + + RTBIGNUMELEMENT2X TmpRight; + TmpRight.s.Lo = 0; + TmpRight.s.Hi = uRhat; + RTUInt128AssignAddU64(&TmpRight, uDividendJMinus2); + + if (RTUInt128Compare(&TmpLeft, &TmpRight) > 0) + return true; +#else + if (puQhat->u * uDivisorY > ((uint64_t)uRhat << 32) + uDividendJMinus2) + return true; +#endif + return false; +} + + +/** + * C implementation of the D3 step of Knuth's division algorithm. + * + * This estimates a value Qhat that will be used as quotient "digit" (element) + * at the current level of the division (j). + * + * @returns The Qhat value we've estimated. + * @param pauDividendJN Pointer to the j+n (normalized) dividend element. + * Will access up to two elements prior to this. + * @param uDivZ The last element in the (normalized) divisor. + * @param uDivY The penultimate element in the (normalized) divisor. + */ +DECLINLINE(RTBIGNUMELEMENT) rtBigNumKnuthD3_EstimateQhat(PCRTBIGNUMELEMENT pauDividendJN, + RTBIGNUMELEMENT uDivZ, RTBIGNUMELEMENT uDivY) +{ + RTBIGNUMELEMENT2X uQhat; + RTBIGNUMELEMENT uRhat; + RTBIGNUMELEMENT uDividendJN = pauDividendJN[0]; + Assert(uDividendJN <= uDivZ); + if (uDividendJN != uDivZ) + rtBigNumElement2xDiv2xBy1x(&uQhat, &uRhat, uDividendJN, pauDividendJN[-1], uDivZ); + else + { + /* + * This is the case where we end up with an initial Qhat that's all Fs. + */ + /* Calc the remainder for max Qhat value. */ + RTBIGNUMELEMENT2X uTmp1; /* (v[j+n] << bits) + v[J+N-1] */ + uTmp1.s.Hi = uDivZ; + uTmp1.s.Lo = pauDividendJN[-1]; + + RTBIGNUMELEMENT2X uTmp2; /* uQhat * uDividendJN */ + uTmp2.s.Hi = uDivZ - 1; + uTmp2.s.Lo = 0 - uDivZ; +#if RTBIGNUM_ELEMENT_BITS == 64 + RTUInt128AssignSub(&uTmp1, &uTmp2); +#else + uTmp1.u -= uTmp2.u; +#endif + /* If we overflowed the remainder, don't bother trying to adjust. */ + if (uTmp1.s.Hi) + return RTBIGNUM_ELEMENT_MAX; + + uRhat = uTmp1.s.Lo; + uQhat.s.Lo = RTBIGNUM_ELEMENT_MAX; + uQhat.s.Hi = 0; + } + + /* + * Adjust Q to eliminate all cases where it's two to large and most cases + * where it's one too large. + */ + while (rtBigNumKnuthD3_ShouldDecrementQhat(&uQhat, uRhat, uDivY, pauDividendJN[-2])) + { + rtBigNumElement2xDec(&uQhat); + uRhat += uDivZ; + if (uRhat < uDivZ /* overflow */ || uRhat == RTBIGNUM_ELEMENT_MAX) + break; + } + + return uQhat.s.Lo; +} + + +#ifdef IPRT_BIGINT_WITH_ASM +DECLASM(bool) rtBigNumKnuthD4_MulSub(PRTBIGNUMELEMENT pauDividendJ, PRTBIGNUMELEMENT pauDivisor, + uint32_t cDivisor, RTBIGNUMELEMENT uQhat); +#else +/** + * C implementation of the D4 step of Knuth's division algorithm. + * + * This subtracts Divisor * Qhat from the dividend at the current J index. + * + * @returns true if negative result (unlikely), false if positive. + * @param pauDividendJ Pointer to the j-th (normalized) dividend element. + * Will access up to two elements prior to this. + * @param uDivZ The last element in the (normalized) divisor. + * @param uDivY The penultimate element in the (normalized) divisor. + */ +DECLINLINE(bool) rtBigNumKnuthD4_MulSub(PRTBIGNUMELEMENT pauDividendJ, PRTBIGNUMELEMENT pauDivisor, + uint32_t cDivisor, RTBIGNUMELEMENT uQhat) +{ + uint32_t i; + bool fBorrow = false; + RTBIGNUMELEMENT uMulCarry = 0; + for (i = 0; i < cDivisor; i++) + { + RTBIGNUMELEMENT2X uSub; +# if RTBIGNUM_ELEMENT_BITS == 64 + RTUInt128MulU64ByU64(&uSub, uQhat, pauDivisor[i]); + RTUInt128AssignAddU64(&uSub, uMulCarry); +# else + uSub.u = (uint64_t)uQhat * pauDivisor[i] + uMulCarry; +# endif + uMulCarry = uSub.s.Hi; + + RTBIGNUMELEMENT uDividendI = pauDividendJ[i]; + if (!fBorrow) + { + fBorrow = uDividendI < uSub.s.Lo; + uDividendI -= uSub.s.Lo; + } + else + { + fBorrow = uDividendI <= uSub.s.Lo; + uDividendI -= uSub.s.Lo + 1; + } + pauDividendJ[i] = uDividendI; + } + + /* Carry and borrow into the final dividend element. */ + RTBIGNUMELEMENT uDividendI = pauDividendJ[i]; + if (!fBorrow) + { + fBorrow = uDividendI < uMulCarry; + pauDividendJ[i] = uDividendI - uMulCarry; + } + else + { + fBorrow = uDividendI <= uMulCarry; + pauDividendJ[i] = uDividendI - uMulCarry - 1; + } + + return fBorrow; +} +#endif /* !IPRT_BIGINT_WITH_ASM */ + + +/** + * C implementation of the D6 step of Knuth's division algorithm. + * + * This adds the divisor to the dividend to undo the negative value step D4 + * produced. This is not very frequent occurence. + * + * @param pauDividendJ Pointer to the j-th (normalized) dividend element. + * Will access up to two elements prior to this. + * @param pauDivisor The last element in the (normalized) divisor. + * @param cDivisor The penultimate element in the (normalized) divisor. + */ +DECLINLINE(void) rtBigNumKnuthD6_AddBack(PRTBIGNUMELEMENT pauDividendJ, PRTBIGNUMELEMENT pauDivisor, uint32_t cDivisor) +{ + RTBIGNUMELEMENT2X uTmp; + uTmp.s.Lo = 0; + + uint32_t i; + for (i = 0; i < cDivisor; i++) + { + uTmp.s.Hi = 0; +#if RTBIGNUM_ELEMENT_BITS == 64 + RTUInt128AssignAddU64(&uTmp, pauDivisor[i]); + RTUInt128AssignAddU64(&uTmp, pauDividendJ[i]); +#else + uTmp.u += pauDivisor[i]; + uTmp.u += pauDividendJ[i]; +#endif + pauDividendJ[i] = uTmp.s.Lo; + uTmp.s.Lo = uTmp.s.Hi; + } + + /* The final dividend entry. */ + Assert(pauDividendJ[i] + uTmp.s.Lo < uTmp.s.Lo); + pauDividendJ[i] += uTmp.s.Lo; +} + + +/** + * Knuth's division (core). + * + * @returns IPRT status code. + * @param pQuotient Where to return the quotient. Can be NULL. + * @param pRemainder Where to return the remainder. + * @param pDividend What to divide. + * @param pDivisor What to divide by. + */ +static int rtBigNumMagnitudeDivideKnuth(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) +{ + Assert(pDivisor->cUsed > 1); + uint32_t const cDivisor = pDivisor->cUsed; + Assert(pDividend->cUsed >= cDivisor); + + /* + * Make sure we've got enough space in the quotient, so we can build it + * without any trouble come step D5. + */ + int rc; + if (pQuotient) + { + rc = rtBigNumSetUsedEx(pQuotient, 0, pDividend->cUsed - cDivisor + 1); + if (RT_SUCCESS(rc)) + rc = rtBigNumSetUsed(pQuotient, pDividend->cUsed - cDivisor + 1); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * D1. Normalize. The goal here is to make sure the last element in the + * divisor is greater than RTBIGNUMELEMENTS_MAX/2. We must also make sure + * we can access element pDividend->cUsed of the normalized dividend. + */ + RTBIGNUM NormDividend; + RTBIGNUM NormDivisor; + PCRTBIGNUM pNormDivisor = &NormDivisor; + rtBigNumInitZeroTemplate(&NormDivisor, pDividend); + + uint32_t cNormShift = (RTBIGNUM_ELEMENT_BITS - rtBigNumMagnitudeBitWidth(pDivisor)) & (RTBIGNUM_ELEMENT_BITS - 1); + if (cNormShift) + { + rtBigNumInitZeroTemplate(&NormDividend, pDividend); + rc = rtBigNumMagnitudeShiftLeft(&NormDividend, pDividend, cNormShift); + if (RT_SUCCESS(rc)) + rc = rtBigNumMagnitudeShiftLeft(&NormDivisor, pDivisor, cNormShift); + } + else + { + pNormDivisor = pDivisor; + rc = rtBigNumCloneInternal(&NormDividend, pDividend); + } + if (RT_SUCCESS(rc) && pDividend->cUsed == NormDividend.cUsed) + rc = rtBigNumEnsureExtraZeroElements(&NormDividend, NormDividend.cUsed + 1); + if (RT_SUCCESS(rc)) + { + /* + * D2. Initialize the j index so we can loop thru the elements in the + * dividend that makes it larger than the divisor. + */ + uint32_t j = pDividend->cUsed - cDivisor; + + RTBIGNUMELEMENT const DivZ = pNormDivisor->pauElements[cDivisor - 1]; + RTBIGNUMELEMENT const DivY = pNormDivisor->pauElements[cDivisor - 2]; + for (;;) + { + /* + * D3. Estimate a Q' by dividing the j and j-1 dividen elements by + * the last divisor element, then adjust against the next elements. + */ + RTBIGNUMELEMENT uQhat = rtBigNumKnuthD3_EstimateQhat(&NormDividend.pauElements[j + cDivisor], DivZ, DivY); + + /* + * D4. Multiply and subtract. + */ + bool fNegative = rtBigNumKnuthD4_MulSub(&NormDividend.pauElements[j], pNormDivisor->pauElements, cDivisor, uQhat); + + /* + * D5. Test remainder. + * D6. Add back. + */ + if (fNegative) + { +//__debugbreak(); + rtBigNumKnuthD6_AddBack(&NormDividend.pauElements[j], pNormDivisor->pauElements, cDivisor); + uQhat--; + } + + if (pQuotient) + pQuotient->pauElements[j] = uQhat; + + /* + * D7. Loop on j. + */ + if (j == 0) + break; + j--; + } + + /* + * D8. Unnormalize the remainder. + */ + rtBigNumStripTrailingZeros(&NormDividend); + if (cNormShift) + rc = rtBigNumMagnitudeShiftRight(pRemainder, &NormDividend, cNormShift); + else + rc = rtBigNumMagnitudeCopy(pRemainder, &NormDividend); + if (pQuotient) + rtBigNumStripTrailingZeros(pQuotient); + } + + /* + * Delete temporary variables. + */ + RTBigNumDestroy(&NormDividend); + if (pNormDivisor == &NormDivisor) + RTBigNumDestroy(&NormDivisor); + return rc; +} + + +static int rtBigNumMagnitudeDivideSlowLong(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) +{ + /* + * Do very simple long division. This ain't fast, but it does the trick. + */ + int rc = VINF_SUCCESS; + uint32_t iBit = rtBigNumMagnitudeBitWidth(pDividend); + while (iBit-- > 0) + { + rc = rtBigNumMagnitudeShiftLeftOne(pRemainder, rtBigNumMagnitudeGetBit(pDividend, iBit)); + AssertRCBreak(rc); + int iDiff = rtBigNumMagnitudeCompare(pRemainder, pDivisor); + if (iDiff >= 0) + { + if (iDiff != 0) + { + rc = rtBigNumMagnitudeSubThis(pRemainder, pDivisor); + AssertRCBreak(rc); + } + else + rtBigNumSetUsed(pRemainder, 0); + rc = rtBigNumMagnitudeSetBit(pQuotient, iBit); + AssertRCBreak(rc); + } + } + + /* This shouldn't be necessary. */ + rtBigNumStripTrailingZeros(pQuotient); + rtBigNumStripTrailingZeros(pRemainder); + + return rc; +} + + +/** + * Divides the magnitudes of two values, letting the caller care about the sign + * bit. + * + * All variables must be unscrambled. The sign flag is not considered nor + * touched, this means the caller have to check for zero outputs. + * + * @returns IPRT status code. + * @param pQuotient Where to return the quotient. + * @param pRemainder Where to return the remainder. + * @param pDividend What to divide. + * @param pDivisor What to divide by. + * @param fForceLong Force long division. + */ +static int rtBigNumMagnitudeDivide(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor, + bool fForceLong) +{ + Assert(pQuotient != pDividend); Assert(pQuotient != pDivisor); Assert(pRemainder != pDividend); Assert(pRemainder != pDivisor); Assert(pRemainder != pQuotient); + Assert(!pQuotient->fCurScrambled); Assert(!pRemainder->fCurScrambled); Assert(!pDividend->fCurScrambled); Assert(!pDivisor->fCurScrambled); + + /* + * Just set both output values to zero as that's the return for several + * special case and the initial state of the general case. + */ + rtBigNumSetUsed(pQuotient, 0); + rtBigNumSetUsed(pRemainder, 0); + + /* + * Dividing something by zero is undefined. + * Diving zero by something is zero, unless the divsor is also zero. + */ + if (!pDivisor->cUsed || !pDividend->cUsed) + return pDivisor->cUsed ? VINF_SUCCESS : VERR_BIGNUM_DIV_BY_ZERO; + + /* + * Dividing by one? Quotient = dividend, no remainder. + */ + if (pDivisor->cUsed == 1 && pDivisor->pauElements[0] == 1) + return rtBigNumMagnitudeCopy(pQuotient, pDividend); + + /* + * Dividend smaller than the divisor. Zero quotient, all divisor. + */ + int iDiff = rtBigNumMagnitudeCompare(pDividend, pDivisor); + if (iDiff < 0) + return rtBigNumMagnitudeCopy(pRemainder, pDividend); + + /* + * Since we already have done the compare, check if the two values are the + * same. The result is 1 and no remainder then. + */ + if (iDiff == 0) + { + int rc = rtBigNumSetUsed(pQuotient, 1); + if (RT_SUCCESS(rc)) + pQuotient->pauElements[0] = 1; + return rc; + } + + /* + * Sort out special cases before going to the preferred or select algorithm. + */ + int rc; + if (pDividend->cUsed <= 2 && !fForceLong) + { + if (pDividend->cUsed < 2) + { + /* + * Single element division. + */ + RTBIGNUMELEMENT uQ = pDividend->pauElements[0] / pDivisor->pauElements[0]; + RTBIGNUMELEMENT uR = pDividend->pauElements[0] % pDivisor->pauElements[0]; + rc = VINF_SUCCESS; + if (uQ) + { + rc = rtBigNumSetUsed(pQuotient, 1); + if (RT_SUCCESS(rc)) + pQuotient->pauElements[0] = uQ; + } + if (uR && RT_SUCCESS(rc)) + { + rc = rtBigNumSetUsed(pRemainder, 1); + if (RT_SUCCESS(rc)) + pRemainder->pauElements[0] = uR; + } + } + else + { + /* + * Two elements dividend by a one or two element divisor. + */ + RTBIGNUMELEMENT2X uQ, uR; + if (pDivisor->cUsed == 1) + { + rtBigNumElement2xDiv2xBy1x(&uQ, &uR.s.Lo, pDividend->pauElements[1], pDividend->pauElements[0], + pDivisor->pauElements[0]); + uR.s.Hi = 0; + } + else + rtBigNumElement2xDiv(&uQ, &uR, pDividend->pauElements[1], pDividend->pauElements[0], + pDivisor->pauElements[1], pDivisor->pauElements[0]); + rc = rtBigNumElement2xCopyToMagnitude(&uQ, pQuotient); + if (RT_SUCCESS(rc)) + rc = rtBigNumElement2xCopyToMagnitude(&uR, pRemainder); + } + } + /* + * Decide upon which algorithm to use. Knuth requires a divisor that's at + * least 2 elements big. + */ + else if (pDivisor->cUsed < 2 || fForceLong) + rc = rtBigNumMagnitudeDivideSlowLong(pQuotient, pRemainder, pDividend, pDivisor); + else + rc = rtBigNumMagnitudeDivideKnuth(pQuotient, pRemainder, pDividend, pDivisor); + return rc; +} + + +static int rtBigNumDivideCommon(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, + PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor, bool fForceLong) +{ + Assert(pQuotient != pDividend); Assert(pQuotient != pDivisor); Assert(pRemainder != pDividend); Assert(pRemainder != pDivisor); Assert(pRemainder != pQuotient); + AssertReturn(pQuotient->fSensitive >= (pDividend->fSensitive | pDivisor->fSensitive), VERR_BIGNUM_SENSITIVE_INPUT); + AssertReturn(pRemainder->fSensitive >= (pDividend->fSensitive | pDivisor->fSensitive), VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pQuotient); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pQuotient); + rc = rtBigNumUnscramble(pRemainder); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pRemainder); + rc = rtBigNumUnscramble((PRTBIGNUM)pDividend); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pDividend); + rc = rtBigNumUnscramble((PRTBIGNUM)pDivisor); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pDivisor); + + /* + * The sign value of the remainder is the same as the dividend. + * The sign values of the quotient follow XOR rules, just like multiplication: + * -3 / 2 = -1; r=-1; 1 ^ 0 = 1 + * 3 / -2 = -1; r= 1; 1 ^ 0 = 1 + * -3 / -2 = 1; r=-1; 1 ^ 1 = 0 + * 3 / 2 = 1; r= 1; 0 ^ 0 = 0 + */ + pQuotient->fNegative = pDividend->fNegative ^ pDivisor->fNegative; + pRemainder->fNegative = pDividend->fNegative; + + rc = rtBigNumMagnitudeDivide(pQuotient, pRemainder, pDividend, pDivisor, fForceLong); + + if (pQuotient->cUsed == 0) + pQuotient->fNegative = 0; + if (pRemainder->cUsed == 0) + pRemainder->fNegative = 0; + + rtBigNumScramble((PRTBIGNUM)pDivisor); + } + rtBigNumScramble((PRTBIGNUM)pDividend); + } + rtBigNumScramble(pRemainder); + } + rtBigNumScramble(pQuotient); + } + return rc; +} + + +RTDECL(int) RTBigNumDivide(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) +{ + return rtBigNumDivideCommon(pQuotient, pRemainder, pDividend, pDivisor, false /*fForceLong*/); +} + + +RTDECL(int) RTBigNumDivideLong(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) +{ + return rtBigNumDivideCommon(pQuotient, pRemainder, pDividend, pDivisor, true /*fForceLong*/); +} + + +/** + * Calculates the modulus of a magnitude value, leaving the sign bit to the + * caller. + * + * All variables must be unscrambled. The sign flag is not considered nor + * touched, this means the caller have to check for zero outputs. + * + * @returns IPRT status code. + * @param pRemainder Where to return the remainder. + * @param pDividend What to divide. + * @param pDivisor What to divide by. + */ +static int rtBigNumMagnitudeModulo(PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) +{ + Assert(pRemainder != pDividend); Assert(pRemainder != pDivisor); + Assert(!pRemainder->fCurScrambled); Assert(!pDividend->fCurScrambled); Assert(!pDivisor->fCurScrambled); + + /* + * Just set the output value to zero as that's the return for several + * special case and the initial state of the general case. + */ + rtBigNumSetUsed(pRemainder, 0); + + /* + * Dividing something by zero is undefined. + * Diving zero by something is zero, unless the divsor is also zero. + */ + if (!pDivisor->cUsed || !pDividend->cUsed) + return pDivisor->cUsed ? VINF_SUCCESS : VERR_BIGNUM_DIV_BY_ZERO; + + /* + * Dividing by one? Quotient = dividend, no remainder. + */ + if (pDivisor->cUsed == 1 && pDivisor->pauElements[0] == 1) + return VINF_SUCCESS; + + /* + * Dividend smaller than the divisor. Zero quotient, all divisor. + */ + int iDiff = rtBigNumMagnitudeCompare(pDividend, pDivisor); + if (iDiff < 0) + return rtBigNumMagnitudeCopy(pRemainder, pDividend); + + /* + * Since we already have done the compare, check if the two values are the + * same. The result is 1 and no remainder then. + */ + if (iDiff == 0) + return VINF_SUCCESS; + + /** @todo optimize small numbers. */ + int rc = VINF_SUCCESS; + if (pDivisor->cUsed < 2) + { + /* + * Do very simple long division. This ain't fast, but it does the trick. + */ + uint32_t iBit = rtBigNumMagnitudeBitWidth(pDividend); + while (iBit-- > 0) + { + rc = rtBigNumMagnitudeShiftLeftOne(pRemainder, rtBigNumMagnitudeGetBit(pDividend, iBit)); + AssertRCBreak(rc); + iDiff = rtBigNumMagnitudeCompare(pRemainder, pDivisor); + if (iDiff >= 0) + { + if (iDiff != 0) + { + rc = rtBigNumMagnitudeSubThis(pRemainder, pDivisor); + AssertRCBreak(rc); + } + else + rtBigNumSetUsed(pRemainder, 0); + } + } + } + else + { + /* + * Join paths with division. + */ + rc = rtBigNumMagnitudeDivideKnuth(NULL, pRemainder, pDividend, pDivisor); + } + + /* This shouldn't be necessary. */ + rtBigNumStripTrailingZeros(pRemainder); + return rc; +} + + +RTDECL(int) RTBigNumModulo(PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) +{ + Assert(pRemainder != pDividend); Assert(pRemainder != pDivisor); + AssertReturn(pRemainder->fSensitive >= (pDividend->fSensitive | pDivisor->fSensitive), VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pRemainder); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pRemainder); + rc = rtBigNumUnscramble((PRTBIGNUM)pDividend); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pDividend); + rc = rtBigNumUnscramble((PRTBIGNUM)pDivisor); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pDivisor); + + /* + * The sign value of the remainder is the same as the dividend. + */ + pRemainder->fNegative = pDividend->fNegative; + + rc = rtBigNumMagnitudeModulo(pRemainder, pDividend, pDivisor); + + if (pRemainder->cUsed == 0) + pRemainder->fNegative = 0; + + rtBigNumScramble((PRTBIGNUM)pDivisor); + } + rtBigNumScramble((PRTBIGNUM)pDividend); + } + rtBigNumScramble(pRemainder); + } + return rc; +} + + + +/** + * Exponentiate the magnitude. + * + * All variables must be unscrambled. The sign flag is not considered nor + * touched, this means the caller have to reject negative exponents. + * + * @returns IPRT status code. + * @param pResult Where to return power. + * @param pBase The base value. + * @param pExponent The exponent (assumed positive or zero). + */ +static int rtBigNumMagnitudeExponentiate(PRTBIGNUM pResult, PCRTBIGNUM pBase, PCRTBIGNUM pExponent) +{ + Assert(pResult != pBase); Assert(pResult != pExponent); + Assert(!pResult->fCurScrambled); Assert(!pBase->fCurScrambled); Assert(!pExponent->fCurScrambled); + + /* + * A couple of special cases. + */ + int rc; + /* base ^ 0 => 1. */ + if (pExponent->cUsed == 0) + { + rc = rtBigNumSetUsed(pResult, 1); + if (RT_SUCCESS(rc)) + pResult->pauElements[0] = 1; + return rc; + } + + /* base ^ 1 => base. */ + if (pExponent->cUsed == 1 && pExponent->pauElements[0] == 1) + return rtBigNumMagnitudeCopy(pResult, pBase); + + /* + * Set up. + */ + /* Init temporary power-of-two variable to base. */ + RTBIGNUM Pow2; + rc = rtBigNumCloneInternal(&Pow2, pBase); + if (RT_SUCCESS(rc)) + { + /* Init result to 1. */ + rc = rtBigNumSetUsed(pResult, 1); + if (RT_SUCCESS(rc)) + { + pResult->pauElements[0] = 1; + + /* Make a temporary variable that we can use for temporary storage of the result. */ + RTBIGNUM TmpMultiplicand; + rc = rtBigNumCloneInternal(&TmpMultiplicand, pResult); + if (RT_SUCCESS(rc)) + { + /* + * Exponentiation by squaring. Reduces the number of + * multiplications to: NumBitsSet(Exponent) + BitWidth(Exponent). + */ + uint32_t const cExpBits = rtBigNumMagnitudeBitWidth(pExponent); + uint32_t iBit = 0; + for (;;) + { + if (rtBigNumMagnitudeGetBit(pExponent, iBit) != 0) + { + rc = rtBigNumMagnitudeCopy(&TmpMultiplicand, pResult); + if (RT_SUCCESS(rc)) + rc = rtBigNumMagnitudeMultiply(pResult, &TmpMultiplicand, &Pow2); + if (RT_FAILURE(rc)) + break; + } + + /* Done? */ + iBit++; + if (iBit >= cExpBits) + break; + + /* Not done yet, square the base again. */ + rc = rtBigNumMagnitudeCopy(&TmpMultiplicand, &Pow2); + if (RT_SUCCESS(rc)) + rc = rtBigNumMagnitudeMultiply(&Pow2, &TmpMultiplicand, &TmpMultiplicand); + if (RT_FAILURE(rc)) + break; + } + + RTBigNumDestroy(&TmpMultiplicand); + } + } + RTBigNumDestroy(&Pow2); + } + return rc; +} + + +RTDECL(int) RTBigNumExponentiate(PRTBIGNUM pResult, PCRTBIGNUM pBase, PCRTBIGNUM pExponent) +{ + Assert(pResult != pBase); Assert(pResult != pExponent); + AssertReturn(pResult->fSensitive >= (pBase->fSensitive | pExponent->fSensitive), VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pResult); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pResult); + rc = rtBigNumUnscramble((PRTBIGNUM)pBase); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pBase); + rc = rtBigNumUnscramble((PRTBIGNUM)pExponent); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pExponent); + if (!pExponent->fNegative) + { + pResult->fNegative = pBase->fNegative; /* sign unchanged. */ + rc = rtBigNumMagnitudeExponentiate(pResult, pBase, pExponent); + } + else + rc = VERR_BIGNUM_NEGATIVE_EXPONENT; + + rtBigNumScramble((PRTBIGNUM)pExponent); + } + rtBigNumScramble((PRTBIGNUM)pBase); + } + rtBigNumScramble(pResult); + } + return rc; +} + + +/** + * Modular exponentiation, magnitudes only. + * + * All variables must be unscrambled. The sign flag is not considered nor + * touched, this means the caller have to reject negative exponents and do any + * other necessary sign bit fiddling. + * + * @returns IPRT status code. + * @param pResult Where to return the remainder of the power. + * @param pBase The base value. + * @param pExponent The exponent (assumed positive or zero). + * @param pModulus The modulus value (or divisor if you like). + */ +static int rtBigNumMagnitudeModExp(PRTBIGNUM pResult, PRTBIGNUM pBase, PRTBIGNUM pExponent, PRTBIGNUM pModulus) +{ + Assert(pResult != pBase); Assert(pResult != pBase); Assert(pResult != pExponent); Assert(pResult != pModulus); + Assert(!pResult->fCurScrambled); Assert(!pBase->fCurScrambled); Assert(!pExponent->fCurScrambled); Assert(!pModulus->fCurScrambled); + int rc; + + /* + * Check some special cases to get them out of the way. + */ + /* Div by 0 => invalid. */ + if (pModulus->cUsed == 0) + return VERR_BIGNUM_DIV_BY_ZERO; + + /* Div by 1 => no remainder. */ + if (pModulus->cUsed == 1 && pModulus->pauElements[0] == 1) + { + rtBigNumSetUsed(pResult, 0); + return VINF_SUCCESS; + } + + /* base ^ 0 => 1. */ + if (pExponent->cUsed == 0) + { + rc = rtBigNumSetUsed(pResult, 1); + if (RT_SUCCESS(rc)) + pResult->pauElements[0] = 1; + return rc; + } + + /* base ^ 1 => base. */ + if (pExponent->cUsed == 1 && pExponent->pauElements[0] == 1) + return rtBigNumMagnitudeModulo(pResult, pBase, pModulus); + + /* + * Set up. + */ + /* Result = 1; preallocate space for the result while at it. */ + rc = rtBigNumSetUsed(pResult, pModulus->cUsed + 1); + if (RT_SUCCESS(rc)) + rc = rtBigNumSetUsed(pResult, 1); + if (RT_SUCCESS(rc)) + { + pResult->pauElements[0] = 1; + + /* ModBase = pBase or pBase % pModulus depending on the difference in size. */ + RTBIGNUM Pow2; + if (pBase->cUsed <= pModulus->cUsed + pModulus->cUsed / 2) + rc = rtBigNumCloneInternal(&Pow2, pBase); + else + rc = rtBigNumMagnitudeModulo(rtBigNumInitZeroTemplate(&Pow2, pBase), pBase, pModulus); + + /* Need a couple of temporary variables. */ + RTBIGNUM TmpMultiplicand; + rtBigNumInitZeroTemplate(&TmpMultiplicand, pResult); + + RTBIGNUM TmpProduct; + rtBigNumInitZeroTemplate(&TmpProduct, pResult); + + /* + * We combine the exponentiation by squaring with the fact that: + * (a*b) mod n = ( (a mod n) * (b mod n) ) mod n + * + * Thus, we can reduce the size of intermediate results by mod'ing them + * in each step. + */ + uint32_t const cExpBits = rtBigNumMagnitudeBitWidth(pExponent); + uint32_t iBit = 0; + for (;;) + { + if (rtBigNumMagnitudeGetBit(pExponent, iBit) != 0) + { + rc = rtBigNumMagnitudeCopy(&TmpMultiplicand, pResult); + if (RT_SUCCESS(rc)) + rc = rtBigNumMagnitudeMultiply(&TmpProduct, &TmpMultiplicand, &Pow2); + if (RT_SUCCESS(rc)) + rc = rtBigNumMagnitudeModulo(pResult, &TmpProduct, pModulus); + if (RT_FAILURE(rc)) + break; + } + + /* Done? */ + iBit++; + if (iBit >= cExpBits) + break; + + /* Not done yet, square and mod the base again. */ + rc = rtBigNumMagnitudeCopy(&TmpMultiplicand, &Pow2); + if (RT_SUCCESS(rc)) + rc = rtBigNumMagnitudeMultiply(&TmpProduct, &TmpMultiplicand, &TmpMultiplicand); + if (RT_SUCCESS(rc)) + rc = rtBigNumMagnitudeModulo(&Pow2, &TmpProduct, pModulus); + if (RT_FAILURE(rc)) + break; + } + + RTBigNumDestroy(&TmpMultiplicand); + RTBigNumDestroy(&TmpProduct); + RTBigNumDestroy(&Pow2); + } + return rc; +} + + +RTDECL(int) RTBigNumModExp(PRTBIGNUM pResult, PRTBIGNUM pBase, PRTBIGNUM pExponent, PRTBIGNUM pModulus) +{ + Assert(pResult != pBase); Assert(pResult != pBase); Assert(pResult != pExponent); Assert(pResult != pModulus); + AssertReturn(pResult->fSensitive >= (pBase->fSensitive | pExponent->fSensitive | pModulus->fSensitive), + VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pResult); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pResult); + rc = rtBigNumUnscramble((PRTBIGNUM)pBase); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pBase); + rc = rtBigNumUnscramble((PRTBIGNUM)pExponent); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pExponent); + rc = rtBigNumUnscramble((PRTBIGNUM)pModulus); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pModulus); + if (!pExponent->fNegative) + { + pResult->fNegative = pModulus->fNegative; /* pBase ^ pExponent / pModulus; result = remainder. */ + rc = rtBigNumMagnitudeModExp(pResult, pBase, pExponent, pModulus); + } + else + rc = VERR_BIGNUM_NEGATIVE_EXPONENT; + rtBigNumScramble((PRTBIGNUM)pModulus); + } + rtBigNumScramble((PRTBIGNUM)pExponent); + } + rtBigNumScramble((PRTBIGNUM)pBase); + } + rtBigNumScramble(pResult); + } + return rc; +} + diff --git a/src/VBox/Runtime/common/math/ceill.asm b/src/VBox/Runtime/common/math/ceill.asm new file mode 100644 index 00000000..b96f26c9 --- /dev/null +++ b/src/VBox/Runtime/common/math/ceill.asm @@ -0,0 +1,60 @@ +; $Id: ceill.asm $ +;; @file +; IPRT - No-CRT ceill - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the smallest integral value not less than lrd. +; @returns st(0) +; @param lrd [rbp + 8] +BEGINPROC RT_NOCRT(ceill) + push xBP + mov xBP, xSP + sub xSP, 10h + + fld tword [xBP + xCB*2] + + ; Make it round up by modifying the fpu control word. + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, 00800h + and eax, 0fbffh + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; Round ST(0) to integer. + frndint + + ; Restore the fpu control word. + fldcw [xBP - 10h] + + leave + ret +ENDPROC RT_NOCRT(ceill) + diff --git a/src/VBox/Runtime/common/math/cosl.asm b/src/VBox/Runtime/common/math/cosl.asm new file mode 100644 index 00000000..6c4f1921 --- /dev/null +++ b/src/VBox/Runtime/common/math/cosl.asm @@ -0,0 +1,62 @@ +; $Id: cosl.asm $ +;; @file +; IPRT - No-CRT cosl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; compute the cosine of ldr, measured in radians. +; @returns st(0) +; @param lrd [rbp + xCB*2] +BEGINPROC RT_NOCRT(cosl) + push xBP + mov xBP, xSP + sub xSP, 10h + + fld tword [xBP + xCB*2] + fcos + fnstsw ax + test ah, 4 + jz .done + + fldpi + fadd st0, st0 + fxch st1 +.again: + fprem1 + fnstsw ax + test ah, 4 + jnz .again + + fstp st0 + fcos + +.done: + leave + ret +ENDPROC RT_NOCRT(cosl) + diff --git a/src/VBox/Runtime/common/math/fabs.asm b/src/VBox/Runtime/common/math/fabs.asm new file mode 100644 index 00000000..71896700 --- /dev/null +++ b/src/VBox/Runtime/common/math/fabs.asm @@ -0,0 +1,58 @@ +; $Id: fabs.asm $ +;; @file +; IPRT - No-CRT fabs - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the absolute value of rd (|rd|). +; @returns 32-bit: st(0) 64-bit: xmm0 +; @param rd 32-bit: [ebp + 8] 64-bit: xmm0 +BEGINPROC RT_NOCRT(fabs) + push xBP + mov xBP, xSP + +%ifdef RT_ARCH_AMD64 + sub xSP, 10h + + movsd [xSP], xmm0 + fld qword [xSP] + + fabs + + fstp qword [xSP] + movsd xmm0, [xSP] + +%else + fld qword [xBP + xCB*2] + fabs +%endif + + leave + ret +ENDPROC RT_NOCRT(fabs) + diff --git a/src/VBox/Runtime/common/math/fabsf.asm b/src/VBox/Runtime/common/math/fabsf.asm new file mode 100644 index 00000000..6b209f15 --- /dev/null +++ b/src/VBox/Runtime/common/math/fabsf.asm @@ -0,0 +1,58 @@ +; $Id: fabsf.asm $ +;; @file +; IPRT - No-CRT fabsf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the absolute value of rf (|rf|). +; @returns 32-bit: st(0) 64-bit: xmm0 +; @param rf 32-bit: [ebp + 8] 64-bit: xmm0 +BEGINPROC RT_NOCRT(fabsf) + push xBP + mov xBP, xSP + +%ifdef RT_ARCH_AMD64 + sub xSP, 10h + + movsd [xSP], xmm0 + fld dword [xSP] + + fabs + + fstp dword [xSP] + movsd xmm0, [xSP] + +%else + fld dword [xBP + xCB*2] + fabs +%endif + + leave + ret +ENDPROC RT_NOCRT(fabsf) + diff --git a/src/VBox/Runtime/common/math/fabsl.asm b/src/VBox/Runtime/common/math/fabsl.asm new file mode 100644 index 00000000..eba919e7 --- /dev/null +++ b/src/VBox/Runtime/common/math/fabsl.asm @@ -0,0 +1,46 @@ +; $Id: fabsl.asm $ +;; @file +; IPRT - No-CRT fabsl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the absolute value of lrd (|lrd|). +; @returns st(0) +; @param lrd [xSP + xCB*2] +BEGINPROC RT_NOCRT(fabsl) + push xBP + mov xBP, xSP + + fld tword [xBP + xCB*2] + fabs + +.done: + leave + ret +ENDPROC RT_NOCRT(fabsl) + diff --git a/src/VBox/Runtime/common/math/floor.asm b/src/VBox/Runtime/common/math/floor.asm new file mode 100644 index 00000000..474d005e --- /dev/null +++ b/src/VBox/Runtime/common/math/floor.asm @@ -0,0 +1,68 @@ +; $Id: floor.asm $ +;; @file +; IPRT - No-CRT floor - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the largest integral value not greater than rd. +; @returns 32-bit: st(0) 64-bit: xmm0 +; @param rd 32-bit: [ebp + 8] 64-bit: xmm0 +BEGINPROC RT_NOCRT(floor) + push xBP + mov xBP, xSP + sub xSP, 10h + +%ifdef RT_ARCH_AMD64 + movsd [xSP], xmm0 + fld qword [xSP] +%else + fld qword [xBP + xCB*2] +%endif + + ; Make it round down by modifying the fpu control word. + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, 00400h + and eax, 0f7ffh + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; Round ST(0) to integer. + frndint + + ; Restore the fpu control word. + fldcw [xBP - 10h] + +%ifdef RT_ARCH_AMD64 + fstp qword [xSP] + movsd xmm0, [xSP] +%endif + leave + ret +ENDPROC RT_NOCRT(floor) + diff --git a/src/VBox/Runtime/common/math/floorf.asm b/src/VBox/Runtime/common/math/floorf.asm new file mode 100644 index 00000000..10a1ba7a --- /dev/null +++ b/src/VBox/Runtime/common/math/floorf.asm @@ -0,0 +1,68 @@ +; $Id: floorf.asm $ +;; @file +; IPRT - No-CRT floorf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the largest integral value not greater than rf. +; @returns st(0) +; @param rf 32-bit: [ebp + 8] 64-bit: xmm0 +BEGINPROC RT_NOCRT(floorf) + push xBP + mov xBP, xSP + sub xSP, 10h + +%ifdef RT_ARCH_AMD64 + movss [xSP], xmm0 + fld dword [xSP] +%else + fld dword [xBP + xCB*2] +%endif + + ; Make it round down by modifying the fpu control word. + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, 00400h + and eax, 0f7ffh + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; Round ST(0) to integer. + frndint + + ; Restore the fpu control word. + fldcw [xBP - 10h] + +%ifdef RT_ARCH_AMD64 + fstp dword [xSP] + movss xmm0, [xSP] +%endif + leave + ret +ENDPROC RT_NOCRT(floorf) + diff --git a/src/VBox/Runtime/common/math/floorl.asm b/src/VBox/Runtime/common/math/floorl.asm new file mode 100644 index 00000000..612e786b --- /dev/null +++ b/src/VBox/Runtime/common/math/floorl.asm @@ -0,0 +1,59 @@ +; $Id: floorl.asm $ +;; @file +; IPRT - No-CRT floorl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the largest integral value not greater than lrd. +; @returns st(0) +; @param lrd [rbp + 8] +BEGINPROC RT_NOCRT(floorl) + push xBP + mov xBP, xSP + sub xSP, 10h + + fld tword [xBP + xCB*2] + + ; Make it round down by modifying the fpu control word. + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, 00400h + and eax, 0f7ffh + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; Round ST(0) to integer. + frndint + + ; Restore the fpu control word. + fldcw [xBP - 10h] + + leave + ret +ENDPROC RT_NOCRT(floorl) + diff --git a/src/VBox/Runtime/common/math/gcc/Makefile.kup b/src/VBox/Runtime/common/math/gcc/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/Makefile.kup diff --git a/src/VBox/Runtime/common/math/gcc/adddi3.c b/src/VBox/Runtime/common/math/gcc/adddi3.c new file mode 100644 index 00000000..ce2b1d85 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/adddi3.c @@ -0,0 +1,63 @@ +/* $NetBSD: adddi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)adddi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: adddi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Add two quads. This is trivial since a one-bit carry from a single + * u_int addition x+y occurs if and only if the sum x+y is less than + * either x or y (the choice to compare with x or y is arbitrary). + */ +quad_t +__adddi3(a, b) + quad_t a, b; +{ + union uu aa, bb, sum; + + aa.q = a; + bb.q = b; + sum.ul[L] = aa.ul[L] + bb.ul[L]; + sum.ul[H] = aa.ul[H] + bb.ul[H] + (sum.ul[L] < bb.ul[L]); + return (sum.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/anddi3.c b/src/VBox/Runtime/common/math/gcc/anddi3.c new file mode 100644 index 00000000..2f35ced8 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/anddi3.c @@ -0,0 +1,61 @@ +/* $NetBSD: anddi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)anddi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: anddi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return a & b, in quad. + */ +quad_t +__anddi3(a, b) + quad_t a, b; +{ + union uu aa, bb; + + aa.q = a; + bb.q = b; + aa.ul[0] &= bb.ul[0]; + aa.ul[1] &= bb.ul[1]; + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/ashldi3.c b/src/VBox/Runtime/common/math/gcc/ashldi3.c new file mode 100644 index 00000000..e7df3c18 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/ashldi3.c @@ -0,0 +1,70 @@ +/* $NetBSD: ashldi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)ashldi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: ashldi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Shift a (signed) quad value left (arithmetic shift left). + * This is the same as logical shift left! + */ +quad_t +__ashldi3(a, shift) + quad_t a; + qshift_t shift; +{ + union uu aa; + + if (shift == 0) + return(a); + aa.q = a; + if (shift >= INT_BITS) { + aa.ul[H] = aa.ul[L] << (shift - INT_BITS); + aa.ul[L] = 0; + } else { + aa.ul[H] = (aa.ul[H] << shift) | + (aa.ul[L] >> (INT_BITS - shift)); + aa.ul[L] <<= shift; + } + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/ashrdi3.c b/src/VBox/Runtime/common/math/gcc/ashrdi3.c new file mode 100644 index 00000000..aaa1c71b --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/ashrdi3.c @@ -0,0 +1,82 @@ +/* $NetBSD: ashrdi3.c,v 1.10 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)ashrdi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: ashrdi3.c,v 1.10 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Shift a (signed) quad value right (arithmetic shift right). + */ +quad_t +__ashrdi3(a, shift) + quad_t a; + qshift_t shift; +{ + union uu aa; + + if (shift == 0) + return(a); + aa.q = a; + if (shift >= INT_BITS) { + int s; + + /* + * Smear bits rightward using the machine's right-shift + * method, whether that is sign extension or zero fill, + * to get the `sign word' s. Note that shifting by + * INT_BITS is undefined, so we shift (INT_BITS-1), + * then 1 more, to get our answer. + */ + /* LINTED inherits machine dependency */ + s = (aa.sl[H] >> (INT_BITS - 1)) >> 1; + /* LINTED inherits machine dependency*/ + aa.ul[L] = aa.sl[H] >> (shift - INT_BITS); + aa.ul[H] = s; + } else { + aa.ul[L] = (aa.ul[L] >> shift) | + (aa.ul[H] << (INT_BITS - shift)); + /* LINTED inherits machine dependency */ + aa.sl[H] >>= shift; + } + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/cmpdi2.c b/src/VBox/Runtime/common/math/gcc/cmpdi2.c new file mode 100644 index 00000000..c876eb4e --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/cmpdi2.c @@ -0,0 +1,62 @@ +/* $NetBSD: cmpdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)cmpdi2.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: cmpdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return 0, 1, or 2 as a <, =, > b respectively. + * Both a and b are considered signed---which means only the high word is + * signed. + */ +int +__cmpdi2(a, b) + quad_t a, b; +{ + union uu aa, bb; + + aa.q = a; + bb.q = b; + return (aa.sl[H] < bb.sl[H] ? 0 : aa.sl[H] > bb.sl[H] ? 2 : + aa.ul[L] < bb.ul[L] ? 0 : aa.ul[L] > bb.ul[L] ? 2 : 1); +} diff --git a/src/VBox/Runtime/common/math/gcc/divdi3.c b/src/VBox/Runtime/common/math/gcc/divdi3.c new file mode 100644 index 00000000..eecc17ad --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/divdi3.c @@ -0,0 +1,70 @@ +/* $NetBSD: divdi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)divdi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: divdi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Divide two signed quads. + * ??? if -1/2 should produce -1 on this machine, this code is wrong + */ +quad_t +__divdi3(a, b) + quad_t a, b; +{ + u_quad_t ua, ub, uq; + int neg = 0; + + ua = a; + ub = b; + + if (a < 0) + ua = -ua, neg ^= 1; + if (b < 0) + ub = -ub, neg ^= 1; + + uq = __qdivrem(ua, ub, (u_quad_t *)0); + if (neg) + uq = - uq; + return uq; +} diff --git a/src/VBox/Runtime/common/math/gcc/iordi3.c b/src/VBox/Runtime/common/math/gcc/iordi3.c new file mode 100644 index 00000000..3d0a7eda --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/iordi3.c @@ -0,0 +1,61 @@ +/* $NetBSD: iordi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)iordi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: iordi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return a | b, in quad. + */ +quad_t +__iordi3(a, b) + quad_t a, b; +{ + union uu aa, bb; + + aa.q = a; + bb.q = b; + aa.ul[0] |= bb.ul[0]; + aa.ul[1] |= bb.ul[1]; + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/lshldi3.c b/src/VBox/Runtime/common/math/gcc/lshldi3.c new file mode 100644 index 00000000..611cb08d --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/lshldi3.c @@ -0,0 +1,70 @@ +/* $NetBSD: lshldi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)lshldi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: lshldi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Shift an (unsigned) quad value left (logical shift left). + * This is the same as arithmetic shift left! + */ +quad_t +__lshldi3(a, shift) + quad_t a; + qshift_t shift; +{ + union uu aa; + + if (shift == 0) + return(a); + aa.q = a; + if (shift >= INT_BITS) { + aa.ul[H] = aa.ul[L] << (shift - INT_BITS); + aa.ul[L] = 0; + } else { + aa.ul[H] = (aa.ul[H] << shift) | + (aa.ul[L] >> (INT_BITS - shift)); + aa.ul[L] <<= shift; + } + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/lshrdi3.c b/src/VBox/Runtime/common/math/gcc/lshrdi3.c new file mode 100644 index 00000000..3dba60c3 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/lshrdi3.c @@ -0,0 +1,69 @@ +/* $NetBSD: lshrdi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)lshrdi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: lshrdi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Shift an (unsigned) quad value right (logical shift right). + */ +quad_t +__lshrdi3(a, shift) + quad_t a; + qshift_t shift; +{ + union uu aa; + + if (shift == 0) + return(a); + aa.q = a; + if (shift >= INT_BITS) { + aa.ul[L] = aa.ul[H] >> (shift - INT_BITS); + aa.ul[H] = 0; + } else { + aa.ul[L] = (aa.ul[L] >> shift) | + (aa.ul[H] << (INT_BITS - shift)); + aa.ul[H] >>= shift; + } + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/moddi3.c b/src/VBox/Runtime/common/math/gcc/moddi3.c new file mode 100644 index 00000000..764ea01d --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/moddi3.c @@ -0,0 +1,70 @@ +/* $NetBSD: moddi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)moddi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: moddi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return remainder after dividing two signed quads. + * + * XXX we assume a % b < 0 iff a < 0, but this is actually machine-dependent. + */ +quad_t +__moddi3(a, b) + quad_t a, b; +{ + u_quad_t ua, ub, ur; + int neg = 0; + + ua = a; + ub = b; + + if (a < 0) + ua = -ua, neg ^= 1; + if (b < 0) + ub = -ub; + (void)__qdivrem(ua, ub, &ur); + if (neg) + ur = -ur; + return (ur); +} diff --git a/src/VBox/Runtime/common/math/gcc/muldi3.c b/src/VBox/Runtime/common/math/gcc/muldi3.c new file mode 100644 index 00000000..370ef3d2 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/muldi3.c @@ -0,0 +1,249 @@ +/* $NetBSD: muldi3.c,v 1.10 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)muldi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: muldi3.c,v 1.10 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Multiply two quads. + * + * Our algorithm is based on the following. Split incoming quad values + * u and v (where u,v >= 0) into + * + * u = 2^n u1 * u0 (n = number of bits in `u_int', usu. 32) + * + * and + * + * v = 2^n v1 * v0 + * + * Then + * + * uv = 2^2n u1 v1 + 2^n u1 v0 + 2^n v1 u0 + u0 v0 + * = 2^2n u1 v1 + 2^n (u1 v0 + v1 u0) + u0 v0 + * + * Now add 2^n u1 v1 to the first term and subtract it from the middle, + * and add 2^n u0 v0 to the last term and subtract it from the middle. + * This gives: + * + * uv = (2^2n + 2^n) (u1 v1) + + * (2^n) (u1 v0 - u1 v1 + u0 v1 - u0 v0) + + * (2^n + 1) (u0 v0) + * + * Factoring the middle a bit gives us: + * + * uv = (2^2n + 2^n) (u1 v1) + [u1v1 = high] + * (2^n) (u1 - u0) (v0 - v1) + [(u1-u0)... = mid] + * (2^n + 1) (u0 v0) [u0v0 = low] + * + * The terms (u1 v1), (u1 - u0) (v0 - v1), and (u0 v0) can all be done + * in just half the precision of the original. (Note that either or both + * of (u1 - u0) or (v0 - v1) may be negative.) + * + * This algorithm is from Knuth vol. 2 (2nd ed), section 4.3.3, p. 278. + * + * Since C does not give us a `int * int = quad' operator, we split + * our input quads into two ints, then split the two ints into two + * shorts. We can then calculate `short * short = int' in native + * arithmetic. + * + * Our product should, strictly speaking, be a `long quad', with 128 + * bits, but we are going to discard the upper 64. In other words, + * we are not interested in uv, but rather in (uv mod 2^2n). This + * makes some of the terms above vanish, and we get: + * + * (2^n)(high) + (2^n)(mid) + (2^n + 1)(low) + * + * or + * + * (2^n)(high + mid + low) + low + * + * Furthermore, `high' and `mid' can be computed mod 2^n, as any factor + * of 2^n in either one will also vanish. Only `low' need be computed + * mod 2^2n, and only because of the final term above. + */ +static quad_t __lmulq(u_int, u_int); + +quad_t +__muldi3(a, b) + quad_t a, b; +{ + union uu u, v, low, prod; + u_int high, mid, udiff, vdiff; + int negall, negmid; +#define u1 u.ul[H] +#define u0 u.ul[L] +#define v1 v.ul[H] +#define v0 v.ul[L] + + /* + * Get u and v such that u, v >= 0. When this is finished, + * u1, u0, v1, and v0 will be directly accessible through the + * int fields. + */ + if (a >= 0) + u.q = a, negall = 0; + else + u.q = -a, negall = 1; + if (b >= 0) + v.q = b; + else + v.q = -b, negall ^= 1; + + if (u1 == 0 && v1 == 0) { + /* + * An (I hope) important optimization occurs when u1 and v1 + * are both 0. This should be common since most numbers + * are small. Here the product is just u0*v0. + */ + prod.q = __lmulq(u0, v0); + } else { + /* + * Compute the three intermediate products, remembering + * whether the middle term is negative. We can discard + * any upper bits in high and mid, so we can use native + * u_int * u_int => u_int arithmetic. + */ + low.q = __lmulq(u0, v0); + + if (u1 >= u0) + negmid = 0, udiff = u1 - u0; + else + negmid = 1, udiff = u0 - u1; + if (v0 >= v1) + vdiff = v0 - v1; + else + vdiff = v1 - v0, negmid ^= 1; + mid = udiff * vdiff; + + high = u1 * v1; + + /* + * Assemble the final product. + */ + prod.ul[H] = high + (negmid ? -mid : mid) + low.ul[L] + + low.ul[H]; + prod.ul[L] = low.ul[L]; + } + return (negall ? -prod.q : prod.q); +#undef u1 +#undef u0 +#undef v1 +#undef v0 +} + +/* + * Multiply two 2N-bit ints to produce a 4N-bit quad, where N is half + * the number of bits in an int (whatever that is---the code below + * does not care as long as quad.h does its part of the bargain---but + * typically N==16). + * + * We use the same algorithm from Knuth, but this time the modulo refinement + * does not apply. On the other hand, since N is half the size of an int, + * we can get away with native multiplication---none of our input terms + * exceeds (UINT_MAX >> 1). + * + * Note that, for u_int l, the quad-precision result + * + * l << N + * + * splits into high and low ints as HHALF(l) and LHUP(l) respectively. + */ +static quad_t +__lmulq(u_int u, u_int v) +{ + u_int u1, u0, v1, v0, udiff, vdiff, high, mid, low; + u_int prodh, prodl, was; + union uu prod; + int neg; + + u1 = HHALF(u); + u0 = LHALF(u); + v1 = HHALF(v); + v0 = LHALF(v); + + low = u0 * v0; + + /* This is the same small-number optimization as before. */ + if (u1 == 0 && v1 == 0) + return (low); + + if (u1 >= u0) + udiff = u1 - u0, neg = 0; + else + udiff = u0 - u1, neg = 1; + if (v0 >= v1) + vdiff = v0 - v1; + else + vdiff = v1 - v0, neg ^= 1; + mid = udiff * vdiff; + + high = u1 * v1; + + /* prod = (high << 2N) + (high << N); */ + prodh = high + HHALF(high); + prodl = LHUP(high); + + /* if (neg) prod -= mid << N; else prod += mid << N; */ + if (neg) { + was = prodl; + prodl -= LHUP(mid); + prodh -= HHALF(mid) + (prodl > was); + } else { + was = prodl; + prodl += LHUP(mid); + prodh += HHALF(mid) + (prodl < was); + } + + /* prod += low << N */ + was = prodl; + prodl += LHUP(low); + prodh += HHALF(low) + (prodl < was); + /* ... + low; */ + if ((prodl += low) < low) + prodh++; + + /* return 4N-bit product */ + prod.ul[H] = prodh; + prod.ul[L] = prodl; + return (prod.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/negdi2.c b/src/VBox/Runtime/common/math/gcc/negdi2.c new file mode 100644 index 00000000..2eafcffa --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/negdi2.c @@ -0,0 +1,60 @@ +/* $NetBSD: negdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)negdi2.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: negdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return -a (or, equivalently, 0 - a), in quad. See subdi3.c. + */ +quad_t +__negdi2(a) + quad_t a; +{ + union uu aa, res; + + aa.q = a; + res.ul[L] = -aa.ul[L]; + res.ul[H] = -aa.ul[H] - (res.ul[L] > 0); + return (res.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/notdi2.c b/src/VBox/Runtime/common/math/gcc/notdi2.c new file mode 100644 index 00000000..c671e037 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/notdi2.c @@ -0,0 +1,61 @@ +/* $NetBSD: notdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)notdi2.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: notdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return ~a. For some reason gcc calls this `one's complement' rather + * than `not'. + */ +quad_t +__one_cmpldi2(a) + quad_t a; +{ + union uu aa; + + aa.q = a; + aa.ul[0] = ~aa.ul[0]; + aa.ul[1] = ~aa.ul[1]; + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/qdivrem.c b/src/VBox/Runtime/common/math/gcc/qdivrem.c new file mode 100644 index 00000000..7ca2d38c --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/qdivrem.c @@ -0,0 +1,285 @@ +/* $NetBSD: qdivrem.c,v 1.12 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)qdivrem.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: qdivrem.c,v 1.12 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +/* + * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), + * section 4.3.1, pp. 257--259. + */ + +#include "quad.h" + +#define B ((int)1 << HALF_BITS) /* digit base */ + +/* Combine two `digits' to make a single two-digit number. */ +#define COMBINE(a, b) (((u_int)(a) << HALF_BITS) | (b)) + +/* select a type for digits in base B: use unsigned short if they fit */ +#if UINT_MAX == 0xffffffffU && USHRT_MAX >= 0xffff +typedef unsigned short digit; +#else +typedef u_int digit; +#endif + +static void shl __P((digit *p, int len, int sh)); + +/* + * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. + * + * We do this in base 2-sup-HALF_BITS, so that all intermediate products + * fit within u_int. As a consequence, the maximum length dividend and + * divisor are 4 `digits' in this base (they are shorter if they have + * leading zeros). + */ +u_quad_t +__qdivrem(uq, vq, arq) + u_quad_t uq, vq, *arq; +{ + union uu tmp; + digit *u, *v, *q; + digit v1, v2; + u_int qhat, rhat, t; + int m, n, d, j, i; + digit uspace[5], vspace[5], qspace[5]; + + /* + * Take care of special cases: divide by zero, and u < v. + */ + if (vq == 0) { + /* divide by zero. */ + static volatile const unsigned int zero = 0; + + tmp.ul[H] = tmp.ul[L] = 1 / zero; + if (arq) + *arq = uq; + return (tmp.q); + } + if (uq < vq) { + if (arq) + *arq = uq; + return (0); + } + u = &uspace[0]; + v = &vspace[0]; + q = &qspace[0]; + + /* + * Break dividend and divisor into digits in base B, then + * count leading zeros to determine m and n. When done, we + * will have: + * u = (u[1]u[2]...u[m+n]) sub B + * v = (v[1]v[2]...v[n]) sub B + * v[1] != 0 + * 1 < n <= 4 (if n = 1, we use a different division algorithm) + * m >= 0 (otherwise u < v, which we already checked) + * m + n = 4 + * and thus + * m = 4 - n <= 2 + */ + tmp.uq = uq; + u[0] = 0; + u[1] = (digit)HHALF(tmp.ul[H]); + u[2] = (digit)LHALF(tmp.ul[H]); + u[3] = (digit)HHALF(tmp.ul[L]); + u[4] = (digit)LHALF(tmp.ul[L]); + tmp.uq = vq; + v[1] = (digit)HHALF(tmp.ul[H]); + v[2] = (digit)LHALF(tmp.ul[H]); + v[3] = (digit)HHALF(tmp.ul[L]); + v[4] = (digit)LHALF(tmp.ul[L]); + for (n = 4; v[1] == 0; v++) { + if (--n == 1) { + u_int rbj; /* r*B+u[j] (not root boy jim) */ + digit q1, q2, q3, q4; + + /* + * Change of plan, per exercise 16. + * r = 0; + * for j = 1..4: + * q[j] = floor((r*B + u[j]) / v), + * r = (r*B + u[j]) % v; + * We unroll this completely here. + */ + t = v[2]; /* nonzero, by definition */ + q1 = (digit)(u[1] / t); + rbj = COMBINE(u[1] % t, u[2]); + q2 = (digit)(rbj / t); + rbj = COMBINE(rbj % t, u[3]); + q3 = (digit)(rbj / t); + rbj = COMBINE(rbj % t, u[4]); + q4 = (digit)(rbj / t); + if (arq) + *arq = rbj % t; + tmp.ul[H] = COMBINE(q1, q2); + tmp.ul[L] = COMBINE(q3, q4); + return (tmp.q); + } + } + + /* + * By adjusting q once we determine m, we can guarantee that + * there is a complete four-digit quotient at &qspace[1] when + * we finally stop. + */ + for (m = 4 - n; u[1] == 0; u++) + m--; + for (i = 4 - m; --i >= 0;) + q[i] = 0; + q += 4 - m; + + /* + * Here we run Program D, translated from MIX to C and acquiring + * a few minor changes. + * + * D1: choose multiplier 1 << d to ensure v[1] >= B/2. + */ + d = 0; + for (t = v[1]; t < B / 2; t <<= 1) + d++; + if (d > 0) { + shl(&u[0], m + n, d); /* u <<= d */ + shl(&v[1], n - 1, d); /* v <<= d */ + } + /* + * D2: j = 0. + */ + j = 0; + v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ + v2 = v[2]; /* for D3 */ + do { + digit uj0, uj1, uj2; + + /* + * D3: Calculate qhat (\^q, in TeX notation). + * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and + * let rhat = (u[j]*B + u[j+1]) mod v[1]. + * While rhat < B and v[2]*qhat > rhat*B+u[j+2], + * decrement qhat and increase rhat correspondingly. + * Note that if rhat >= B, v[2]*qhat < rhat*B. + */ + uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ + uj1 = u[j + 1]; /* for D3 only */ + uj2 = u[j + 2]; /* for D3 only */ + if (uj0 == v1) { + qhat = B; + rhat = uj1; + goto qhat_too_big; + } else { + u_int nn = COMBINE(uj0, uj1); + qhat = nn / v1; + rhat = nn % v1; + } + while (v2 * qhat > COMBINE(rhat, uj2)) { + qhat_too_big: + qhat--; + if ((rhat += v1) >= B) + break; + } + /* + * D4: Multiply and subtract. + * The variable `t' holds any borrows across the loop. + * We split this up so that we do not require v[0] = 0, + * and to eliminate a final special case. + */ + for (t = 0, i = n; i > 0; i--) { + t = u[i + j] - v[i] * qhat - t; + u[i + j] = (digit)LHALF(t); + t = (B - HHALF(t)) & (B - 1); + } + t = u[j] - t; + u[j] = (digit)LHALF(t); + /* + * D5: test remainder. + * There is a borrow if and only if HHALF(t) is nonzero; + * in that (rare) case, qhat was too large (by exactly 1). + * Fix it by adding v[1..n] to u[j..j+n]. + */ + if (HHALF(t)) { + qhat--; + for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ + t += u[i + j] + v[i]; + u[i + j] = (digit)LHALF(t); + t = HHALF(t); + } + u[j] = (digit)LHALF(u[j] + t); + } + q[j] = (digit)qhat; + } while (++j <= m); /* D7: loop on j. */ + + /* + * If caller wants the remainder, we have to calculate it as + * u[m..m+n] >> d (this is at most n digits and thus fits in + * u[m+1..m+n], but we may need more source digits). + */ + if (arq) { + if (d) { + for (i = m + n; i > m; --i) + u[i] = (digit)(((u_int)u[i] >> d) | + LHALF((u_int)u[i - 1] << (HALF_BITS - d))); + u[i] = 0; + } + tmp.ul[H] = COMBINE(uspace[1], uspace[2]); + tmp.ul[L] = COMBINE(uspace[3], uspace[4]); + *arq = tmp.q; + } + + tmp.ul[H] = COMBINE(qspace[1], qspace[2]); + tmp.ul[L] = COMBINE(qspace[3], qspace[4]); + return (tmp.q); +} + +/* + * Shift p[0]..p[len] left `sh' bits, ignoring any bits that + * `fall out' the left (there never will be any such anyway). + * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. + */ +static void +shl(digit *p, int len, int sh) +{ + int i; + + for (i = 0; i < len; i++) + p[i] = (digit)(LHALF((u_int)p[i] << sh) | + ((u_int)p[i + 1] >> (HALF_BITS - sh))); + p[i] = (digit)(LHALF((u_int)p[i] << sh)); +} diff --git a/src/VBox/Runtime/common/math/gcc/quad.h b/src/VBox/Runtime/common/math/gcc/quad.h new file mode 100644 index 00000000..c4197795 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/quad.h @@ -0,0 +1,174 @@ +/* $NetBSD: quad.h,v 1.17 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)quad.h 8.1 (Berkeley) 6/4/93 + */ + +#ifndef IPRT_INCLUDED_COMMON_MATH_quad_h +#define IPRT_INCLUDED_COMMON_MATH_quad_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +/* + * Quad arithmetic. + * + * This library makes the following assumptions: + * + * - The type long long (aka quad_t) exists. + * + * - A quad variable is exactly twice as long as `int'. + * + * - The machine's arithmetic is two's complement. + * + * This library can provide 128-bit arithmetic on a machine with 128-bit + * quads and 64-bit ints, for instance, or 96-bit arithmetic on machines + * with 48-bit ints. + */ + +#if 0 /* iprt */ +#include <sys/types.h> +#if !defined(_KERNEL) && !defined(_STANDALONE) +#include <limits.h> +#else +#include <machine/limits.h> +#endif +#else /* iprt */ +# include <iprt/types.h> +# include <iprt/nocrt/limits.h> +# undef __P +# define __P(a) a +# undef __GNUC_PREREQ__ +# define __GNUC_PREREQ__(m1,m2) 1 +# if 1 /* ASSUMES: little endian */ +# define _QUAD_HIGHWORD 1 +# define _QUAD_LOWWORD 0 +# else +# define _QUAD_HIGHWORD 0 +# define _QUAD_LOWWORD 1 +# endif +# if !defined(RT_OS_LINUX) || !defined(__KERNEL__) /* (linux/types.h defines u_int) */ + typedef unsigned int u_int; +# endif +# if !defined(RT_OS_SOLARIS) + typedef int64_t quad_t; +# else +# define quad_t int64_t +# endif + typedef uint64_t u_quad_t; + typedef quad_t *qaddr_t; +#endif /* iprt */ + +/* + * Depending on the desired operation, we view a `long long' (aka quad_t) in + * one or more of the following formats. + */ +union uu { + quad_t q; /* as a (signed) quad */ + u_quad_t uq; /* as an unsigned quad */ + int sl[2]; /* as two signed ints */ + u_int ul[2]; /* as two unsigned ints */ +}; + +/* + * Define high and low parts of a quad_t. + */ +#define H _QUAD_HIGHWORD +#define L _QUAD_LOWWORD + +/* + * Total number of bits in a quad_t and in the pieces that make it up. + * These are used for shifting, and also below for halfword extraction + * and assembly. + */ +#define QUAD_BITS (sizeof(quad_t) * CHAR_BIT) +#define INT_BITS (sizeof(int) * CHAR_BIT) +#define HALF_BITS (sizeof(int) * CHAR_BIT / 2) + +/* + * Extract high and low shortwords from longword, and move low shortword of + * longword to upper half of long, i.e., produce the upper longword of + * ((quad_t)(x) << (number_of_bits_in_int/2)). (`x' must actually be u_int.) + * + * These are used in the multiply code, to split a longword into upper + * and lower halves, and to reassemble a product as a quad_t, shifted left + * (sizeof(int)*CHAR_BIT/2). + */ +#define HHALF(x) ((u_int)(x) >> HALF_BITS) +#define LHALF(x) ((u_int)(x) & (((int)1 << HALF_BITS) - 1)) +#define LHUP(x) ((u_int)(x) << HALF_BITS) + +/* + * XXX + * Compensate for gcc 1 vs gcc 2. Gcc 1 defines ?sh?di3's second argument + * as u_quad_t, while gcc 2 correctly uses int. Unfortunately, we still use + * both compilers. + */ +#if __GNUC_PREREQ__(2, 0) || defined(lint) +typedef unsigned int qshift_t; +#else +typedef u_quad_t qshift_t; +#endif + +RT_C_DECLS_BEGIN +quad_t __adddi3 __P((quad_t, quad_t)); +quad_t __anddi3 __P((quad_t, quad_t)); +quad_t __ashldi3 __P((quad_t, qshift_t)); +quad_t __ashrdi3 __P((quad_t, qshift_t)); +int __cmpdi2 __P((quad_t, quad_t )); +quad_t __divdi3 __P((quad_t, quad_t)); +quad_t __fixdfdi __P((double)); +quad_t __fixsfdi __P((float)); +u_quad_t __fixunsdfdi __P((double)); +u_quad_t __fixunssfdi __P((float)); +double __floatdidf __P((quad_t)); +float __floatdisf __P((quad_t)); +double __floatunsdidf __P((u_quad_t)); +quad_t __iordi3 __P((quad_t, quad_t)); +quad_t __lshldi3 __P((quad_t, qshift_t)); +quad_t __lshrdi3 __P((quad_t, qshift_t)); +quad_t __moddi3 __P((quad_t, quad_t)); +quad_t __muldi3 __P((quad_t, quad_t)); +quad_t __negdi2 __P((quad_t)); +quad_t __one_cmpldi2 __P((quad_t)); +u_quad_t __qdivrem __P((u_quad_t, u_quad_t, u_quad_t *)); +quad_t __subdi3 __P((quad_t, quad_t)); +int __ucmpdi2 __P((u_quad_t, u_quad_t)); +u_quad_t __udivdi3 __P((u_quad_t, u_quad_t )); +u_quad_t __umoddi3 __P((u_quad_t, u_quad_t )); +quad_t __xordi3 __P((quad_t, quad_t)); +RT_C_DECLS_END + +#endif + diff --git a/src/VBox/Runtime/common/math/gcc/subdi3.c b/src/VBox/Runtime/common/math/gcc/subdi3.c new file mode 100644 index 00000000..2751acc2 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/subdi3.c @@ -0,0 +1,62 @@ +/* $NetBSD: subdi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)subdi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: subdi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Subtract two quad values. This is trivial since a one-bit carry + * from a single u_int difference x-y occurs if and only if (x-y) > x. + */ +quad_t +__subdi3(a, b) + quad_t a, b; +{ + union uu aa, bb, diff; + + aa.q = a; + bb.q = b; + diff.ul[L] = aa.ul[L] - bb.ul[L]; + diff.ul[H] = aa.ul[H] - bb.ul[H] - (diff.ul[L] > aa.ul[L]); + return (diff.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/ucmpdi2.c b/src/VBox/Runtime/common/math/gcc/ucmpdi2.c new file mode 100644 index 00000000..47d79164 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/ucmpdi2.c @@ -0,0 +1,61 @@ +/* $NetBSD: ucmpdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)ucmpdi2.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: ucmpdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return 0, 1, or 2 as a <, =, > b respectively. + * Neither a nor b are considered signed. + */ +int +__ucmpdi2(a, b) + u_quad_t a, b; +{ + union uu aa, bb; + + aa.uq = a; + bb.uq = b; + return (aa.ul[H] < bb.ul[H] ? 0 : aa.ul[H] > bb.ul[H] ? 2 : + aa.ul[L] < bb.ul[L] ? 0 : aa.ul[L] > bb.ul[L] ? 2 : 1); +} diff --git a/src/VBox/Runtime/common/math/gcc/udivdi3.c b/src/VBox/Runtime/common/math/gcc/udivdi3.c new file mode 100644 index 00000000..9069f4d2 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/udivdi3.c @@ -0,0 +1,56 @@ +/* $NetBSD: udivdi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)udivdi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: udivdi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Divide two unsigned quads. + */ +u_quad_t +__udivdi3(a, b) + u_quad_t a, b; +{ + + return (__qdivrem(a, b, (u_quad_t *)0)); +} diff --git a/src/VBox/Runtime/common/math/gcc/udivmoddi4.c b/src/VBox/Runtime/common/math/gcc/udivmoddi4.c new file mode 100644 index 00000000..771fd071 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/udivmoddi4.c @@ -0,0 +1,55 @@ +/* $Id: udivmoddi4.c $ */ +/** @file + * IPRT - __udivmoddi4 implementation + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL) only, as it comes in the "COPYING.CDDL" file of the + * VirtualBox OSE distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + */ + +#include <iprt/stdint.h> +#include <iprt/uint64.h> + +uint64_t __udivmoddi4(uint64_t u64A, uint64_t u64B, uint64_t *pu64R); + +/** + * __udivmoddi4() implementation to satisfy external references from 32-bit + * code generated by gcc-7 or later. + * + * @param u64A The divident value. + * @param u64B The divisor value. + * @param pu64R A pointer to the reminder. May be NULL. + * @returns u64A / u64B + */ +uint64_t __udivmoddi4(uint64_t u64A, uint64_t u64B, uint64_t *pu64R) +{ + RTUINT64U Divident; + RTUINT64U Divisor; + RTUINT64U Quotient; + RTUINT64U Reminder; + Divident.u = u64A; + Divisor.u = u64B; + Quotient.u = 0; /* shut up gcc 10 */ + Reminder.u = 0; /* shut up gcc 10 */ + RTUInt64DivRem(&Quotient, &Reminder, &Divident, &Divisor); + if (pu64R) + *pu64R = Reminder.u; + return Quotient.u; +} diff --git a/src/VBox/Runtime/common/math/gcc/umoddi3.c b/src/VBox/Runtime/common/math/gcc/umoddi3.c new file mode 100644 index 00000000..2e65ecab --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/umoddi3.c @@ -0,0 +1,58 @@ +/* $NetBSD: umoddi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)umoddi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: umoddi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return remainder after dividing two unsigned quads. + */ +u_quad_t +__umoddi3(a, b) + u_quad_t a, b; +{ + u_quad_t r; + + (void)__qdivrem(a, b, &r); + return (r); +} diff --git a/src/VBox/Runtime/common/math/gcc/xordi3.c b/src/VBox/Runtime/common/math/gcc/xordi3.c new file mode 100644 index 00000000..aa5db229 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/xordi3.c @@ -0,0 +1,61 @@ +/* $NetBSD: xordi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)xordi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: xordi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return a ^ b, in quad. + */ +quad_t +__xordi3(a, b) + quad_t a, b; +{ + union uu aa, bb; + + aa.q = a; + bb.q = b; + aa.ul[0] ^= bb.ul[0]; + aa.ul[1] ^= bb.ul[1]; + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/ldexpl.asm b/src/VBox/Runtime/common/math/ldexpl.asm new file mode 100644 index 00000000..112124f1 --- /dev/null +++ b/src/VBox/Runtime/common/math/ldexpl.asm @@ -0,0 +1,55 @@ +; $Id: ldexpl.asm $ +;; @file +; IPRT - No-CRT ldexpl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Computes lrd * 2^exp +; @returns st(0) +; @param lrd [rbp + xCB*2] +; @param exp [ebp + 14h] gcc:edi msc:ecx +BEGINPROC RT_NOCRT(ldexpl) + push xBP + mov xBP, xSP + sub xSP, 10h + + ; load exp +%ifdef RT_ARCH_AMD64 ; ASSUMES ONLY GCC HERE! + mov [rsp], edi + fild dword [rsp] +%else + fild dword [ebp + xCB*2 + RTLRD_CB] +%endif + fld tword [xBP + xCB*2] + fscale + fstp st1 + + leave + ret +ENDPROC RT_NOCRT(ldexpl) + diff --git a/src/VBox/Runtime/common/math/llrint.asm b/src/VBox/Runtime/common/math/llrint.asm new file mode 100644 index 00000000..1c9ee326 --- /dev/null +++ b/src/VBox/Runtime/common/math/llrint.asm @@ -0,0 +1,53 @@ +; $Id: llrint.asm $ +;; @file +; IPRT - No-CRT llrint - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Round rd to the nearest integer value, rounding according to the current rounding direction. +; @returns 32-bit: edx:eax 64-bit: rax +; @param rd 32-bit: [esp + 4h] 64-bit: xmm0 +BEGINPROC RT_NOCRT(llrint) +%ifdef RT_ARCH_AMD64 + cvtsd2si rax, xmm0 +%else + push ebp + mov ebp, esp + sub esp, 8h + + fld qword [ebp + 8h] + fistp qword [esp] + fwait + mov eax, [esp] + mov edx, [esp + 4] + + leave +%endif + ret +ENDPROC RT_NOCRT(llrint) + diff --git a/src/VBox/Runtime/common/math/llrintf.asm b/src/VBox/Runtime/common/math/llrintf.asm new file mode 100644 index 00000000..92174463 --- /dev/null +++ b/src/VBox/Runtime/common/math/llrintf.asm @@ -0,0 +1,53 @@ +; $Id: llrintf.asm $ +;; @file +; IPRT - No-CRT llrintf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Round rd to the nearest integer value, rounding according to the current rounding direction. +; @returns 32-bit: edx:eax 64-bit: rax +; @param rf 32-bit: [esp + 4h] 64-bit: xmm0 +BEGINPROC RT_NOCRT(llrintf) +%ifdef RT_ARCH_AMD64 + cvtss2si rax, xmm0 +%else + push ebp + mov ebp, esp + sub esp, 8h + + fld dword [ebp + 8h] + fistp qword [esp] + fwait + mov eax, [esp] + mov edx, [esp + 4] + + leave +%endif + ret +ENDPROC RT_NOCRT(llrintf) + diff --git a/src/VBox/Runtime/common/math/llrintl.asm b/src/VBox/Runtime/common/math/llrintl.asm new file mode 100644 index 00000000..7ae72939 --- /dev/null +++ b/src/VBox/Runtime/common/math/llrintl.asm @@ -0,0 +1,53 @@ +; $Id: llrintl.asm $ +;; @file +; IPRT - No-CRT llrintl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Round rd to the nearest integer value, rounding according to the current rounding direction. +; @returns 32-bit: edx:eax 64-bit: rax +; @param lrd [rbp + xCB*2] +BEGINPROC RT_NOCRT(llrintl) + push xBP + mov xBP, xSP + sub xSP, 10h + + fld tword [xBP + xCB*2] + fistp qword [xSP] + fwait +%ifdef RT_ARCH_AMD64 + mov rax, [xSP] +%else + mov eax, [xSP] + mov edx, [xSP + 4] +%endif + + leave + ret +ENDPROC RT_NOCRT(llrintl) + diff --git a/src/VBox/Runtime/common/math/logl.asm b/src/VBox/Runtime/common/math/logl.asm new file mode 100644 index 00000000..392a10ff --- /dev/null +++ b/src/VBox/Runtime/common/math/logl.asm @@ -0,0 +1,65 @@ +; $Id: logl.asm $ +;; @file +; IPRT - No-CRT logl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; compute the natural logarithm of lrd +; @returns st(0) +; @param lrd [rbp + xCB*2] +BEGINPROC RT_NOCRT(logl) + push xBP + mov xBP, xSP + sub xSP, 10h + + fldln2 ; st0=log(2) + fld tword [xBP + xCB*2] ; st1=log(2) st0=lrd + fld st0 ; st1=log(2) st0=lrd st0=lrd + fsub qword [.one xWrtRIP] ; st2=log(2) st1=lrd st0=lrd-1.0 + fld st0 ; st3=log(2) st2=lrd st1=lrd-1.0 st0=lrd-1.0 + fabs ; st3=log(2) st2=lrd st1=lrd-1.0 st0=abs(lrd-1.0) + fcomp qword [.limit xWrtRIP] ; st2=log(2) st1=lrd st0=lrd-1.0 + fnstsw ax + and eax, 04500h + jnz .use_st1 + + fstp st0 ; st1=log(2) st0=lrd + fyl2x ; log(lrd) + jmp .done + +.use_st1: + fstp st1 ; st1=log(2) st0=lrd-1.0 + fyl2xp1 ; log(lrd) + +.done: + leave + ret +.one: dq 1.0 +.limit: dq 0.29 +ENDPROC RT_NOCRT(logl) + diff --git a/src/VBox/Runtime/common/math/lrint.asm b/src/VBox/Runtime/common/math/lrint.asm new file mode 100644 index 00000000..47b5bf5c --- /dev/null +++ b/src/VBox/Runtime/common/math/lrint.asm @@ -0,0 +1,52 @@ +; $Id: lrint.asm $ +;; @file +; IPRT - No-CRT lrint - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Round rd to the nearest integer value, rounding according to the current rounding direction. +; @returns 32-bit: eax 64-bit: rax +; @param rd 32-bit: [esp + 4h] 64-bit: xmm0 +BEGINPROC RT_NOCRT(lrint) +%ifdef RT_ARCH_AMD64 + cvtsd2si rax, xmm0 +%else + push ebp + mov ebp, esp + sub esp, 8h + + fld qword [ebp + 8h] + fistp dword [esp] + fwait + mov eax, [esp] + + leave +%endif + ret +ENDPROC RT_NOCRT(lrint) + diff --git a/src/VBox/Runtime/common/math/lrintf.asm b/src/VBox/Runtime/common/math/lrintf.asm new file mode 100644 index 00000000..026355ed --- /dev/null +++ b/src/VBox/Runtime/common/math/lrintf.asm @@ -0,0 +1,52 @@ +; $Id: lrintf.asm $ +;; @file +; IPRT - No-CRT lrintf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Round rd to the nearest integer value, rounding according to the current rounding direction. +; @returns 32-bit: eax 64-bit: rax +; @param rf 32-bit: [esp + 4h] 64-bit: xmm0 +BEGINPROC RT_NOCRT(lrintf) +%ifdef RT_ARCH_AMD64 + cvtss2si rax, xmm0 +%else + push ebp + mov ebp, esp + sub esp, 8h + + fld dword [ebp + 8h] + fistp dword [esp] + fwait + mov eax, [esp] + + leave +%endif + ret +ENDPROC RT_NOCRT(lrintf) + diff --git a/src/VBox/Runtime/common/math/lrintl.asm b/src/VBox/Runtime/common/math/lrintl.asm new file mode 100644 index 00000000..36287c93 --- /dev/null +++ b/src/VBox/Runtime/common/math/lrintl.asm @@ -0,0 +1,54 @@ +; $Id: lrintl.asm $ +;; @file +; IPRT - No-CRT lrintl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Round rd to the nearest integer value, rounding according to the current rounding direction. +; @returns 32-bit: eax 64-bit: rax +; @param lrd [rbp + xCB*2] +BEGINPROC RT_NOCRT(lrintl) + push xBP + mov xBP, xSP + sub xSP, 10h + + fld tword [xBP + xCB*2] +%ifdef RT_ARCH_AMD64 + fistp qword [xSP] + fwait + mov rax, [xSP] +%else + fistp dword [xSP] + fwait + mov eax, [xSP] +%endif + + leave + ret +ENDPROC RT_NOCRT(lrintl) + diff --git a/src/VBox/Runtime/common/math/remainder.asm b/src/VBox/Runtime/common/math/remainder.asm new file mode 100644 index 00000000..13d628c0 --- /dev/null +++ b/src/VBox/Runtime/common/math/remainder.asm @@ -0,0 +1,67 @@ +; $Id: remainder.asm $ +;; @file +; IPRT - No-CRT remainder - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; See SUS. +; @returns st(0) +; @param rd1 [ebp + 8h] xmm0 +; @param rd2 [ebp + 10h] xmm1 +BEGINPROC RT_NOCRT(remainder) + push xBP + mov xBP, xSP + sub xSP, 20h +;int3 + +%ifdef RT_ARCH_AMD64 + movsd [rsp + 10h], xmm1 + movsd [rsp], xmm0 + fld qword [rsp + 10h] + fld qword [rsp] +%else + fld qword [ebp + 10h] + fld qword [ebp + 8h] +%endif + + fprem1 + fstsw ax + test ah, 04h + jnz .done + fstp st1 + +.done: +%ifdef RT_ARCH_AMD64 + fstp qword [rsp] + movsd xmm0, [rsp] +%endif + + leave + ret +ENDPROC RT_NOCRT(remainder) + diff --git a/src/VBox/Runtime/common/math/remainderf.asm b/src/VBox/Runtime/common/math/remainderf.asm new file mode 100644 index 00000000..0f9a3f76 --- /dev/null +++ b/src/VBox/Runtime/common/math/remainderf.asm @@ -0,0 +1,66 @@ +; $Id: remainderf.asm $ +;; @file +; IPRT - No-CRT remainderf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; See SUS. +; @returns st(0) +; @param rf1 [ebp + 08h] xmm0 +; @param rf2 [ebp + 0ch] xmm1 +BEGINPROC RT_NOCRT(remainderf) + push xBP + mov xBP, xSP + sub xSP, 20h + +%ifdef RT_ARCH_AMD64 + movss [rsp], xmm1 + movss [rsp + 10h], xmm0 + fld dword [rsp] + fld dword [rsp + 10h] +%else + fld dword [ebp + 0ch] + fld dword [ebp + 8h] +%endif + + fprem1 + fstsw ax + test ah, 04h + jnz .done + fstp st1 + +.done: +%ifdef RT_ARCH_AMD64 + fstp dword [rsp] + movss xmm0, [rsp] +%endif + + leave + ret +ENDPROC RT_NOCRT(remainderf) + diff --git a/src/VBox/Runtime/common/math/remainderl.asm b/src/VBox/Runtime/common/math/remainderl.asm new file mode 100644 index 00000000..6a976dc9 --- /dev/null +++ b/src/VBox/Runtime/common/math/remainderl.asm @@ -0,0 +1,58 @@ +; $Id: remainderl.asm $ +;; @file +; IPRT - No-CRT remainderl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; See SUS. +; @returns st(0) +; @param lrd1 [rbp + 10h] +; @param lrd2 [rbp + 20h] +BEGINPROC RT_NOCRT(remainderl) + push xBP + mov xBP, xSP + +%ifdef RT_ARCH_AMD64 + fld tword [rbp + 10h + RTLRD_CB] + fld tword [rbp + 10h] +%else + fld tword [ebp + 8h + RTLRD_CB] + fld tword [ebp + 8h] +%endif + + fprem1 + fstsw ax + test ah, 04h + jnz .done + fstp st1 + +.done: + leave + ret +ENDPROC RT_NOCRT(remainderl) + diff --git a/src/VBox/Runtime/common/math/sinl.asm b/src/VBox/Runtime/common/math/sinl.asm new file mode 100644 index 00000000..f38eedd8 --- /dev/null +++ b/src/VBox/Runtime/common/math/sinl.asm @@ -0,0 +1,61 @@ +; $Id: sinl.asm $ +;; @file +; IPRT - No-CRT sinl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the sine of lrd +; @returns st(0) +; @param lrd [xSP + xCB*2] +BEGINPROC RT_NOCRT(sinl) + push xBP + mov xBP, xSP + sub xSP, 10h + + fld tword [xBP + xCB*2] + fsin + fnstsw ax + test ah, 04h + jz .done + + fldpi + fadd st0 + fxch st1 +.again: + fprem1 + fnstsw ax + test ah, 04h + jnz .again + fstp st1 + fsin + +.done: + leave + ret +ENDPROC RT_NOCRT(sinl) + diff --git a/src/VBox/Runtime/common/math/tanl.asm b/src/VBox/Runtime/common/math/tanl.asm new file mode 100644 index 00000000..585baded --- /dev/null +++ b/src/VBox/Runtime/common/math/tanl.asm @@ -0,0 +1,62 @@ +; $Id: tanl.asm $ +;; @file +; IPRT - No-CRT tanl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the sine of lrd +; @returns st(0) +; @param lrd [xSP + xCB*2] +BEGINPROC RT_NOCRT(tanl) + push xBP + mov xBP, xSP + sub xSP, 10h + + fld tword [xBP + xCB*2] + fptan + fnstsw ax + test ah, 04h ; check for C2 + jz .done + + fldpi + fadd st0 + fxch st1 +.again: + fprem1 + fnstsw ax + test ah, 04h + jnz .again + fstp st1 + fptan + +.done: + fstp st0 + leave + ret +ENDPROC RT_NOCRT(tanl) + diff --git a/src/VBox/Runtime/common/math/trunc.asm b/src/VBox/Runtime/common/math/trunc.asm new file mode 100644 index 00000000..65131e9f --- /dev/null +++ b/src/VBox/Runtime/common/math/trunc.asm @@ -0,0 +1,68 @@ +; $Id: trunc.asm $ +;; @file +; IPRT - No-CRT trunc - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Round to truncated integer value. +; @returns 32-bit: st(0) 64-bit: xmm0 +; @param rd 32-bit: [ebp + 8] 64-bit: xmm0 +BEGINPROC RT_NOCRT(trunc) + push xBP + mov xBP, xSP + sub xSP, 10h + +%ifdef RT_ARCH_AMD64 + movsd [xSP], xmm0 + fld qword [xSP] +%else + fld qword [xBP + xCB*2] +%endif + + ; Make it truncate up by modifying the fpu control word. + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, 00c00h + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; Round ST(0) to integer. + frndint + + ; Restore the fpu control word. + fldcw [xBP - 10h] + +%ifdef RT_ARCH_AMD64 + fstp qword [xSP] + movsd xmm0, [xSP] +%endif + leave + ret +ENDPROC RT_NOCRT(trunc) + diff --git a/src/VBox/Runtime/common/math/truncf.asm b/src/VBox/Runtime/common/math/truncf.asm new file mode 100644 index 00000000..92a8f651 --- /dev/null +++ b/src/VBox/Runtime/common/math/truncf.asm @@ -0,0 +1,68 @@ +; $Id: truncf.asm $ +;; @file +; IPRT - No-CRT truncf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Round to truncated integer value. +; @returns 32-bit: st(0) 64-bit: xmm0 +; @param rf 32-bit: [ebp + 8] 64-bit: xmm0 +BEGINPROC RT_NOCRT(truncf) + push xBP + mov xBP, xSP + sub xSP, 10h + +%ifdef RT_ARCH_AMD64 + movss [xSP], xmm0 + fld dword [xSP] +%else + fld dword [xBP + xCB*2] +%endif + + ; Make it truncate up by modifying the fpu control word. + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, 00c00h + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; Round ST(0) to integer. + frndint + + ; Restore the fpu control word. + fldcw [xBP - 10h] + +%ifdef RT_ARCH_AMD64 + fstp dword [xSP] + movss xmm0, [xSP] +%endif + leave + ret +ENDPROC RT_NOCRT(truncf) + diff --git a/src/VBox/Runtime/common/math/truncl.asm b/src/VBox/Runtime/common/math/truncl.asm new file mode 100644 index 00000000..08a60863 --- /dev/null +++ b/src/VBox/Runtime/common/math/truncl.asm @@ -0,0 +1,59 @@ +; $Id: truncl.asm $ +;; @file +; IPRT - No-CRT truncl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Round to truncated integer value. +; @returns st(0) +; @param rd [rbp + 8] +BEGINPROC RT_NOCRT(truncl) + push xBP + mov xBP, xSP + sub xSP, 10h + + fld tword [xBP + xCB*2] + + ; Make it truncate up by modifying the fpu control word. + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, 00c00h + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; Round ST(0) to integer. + frndint + + ; Restore the fpu control word. + fldcw [xBP - 10h] + + leave + ret +ENDPROC RT_NOCRT(truncl) + diff --git a/src/VBox/Runtime/common/math/watcom/I8D-x86-32.asm b/src/VBox/Runtime/common/math/watcom/I8D-x86-32.asm new file mode 100644 index 00000000..2e608222 --- /dev/null +++ b/src/VBox/Runtime/common/math/watcom/I8D-x86-32.asm @@ -0,0 +1,98 @@ +; $Id: I8D-x86-32.asm $ +;; @file +; BS3Kit - 32-bit Watcom C/C++, 64-bit signed integer division. +; + +; +; Copyright (C) 2007-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + + +BEGINCODE + +extern __U8D + + +;; +; 64-bit signed integer division. +; +; @returns EDX:EAX Quotient, ECX:EBX Remainder. +; @param EDX:EAX Dividend. +; @param ECX:EBX Divisor +; +global __I8D +__I8D: + ; + ; We use __U8D to do the work, we take care of the signedness. + ; + or edx, edx + js .negative_dividend + + or ecx, ecx + js .negative_divisor_positive_dividend + jmp __U8D + + +.negative_divisor_positive_dividend: + ; negate the divisor, do unsigned division, and negate the quotient. + neg ecx + neg ebx + sbb ecx, 0 + + call __U8D + + neg edx + neg eax + sbb edx, 0 + ret + +.negative_dividend: + neg edx + neg eax + sbb edx, 0 + + or ecx, ecx + js .negative_dividend_negative_divisor + +.negative_dividend_positive_divisor: + ; negate the dividend (above), do unsigned division, and negate both quotient and remainder + call __U8D + + neg edx + neg eax + sbb edx, 0 + +.return_negated_remainder: + neg ecx + neg ebx + sbb ecx, 0 + ret + +.negative_dividend_negative_divisor: + ; negate both dividend (above) and divisor, do unsigned division, and negate the remainder. + neg ecx + neg ebx + sbb ecx, 0 + + call __U8D + jmp .return_negated_remainder + diff --git a/src/VBox/Runtime/common/math/watcom/RTWatcomUInt64Div.c b/src/VBox/Runtime/common/math/watcom/RTWatcomUInt64Div.c new file mode 100644 index 00000000..a90727b6 --- /dev/null +++ b/src/VBox/Runtime/common/math/watcom/RTWatcomUInt64Div.c @@ -0,0 +1,38 @@ +/* $Id: RTWatcomUInt64Div.c $ */ +/** @file + * BS3Kit - Unsigned 64-bit division (compiler support routine helper). + */ + +/* + * Copyright (C) 2007-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL) only, as it comes in the "COPYING.CDDL" file of the + * VirtualBox OSE distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include <iprt/uint64.h> + + +DECLASM(void) RTWatcomUInt64Div(RTUINT64U uDividend, RTUINT64U uDivisor, RTUINT64U RT_FAR *paQuotientReminder) +{ + RTUInt64DivRem(&paQuotientReminder[0], &paQuotientReminder[1], &uDividend, &uDivisor); +} + diff --git a/src/VBox/Runtime/common/math/watcom/U8D-x86-32.asm b/src/VBox/Runtime/common/math/watcom/U8D-x86-32.asm new file mode 100644 index 00000000..22ae71f3 --- /dev/null +++ b/src/VBox/Runtime/common/math/watcom/U8D-x86-32.asm @@ -0,0 +1,74 @@ +; $Id: U8D-x86-32.asm $ +;; @file +; BS3Kit - 32-bit Watcom C/C++, 64-bit unsigned integer division. +; + +; +; Copyright (C) 2007-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + + +BEGINCODE + +extern NAME(RTWatcomUInt64Div) + + +;; +; 64-bit unsigned integer division. +; +; @returns EDX:EAX Quotient, ECX:EBX Remainder. +; @param EDX:EAX Dividend. +; @param ECX:EBX Divisor +; +global __U8D +__U8D: + ; + ; Convert to a C __cdecl call - not doing this in assembly. + ; + + ; Set up a frame, allocating 16 bytes for the result buffer. + push ebp + mov ebp, esp + sub esp, 10h + + ; Pointer to the return buffer. + push esp + + ; The divisor. + push ecx + push ebx + + ; The dividend. + push edx + push eax + + call NAME(RTWatcomUInt64Div) + + ; Load the result. + mov ecx, [ebp - 10h + 12] + mov ebx, [ebp - 10h + 8] + mov edx, [ebp - 10h + 4] + mov eax, [ebp - 10h] + + leave + ret + diff --git a/src/VBox/Runtime/common/math/watcom/U8LS-x86-32.asm b/src/VBox/Runtime/common/math/watcom/U8LS-x86-32.asm new file mode 100644 index 00000000..ed4e0d57 --- /dev/null +++ b/src/VBox/Runtime/common/math/watcom/U8LS-x86-32.asm @@ -0,0 +1,64 @@ +; $Id: U8LS-x86-32.asm $ +;; @file +; BS3Kit - 32-bit Watcom C/C++, 64-bit integer left shift. +; + +; +; Copyright (C) 2007-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + + +BEGINCODE + +;; +; 64-bit integer left shift. +; +; @returns EDX:EAX +; @param EDX:EAX Value to shift. +; @param BL Shift count (it's specified as ECX:EBX, but we only use BL). +; +global __U8LS +__U8LS: +global __I8LS +__I8LS: + push ecx ; We're allowed to trash ECX, but why bother. + + mov cl, bl + and cl, 3fh + test cl, 20h + jnz .big_shift + + ; Shifting less than 32. + shld edx, eax, cl + shl eax, cl + +.return: + pop ecx + ret + +.big_shift: + ; Shifting 32 or more. + mov edx, eax + shl edx, cl ; Only uses lower 5 bits. + xor eax, eax + jmp .return + diff --git a/src/VBox/Runtime/common/math/watcom/U8M-I8M-x86-32.asm b/src/VBox/Runtime/common/math/watcom/U8M-I8M-x86-32.asm new file mode 100644 index 00000000..1291dcd5 --- /dev/null +++ b/src/VBox/Runtime/common/math/watcom/U8M-I8M-x86-32.asm @@ -0,0 +1,77 @@ +; $Id: U8M-I8M-x86-32.asm $ +;; @file +; BS3Kit - 32-bit Watcom C/C++, 64-bit unsigned integer division. +; + +; +; Copyright (C) 2007-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + + +BEGINCODE + + +;; +; 64-bit signed & unsigned integer multiplication. +; +; @returns EDX:EAX product +; @param EDX:EAX Factor #1. +; @param ECX:EBX Factor #2. +; @uses ECX, EBX +; +global __U8M +__U8M: +global __I8M +__I8M: + ; + ; See if this is a pure 32-bit multiplication. We might get lucky. + ; + test edx, edx + jnz .complicated + test ecx, ecx + jnz .complicated + + mul ebx ; eax * ebx -> edx:eax + ret + +.complicated: + push eax + push edx + + ; ecx = F1.lo * F2.hi (edx contains overflow here can be ignored) + mul ecx + mov ecx, eax + + ; ecx += F1.hi * F2.lo (edx can be ignored again) + pop eax + mul ebx + add ecx, eax + + ; edx:eax = F1.lo * F2.lo + pop eax + mul ebx + + ; Add ecx to the high part (edx). + add edx, ecx + + ret + diff --git a/src/VBox/Runtime/common/math/watcom/U8RS-x86-32.asm b/src/VBox/Runtime/common/math/watcom/U8RS-x86-32.asm new file mode 100644 index 00000000..2c4f7d38 --- /dev/null +++ b/src/VBox/Runtime/common/math/watcom/U8RS-x86-32.asm @@ -0,0 +1,63 @@ +; $Id: U8RS-x86-32.asm $ +;; @file +; BS3Kit - 32-bit Watcom C/C++, 64-bit unsigned integer right shift. +; + +; +; Copyright (C) 2007-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + + +BEGINCODE + + +;; +; 64-bit unsigned integer right shift. +; +; @returns EDX:EAX +; @param EDX:EAX Value to shift. +; @param BL Shift count (it's specified as ECX:EBX, but we only use BL). +; +global __U8RS +__U8RS: + push ecx ; We're allowed to trash ECX, but why bother. + + mov cl, bl + and cl, 3fh + test cl, 20h + jnz .big_shift + + ; Shifting less than 32. + shrd eax, edx, cl + shr edx, cl + +.return: + pop ecx + ret + +.big_shift: + ; Shifting 32 or more. + mov eax, edx + shr eax, cl ; Only uses lower 5 bits. + xor edx, edx + jmp .return + diff --git a/src/VBox/Runtime/common/math/x86/Makefile.kup b/src/VBox/Runtime/common/math/x86/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/Runtime/common/math/x86/Makefile.kup diff --git a/src/VBox/Runtime/common/math/x86/fenv-x86.c b/src/VBox/Runtime/common/math/x86/fenv-x86.c new file mode 100644 index 00000000..c48ae291 --- /dev/null +++ b/src/VBox/Runtime/common/math/x86/fenv-x86.c @@ -0,0 +1,218 @@ +/*- + * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/lib/msun/i387/fenv.c,v 1.2 2005/03/17 22:21:46 das Exp $ + */ + +/*#include "namespace.h" +#include <sys/cdefs.h> +#include <sys/types.h> +#include <machine/npx.h> +#include "fenv.h"*/ +#include <iprt/types.h> +#include <iprt/nocrt/fenv.h> + +#if 0 +const fenv_t __fe_dfl_env = { + __INITIAL_NPXCW__, + 0x0000, + 0x0000, + 0x1f80, + 0xffffffff, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } +}; +#endif + +enum __sse_support __has_sse = +#ifdef __SSE__ + __SSE_YES; +#else + __SSE_UNK; +#endif + +#define getfl(x) __asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x))) +#define setfl(x) __asm __volatile("pushl %0\n\tpopfl" : : "g" (x)) +#define cpuid_dx(x) __asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t" \ + "cpuid\n\tpopl %%ebx" \ + : "=d" (*(x)) : : "eax", "ecx") + +/* + * Test for SSE support on this processor. We need to do this because + * we need to use ldmxcsr/stmxcsr to get correct results if any part + * of the program was compiled to use SSE floating-point, but we can't + * use SSE on older processors. + */ +int +__test_sse(void) +{ + int flag, nflag; + int dx_features; + + /* Am I a 486? */ + getfl(&flag); + nflag = flag ^ 0x200000; + setfl(nflag); + getfl(&nflag); + if (flag != nflag) { + /* Not a 486, so CPUID should work. */ + cpuid_dx(&dx_features); + if (dx_features & 0x2000000) { + __has_sse = __SSE_YES; + return (1); + } + } + __has_sse = __SSE_NO; + return (0); +} + +#if 0 /* later */ +int +_STD(fesetexceptflag)(const fexcept_t *flagp, int excepts) +{ + fenv_t env; + int mxcsr; + + __fnstenv(&env); + env.__status &= ~excepts; + env.__status |= *flagp & excepts; + __fldenv(env); + + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + mxcsr &= ~excepts; + mxcsr |= *flagp & excepts; + __ldmxcsr(mxcsr); + } + + return (0); +} + +int +_STD(feraiseexcept)(int excepts) +{ + fexcept_t ex = excepts; + + fesetexceptflag(&ex, excepts); + __fwait(); + return (0); +} + +int +_STD(fegetenv)(fenv_t *envp) +{ + int control, mxcsr; + + /* + * fnstenv masks all exceptions, so we need to save and + * restore the control word to avoid this side effect. + */ + __fnstcw(&control); + __fnstenv(envp); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + __set_mxcsr(*envp, mxcsr); + } + __fldcw(control); + return (0); +} + +int +_STD(feholdexcept)(fenv_t *envp) +{ + int mxcsr; + + __fnstenv(envp); + __fnclex(); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + __set_mxcsr(*envp, mxcsr); + mxcsr &= ~FE_ALL_EXCEPT; + mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT; + __ldmxcsr(mxcsr); + } + return (0); +} + +int +_STD(feupdateenv)(const fenv_t *envp) +{ + int mxcsr, status; + + __fnstsw(&status); + if (__HAS_SSE()) + __stmxcsr(&mxcsr); + else + mxcsr = 0; + fesetenv(envp); + feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT); + return (0); +} + +int +__feenableexcept(int mask) +{ + int mxcsr, control, omask; + + mask &= FE_ALL_EXCEPT; + __fnstcw(&control); + if (__HAS_SSE()) + __stmxcsr(&mxcsr); + else + mxcsr = 0; + omask = (control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; + control &= ~mask; + __fldcw(control); + if (__HAS_SSE()) { + mxcsr &= ~(mask << _SSE_EMASK_SHIFT); + __ldmxcsr(mxcsr); + } + return (~omask); +} + +int +__fedisableexcept(int mask) +{ + int mxcsr, control, omask; + + mask &= FE_ALL_EXCEPT; + __fnstcw(&control); + if (__HAS_SSE()) + __stmxcsr(&mxcsr); + else + mxcsr = 0; + omask = (control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; + control |= mask; + __fldcw(control); + if (__HAS_SSE()) { + mxcsr |= mask << _SSE_EMASK_SHIFT; + __ldmxcsr(mxcsr); + } + return (~omask); +} + +__weak_reference(__feenableexcept, feenableexcept); +__weak_reference(__fedisableexcept, fedisableexcept); +#endif /* later */ |