summaryrefslogtreecommitdiffstats
path: root/src/VBox/Runtime/common/math/cos.asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/VBox/Runtime/common/math/cos.asm')
-rw-r--r--src/VBox/Runtime/common/math/cos.asm213
1 files changed, 213 insertions, 0 deletions
diff --git a/src/VBox/Runtime/common/math/cos.asm b/src/VBox/Runtime/common/math/cos.asm
new file mode 100644
index 00000000..da83ef81
--- /dev/null
+++ b/src/VBox/Runtime/common/math/cos.asm
@@ -0,0 +1,213 @@
+; $Id: cos.asm $
+;; @file
+; IPRT - No-CRT cos - AMD64 & X86.
+;
+
+;
+; Copyright (C) 2006-2023 Oracle and/or its affiliates.
+;
+; This file is part of VirtualBox base platform packages, as
+; available from https://www.virtualbox.org.
+;
+; This program is free software; you can redistribute it and/or
+; modify it under the terms of the GNU General Public License
+; as published by the Free Software Foundation, in version 3 of the
+; License.
+;
+; This program is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+; General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with this program; if not, see <https://www.gnu.org/licenses>.
+;
+; The contents of this file may alternatively be used under the terms
+; of the Common Development and Distribution License Version 1.0
+; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+; in the VirtualBox distribution, in which case the provisions of the
+; CDDL are applicable instead of those of the GPL.
+;
+; You may elect to license modified versions of this file under the
+; terms and conditions of either the GPL or the CDDL or both.
+;
+; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+;
+
+
+%define RT_ASM_WITH_SEH64
+%include "iprt/asmdefs.mac"
+%include "iprt/x86.mac"
+
+
+BEGINCODE
+
+;;
+; Compute the cosine of rd, measured in radians.
+;
+; @returns st(0) / xmm0
+; @param rd [rbp + xCB*2] / xmm0
+;
+RT_NOCRT_BEGINPROC cos
+ push xBP
+ SEH64_PUSH_xBP
+ mov xBP, xSP
+ SEH64_SET_FRAME_xBP 0
+ sub xSP, 20h
+ SEH64_ALLOCATE_STACK 20h
+ SEH64_END_PROLOGUE
+
+%ifdef RT_OS_WINDOWS
+ ;
+ ; Make sure we use full precision and not the windows default of 53 bits.
+ ;
+;; @todo not sure if this makes any difference...
+ fnstcw [xBP - 20h]
+ mov ax, [xBP - 20h]
+ or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask.
+ mov [xBP - 1ch], ax
+ fldcw [xBP - 1ch]
+%endif
+
+ ;
+ ; Load the input into st0.
+ ;
+%ifdef RT_ARCH_AMD64
+ movsd [xBP - 10h], xmm0
+ fld qword [xBP - 10h]
+%else
+ fld qword [xBP + xCB*2]
+%endif
+
+ ;
+ ; The FCOS instruction has a very narrow range (-3pi/8 to 3pi/8) where it
+ ; works reliably, so outside that we'll use the FSIN instruction instead
+ ; as it has a larger good range (-5pi/4 to 1pi/4 for cosine).
+ ; Input conversion follows: cos(x) = sin(x + pi/2)
+ ;
+ ; We examin the input and weed out non-finit numbers first.
+ ;
+
+ ; We only do the range check on normal finite numbers.
+ fxam
+ fnstsw ax
+ and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0
+ cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
+ je .finite
+ cmp ax, X86_FSW_C3 ; Zero
+ je .zero
+ cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals - treat them as zero.
+ je .zero
+ cmp ax, X86_FSW_C0 ; NaN - must handle it special,
+ je .nan
+
+ ; Pass infinities and unsupported inputs to fcos, assuming it does the right thing.
+ ; We also jump here if we get a finite number in the "good" range, see below.
+.do_fcos:
+ fcos
+ jmp .return_val
+
+ ;
+ ; Finite number.
+ ;
+ ; First check if it's a very tiny number where we can simply return 1.
+ ; Next check if it's in the range where FCOS is reasonable, otherwise
+ ; go to FSIN to do the work.
+ ;
+.finite:
+ fld st0
+ fabs
+ fld qword [.s_r64TinyCosTo1 xWrtRIP]
+ fcomip st1
+ ja .zero_extra_pop
+
+.not_that_tiny_input:
+ fld qword [.s_r64FCosOkay xWrtRIP]
+ fcomip st1
+ ffreep st0 ; pop fabs(input)
+ ja .do_fcos ; jmp if fabs(input) < .s_r64FCosOkay
+
+ ;
+ ; If we have a positive number we subtract 3pi/2, for negative we add pi/2.
+ ; We still have the FXAM result in AX.
+ ;
+.outside_fcos_range:
+ test ax, X86_FSW_C1 ; The sign bit.
+ jnz .adjust_negative_to_sine
+
+ ; Calc -3pi/2 using FPU-internal pi constant.
+ fldpi
+ fadd st0, st0 ; st0=2pi
+ fldpi
+ fdiv qword [.s_r64Two xWrtRIP] ; st1=2pi; st0=pi/2
+ fsubp st1, st0 ; st0=3pi/2
+ fchs ; st0=-3pi/2
+ jmp .make_sine_adjustment
+
+.adjust_negative_to_sine:
+ ; Calc +pi/2.
+ fldpi
+ fdiv qword [.s_r64Two xWrtRIP] ; st1=2pi; st0=pi/2
+
+.make_sine_adjustment:
+ faddp st1, st0
+
+ ;
+ ; Call internal sine worker to calculate st0=sin(st0)
+ ;
+.do_sine:
+ mov ecx, 1 ; double
+ extern NAME(rtNoCrtMathSinCore)
+ call NAME(rtNoCrtMathSinCore)
+
+ ;
+ ; Return st0.
+ ;
+.return_val:
+%ifdef RT_ARCH_AMD64
+ fstp qword [xBP - 10h]
+ movsd xmm0, [xBP - 10h]
+%endif
+%ifdef RT_OS_WINDOWS
+ fldcw [xBP - 20h] ; restore original
+%endif
+.return:
+ leave
+ ret
+
+ ;
+ ; cos(+/-0) = +1.0
+ ;
+.zero_extra_pop:
+ ffreep st0
+.zero:
+ ffreep st0
+ fld1
+ jmp .return_val
+
+ ;
+ ; Input is NaN, output it unmodified as far as we can (FLD changes SNaN
+ ; to QNaN when masked).
+ ;
+.nan:
+%ifdef RT_ARCH_AMD64
+ ffreep st0
+%endif
+ jmp .return
+
+ ;
+ ; Local constants.
+ ;
+ALIGNCODE(8)
+ ; About 2**-27. When fabs(input) is below this limit we can consider cos(input) ~= 1.0.
+.s_r64TinyCosTo1:
+ dq 7.4505806e-9
+
+ ; The absolute limit for the range which FCOS is expected to produce reasonable results.
+.s_r64FCosOkay:
+ dq 1.1780972450961724644225 ; 3*pi/8
+
+.s_r64Two:
+ dq 2.0
+ENDPROC RT_NOCRT(cos)
+