diff options
Diffstat (limited to 'src/VBox/Runtime/common/math/fmaf-asm.asm')
-rw-r--r-- | src/VBox/Runtime/common/math/fmaf-asm.asm | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/src/VBox/Runtime/common/math/fmaf-asm.asm b/src/VBox/Runtime/common/math/fmaf-asm.asm new file mode 100644 index 00000000..105044e1 --- /dev/null +++ b/src/VBox/Runtime/common/math/fmaf-asm.asm @@ -0,0 +1,104 @@ +; $Id: fmaf-asm.asm $ +;; @file +; IPRT - No-CRT fmaf alternatives - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Fused multiplication and add, intel version. +; +; @returns st(0) / xmm0 +; @param r32Factor1 [rbp + 08h] / xmm0 +; @param r32Factor2 [rbp + 0ch] / xmm1 +; @param r32Addend [rbp + 10h] / xmm2 +BEGINPROC rtNoCrtMathFma3f + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_X86 + movss xmm0, dword [xBP + xCB*2 + 00h] + movss xmm1, dword [xBP + xCB*2 + 04h] + movss xmm2, dword [xBP + xCB*2 + 08h] +%endif + + vfmadd132ss xmm0, xmm2, xmm1 ; xmm0 = xmm0 * xmm1 + xmm2 (132 = multiply op1 with op3 and add op2) + +%ifdef RT_ARCH_X86 + sub xSP, 10h + movss [xSP], xmm0 + fld dword [xSP] +%endif + leave + ret +ENDPROC rtNoCrtMathFma3f + + +;; +; Fused multiplication and add, amd version. +; +; @returns st(0) / xmm0 +; @param r32Factor1 [rbp + 08h] / xmm0 +; @param r32Factor2 [rbp + 10h] / xmm1 +; @param r32Addend [rbp + 18h] / xmm2 +BEGINPROC rtNoCrtMathFma4f + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_X86 + movss xmm0, dword [xBP + xCB*2 + 00h] + movss xmm1, dword [xBP + xCB*2 + 04h] + movss xmm2, dword [xBP + xCB*2 + 08h] +%endif + + vfmaddss xmm0, xmm0, xmm1, xmm2 ; xmm0 = xmm0 * xmm1 + xmm2 + +%ifdef RT_ARCH_X86 + sub xSP, 10h + movss [xSP], xmm0 + fld dword [xSP] +%endif + leave + ret +ENDPROC rtNoCrtMathFma4f + |