diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:17:27 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:17:27 +0000 |
commit | f215e02bf85f68d3a6106c2a1f4f7f063f819064 (patch) | |
tree | 6bb5b92c046312c4e95ac2620b10ddf482d3fa8b /src/VBox/Runtime/common/math | |
parent | Initial commit. (diff) | |
download | virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.tar.xz virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.zip |
Adding upstream version 7.0.14-dfsg.upstream/7.0.14-dfsg
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/VBox/Runtime/common/math')
152 files changed, 17880 insertions, 0 deletions
diff --git a/src/VBox/Runtime/common/math/Makefile.kup b/src/VBox/Runtime/common/math/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/Runtime/common/math/Makefile.kup diff --git a/src/VBox/Runtime/common/math/RTUInt128MulByU64.asm b/src/VBox/Runtime/common/math/RTUInt128MulByU64.asm new file mode 100644 index 00000000..9c14fc5b --- /dev/null +++ b/src/VBox/Runtime/common/math/RTUInt128MulByU64.asm @@ -0,0 +1,91 @@ +; $Id: RTUInt128MulByU64.asm $ +;; @file +; IPRT - RTUInt128MulByU64 - AMD64 implementation. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "internal/bignum.mac" + + +BEGINCODE + +;; +; Multiplies a 128-bit number with a 64-bit one. +; +; @returns puResult. +; @param puResult x86:[ebp + 8] gcc:rdi msc:rcx +; @param puValue1 x86:[ebp + 12] gcc:rsi msc:rdx +; @param uValue2 x86:[ebp + 16] gcc:rdx msc:r8 +; +RT_BEGINPROC RTUInt128MulByU64 +; SEH64_SET_FRAME_xSP 0 +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + %define puResult rdi + %define puValue1 rsi + %define uValue2 r8 + mov r8, rdx + %else + %define puResult rcx + %define puValue1 r9 + %define uValue2 r8 + mov r9, rdx + %endif + + ; puValue1->s.Lo * uValue2 + mov rax, [puValue1] + mul uValue2 + mov [puResult], rax + mov r11, rdx ; Store the lower half of the result. + + ; puValue1->s.Hi * uValue2 + mov rax, [puValue1 + 8] + mul uValue2 + add r11, rax ; Calc the second half of the result. + mov [puResult + 8], r11 ; Store the high half of the result. + + mov rax, puResult + +;%elifdef RT_ARCH_X86 +%else + %error "unsupported arch" +%endif + + ret +ENDPROC RTUInt128MulByU64 + diff --git a/src/VBox/Runtime/common/math/RTUInt128MulByU64Ex.asm b/src/VBox/Runtime/common/math/RTUInt128MulByU64Ex.asm new file mode 100644 index 00000000..6e913677 --- /dev/null +++ b/src/VBox/Runtime/common/math/RTUInt128MulByU64Ex.asm @@ -0,0 +1,95 @@ +; $Id: RTUInt128MulByU64Ex.asm $ +;; @file +; IPRT - RTUInt128MulByU64 - AMD64 implementation. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "internal/bignum.mac" + + +BEGINCODE + +;; +; Multiplies a 128-bit number with a 64-bit one, returning a 256-bit result. +; +; @returns puResult. +; @param puResult x86:[ebp + 8] gcc:rdi msc:rcx +; @param puValue1 x86:[ebp + 12] gcc:rsi msc:rdx +; @param uValue2 x86:[ebp + 16] gcc:rdx msc:r8 +; +RT_BEGINPROC RTUInt128MulByU64Ex +; SEH64_SET_FRAME_xSP 0 +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + %define puResult rdi + %define puValue1 rsi + %define uValue2 r8 + mov r8, rdx + %else + %define puResult rcx + %define puValue1 r9 + %define uValue2 r8 + mov r9, rdx + %endif + + ; puValue1->s.Lo * uValue2 + mov rax, [puValue1] + mul uValue2 + mov [puResult], rax ; Store the 1st 64-bit part of the result. + mov r11, rdx ; Save the upper 64 bits for later. + + ; puValue1->s.Hi * uValue2 + mov rax, [puValue1 + 8] + mul uValue2 + add r11, rax ; Calc the second half of the result. + adc rdx, 0 + mov [puResult + 8], r11 ; Store the 2nd 64-bit part of the result. + mov [puResult + 16], rdx ; Store the 3rd 64-bit part of the result. + xor r10, r10 + mov [puResult + 24], r10 ; Store the 4th 64-bit part of the result. + + mov rax, puResult + +;%elifdef RT_ARCH_X86 +%else + %error "unsupported arch" +%endif + + ret +ENDPROC RTUInt128MulByU64Ex + diff --git a/src/VBox/Runtime/common/math/__fpclassifyd.cpp b/src/VBox/Runtime/common/math/__fpclassifyd.cpp new file mode 100644 index 00000000..d07957ba --- /dev/null +++ b/src/VBox/Runtime/common/math/__fpclassifyd.cpp @@ -0,0 +1,66 @@ +/* $Id: __fpclassifyd.cpp $ */ +/** @file + * IPRT - No-CRT - __fpclassifyd(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef __fpclassifyd +int RT_NOCRT(__fpclassifyd)(double rd) +{ + AssertCompile(sizeof(rd) == sizeof(RTFLOAT64U)); + RTFLOAT64U u; + u.rd = rd; + if (RTFLOAT64U_IS_ZERO(&u)) + return RT_NOCRT_FP_ZERO; + if (RTFLOAT64U_IS_NORMAL(&u)) + return RT_NOCRT_FP_NORMAL; + if (RTFLOAT64U_IS_NAN(&u)) + return RT_NOCRT_FP_NAN; + if (RTFLOAT64U_IS_INF(&u)) + return RT_NOCRT_FP_INFINITE; + Assert(RTFLOAT64U_IS_SUBNORMAL(&u)); + return RT_NOCRT_FP_SUBNORMAL; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__fpclassifyd); + diff --git a/src/VBox/Runtime/common/math/__fpclassifyf.cpp b/src/VBox/Runtime/common/math/__fpclassifyf.cpp new file mode 100644 index 00000000..ca674eab --- /dev/null +++ b/src/VBox/Runtime/common/math/__fpclassifyf.cpp @@ -0,0 +1,66 @@ +/* $Id: __fpclassifyf.cpp $ */ +/** @file + * IPRT - No-CRT - __fpclassifyf(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef __fpclassifyf +int RT_NOCRT(__fpclassifyf)(float r32) +{ + AssertCompile(sizeof(r32) == sizeof(RTFLOAT32U)); + RTFLOAT32U u; + u.r = r32; + if (RTFLOAT32U_IS_ZERO(&u)) + return RT_NOCRT_FP_ZERO; + if (RTFLOAT32U_IS_NORMAL(&u)) + return RT_NOCRT_FP_NORMAL; + if (RTFLOAT32U_IS_NAN(&u)) + return RT_NOCRT_FP_NAN; + if (RTFLOAT32U_IS_INF(&u)) + return RT_NOCRT_FP_INFINITE; + Assert(RTFLOAT32U_IS_SUBNORMAL(&u)); + return RT_NOCRT_FP_SUBNORMAL; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__fpclassifyf); + diff --git a/src/VBox/Runtime/common/math/__fpclassifyl.cpp b/src/VBox/Runtime/common/math/__fpclassifyl.cpp new file mode 100644 index 00000000..4a29baa4 --- /dev/null +++ b/src/VBox/Runtime/common/math/__fpclassifyl.cpp @@ -0,0 +1,101 @@ +/* $Id: __fpclassifyl.cpp $ */ +/** @file + * IPRT - No-CRT - __fpclassifyl(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef __fpclassifyl +int RT_NOCRT(__fpclassifyl)(long double lrd) +{ +#ifdef RT_COMPILER_WITH_128BIT_LONG_DOUBLE + RTFLOAT128U u; + u.rd = lrd; + if (RTFLOAT128U_IS_ZERO(&u)) + return RT_NOCRT_FP_ZERO; + if (RTFLOAT128U_IS_NORMAL(&u)) + return RT_NOCRT_FP_NORMAL; + if (RTFLOAT128U_IS_NAN(&u)) + return RT_NOCRT_FP_NAN; + if (RTFLOAT128U_IS_INF(&u)) + return RT_NOCRT_FP_INFINITE; + Assert(RTFLOAT128U_IS_SUBNORMAL(&u)); + return RT_NOCRT_FP_SUBNORMAL; + +#elif defined(RT_COMPILER_WITH_80BIT_LONG_DOUBLE) + RTFLOAT80U2 u; + u.lrd = lrd; + if (RTFLOAT80U_IS_ZERO(&u)) + return RT_NOCRT_FP_ZERO; + if (RTFLOAT80U_IS_NORMAL(&u)) + return RT_NOCRT_FP_NORMAL; + if (RTFLOAT80U_IS_NAN(&u)) + return RT_NOCRT_FP_NAN; + if (RTFLOAT80U_IS_INF(&u)) + return RT_NOCRT_FP_INFINITE; + if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(&u)) + return RT_NOCRT_FP_SUBNORMAL; + + /* Following i387 invalid operand rules here. Adjust as needed for + other architectures. */ + Assert(RTFLOAT80U_IS_387_INVALID(&u)); + return RT_NOCRT_FP_NAN; + +#else + AssertCompile(sizeof(lrd) == sizeof(uint64_t)); + RTFLOAT64U u; + u.rd = lrd; + if (RTFLOAT64U_IS_ZERO(&u)) + return RT_NOCRT_FP_ZERO; + if (RTFLOAT64U_IS_NORMAL(&u)) + return RT_NOCRT_FP_NORMAL; + if (RTFLOAT64U_IS_NAN(&u)) + return RT_NOCRT_FP_NAN; + if (RTFLOAT64U_IS_INF(&u)) + return RT_NOCRT_FP_INFINITE; + Assert(RTFLOAT64U_IS_SUBNORMAL(&u)); + return RT_NOCRT_FP_SUBNORMAL; +#endif +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__fpclassifyl); + diff --git a/src/VBox/Runtime/common/math/__isfinite.cpp b/src/VBox/Runtime/common/math/__isfinite.cpp new file mode 100644 index 00000000..dadb17cb --- /dev/null +++ b/src/VBox/Runtime/common/math/__isfinite.cpp @@ -0,0 +1,57 @@ +/* $Id: __isfinite.cpp $ */ +/** @file + * IPRT - No-CRT - __isfinite(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef __isfinite +int RT_NOCRT(__isfinite)(double rd) +{ + AssertCompile(sizeof(rd) == sizeof(RTFLOAT64U)); + RTFLOAT64U u; + u.rd = rd; + return RTFLOAT64U_IS_ZERO(&u) || RTFLOAT64U_IS_NORMAL(&u) || RTFLOAT64U_IS_SUBNORMAL(&u); +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__isfinite); + diff --git a/src/VBox/Runtime/common/math/__isfinitef.cpp b/src/VBox/Runtime/common/math/__isfinitef.cpp new file mode 100644 index 00000000..39dae5f8 --- /dev/null +++ b/src/VBox/Runtime/common/math/__isfinitef.cpp @@ -0,0 +1,57 @@ +/* $Id: __isfinitef.cpp $ */ +/** @file + * IPRT - No-CRT - __isfinitef(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef __isfinitef +int RT_NOCRT(__isfinitef)(float r32) +{ + AssertCompile(sizeof(r32) == sizeof(RTFLOAT32U)); + RTFLOAT32U u; + u.r = r32; + return RTFLOAT32U_IS_ZERO(&u) || RTFLOAT32U_IS_NORMAL(&u) || RTFLOAT32U_IS_SUBNORMAL(&u); +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__isfinitef); + diff --git a/src/VBox/Runtime/common/math/__isfinitel.cpp b/src/VBox/Runtime/common/math/__isfinitel.cpp new file mode 100644 index 00000000..3b655781 --- /dev/null +++ b/src/VBox/Runtime/common/math/__isfinitel.cpp @@ -0,0 +1,63 @@ +/* $Id: __isfinitel.cpp $ */ +/** @file + * IPRT - No-CRT - __isfinitel(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef __isfinitel +int RT_NOCRT(__isfinitel)(long double lrd) +{ +#ifdef RT_COMPILER_WITH_80BIT_LONG_DOUBLE + RTFLOAT80U2 u; + u.lrd = lrd; + return RTFLOAT80U_IS_ZERO(&u) || RTFLOAT80U_IS_NORMAL(&u) || RTFLOAT80U_IS_SUBNORMAL(&u); +#else + AssertCompile(sizeof(lrd) == sizeof(RTFLOAT64U)); + RTFLOAT64U u; + u.rd = lrd; + return RTFLOAT64U_IS_ZERO(&u) || RTFLOAT64U_IS_NORMAL(&u) || RTFLOAT64U_IS_SUBNORMAL(&u); +#endif +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__isfinitel); + diff --git a/src/VBox/Runtime/common/math/__isinff.cpp b/src/VBox/Runtime/common/math/__isinff.cpp new file mode 100644 index 00000000..8d4f74e4 --- /dev/null +++ b/src/VBox/Runtime/common/math/__isinff.cpp @@ -0,0 +1,57 @@ +/* $Id: __isinff.cpp $ */ +/** @file + * IPRT - No-CRT - __isinff(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef __isinff +int RT_NOCRT(__isinff)(float r32) +{ + AssertCompile(sizeof(r32) == sizeof(RTFLOAT32U)); + RTFLOAT32U u; + u.r = r32; + return RTFLOAT32U_IS_INF(&u); +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__isinff); + diff --git a/src/VBox/Runtime/common/math/__isinfl.cpp b/src/VBox/Runtime/common/math/__isinfl.cpp new file mode 100644 index 00000000..6f9f5400 --- /dev/null +++ b/src/VBox/Runtime/common/math/__isinfl.cpp @@ -0,0 +1,63 @@ +/* $Id: __isinfl.cpp $ */ +/** @file + * IPRT - No-CRT - __isinfl(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef __isinfl +int RT_NOCRT(__isinfl)(long double lrd) +{ +#ifdef RT_COMPILER_WITH_80BIT_LONG_DOUBLE + RTFLOAT80U2 u; + u.lrd = lrd; + return RTFLOAT80U_IS_INF(&u); +#else + AssertCompile(sizeof(lrd) == sizeof(RTFLOAT64U)); + RTFLOAT64U u; + u.rd = lrd; + return RTFLOAT64U_IS_INF(&u); +#endif +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__isinfl); + diff --git a/src/VBox/Runtime/common/math/__isnanl.cpp b/src/VBox/Runtime/common/math/__isnanl.cpp new file mode 100644 index 00000000..7b80a721 --- /dev/null +++ b/src/VBox/Runtime/common/math/__isnanl.cpp @@ -0,0 +1,63 @@ +/* $Id: __isnanl.cpp $ */ +/** @file + * IPRT - No-CRT - __isnanl(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef __isnanl +int RT_NOCRT(__isnanl)(long double lrd) +{ +#ifdef RT_COMPILER_WITH_80BIT_LONG_DOUBLE + RTFLOAT80U2 u; + u.lrd = lrd; + return RTFLOAT80U_IS_NAN(&u) || RTFLOAT80U_IS_387_INVALID(&u); /* PORTME */ +#else + AssertCompile(sizeof(lrd) == sizeof(RTFLOAT64U)); + RTFLOAT64U u; + u.rd = lrd; + return RTFLOAT64U_IS_NAN(&u); +#endif +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__isnanl); + diff --git a/src/VBox/Runtime/common/math/__isnormal.cpp b/src/VBox/Runtime/common/math/__isnormal.cpp new file mode 100644 index 00000000..0d34f3a5 --- /dev/null +++ b/src/VBox/Runtime/common/math/__isnormal.cpp @@ -0,0 +1,57 @@ +/* $Id: __isnormal.cpp $ */ +/** @file + * IPRT - No-CRT - __isnormal(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef __isnormal +int RT_NOCRT(__isnormal)(double rd) +{ + AssertCompile(sizeof(rd) == sizeof(RTFLOAT64U)); + RTFLOAT64U u; + u.rd = rd; + return RTFLOAT64U_IS_NORMAL(&u); +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__isnormal); + diff --git a/src/VBox/Runtime/common/math/__isnormalf.cpp b/src/VBox/Runtime/common/math/__isnormalf.cpp new file mode 100644 index 00000000..e0754293 --- /dev/null +++ b/src/VBox/Runtime/common/math/__isnormalf.cpp @@ -0,0 +1,57 @@ +/* $Id: __isnormalf.cpp $ */ +/** @file + * IPRT - No-CRT - __isnormalf(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef __isnormalf +int RT_NOCRT(__isnormalf)(float r32) +{ + AssertCompile(sizeof(r32) == sizeof(RTFLOAT32U)); + RTFLOAT32U u; + u.r = r32; + return RTFLOAT32U_IS_NORMAL(&u); +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__isnormalf); + diff --git a/src/VBox/Runtime/common/math/__isnormall.cpp b/src/VBox/Runtime/common/math/__isnormall.cpp new file mode 100644 index 00000000..3c153b2b --- /dev/null +++ b/src/VBox/Runtime/common/math/__isnormall.cpp @@ -0,0 +1,63 @@ +/* $Id: __isnormall.cpp $ */ +/** @file + * IPRT - No-CRT - __isnormall(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef __isnormall +int RT_NOCRT(__isnormall)(long double lrd) +{ +#ifdef RT_COMPILER_WITH_80BIT_LONG_DOUBLE + RTFLOAT80U2 u; + u.lrd = lrd; + return RTFLOAT80U_IS_NORMAL(&u); +#else + AssertCompile(sizeof(lrd) == sizeof(RTFLOAT64U)); + RTFLOAT64U u; + u.rd = lrd; + return RTFLOAT64U_IS_NORMAL(&u); +#endif +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__isnormall); + diff --git a/src/VBox/Runtime/common/math/__signbit.cpp b/src/VBox/Runtime/common/math/__signbit.cpp new file mode 100644 index 00000000..576bedcd --- /dev/null +++ b/src/VBox/Runtime/common/math/__signbit.cpp @@ -0,0 +1,57 @@ +/* $Id: __signbit.cpp $ */ +/** @file + * IPRT - No-CRT - __signbit(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef __signbit +int RT_NOCRT(__signbit)(double rd) +{ + AssertCompile(sizeof(rd) == sizeof(RTFLOAT64U)); + RTFLOAT64U u; + u.rd = rd; + return u.s.fSign; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__signbit); + diff --git a/src/VBox/Runtime/common/math/__signbitf.cpp b/src/VBox/Runtime/common/math/__signbitf.cpp new file mode 100644 index 00000000..e47a4301 --- /dev/null +++ b/src/VBox/Runtime/common/math/__signbitf.cpp @@ -0,0 +1,57 @@ +/* $Id: __signbitf.cpp $ */ +/** @file + * IPRT - No-CRT - __signbitf(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef __signbitf +int RT_NOCRT(__signbitf)(float r32) +{ + AssertCompile(sizeof(r32) == sizeof(RTFLOAT32U)); + RTFLOAT32U u; + u.r = r32; + return u.s.fSign; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__signbitf); + diff --git a/src/VBox/Runtime/common/math/__signbitl.cpp b/src/VBox/Runtime/common/math/__signbitl.cpp new file mode 100644 index 00000000..ece02d0d --- /dev/null +++ b/src/VBox/Runtime/common/math/__signbitl.cpp @@ -0,0 +1,62 @@ +/* $Id: __signbitl.cpp $ */ +/** @file + * IPRT - No-CRT - __signbitl(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef __signbitl +int RT_NOCRT(__signbitl)(long double lrd) +{ +#ifdef RT_COMPILER_WITH_80BIT_LONG_DOUBLE + RTFLOAT80U2 u; + u.lrd = lrd; +#else + AssertCompile(sizeof(lrd) == sizeof(RTFLOAT64U)); + RTFLOAT64U u; + u.rd = lrd; +#endif + return u.s.fSign; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__signbitl); + diff --git a/src/VBox/Runtime/common/math/atan.asm b/src/VBox/Runtime/common/math/atan.asm new file mode 100644 index 00000000..58fdd24a --- /dev/null +++ b/src/VBox/Runtime/common/math/atan.asm @@ -0,0 +1,77 @@ +; $Id: atan.asm $ +;; @file +; IPRT - No-CRT atan - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + + +BEGINCODE + +;; +; Arctangent (partial). +; +; @returns st(0) / xmm0 +; @param rd [rbp + 8] / xmm0 +RT_NOCRT_BEGINPROC atan + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +%ifdef RT_ARCH_AMD64 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h +%endif + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + movsd [xSP], xmm0 + fld qword [xSP] +%else + fld qword [xBP + xCB*2] +%endif + fld1 + + fpatan + +%ifdef RT_ARCH_AMD64 + fstp qword [xSP] + movsd xmm0, [xSP] +%endif + leave + ret +ENDPROC RT_NOCRT(atan) + diff --git a/src/VBox/Runtime/common/math/atan2.asm b/src/VBox/Runtime/common/math/atan2.asm new file mode 100644 index 00000000..9f01f7e2 --- /dev/null +++ b/src/VBox/Runtime/common/math/atan2.asm @@ -0,0 +1,72 @@ +; $Id: atan2.asm $ +;; @file +; IPRT - No-CRT atan2 - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Arctangent (partial). +; +; @returns st(0) / xmm0 +; @param rdY [rbp + 8] / xmm0 +; @param rdX [rbp + 8] / xmm0 +RT_NOCRT_BEGINPROC atan2 + push xBP + mov xBP, xSP + +%ifdef RT_ARCH_AMD64 + sub xSP, 20h + movsd [xSP + 10h], xmm1 + movsd [xSP], xmm0 + fld qword [xSP] + fld qword [xSP + 10h] +%else + fld qword [xBP + xCB*2] + fld qword [xBP + xCB*2 + 8] +%endif + + fpatan + +%ifdef RT_ARCH_AMD64 + fstp qword [xSP] + movsd xmm0, [xSP] +%endif + leave + ret +ENDPROC RT_NOCRT(atan2) + diff --git a/src/VBox/Runtime/common/math/atan2f.asm b/src/VBox/Runtime/common/math/atan2f.asm new file mode 100644 index 00000000..6245cbba --- /dev/null +++ b/src/VBox/Runtime/common/math/atan2f.asm @@ -0,0 +1,72 @@ +; $Id: atan2f.asm $ +;; @file +; IPRT - No-CRT atan2f - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Arctangent (partial). +; +; @returns st(0) / xmm0 +; @param r32Y [rbp + 8] / xmm0 +; @param r32X [rbp + 8] / xmm0 +RT_NOCRT_BEGINPROC atan2f + push xBP + mov xBP, xSP + +%ifdef RT_ARCH_AMD64 + sub xSP, 20h + movss [xSP + 10h], xmm1 + movss [xSP], xmm0 + fld dword [xSP] + fld dword [xSP + 10h] +%else + fld dword [xBP + xCB*2] + fld dword [xBP + xCB*2 + 4] +%endif + + fpatan + +%ifdef RT_ARCH_AMD64 + fstp dword [xSP] + movss xmm0, [xSP] +%endif + leave + ret +ENDPROC RT_NOCRT(atan2f) + diff --git a/src/VBox/Runtime/common/math/atanf.asm b/src/VBox/Runtime/common/math/atanf.asm new file mode 100644 index 00000000..7b0862ae --- /dev/null +++ b/src/VBox/Runtime/common/math/atanf.asm @@ -0,0 +1,77 @@ +; $Id: atanf.asm $ +;; @file +; IPRT - No-CRT atanf - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + + +BEGINCODE + +;; +; Arctangent (partial). +; +; @returns st(0) / xmm0 +; @param r32 [rbp + 8] / xmm0 +RT_NOCRT_BEGINPROC atanf + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +%ifdef RT_ARCH_AMD64 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h +%endif + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + movss [xSP], xmm0 + fld dword [xSP] +%else + fld dword [xBP + xCB*2] +%endif + fld1 + + fpatan + +%ifdef RT_ARCH_AMD64 + fstp dword [xSP] + movss xmm0, [xSP] +%endif + leave + ret +ENDPROC RT_NOCRT(atanf) + diff --git a/src/VBox/Runtime/common/math/bignum-amd64-x86.asm b/src/VBox/Runtime/common/math/bignum-amd64-x86.asm new file mode 100644 index 00000000..32ea98aa --- /dev/null +++ b/src/VBox/Runtime/common/math/bignum-amd64-x86.asm @@ -0,0 +1,891 @@ +; $Id: bignum-amd64-x86.asm $ +;; @file +; IPRT - Big Integer Numbers, AMD64 and X86 Assembly Workers +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +;********************************************************************************************************************************* +;* Header Files * +;********************************************************************************************************************************* +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "internal/bignum.mac" + + +;********************************************************************************************************************************* +;* Defined Constants And Macros * +;********************************************************************************************************************************* +%ifdef RT_ARCH_AMD64 + %macro sahf 0 + %error "SAHF not supported on ancient AMD64" + %endmacro + %macro lahf 0 + %error "LAHF not supported on ancient AMD64" + %endmacro +%endif + + +BEGINCODE + +;; +; Subtracts a number (pauSubtrahend) from a larger number (pauMinuend) and +; stores the result in pauResult. +; +; All three numbers are zero padded such that a borrow can be carried one (or +; two for 64-bit) elements beyond the end of the largest number. +; +; @returns nothing. +; @param pauResult x86:[ebp + 8] gcc:rdi msc:rcx +; @param pauMinuend x86:[ebp + 12] gcc:rsi msc:rdx +; @param pauSubtrahend x86:[ebp + 16] gcc:rdx msc:r8 +; @param cUsed x86:[ebp + 20] gcc:rcx msc:r9 +; +BEGINPROC rtBigNumMagnitudeSubAssemblyWorker + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + %define pauResult rdi + %define pauMinuend rsi + %define pauSubtrahend rdx + %define cUsed ecx + %else + %define pauResult rcx + %define pauMinuend rdx + %define pauSubtrahend r8 + %define cUsed r9d + %endif + xor r11d, r11d ; index register. + + %if RTBIGNUM_ELEMENT_SIZE == 4 + add cUsed, 1 ; cUsed = RT_ALIGN(cUsed, 2) / 2 + shr cUsed, 1 + %endif + cmp cUsed, 8 ; Skip the big loop if small number. + jb .small_job + + mov r10d, cUsed + shr r10d, 3 + clc +.big_loop: + mov rax, [pauMinuend + r11] + sbb rax, [pauSubtrahend + r11] + mov [pauResult + r11], rax + mov rax, [pauMinuend + r11 + 8] + sbb rax, [pauSubtrahend + r11 + 8] + mov [pauResult + r11 + 8], rax + mov rax, [pauMinuend + r11 + 16] + sbb rax, [pauSubtrahend + r11 + 16] + mov [pauResult + r11 + 16], rax + mov rax, [pauMinuend + r11 + 24] + sbb rax, [pauSubtrahend + r11 + 24] + mov [pauResult + r11 + 24], rax + mov rax, [pauMinuend + r11 + 32] + sbb rax, [pauSubtrahend + r11 + 32] + mov [pauResult + r11 + 32], rax + mov rax, [pauMinuend + r11 + 40] + sbb rax, [pauSubtrahend + r11 + 40] + mov [pauResult + r11 + 40], rax + mov rax, [pauMinuend + r11 + 48] + sbb rax, [pauSubtrahend + r11 + 48] + mov [pauResult + r11 + 48], rax + mov rax, [pauMinuend + r11 + 56] + sbb rax, [pauSubtrahend + r11 + 56] + mov [pauResult + r11 + 56], rax + lea r11, [r11 + 64] + dec r10d ; Does not change CF. + jnz .big_loop + + %if 0 ; Ancient AMD CPUs does have lahf/sahf, thus the mess in the %else. + lahf ; Save CF + and cUsed, 7 ; Up to seven odd rounds. + jz .done + sahf ; Restore CF. + jmp .small_loop ; Skip CF=1 (clc). + %else + jnc .no_carry + and cUsed, 7 ; Up to seven odd rounds. + jz .done + stc + jmp .small_loop ; Skip CF=1 (clc). +.no_carry: + and cUsed, 7 ; Up to seven odd rounds. + jz .done + %endif +.small_job: + clc +.small_loop: + mov rax, [pauMinuend + r11] + sbb rax, [pauSubtrahend + r11] + mov [pauResult + r11], rax + lea r11, [r11 + 8] + dec cUsed ; does not change CF. + jnz .small_loop + %ifdef RT_STRICT + jnc .done + int3 + %endif +.done: + +%elifdef RT_ARCH_X86 + push edi + push esi + push ebx + + mov edi, [ebp + 08h] ; pauResult + %define pauResult edi + mov ecx, [ebp + 0ch] ; pauMinuend + %define pauMinuend ecx + mov edx, [ebp + 10h] ; pauSubtrahend + %define pauSubtrahend edx + mov esi, [ebp + 14h] ; cUsed + %define cUsed esi + + xor ebx, ebx ; index register. + + cmp cUsed, 8 ; Skip the big loop if small number. + jb .small_job + + shr cUsed, 3 + clc +.big_loop: + mov eax, [pauMinuend + ebx] + sbb eax, [pauSubtrahend + ebx] + mov [pauResult + ebx], eax + mov eax, [pauMinuend + ebx + 4] + sbb eax, [pauSubtrahend + ebx + 4] + mov [pauResult + ebx + 4], eax + mov eax, [pauMinuend + ebx + 8] + sbb eax, [pauSubtrahend + ebx + 8] + mov [pauResult + ebx + 8], eax + mov eax, [pauMinuend + ebx + 12] + sbb eax, [pauSubtrahend + ebx + 12] + mov [pauResult + ebx + 12], eax + mov eax, [pauMinuend + ebx + 16] + sbb eax, [pauSubtrahend + ebx + 16] + mov [pauResult + ebx + 16], eax + mov eax, [pauMinuend + ebx + 20] + sbb eax, [pauSubtrahend + ebx + 20] + mov [pauResult + ebx + 20], eax + mov eax, [pauMinuend + ebx + 24] + sbb eax, [pauSubtrahend + ebx + 24] + mov [pauResult + ebx + 24], eax + mov eax, [pauMinuend + ebx + 28] + sbb eax, [pauSubtrahend + ebx + 28] + mov [pauResult + ebx + 28], eax + lea ebx, [ebx + 32] + dec cUsed ; Does not change CF. + jnz .big_loop + + lahf ; Save CF + mov cUsed, [ebp + 14h] ; Up to three final rounds. + and cUsed, 7 + jz .done + sahf ; Restore CF. + jmp .small_loop ; Skip CF=1 (clc). + +.small_job: + clc +.small_loop: + mov eax, [pauMinuend + ebx] + sbb eax, [pauSubtrahend + ebx] + mov [pauResult + ebx], eax + lea ebx, [ebx + 4] + dec cUsed ; Does not change CF + jnz .small_loop + %ifdef RT_STRICT + jnc .done + int3 + %endif +.done: + + pop ebx + pop esi + pop edi +%else + %error "Unsupported arch" +%endif + + leave + ret +%undef pauResult +%undef pauMinuend +%undef pauSubtrahend +%undef cUsed +ENDPROC rtBigNumMagnitudeSubAssemblyWorker + + + +;; +; Subtracts a number (pauSubtrahend) from a larger number (pauMinuend) and +; stores the result in pauResult. +; +; All three numbers are zero padded such that a borrow can be carried one (or +; two for 64-bit) elements beyond the end of the largest number. +; +; @returns nothing. +; @param pauResultMinuend x86:[ebp + 8] gcc:rdi msc:rcx +; @param pauSubtrahend x86:[ebp + 12] gcc:rsi msc:rdx +; @param cUsed x86:[ebp + 16] gcc:rdx msc:r8 +; +BEGINPROC rtBigNumMagnitudeSubThisAssemblyWorker + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + %define pauResultMinuend rdi + %define pauSubtrahend rsi + %define cUsed edx + %else + %define pauResultMinuend rcx + %define pauSubtrahend rdx + %define cUsed r8d + %endif + xor r11d, r11d ; index register. + + %if RTBIGNUM_ELEMENT_SIZE == 4 + add cUsed, 1 ; cUsed = RT_ALIGN(cUsed, 2) / 2 + shr cUsed, 1 + %endif + cmp cUsed, 8 ; Skip the big loop if small number. + jb .small_job + + mov r10d, cUsed + shr r10d, 3 + clc +.big_loop: + mov rax, [pauSubtrahend + r11] + sbb [pauResultMinuend + r11], rax + mov rax, [pauSubtrahend + r11 + 8] + sbb [pauResultMinuend + r11 + 8], rax + mov rax, [pauSubtrahend + r11 + 16] + sbb [pauResultMinuend + r11 + 16], rax + mov rax, [pauSubtrahend + r11 + 24] + sbb [pauResultMinuend + r11 + 24], rax + mov rax, [pauSubtrahend + r11 + 32] + sbb [pauResultMinuend + r11 + 32], rax + mov rax, [pauSubtrahend + r11 + 40] + sbb [pauResultMinuend + r11 + 40], rax + mov rax, [pauSubtrahend + r11 + 48] + sbb [pauResultMinuend + r11 + 48], rax + mov rax, [pauSubtrahend + r11 + 56] + sbb [pauResultMinuend + r11 + 56], rax + lea r11, [r11 + 64] + dec r10d ; Does not change CF. + jnz .big_loop + + %if 0 ; Ancient AMD CPUs does have lahf/sahf, thus the mess in the %else. + lahf ; Save CF + and cUsed, 7 ; Up to seven odd rounds. + jz .done + sahf ; Restore CF. + jmp .small_loop ; Skip CF=1 (clc). + %else + jnc .no_carry + and cUsed, 7 ; Up to seven odd rounds. + jz .done + stc + jmp .small_loop ; Skip CF=1 (clc). +.no_carry: + and cUsed, 7 ; Up to seven odd rounds. + jz .done + %endif +.small_job: + clc +.small_loop: + mov rax, [pauSubtrahend + r11] + sbb [pauResultMinuend + r11], rax + lea r11, [r11 + 8] + dec cUsed ; does not change CF. + jnz .small_loop + %ifdef RT_STRICT + jnc .done + int3 + %endif +.done: + +%elifdef RT_ARCH_X86 + push edi + push ebx + + mov edi, [ebp + 08h] ; pauResultMinuend + %define pauResultMinuend edi + mov edx, [ebp + 0ch] ; pauSubtrahend + %define pauSubtrahend edx + mov ecx, [ebp + 10h] ; cUsed + %define cUsed ecx + + xor ebx, ebx ; index register. + + cmp cUsed, 8 ; Skip the big loop if small number. + jb .small_job + + shr cUsed, 3 + clc +.big_loop: + mov eax, [pauSubtrahend + ebx] + sbb [pauResultMinuend + ebx], eax + mov eax, [pauSubtrahend + ebx + 4] + sbb [pauResultMinuend + ebx + 4], eax + mov eax, [pauSubtrahend + ebx + 8] + sbb [pauResultMinuend + ebx + 8], eax + mov eax, [pauSubtrahend + ebx + 12] + sbb [pauResultMinuend + ebx + 12], eax + mov eax, [pauSubtrahend + ebx + 16] + sbb [pauResultMinuend + ebx + 16], eax + mov eax, [pauSubtrahend + ebx + 20] + sbb [pauResultMinuend + ebx + 20], eax + mov eax, [pauSubtrahend + ebx + 24] + sbb [pauResultMinuend + ebx + 24], eax + mov eax, [pauSubtrahend + ebx + 28] + sbb [pauResultMinuend + ebx + 28], eax + lea ebx, [ebx + 32] + dec cUsed ; Does not change CF. + jnz .big_loop + + lahf ; Save CF + mov cUsed, [ebp + 10h] ; Up to seven odd rounds. + and cUsed, 7 + jz .done + sahf ; Restore CF. + jmp .small_loop ; Skip CF=1 (clc). + +.small_job: + clc +.small_loop: + mov eax, [pauSubtrahend + ebx] + sbb [pauResultMinuend + ebx], eax + lea ebx, [ebx + 4] + dec cUsed ; Does not change CF + jnz .small_loop + %ifdef RT_STRICT + jnc .done + int3 + %endif +.done: + + pop ebx + pop edi +%else + %error "Unsupported arch" +%endif + + leave + ret +ENDPROC rtBigNumMagnitudeSubThisAssemblyWorker + + +;; +; Shifts an element array one bit to the left, returning the final carry value. +; +; On 64-bit hosts the array is always zero padded to a multiple of 8 bytes, so +; we can use 64-bit operand sizes even if the element type is 32-bit. +; +; @returns The final carry value. +; @param pauElements x86:[ebp + 8] gcc:rdi msc:rcx +; @param cUsed x86:[ebp + 12] gcc:rsi msc:rdx +; @param uCarry x86:[ebp + 16] gcc:rdx msc:r8 +; +BEGINPROC rtBigNumMagnitudeShiftLeftOneAssemblyWorker + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + %define pauElements rdi + %define cUsed esi + %define uCarry edx + %else + %define pauElements rcx + %define cUsed edx + %define uCarry r8d + %endif +%elifdef RT_ARCH_X86 + %define pauElements ecx + mov pauElements, [ebp + 08h] + %define cUsed edx + mov cUsed, [ebp + 0ch] + %define uCarry eax + mov uCarry, [ebp + 10h] +%else + %error "Unsupported arch." +%endif + ; Lots to do? + cmp cUsed, 8 + jae .big_loop_init + + ; Check for empty array. + test cUsed, cUsed + jz .no_elements + jmp .small_loop_init + + ; Big loop - 8 unrolled loop iterations. +.big_loop_init: +%ifdef RT_ARCH_AMD64 + mov r11d, cUsed +%endif + shr cUsed, 3 + test uCarry, uCarry ; clear the carry flag + jz .big_loop + stc +.big_loop: +%if RTBIGNUM_ELEMENT_SIZE == 8 + rcl qword [pauElements], 1 + rcl qword [pauElements + 8], 1 + rcl qword [pauElements + 16], 1 + rcl qword [pauElements + 24], 1 + rcl qword [pauElements + 32], 1 + rcl qword [pauElements + 40], 1 + rcl qword [pauElements + 48], 1 + rcl qword [pauElements + 56], 1 + lea pauElements, [pauElements + 64] +%else + rcl dword [pauElements], 1 + rcl dword [pauElements + 4], 1 + rcl dword [pauElements + 8], 1 + rcl dword [pauElements + 12], 1 + rcl dword [pauElements + 16], 1 + rcl dword [pauElements + 20], 1 + rcl dword [pauElements + 24], 1 + rcl dword [pauElements + 28], 1 + lea pauElements, [pauElements + 32] +%endif + dec cUsed + jnz .big_loop + + ; More to do? + pushf ; save carry flag (uCarry no longer used on x86). +%ifdef RT_ARCH_AMD64 + mov cUsed, r11d +%else + mov cUsed, [ebp + 0ch] +%endif + and cUsed, 7 + jz .restore_cf_and_return ; Jump if we're good and done. + popf ; Restore CF. + jmp .small_loop ; Deal with the odd rounds. +.restore_cf_and_return: + popf + jmp .carry_to_eax + + ; Small loop - One round at the time. +.small_loop_init: + test uCarry, uCarry ; clear the carry flag + jz .small_loop + stc +.small_loop: +%if RTBIGNUM_ELEMENT_SIZE == 8 + rcl qword [pauElements], 1 + lea pauElements, [pauElements + 8] +%else + rcl dword [pauElements], 1 + lea pauElements, [pauElements + 4] +%endif + dec cUsed + jnz .small_loop + + ; Calculate return value. +.carry_to_eax: + mov eax, 0 + jnc .return + inc eax +.return: + leave + ret + +.no_elements: + mov eax, uCarry + jmp .return +ENDPROC rtBigNumMagnitudeShiftLeftOneAssemblyWorker + + +;; +; Performs a 128-bit by 64-bit division on 64-bit and +; a 64-bit by 32-bit divison on 32-bit. +; +; @returns nothing. +; @param puQuotient x86:[ebp + 8] gcc:rdi msc:rcx Double element. +; @param puRemainder x86:[ebp + 12] gcc:rsi msc:rdx Normal element. +; @param uDividendHi x86:[ebp + 16] gcc:rdx msc:r8 +; @param uDividendLo x86:[ebp + 20] gcc:rcx msc:r9 +; @param uDivisior x86:[ebp + 24] gcc:r8 msc:[rbp + 30h] +; +BEGINPROC rtBigNumElement2xDiv2xBy1x + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %if RTBIGNUM_ELEMENT_SIZE == 4 + %error "sorry not implemented yet." + sorry not implemented yet. + %endif + + %define uDividendHi rdx + %define uDividendLo rax + %ifdef ASM_CALL64_GCC + %define uDivisor r8 + %define puQuotient rdi + %define puRemainder rsi + mov rax, rcx + %else + %define puQuotient rcx + %define puRemainder r11 + %define uDivisor r10 + mov r11, rdx + mov r10, [rbp + 30h] + mov rdx, r8 + mov rax, r9 + %endif + +%elifdef RT_ARCH_X86 + push edi + push ebx + + %define uDividendHi edx + mov uDividendHi, [ebp + 10h] + %define uDividendLo eax + mov uDividendLo, [ebp + 14h] + %define uDivisor ecx + mov uDivisor, [ebp + 18h] + %define puQuotient edi + mov puQuotient, [ebp + 08h] + %define puRemainder ebx + mov puRemainder, [ebp + 0ch] +%else + %error "Unsupported arch." +%endif + +%ifdef RT_STRICT + ; + ; The dividend shall not be zero. + ; + test uDivisor, uDivisor + jnz .divisor_not_zero + int3 +.divisor_not_zero: +%endif + + ; + ; Avoid division overflow. This will calculate the high part of the quotient. + ; + mov RTBIGNUM_ELEMENT_PRE [puQuotient + RTBIGNUM_ELEMENT_SIZE], 0 + cmp uDividendHi, uDivisor + jb .do_divide + push xAX + mov xAX, xDX + xor edx, edx + div uDivisor + mov RTBIGNUM_ELEMENT_PRE [puQuotient + RTBIGNUM_ELEMENT_SIZE], xAX + pop xAX + + ; + ; Perform the division and store the result. + ; +.do_divide: + div uDivisor + mov RTBIGNUM_ELEMENT_PRE [puQuotient], xAX + mov RTBIGNUM_ELEMENT_PRE [puRemainder], xDX + + +%ifdef RT_ARCH_X86 + pop ebx + pop edi +%endif + leave + ret +ENDPROC rtBigNumElement2xDiv2xBy1x + + +;; +; Performs the core of long multiplication. +; +; @returns nothing. +; @param pauResult x86:[ebp + 8] gcc:rdi msc:rcx Initialized to zero. +; @param pauMultiplier x86:[ebp + 12] gcc:rsi msc:rdx +; @param cMultiplier x86:[ebp + 16] gcc:rdx msc:r8 +; @param pauMultiplicand x86:[ebp + 20] gcc:rcx msc:r9 +; @param cMultiplicand x86:[ebp + 24] gcc:r8 msc:[rbp + 30h] +; +BEGINPROC rtBigNumMagnitudeMultiplyAssemblyWorker + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %if RTBIGNUM_ELEMENT_SIZE == 4 + %error "sorry not implemented yet." + sorry not implemented yet. + %endif + + %ifdef ASM_CALL64_GCC + %define pauResult rdi + %define pauMultiplier rsi + %define cMultiplier r9 + %define pauMultiplicand rcx + %define cMultiplicand r8 + mov r9d, edx ; cMultiplier + mov r8d, r8d ; cMultiplicand - paranoia + %define uMultiplier r10 + %define iMultiplicand r11 + %else + %define pauResult rcx + %define pauMultiplier r11 + %define cMultiplier r8 + %define pauMultiplicand r9 + %define cMultiplicand r10 + mov pauMultiplier, rdx + mov r10d, dword [rbp + 30h] ; cMultiplicand + mov r8d, r8d ; cMultiplier - paranoia + %define uMultiplier r12 + push r12 + %define iMultiplicand r13 + push r13 + %endif + +%elifdef RT_ARCH_X86 + push edi + push esi + push ebx + sub esp, 10h + %define pauResult edi + mov pauResult, [ebp + 08h] + %define pauMultiplier dword [ebp + 0ch] + %define cMultiplier dword [ebp + 10h] + %define pauMultiplicand ecx + mov pauMultiplicand, [ebp + 14h] + %define cMultiplicand dword [ebp + 18h] + %define uMultiplier dword [ebp - 10h] + %define iMultiplicand ebx + +%else + %error "Unsupported arch." +%endif + + ; + ; Check that the multiplicand isn't empty (avoids an extra jump in the inner loop). + ; + cmp cMultiplicand, 0 + je .done + + ; + ; Loop thru each element in the multiplier. + ; + ; while (cMultiplier-- > 0) +.multiplier_loop: + cmp cMultiplier, 0 + jz .done + dec cMultiplier + + ; uMultiplier = *pauMultiplier +%ifdef RT_ARCH_X86 + mov edx, pauMultiplier + mov eax, [edx] + mov uMultiplier, eax +%else + mov uMultiplier, [pauMultiplier] +%endif + ; for (iMultiplicand = 0; iMultiplicand < cMultiplicand; iMultiplicand++) + xor iMultiplicand, iMultiplicand +.multiplicand_loop: + mov xAX, [pauMultiplicand + iMultiplicand * RTBIGNUM_ELEMENT_SIZE] + mul uMultiplier + add [pauResult + iMultiplicand * RTBIGNUM_ELEMENT_SIZE], xAX + adc [pauResult + iMultiplicand * RTBIGNUM_ELEMENT_SIZE + RTBIGNUM_ELEMENT_SIZE], xDX + jnc .next_multiplicand + lea xDX, [iMultiplicand + 2] +.next_adc: + adc RTBIGNUM_ELEMENT_PRE [pauResult + xDX * RTBIGNUM_ELEMENT_SIZE], 0 + inc xDX + jc .next_adc + +.next_multiplicand: + inc iMultiplicand ; iMultiplicand++ + cmp iMultiplicand, cMultiplicand ; iMultiplicand < cMultiplicand + jb .multiplicand_loop + + ; Advance and loop on multiplier. + add pauMultiplier, RTBIGNUM_ELEMENT_SIZE + add pauResult, RTBIGNUM_ELEMENT_SIZE + jmp .multiplier_loop + +.done: + +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + %else + pop r13 + pop r12 + %endif +%elifdef RT_ARCH_X86 + add esp, 10h + pop ebx + pop esi + pop edi +%endif + leave + ret +ENDPROC rtBigNumMagnitudeMultiplyAssemblyWorker + +;; +; Assembly implementation of the D4 step of Knuth's division algorithm. +; +; This subtracts Divisor * Qhat from the dividend at the current J index. +; +; @returns true if negative result (unlikely), false if positive. +; @param pauDividendJ x86:[ebp + 8] gcc:rdi msc:rcx Initialized to zero. +; @param pauDivisor x86:[ebp + 12] gcc:rsi msc:rdx +; @param cDivisor x86:[ebp + 16] gcc:edx msc:r8d +; @param uQhat x86:[ebp + 16] gcc:rcx msc:r9 +; +BEGINPROC rtBigNumKnuthD4_MulSub + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %if RTBIGNUM_ELEMENT_SIZE == 4 + %error "sorry not implemented yet." + sorry not implemented yet. + %endif + + %ifdef ASM_CALL64_GCC + %define pauDividendJ rdi + %define pauDivisor rsi + %define cDivisor r8 + %define uQhat rcx + mov r8d, edx ; cDivisor + %define uMulCarry r11 + %else + %define pauDividendJ rcx + %define pauDivisor r10 + %define cDivisor r8 + %define uQhat r9 + mov r10, rdx ; pauDivisor + mov r8d, r8d ; cDivisor - paranoia + %define uMulCarry r11 + %endif + +%elifdef RT_ARCH_X86 + push edi + push esi + push ebx + %define pauDividendJ edi + mov pauDividendJ, [ebp + 08h] + %define pauDivisor esi + mov pauDivisor, [ebp + 0ch] + %define cDivisor ecx + mov cDivisor, [ebp + 10h] + %define uQhat dword [ebp + 14h] + %define uMulCarry ebx +%else + %error "Unsupported arch." +%endif + +%ifdef RT_STRICT + ; + ; Some sanity checks. + ; + cmp cDivisor, 0 + jne .cDivisor_not_zero + int3 +.cDivisor_not_zero: +%endif + + ; + ; Initialize the loop. + ; + xor uMulCarry, uMulCarry + + ; + ; do ... while (cDivisor-- > 0); + ; +.the_loop: + ; RTUInt128MulU64ByU64(&uSub, uQhat, pauDivisor[i]); + mov xAX, uQhat + mul RTBIGNUM_ELEMENT_PRE [pauDivisor] + ; RTUInt128AssignAddU64(&uSub, uMulCarry); + add xAX, uMulCarry + adc xDX, 0 + mov uMulCarry, xDX + ; Subtract uSub.s.Lo+fCarry from pauDividendJ[i] + sub [pauDividendJ], xAX + adc uMulCarry, 0 +%ifdef RT_STRICT + jnc .uMulCarry_did_not_overflow + int3 +.uMulCarry_did_not_overflow: +%endif + + ; Advance. + add pauDividendJ, RTBIGNUM_ELEMENT_SIZE + add pauDivisor, RTBIGNUM_ELEMENT_SIZE + dec cDivisor + jnz .the_loop + + ; + ; Final dividend element (no corresponding divisor element). + ; + sub [pauDividendJ], uMulCarry + sbb eax, eax + and eax, 1 + +.done: +%ifdef RT_ARCH_AMD64 +%elifdef RT_ARCH_X86 + pop ebx + pop esi + pop edi +%endif + leave + ret +ENDPROC rtBigNumKnuthD4_MulSub + diff --git a/src/VBox/Runtime/common/math/bignum.cpp b/src/VBox/Runtime/common/math/bignum.cpp new file mode 100644 index 00000000..efa67644 --- /dev/null +++ b/src/VBox/Runtime/common/math/bignum.cpp @@ -0,0 +1,2877 @@ +/* $Id: bignum.cpp $ */ +/** @file + * IPRT - Big Integer Numbers. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +/*#ifdef IN_RING3 +# define RTMEM_WRAP_TO_EF_APIS +#endif*/ +#include "internal/iprt.h" +#include <iprt/bignum.h> + +#include <iprt/asm.h> +#include <iprt/asm-math.h> +#include <iprt/err.h> +#include <iprt/mem.h> +#include <iprt/memsafer.h> +#include <iprt/string.h> +#if RTBIGNUM_ELEMENT_BITS == 64 +# include <iprt/uint128.h> +#endif + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** Allocation alignment in elements. */ +#ifndef RTMEM_WRAP_TO_EF_APIS +# define RTBIGNUM_ALIGNMENT 4U +#else +# define RTBIGNUM_ALIGNMENT 1U +#endif + +/** The max size (in bytes) of an elements array. */ +#define RTBIGNUM_MAX_SIZE _4M + + +/** Assert the validity of a big number structure pointer in strict builds. */ +#ifdef RT_STRICT +# define RTBIGNUM_ASSERT_VALID(a_pBigNum) \ + do { \ + AssertPtr(a_pBigNum); \ + Assert(!(a_pBigNum)->fCurScrambled); \ + Assert( (a_pBigNum)->cUsed == (a_pBigNum)->cAllocated \ + || ASMMemIsZero(&(a_pBigNum)->pauElements[(a_pBigNum)->cUsed], \ + ((a_pBigNum)->cAllocated - (a_pBigNum)->cUsed) * RTBIGNUM_ELEMENT_SIZE)); \ + } while (0) +#else +# define RTBIGNUM_ASSERT_VALID(a_pBigNum) do {} while (0) +#endif + + +/** Enable assembly optimizations. */ +#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) +# define IPRT_BIGINT_WITH_ASM +#endif + + +/** @def RTBIGNUM_ZERO_ALIGN + * For calculating the rtBigNumEnsureExtraZeroElements argument from cUsed. + * This has to do with 64-bit assembly instruction operating as RTBIGNUMELEMENT + * was 64-bit on some hosts. + */ +#if defined(IPRT_BIGINT_WITH_ASM) && ARCH_BITS == 64 && RTBIGNUM_ELEMENT_SIZE == 4 && defined(RT_LITTLE_ENDIAN) +# define RTBIGNUM_ZERO_ALIGN(a_cUsed) RT_ALIGN_32(a_cUsed, 2) +#elif defined(IPRT_BIGINT_WITH_ASM) +# define RTBIGNUM_ZERO_ALIGN(a_cUsed) (a_cUsed) +#else +# define RTBIGNUM_ZERO_ALIGN(a_cUsed) (a_cUsed) +#endif + +#define RTBIGNUMELEMENT_HALF_MASK ( ((RTBIGNUMELEMENT)1 << (RTBIGNUM_ELEMENT_BITS / 2)) - (RTBIGNUMELEMENT)1) +#define RTBIGNUMELEMENT_LO_HALF(a_uElement) ( (RTBIGNUMELEMENT_HALF_MASK) & (a_uElement) ) +#define RTBIGNUMELEMENT_HI_HALF(a_uElement) ( (a_uElement) >> (RTBIGNUM_ELEMENT_BITS / 2) ) + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** Type the size of two elements. */ +#if RTBIGNUM_ELEMENT_BITS == 64 +typedef RTUINT128U RTBIGNUMELEMENT2X; +#else +typedef RTUINT64U RTBIGNUMELEMENT2X; +#endif + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +DECLINLINE(int) rtBigNumSetUsed(PRTBIGNUM pBigNum, uint32_t cNewUsed); + +#ifdef IPRT_BIGINT_WITH_ASM +/* bignum-amd64-x86.asm: */ +DECLASM(void) rtBigNumMagnitudeSubAssemblyWorker(RTBIGNUMELEMENT *pauResult, RTBIGNUMELEMENT const *pauMinuend, + RTBIGNUMELEMENT const *pauSubtrahend, uint32_t cUsed); +DECLASM(void) rtBigNumMagnitudeSubThisAssemblyWorker(RTBIGNUMELEMENT *pauMinuendResult, RTBIGNUMELEMENT const *pauSubtrahend, + uint32_t cUsed); +DECLASM(RTBIGNUMELEMENT) rtBigNumMagnitudeShiftLeftOneAssemblyWorker(RTBIGNUMELEMENT *pauElements, uint32_t cUsed, + RTBIGNUMELEMENT uCarry); +DECLASM(void) rtBigNumElement2xDiv2xBy1x(RTBIGNUMELEMENT2X *puQuotient, RTBIGNUMELEMENT *puRemainder, + RTBIGNUMELEMENT uDividendHi, RTBIGNUMELEMENT uDividendLo, RTBIGNUMELEMENT uDivisor); +DECLASM(void) rtBigNumMagnitudeMultiplyAssemblyWorker(PRTBIGNUMELEMENT pauResult, + PCRTBIGNUMELEMENT pauMultiplier, uint32_t cMultiplier, + PCRTBIGNUMELEMENT pauMultiplicand, uint32_t cMultiplicand); +#endif + + + + + +/** @name Functions working on one element. + * @{ */ + +DECLINLINE(uint32_t) rtBigNumElementBitCount(RTBIGNUMELEMENT uElement) +{ +#if RTBIGNUM_ELEMENT_SIZE == 8 + if (uElement >> 32) + return ASMBitLastSetU32((uint32_t)(uElement >> 32)) + 32; + return ASMBitLastSetU32((uint32_t)uElement); +#elif RTBIGNUM_ELEMENT_SIZE == 4 + return ASMBitLastSetU32(uElement); +#else +# error "Bad RTBIGNUM_ELEMENT_SIZE value" +#endif +} + + +/** + * Does addition with carry. + * + * This is a candidate for inline assembly on some platforms. + * + * @returns The result (the sum) + * @param uAugend What to add to. + * @param uAddend What to add to it. + * @param pfCarry Where to read the input carry and return the output + * carry. + */ +DECLINLINE(RTBIGNUMELEMENT) rtBigNumElementAddWithCarry(RTBIGNUMELEMENT uAugend, RTBIGNUMELEMENT uAddend, + RTBIGNUMELEMENT *pfCarry) +{ + RTBIGNUMELEMENT uRet = uAugend + uAddend; + if (!*pfCarry) + *pfCarry = uRet < uAugend; + else + { + uRet += 1; + *pfCarry = uRet <= uAugend; + } + return uRet; +} + + +#if !defined(IPRT_BIGINT_WITH_ASM) || defined(RT_STRICT) +/** + * Does addition with borrow. + * + * This is a candidate for inline assembly on some platforms. + * + * @returns The result (the sum) + * @param uMinuend What to subtract from. + * @param uSubtrahend What to subtract. + * @param pfBorrow Where to read the input borrow and return the output + * borrow. + */ +DECLINLINE(RTBIGNUMELEMENT) rtBigNumElementSubWithBorrow(RTBIGNUMELEMENT uMinuend, RTBIGNUMELEMENT uSubtrahend, + RTBIGNUMELEMENT *pfBorrow) +{ + RTBIGNUMELEMENT uRet = uMinuend - uSubtrahend - *pfBorrow; + + /* Figure out if we borrowed. */ + *pfBorrow = !*pfBorrow ? uMinuend < uSubtrahend : uMinuend <= uSubtrahend; + return uRet; +} +#endif + +/** @} */ + + + + +/** @name Double element primitives. + * @{ */ + +static int rtBigNumElement2xCopyToMagnitude(RTBIGNUMELEMENT2X const *pValue2x, PRTBIGNUM pDst) +{ + int rc; + if (pValue2x->s.Hi) + { + rc = rtBigNumSetUsed(pDst, 2); + if (RT_SUCCESS(rc)) + { + pDst->pauElements[0] = pValue2x->s.Lo; + pDst->pauElements[1] = pValue2x->s.Hi; + } + } + else if (pValue2x->s.Lo) + { + rc = rtBigNumSetUsed(pDst, 1); + if (RT_SUCCESS(rc)) + pDst->pauElements[0] = pValue2x->s.Lo; + } + else + rc = rtBigNumSetUsed(pDst, 0); + return rc; +} + +static void rtBigNumElement2xDiv(RTBIGNUMELEMENT2X *puQuotient, RTBIGNUMELEMENT2X *puRemainder, + RTBIGNUMELEMENT uDividendHi, RTBIGNUMELEMENT uDividendLo, + RTBIGNUMELEMENT uDivisorHi, RTBIGNUMELEMENT uDivisorLo) +{ + RTBIGNUMELEMENT2X uDividend; + uDividend.s.Lo = uDividendLo; + uDividend.s.Hi = uDividendHi; + + RTBIGNUMELEMENT2X uDivisor; + uDivisor.s.Lo = uDivisorLo; + uDivisor.s.Hi = uDivisorHi; + +#if RTBIGNUM_ELEMENT_BITS == 64 + RTUInt128DivRem(puQuotient, puRemainder, &uDividend, &uDivisor); +#else + puQuotient->u = uDividend.u / uDivisor.u; + puRemainder->u = uDividend.u % uDivisor.u; +#endif +} + +#ifndef IPRT_BIGINT_WITH_ASM +static void rtBigNumElement2xDiv2xBy1x(RTBIGNUMELEMENT2X *puQuotient, RTBIGNUMELEMENT *puRemainder, + RTBIGNUMELEMENT uDividendHi, RTBIGNUMELEMENT uDividendLo, RTBIGNUMELEMENT uDivisor) +{ + RTBIGNUMELEMENT2X uDividend; + uDividend.s.Lo = uDividendLo; + uDividend.s.Hi = uDividendHi; + +# if RTBIGNUM_ELEMENT_BITS == 64 + RTBIGNUMELEMENT2X uRemainder2x; + RTBIGNUMELEMENT2X uDivisor2x; + uDivisor2x.s.Hi = 0; + uDivisor2x.s.Lo = uDivisor; + /** @todo optimize this. */ + RTUInt128DivRem(puQuotient, &uRemainder2x, &uDividend, &uDivisor2x); + *puRemainder = uRemainder2x.s.Lo; +# else + puQuotient->u = uDividend.u / uDivisor; + puRemainder->u = uDividend.u % uDivisor; +# endif +} +#endif + +DECLINLINE(void) rtBigNumElement2xDec(RTBIGNUMELEMENT2X *puValue) +{ +#if RTBIGNUM_ELEMENT_BITS == 64 + if (puValue->s.Lo-- == 0) + puValue->s.Hi--; +#else + puValue->u -= 1; +#endif +} + +#if 0 /* unused */ +DECLINLINE(void) rtBigNumElement2xAdd1x(RTBIGNUMELEMENT2X *puValue, RTBIGNUMELEMENT uAdd) +{ +#if RTBIGNUM_ELEMENT_BITS == 64 + RTUInt128AssignAddU64(puValue, uAdd); +#else + puValue->u += uAdd; +#endif +} +#endif /* unused */ + +/** @} */ + + + + + +/** + * Scrambles a big number if required. + * + * @param pBigNum The big number. + */ +DECLINLINE(void) rtBigNumScramble(PRTBIGNUM pBigNum) +{ + if (pBigNum->fSensitive) + { + AssertReturnVoid(!pBigNum->fCurScrambled); + if (pBigNum->pauElements) + { + int rc = RTMemSaferScramble(pBigNum->pauElements, pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE); AssertRC(rc); + pBigNum->fCurScrambled = RT_SUCCESS(rc); + } + else + pBigNum->fCurScrambled = true; + } +} + + +/** + * Unscrambles a big number if required. + * + * @returns IPRT status code. + * @param pBigNum The big number. + */ +DECLINLINE(int) rtBigNumUnscramble(PRTBIGNUM pBigNum) +{ + if (pBigNum->fSensitive) + { + AssertReturn(pBigNum->fCurScrambled, VERR_INTERNAL_ERROR_2); + if (pBigNum->pauElements) + { + int rc = RTMemSaferUnscramble(pBigNum->pauElements, pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE); AssertRC(rc); + pBigNum->fCurScrambled = !RT_SUCCESS(rc); + return rc; + } + else + pBigNum->fCurScrambled = false; + } + return VINF_SUCCESS; +} + + +/** + * Getter function for pauElements which extends the array to infinity. + * + * @returns The element value. + * @param pBigNum The big number. + * @param iElement The element index. + */ +DECLINLINE(RTBIGNUMELEMENT) rtBigNumGetElement(PCRTBIGNUM pBigNum, uint32_t iElement) +{ + if (iElement < pBigNum->cUsed) + return pBigNum->pauElements[iElement]; + return 0; +} + + +/** + * Grows the pauElements array so it can fit at least @a cNewUsed entries. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param cNewUsed The new cUsed value. + * @param cMinElements The minimum number of elements. + */ +static int rtBigNumGrow(PRTBIGNUM pBigNum, uint32_t cNewUsed, uint32_t cMinElements) +{ + Assert(cMinElements >= cNewUsed); + uint32_t const cbOld = pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE; + uint32_t const cNew = RT_ALIGN_32(cMinElements, RTBIGNUM_ALIGNMENT); + uint32_t const cbNew = cNew * RTBIGNUM_ELEMENT_SIZE; + Assert(cbNew > cbOld); + if (cbNew <= RTBIGNUM_MAX_SIZE && cbNew > cbOld) + { + void *pvNew; + if (pBigNum->fSensitive) + pvNew = RTMemSaferReallocZ(cbOld, pBigNum->pauElements, cbNew); + else + pvNew = RTMemRealloc(pBigNum->pauElements, cbNew); + if (RT_LIKELY(pvNew)) + { + if (cbNew > cbOld) + RT_BZERO((char *)pvNew + cbOld, cbNew - cbOld); + if (pBigNum->cUsed > cNewUsed) + RT_BZERO((RTBIGNUMELEMENT *)pvNew + cNewUsed, (pBigNum->cUsed - cNewUsed) * RTBIGNUM_ELEMENT_SIZE); + + pBigNum->pauElements = (RTBIGNUMELEMENT *)pvNew; + pBigNum->cUsed = cNewUsed; + pBigNum->cAllocated = cNew; + return VINF_SUCCESS; + } + return VERR_NO_MEMORY; + } + return VERR_OUT_OF_RANGE; +} + + +/** + * Changes the cUsed member, growing the pauElements array if necessary. + * + * Any elements added to the array will be initialized to zero. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param cNewUsed The new cUsed value. + */ +DECLINLINE(int) rtBigNumSetUsed(PRTBIGNUM pBigNum, uint32_t cNewUsed) +{ + if (pBigNum->cAllocated >= cNewUsed) + { + if (pBigNum->cUsed > cNewUsed) + RT_BZERO(&pBigNum->pauElements[cNewUsed], (pBigNum->cUsed - cNewUsed) * RTBIGNUM_ELEMENT_SIZE); +#ifdef RT_STRICT + else if (pBigNum->cUsed != cNewUsed) + Assert(ASMMemIsZero(&pBigNum->pauElements[pBigNum->cUsed], (cNewUsed - pBigNum->cUsed) * RTBIGNUM_ELEMENT_SIZE)); +#endif + pBigNum->cUsed = cNewUsed; + return VINF_SUCCESS; + } + return rtBigNumGrow(pBigNum, cNewUsed, cNewUsed); +} + + +/** + * Extended version of rtBigNumSetUsed that also allow specifying the number of + * zero elements required. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param cNewUsed The new cUsed value. + * @param cMinElements The minimum number of elements allocated. The + * difference between @a cNewUsed and @a cMinElements + * is initialized to zero because all free elements are + * zero. + */ +DECLINLINE(int) rtBigNumSetUsedEx(PRTBIGNUM pBigNum, uint32_t cNewUsed, uint32_t cMinElements) +{ + if (pBigNum->cAllocated >= cMinElements) + { + if (pBigNum->cUsed > cNewUsed) + RT_BZERO(&pBigNum->pauElements[cNewUsed], (pBigNum->cUsed - cNewUsed) * RTBIGNUM_ELEMENT_SIZE); +#ifdef RT_STRICT + else if (pBigNum->cUsed != cNewUsed) + Assert(ASMMemIsZero(&pBigNum->pauElements[pBigNum->cUsed], (cNewUsed - pBigNum->cUsed) * RTBIGNUM_ELEMENT_SIZE)); +#endif + pBigNum->cUsed = cNewUsed; + return VINF_SUCCESS; + } + return rtBigNumGrow(pBigNum, cNewUsed, cMinElements); +} + + +/** + * For ensuring zero padding of pauElements for sub/add with carry assembly + * operations. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param cElements The number of elements that must be in the elements + * array array, where those after pBigNum->cUsed must + * be zero. + */ +DECLINLINE(int) rtBigNumEnsureExtraZeroElements(PRTBIGNUM pBigNum, uint32_t cElements) +{ + if (pBigNum->cAllocated >= cElements) + { + Assert( pBigNum->cAllocated == pBigNum->cUsed + || ASMMemIsZero(&pBigNum->pauElements[pBigNum->cUsed], + (pBigNum->cAllocated - pBigNum->cUsed) * RTBIGNUM_ELEMENT_SIZE)); + return VINF_SUCCESS; + } + return rtBigNumGrow(pBigNum, pBigNum->cUsed, cElements); +} + + +/** + * The slow part of rtBigNumEnsureElementPresent where we need to do actual zero + * extending. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param iElement The element we wish to access. + */ +static int rtBigNumEnsureElementPresentSlow(PRTBIGNUM pBigNum, uint32_t iElement) +{ + uint32_t const cOldUsed = pBigNum->cUsed; + int rc = rtBigNumSetUsed(pBigNum, iElement + 1); + if (RT_SUCCESS(rc)) + { + RT_BZERO(&pBigNum->pauElements[cOldUsed], (iElement + 1 - cOldUsed) * RTBIGNUM_ELEMENT_SIZE); + return VINF_SUCCESS; + } + return rc; +} + + +/** + * Zero extends the element array to make sure a the specified element index is + * accessible. + * + * This is typically used with bit operations and self modifying methods. Any + * new elements added will be initialized to zero. The caller is responsible + * for there not being any trailing zero elements. + * + * The number must be unscrambled. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param iElement The element we wish to access. + */ +DECLINLINE(int) rtBigNumEnsureElementPresent(PRTBIGNUM pBigNum, uint32_t iElement) +{ + if (iElement < pBigNum->cUsed) + return VINF_SUCCESS; + return rtBigNumEnsureElementPresentSlow(pBigNum, iElement); +} + + +/** + * Strips zero elements from the magnitude value. + * + * @param pBigNum The big number to strip. + */ +static void rtBigNumStripTrailingZeros(PRTBIGNUM pBigNum) +{ + uint32_t i = pBigNum->cUsed; + while (i > 0 && pBigNum->pauElements[i - 1] == 0) + i--; + pBigNum->cUsed = i; +} + + +/** + * Initialize the big number to zero. + * + * @returns @a pBigNum + * @param pBigNum The big number. + * @param fFlags The flags. + * @internal + */ +DECLINLINE(PRTBIGNUM) rtBigNumInitZeroInternal(PRTBIGNUM pBigNum, uint32_t fFlags) +{ + RT_ZERO(*pBigNum); + pBigNum->fSensitive = RT_BOOL(fFlags & RTBIGNUMINIT_F_SENSITIVE); + return pBigNum; +} + + +/** + * Initialize the big number to zero from a template variable. + * + * @returns @a pBigNum + * @param pBigNum The big number. + * @param pTemplate The template big number. + * @internal + */ +DECLINLINE(PRTBIGNUM) rtBigNumInitZeroTemplate(PRTBIGNUM pBigNum, PCRTBIGNUM pTemplate) +{ + RT_ZERO(*pBigNum); + pBigNum->fSensitive = pTemplate->fSensitive; + return pBigNum; +} + + +RTDECL(int) RTBigNumInit(PRTBIGNUM pBigNum, uint32_t fFlags, void const *pvRaw, size_t cbRaw) +{ + /* + * Validate input. + */ + AssertPtrReturn(pBigNum, VERR_INVALID_POINTER); + AssertReturn(RT_BOOL(fFlags & RTBIGNUMINIT_F_ENDIAN_BIG) ^ RT_BOOL(fFlags & RTBIGNUMINIT_F_ENDIAN_LITTLE), + VERR_INVALID_PARAMETER); + AssertReturn(RT_BOOL(fFlags & RTBIGNUMINIT_F_UNSIGNED) ^ RT_BOOL(fFlags & RTBIGNUMINIT_F_SIGNED), VERR_INVALID_PARAMETER); + if (cbRaw) + AssertPtrReturn(pvRaw, VERR_INVALID_POINTER); + + /* + * Initalize the big number to zero. + */ + rtBigNumInitZeroInternal(pBigNum, fFlags); + + /* + * Strip the input and figure the sign flag. + */ + uint8_t const *pb = (uint8_t const *)pvRaw; + if (cbRaw) + { + if (fFlags & RTBIGNUMINIT_F_ENDIAN_LITTLE) + { + if (fFlags & RTBIGNUMINIT_F_UNSIGNED) + { + while (cbRaw > 0 && pb[cbRaw - 1] == 0) + cbRaw--; + } + else + { + if (pb[cbRaw - 1] >> 7) + { + pBigNum->fNegative = 1; + while (cbRaw > 1 && pb[cbRaw - 1] == 0xff) + cbRaw--; + } + else + while (cbRaw > 0 && pb[cbRaw - 1] == 0) + cbRaw--; + } + } + else + { + if (fFlags & RTBIGNUMINIT_F_UNSIGNED) + { + while (cbRaw > 0 && *pb == 0) + pb++, cbRaw--; + } + else + { + if (*pb >> 7) + { + pBigNum->fNegative = 1; + while (cbRaw > 1 && *pb == 0xff) + pb++, cbRaw--; + } + else + while (cbRaw > 0 && *pb == 0) + pb++, cbRaw--; + } + } + } + + /* + * Allocate memory for the elements. + */ + size_t cbAligned = RT_ALIGN_Z(cbRaw, RTBIGNUM_ELEMENT_SIZE); + if (RT_UNLIKELY(cbAligned >= RTBIGNUM_MAX_SIZE)) + return VERR_OUT_OF_RANGE; + pBigNum->cUsed = (uint32_t)cbAligned / RTBIGNUM_ELEMENT_SIZE; + if (pBigNum->cUsed) + { + pBigNum->cAllocated = RT_ALIGN_32(pBigNum->cUsed, RTBIGNUM_ALIGNMENT); + if (pBigNum->fSensitive) + pBigNum->pauElements = (RTBIGNUMELEMENT *)RTMemSaferAllocZ(pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE); + else + pBigNum->pauElements = (RTBIGNUMELEMENT *)RTMemAlloc(pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE); + if (RT_UNLIKELY(!pBigNum->pauElements)) + return VERR_NO_MEMORY; + + /* + * Initialize the array. + */ + uint32_t i = 0; + if (fFlags & RTBIGNUMINIT_F_ENDIAN_LITTLE) + { + while (cbRaw >= RTBIGNUM_ELEMENT_SIZE) + { +#if RTBIGNUM_ELEMENT_SIZE == 8 + pBigNum->pauElements[i] = RT_MAKE_U64_FROM_U8(pb[0], pb[1], pb[2], pb[3], pb[4], pb[5], pb[6], pb[7]); +#elif RTBIGNUM_ELEMENT_SIZE == 4 + pBigNum->pauElements[i] = RT_MAKE_U32_FROM_U8(pb[0], pb[1], pb[2], pb[3]); +#else +# error "Bad RTBIGNUM_ELEMENT_SIZE value" +#endif + i++; + pb += RTBIGNUM_ELEMENT_SIZE; + cbRaw -= RTBIGNUM_ELEMENT_SIZE; + } + + if (cbRaw > 0) + { + RTBIGNUMELEMENT uLast = pBigNum->fNegative ? ~(RTBIGNUMELEMENT)0 : 0; + switch (cbRaw) + { + default: AssertFailed(); +#if RTBIGNUM_ELEMENT_SIZE == 8 + RT_FALL_THRU(); + case 7: uLast = (uLast << 8) | pb[6]; RT_FALL_THRU(); + case 6: uLast = (uLast << 8) | pb[5]; RT_FALL_THRU(); + case 5: uLast = (uLast << 8) | pb[4]; RT_FALL_THRU(); + case 4: uLast = (uLast << 8) | pb[3]; +#endif + RT_FALL_THRU(); + case 3: uLast = (uLast << 8) | pb[2]; RT_FALL_THRU(); + case 2: uLast = (uLast << 8) | pb[1]; RT_FALL_THRU(); + case 1: uLast = (uLast << 8) | pb[0]; + } + pBigNum->pauElements[i] = uLast; + } + } + else + { + pb += cbRaw; + while (cbRaw >= RTBIGNUM_ELEMENT_SIZE) + { + pb -= RTBIGNUM_ELEMENT_SIZE; +#if RTBIGNUM_ELEMENT_SIZE == 8 + pBigNum->pauElements[i] = RT_MAKE_U64_FROM_U8(pb[7], pb[6], pb[5], pb[4], pb[3], pb[2], pb[1], pb[0]); +#elif RTBIGNUM_ELEMENT_SIZE == 4 + pBigNum->pauElements[i] = RT_MAKE_U32_FROM_U8(pb[3], pb[2], pb[1], pb[0]); +#else +# error "Bad RTBIGNUM_ELEMENT_SIZE value" +#endif + i++; + cbRaw -= RTBIGNUM_ELEMENT_SIZE; + } + + if (cbRaw > 0) + { + RTBIGNUMELEMENT uLast = pBigNum->fNegative ? ~(RTBIGNUMELEMENT)0 : 0; + pb -= cbRaw; + switch (cbRaw) + { + default: AssertFailed(); +#if RTBIGNUM_ELEMENT_SIZE == 8 + RT_FALL_THRU(); + case 7: uLast = (uLast << 8) | *pb++; RT_FALL_THRU(); + case 6: uLast = (uLast << 8) | *pb++; RT_FALL_THRU(); + case 5: uLast = (uLast << 8) | *pb++; RT_FALL_THRU(); + case 4: uLast = (uLast << 8) | *pb++; +#endif + RT_FALL_THRU(); + case 3: uLast = (uLast << 8) | *pb++; RT_FALL_THRU(); + case 2: uLast = (uLast << 8) | *pb++; RT_FALL_THRU(); + case 1: uLast = (uLast << 8) | *pb++; + } + pBigNum->pauElements[i] = uLast; + } + } + + /* + * If negative, negate it so we get a positive magnitude value in pauElements. + */ + if (pBigNum->fNegative) + { + pBigNum->pauElements[0] = 0U - pBigNum->pauElements[0]; + for (i = 1; i < pBigNum->cUsed; i++) + pBigNum->pauElements[i] = 0U - pBigNum->pauElements[i] - 1U; + } + + /* + * Clear unused elements. + */ + if (pBigNum->cUsed != pBigNum->cAllocated) + { + RTBIGNUMELEMENT *puUnused = &pBigNum->pauElements[pBigNum->cUsed]; + AssertCompile(RTBIGNUM_ALIGNMENT <= 4); + switch (pBigNum->cAllocated - pBigNum->cUsed) + { + default: AssertFailed(); RT_FALL_THRU(); + case 3: *puUnused++ = 0; RT_FALL_THRU(); + case 2: *puUnused++ = 0; RT_FALL_THRU(); + case 1: *puUnused++ = 0; + } + } + RTBIGNUM_ASSERT_VALID(pBigNum); + } + + rtBigNumScramble(pBigNum); + return VINF_SUCCESS; +} + + +RTDECL(int) RTBigNumInitZero(PRTBIGNUM pBigNum, uint32_t fFlags) +{ + AssertReturn(!(fFlags & ~RTBIGNUMINIT_F_SENSITIVE), VERR_INVALID_PARAMETER); + AssertPtrReturn(pBigNum, VERR_INVALID_POINTER); + + rtBigNumInitZeroInternal(pBigNum, fFlags); + rtBigNumScramble(pBigNum); + return VINF_SUCCESS; +} + + +/** + * Internal clone function that assumes the caller takes care of scrambling. + * + * @returns IPRT status code. + * @param pBigNum The target number. + * @param pSrc The source number. + */ +static int rtBigNumCloneInternal(PRTBIGNUM pBigNum, PCRTBIGNUM pSrc) +{ + Assert(!pSrc->fCurScrambled); + int rc = VINF_SUCCESS; + + /* + * Copy over the data. + */ + RT_ZERO(*pBigNum); + pBigNum->fNegative = pSrc->fNegative; + pBigNum->fSensitive = pSrc->fSensitive; + pBigNum->cUsed = pSrc->cUsed; + if (pSrc->cUsed) + { + /* Duplicate the element array. */ + pBigNum->cAllocated = RT_ALIGN_32(pBigNum->cUsed, RTBIGNUM_ALIGNMENT); + if (pBigNum->fSensitive) + pBigNum->pauElements = (RTBIGNUMELEMENT *)RTMemSaferAllocZ(pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE); + else + pBigNum->pauElements = (RTBIGNUMELEMENT *)RTMemAlloc(pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE); + if (RT_LIKELY(pBigNum->pauElements)) + { + memcpy(pBigNum->pauElements, pSrc->pauElements, pBigNum->cUsed * RTBIGNUM_ELEMENT_SIZE); + if (pBigNum->cUsed != pBigNum->cAllocated) + RT_BZERO(&pBigNum->pauElements[pBigNum->cUsed], (pBigNum->cAllocated - pBigNum->cUsed) * RTBIGNUM_ELEMENT_SIZE); + } + else + { + RT_ZERO(*pBigNum); + rc = VERR_NO_MEMORY; + } + } + return rc; +} + + +RTDECL(int) RTBigNumClone(PRTBIGNUM pBigNum, PCRTBIGNUM pSrc) +{ + int rc = rtBigNumUnscramble((PRTBIGNUM)pSrc); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pSrc); + rc = rtBigNumCloneInternal(pBigNum, pSrc); + if (RT_SUCCESS(rc)) + rtBigNumScramble(pBigNum); + rtBigNumScramble((PRTBIGNUM)pSrc); + } + return rc; +} + + +RTDECL(int) RTBigNumDestroy(PRTBIGNUM pBigNum) +{ + if (pBigNum) + { + if (pBigNum->pauElements) + { + Assert(pBigNum->cAllocated > 0); + if (!pBigNum->fSensitive) + RTMemFree(pBigNum->pauElements); + else + { + RTMemSaferFree(pBigNum->pauElements, pBigNum->cAllocated * RTBIGNUM_ELEMENT_SIZE); + RT_ZERO(*pBigNum); + } + pBigNum->pauElements = NULL; + } + } + return VINF_SUCCESS; +} + + +RTDECL(int) RTBigNumAssign(PRTBIGNUM pDst, PCRTBIGNUM pSrc) +{ + AssertReturn(pDst->fSensitive >= pSrc->fSensitive, VERR_BIGNUM_SENSITIVE_INPUT); + int rc = rtBigNumUnscramble(pDst); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pDst); + rc = rtBigNumUnscramble((PRTBIGNUM)pSrc); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pSrc); + if ( pDst->fSensitive == pSrc->fSensitive + || pDst->fSensitive) + { + if (pDst->cAllocated >= pSrc->cUsed) + { + if (pDst->cUsed > pSrc->cUsed) + RT_BZERO(&pDst->pauElements[pSrc->cUsed], (pDst->cUsed - pSrc->cUsed) * RTBIGNUM_ELEMENT_SIZE); + pDst->cUsed = pSrc->cUsed; + pDst->fNegative = pSrc->fNegative; + memcpy(pDst->pauElements, pSrc->pauElements, pSrc->cUsed * RTBIGNUM_ELEMENT_SIZE); + } + else + { + rc = rtBigNumGrow(pDst, pSrc->cUsed, pSrc->cUsed); + if (RT_SUCCESS(rc)) + { + pDst->fNegative = pSrc->fNegative; + memcpy(pDst->pauElements, pSrc->pauElements, pSrc->cUsed * RTBIGNUM_ELEMENT_SIZE); + } + } + } + else + rc = VERR_BIGNUM_SENSITIVE_INPUT; + rtBigNumScramble((PRTBIGNUM)pSrc); + } + rtBigNumScramble(pDst); + } + return rc; +} + + +/** + * Same as RTBigNumBitWidth, except that it ignore the signed bit. + * + * The number must be unscrambled. + * + * @returns The effective width of the magnitude, in bits. Returns 0 if the + * value is zero. + * @param pBigNum The bit number. + */ +static uint32_t rtBigNumMagnitudeBitWidth(PCRTBIGNUM pBigNum) +{ + uint32_t idxLast = pBigNum->cUsed; + if (idxLast) + { + idxLast--; + RTBIGNUMELEMENT uLast = pBigNum->pauElements[idxLast]; Assert(uLast); + return rtBigNumElementBitCount(uLast) + idxLast * RTBIGNUM_ELEMENT_BITS; + } + return 0; +} + + +RTDECL(uint32_t) RTBigNumBitWidth(PCRTBIGNUM pBigNum) +{ + uint32_t idxLast = pBigNum->cUsed; + if (idxLast) + { + idxLast--; + rtBigNumUnscramble((PRTBIGNUM)pBigNum); + RTBIGNUMELEMENT uLast = pBigNum->pauElements[idxLast]; Assert(uLast); + rtBigNumScramble((PRTBIGNUM)pBigNum); + return rtBigNumElementBitCount(uLast) + idxLast * RTBIGNUM_ELEMENT_BITS + pBigNum->fNegative; + } + return 0; +} + + +RTDECL(uint32_t) RTBigNumByteWidth(PCRTBIGNUM pBigNum) +{ + uint32_t cBits = RTBigNumBitWidth(pBigNum); + return (cBits + 7) / 8; +} + + +RTDECL(int) RTBigNumToBytesBigEndian(PCRTBIGNUM pBigNum, void *pvBuf, size_t cbWanted) +{ + AssertPtrReturn(pvBuf, VERR_INVALID_POINTER); + AssertReturn(cbWanted > 0, VERR_INVALID_PARAMETER); + + int rc = rtBigNumUnscramble((PRTBIGNUM)pBigNum); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pBigNum); + rc = VINF_SUCCESS; + if (pBigNum->cUsed != 0) + { + uint8_t *pbDst = (uint8_t *)pvBuf; + pbDst += cbWanted - 1; + for (uint32_t i = 0; i < pBigNum->cUsed; i++) + { + RTBIGNUMELEMENT uElement = pBigNum->pauElements[i]; + if (pBigNum->fNegative) + uElement = (RTBIGNUMELEMENT)0 - uElement - (i > 0); + if (cbWanted >= sizeof(uElement)) + { + *pbDst-- = (uint8_t)uElement; + uElement >>= 8; + *pbDst-- = (uint8_t)uElement; + uElement >>= 8; + *pbDst-- = (uint8_t)uElement; + uElement >>= 8; + *pbDst-- = (uint8_t)uElement; +#if RTBIGNUM_ELEMENT_SIZE == 8 + uElement >>= 8; + *pbDst-- = (uint8_t)uElement; + uElement >>= 8; + *pbDst-- = (uint8_t)uElement; + uElement >>= 8; + *pbDst-- = (uint8_t)uElement; + uElement >>= 8; + *pbDst-- = (uint8_t)uElement; +#elif RTBIGNUM_ELEMENT_SIZE != 4 +# error "Bad RTBIGNUM_ELEMENT_SIZE value" +#endif + cbWanted -= sizeof(uElement); + } + else + { + + uint32_t cBitsLeft = RTBIGNUM_ELEMENT_BITS; + while (cbWanted > 0) + { + *pbDst-- = (uint8_t)uElement; + uElement >>= 8; + cBitsLeft -= 8; + cbWanted--; + } + Assert(cBitsLeft > 0); Assert(cBitsLeft < RTBIGNUM_ELEMENT_BITS); + if ( i + 1 < pBigNum->cUsed + || ( !pBigNum->fNegative + ? uElement != 0 + : uElement != ((RTBIGNUMELEMENT)1 << cBitsLeft) - 1U ) ) + rc = VERR_BUFFER_OVERFLOW; + break; + } + } + + /* Sign extend the number to the desired output size. */ + if (cbWanted > 0) + memset(pbDst - cbWanted, pBigNum->fNegative ? 0 : 0xff, cbWanted); + } + else + RT_BZERO(pvBuf, cbWanted); + rtBigNumScramble((PRTBIGNUM)pBigNum); + } + return rc; +} + + +RTDECL(int) RTBigNumCompare(PRTBIGNUM pLeft, PRTBIGNUM pRight) +{ + int rc = rtBigNumUnscramble(pLeft); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pLeft); + rc = rtBigNumUnscramble(pRight); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pRight); + if (pLeft->fNegative == pRight->fNegative) + { + if (pLeft->cUsed == pRight->cUsed) + { + rc = 0; + uint32_t i = pLeft->cUsed; + while (i-- > 0) + if (pLeft->pauElements[i] != pRight->pauElements[i]) + { + rc = pLeft->pauElements[i] < pRight->pauElements[i] ? -1 : 1; + break; + } + if (pLeft->fNegative) + rc = -rc; + } + else + rc = !pLeft->fNegative + ? pLeft->cUsed < pRight->cUsed ? -1 : 1 + : pLeft->cUsed < pRight->cUsed ? 1 : -1; + } + else + rc = pLeft->fNegative ? -1 : 1; + + rtBigNumScramble(pRight); + } + rtBigNumScramble(pLeft); + } + return rc; +} + + +RTDECL(int) RTBigNumCompareWithU64(PRTBIGNUM pLeft, uint64_t uRight) +{ + int rc = rtBigNumUnscramble(pLeft); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pLeft); + if (!pLeft->fNegative) + { + if (pLeft->cUsed * RTBIGNUM_ELEMENT_SIZE <= sizeof(uRight)) + { + if (pLeft->cUsed == 0) + rc = uRight == 0 ? 0 : -1; + else + { +#if RTBIGNUM_ELEMENT_SIZE == 8 + uint64_t uLeft = rtBigNumGetElement(pLeft, 0); + if (uLeft < uRight) + rc = -1; + else + rc = uLeft == uRight ? 0 : 1; +#elif RTBIGNUM_ELEMENT_SIZE == 4 + uint32_t uSubLeft = rtBigNumGetElement(pLeft, 1); + uint32_t uSubRight = uRight >> 32; + if (uSubLeft == uSubRight) + { + uSubLeft = rtBigNumGetElement(pLeft, 0); + uSubRight = (uint32_t)uRight; + } + if (uSubLeft < uSubRight) + rc = -1; + else + rc = uSubLeft == uSubRight ? 0 : 1; +#else +# error "Bad RTBIGNUM_ELEMENT_SIZE value" +#endif + } + } + else + rc = 1; + } + else + rc = -1; + rtBigNumScramble(pLeft); + } + return rc; +} + + +RTDECL(int) RTBigNumCompareWithS64(PRTBIGNUM pLeft, int64_t iRight) +{ + int rc = rtBigNumUnscramble(pLeft); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pLeft); + if (pLeft->fNegative == (unsigned)(iRight < 0)) /* (unsigned cast is for MSC weirdness) */ + { + AssertCompile(RTBIGNUM_ELEMENT_SIZE <= sizeof(iRight)); + if (pLeft->cUsed * RTBIGNUM_ELEMENT_SIZE <= sizeof(iRight)) + { + uint64_t uRightMagn = !pLeft->fNegative ? (uint64_t)iRight : (uint64_t)-iRight; +#if RTBIGNUM_ELEMENT_SIZE == 8 + uint64_t uLeft = rtBigNumGetElement(pLeft, 0); + if (uLeft < uRightMagn) + rc = -1; + else + rc = uLeft == (uint64_t)uRightMagn ? 0 : 1; +#elif RTBIGNUM_ELEMENT_SIZE == 4 + uint32_t uSubLeft = rtBigNumGetElement(pLeft, 1); + uint32_t uSubRight = uRightMagn >> 32; + if (uSubLeft == uSubRight) + { + uSubLeft = rtBigNumGetElement(pLeft, 0); + uSubRight = (uint32_t)uRightMagn; + } + if (uSubLeft < uSubRight) + rc = -1; + else + rc = uSubLeft == uSubRight ? 0 : 1; +#else +# error "Bad RTBIGNUM_ELEMENT_SIZE value" +#endif + if (pLeft->fNegative) + rc = -rc; + } + else + rc = pLeft->fNegative ? -1 : 1; + } + else + rc = pLeft->fNegative ? -1 : 1; + rtBigNumScramble(pLeft); + } + return rc; +} + + +/** + * Compares the magnitude values of two big numbers. + * + * @retval -1 if pLeft is smaller than pRight. + * @retval 0 if pLeft is equal to pRight. + * @retval 1 if pLeft is larger than pRight. + * @param pLeft The left side number. + * @param pRight The right side number. + */ +static int rtBigNumMagnitudeCompare(PCRTBIGNUM pLeft, PCRTBIGNUM pRight) +{ + Assert(!pLeft->fCurScrambled); Assert(!pRight->fCurScrambled); + int rc; + uint32_t i = pLeft->cUsed; + if (i == pRight->cUsed) + { + rc = 0; + while (i-- > 0) + if (pLeft->pauElements[i] != pRight->pauElements[i]) + { + rc = pLeft->pauElements[i] < pRight->pauElements[i] ? -1 : 1; + break; + } + } + else + rc = i < pRight->cUsed ? -1 : 1; + return rc; +} + + +/** + * Copies the magnitude of on number (@a pSrc) to another (@a pBigNum). + * + * The variables must be unscrambled. The sign flag is not considered nor + * touched. + * + * @returns IPRT status code. + * @param pDst The destination number. + * @param pSrc The source number. + */ +DECLINLINE(int) rtBigNumMagnitudeCopy(PRTBIGNUM pDst, PCRTBIGNUM pSrc) +{ + int rc = rtBigNumSetUsed(pDst, pSrc->cUsed); + if (RT_SUCCESS(rc)) + memcpy(pDst->pauElements, pSrc->pauElements, pSrc->cUsed * RTBIGNUM_ELEMENT_SIZE); + return rc; +} + + + +/** + * Adds two magnitudes and stores them into a third. + * + * All variables must be unscrambled. The sign flag is not considered nor + * touched. + * + * @returns IPRT status code. + * @param pResult The resultant. + * @param pAugend To whom it shall be addede. + * @param pAddend The nombre to addede. + */ +static int rtBigNumMagnitudeAdd(PRTBIGNUM pResult, PCRTBIGNUM pAugend, PCRTBIGNUM pAddend) +{ + Assert(!pResult->fCurScrambled); Assert(!pAugend->fCurScrambled); Assert(!pAddend->fCurScrambled); + Assert(pResult != pAugend); Assert(pResult != pAddend); + + uint32_t cElements = RT_MAX(pAugend->cUsed, pAddend->cUsed); + int rc = rtBigNumSetUsed(pResult, cElements); + if (RT_SUCCESS(rc)) + { + /* + * The primitive way, requires at least two additions for each entry + * without machine code help. + */ + RTBIGNUMELEMENT fCarry = 0; + for (uint32_t i = 0; i < cElements; i++) + pResult->pauElements[i] = rtBigNumElementAddWithCarry(rtBigNumGetElement(pAugend, i), + rtBigNumGetElement(pAddend, i), + &fCarry); + if (fCarry) + { + rc = rtBigNumSetUsed(pResult, cElements + 1); + if (RT_SUCCESS(rc)) + pResult->pauElements[cElements++] = 1; + } + Assert(pResult->cUsed == cElements || RT_FAILURE_NP(rc)); + } + + return rc; +} + + +/** + * Substracts a smaller (or equal) magnitude from another one and stores it into + * a third. + * + * All variables must be unscrambled. The sign flag is not considered nor + * touched. For this reason, the @a pMinuend must be larger or equal to @a + * pSubtrahend. + * + * @returns IPRT status code. + * @param pResult There to store the result. + * @param pMinuend What to subtract from. + * @param pSubtrahend What to subtract. + */ +static int rtBigNumMagnitudeSub(PRTBIGNUM pResult, PCRTBIGNUM pMinuend, PCRTBIGNUM pSubtrahend) +{ + Assert(!pResult->fCurScrambled); Assert(!pMinuend->fCurScrambled); Assert(!pSubtrahend->fCurScrambled); + Assert(pResult != pMinuend); Assert(pResult != pSubtrahend); + Assert(pMinuend->cUsed >= pSubtrahend->cUsed); + + int rc; + if (pSubtrahend->cUsed) + { + /* + * Resize the result. In the assembly case, ensure that all three arrays + * has the same number of used entries, possibly with an extra zero + * element on 64-bit systems. + */ + rc = rtBigNumSetUsedEx(pResult, pMinuend->cUsed, RTBIGNUM_ZERO_ALIGN(pMinuend->cUsed)); +#ifdef IPRT_BIGINT_WITH_ASM + if (RT_SUCCESS(rc)) + rc = rtBigNumEnsureExtraZeroElements((PRTBIGNUM)pMinuend, RTBIGNUM_ZERO_ALIGN(pMinuend->cUsed)); + if (RT_SUCCESS(rc)) + rc = rtBigNumEnsureExtraZeroElements((PRTBIGNUM)pSubtrahend, RTBIGNUM_ZERO_ALIGN(pMinuend->cUsed)); +#endif + if (RT_SUCCESS(rc)) + { +#ifdef IPRT_BIGINT_WITH_ASM + /* + * Call assembly to do the work. + */ + rtBigNumMagnitudeSubAssemblyWorker(pResult->pauElements, pMinuend->pauElements, + pSubtrahend->pauElements, pMinuend->cUsed); +# ifdef RT_STRICT + RTBIGNUMELEMENT fBorrow = 0; + for (uint32_t i = 0; i < pMinuend->cUsed; i++) + { + RTBIGNUMELEMENT uCorrect = rtBigNumElementSubWithBorrow(pMinuend->pauElements[i], rtBigNumGetElement(pSubtrahend, i), &fBorrow); + AssertMsg(pResult->pauElements[i] == uCorrect, ("[%u]=%#x, expected %#x\n", i, pResult->pauElements[i], uCorrect)); + } +# endif +#else + /* + * The primitive C way. + */ + RTBIGNUMELEMENT fBorrow = 0; + for (uint32_t i = 0; i < pMinuend->cUsed; i++) + pResult->pauElements[i] = rtBigNumElementSubWithBorrow(pMinuend->pauElements[i], + rtBigNumGetElement(pSubtrahend, i), + &fBorrow); + Assert(fBorrow == 0); +#endif + + /* + * Trim the result. + */ + rtBigNumStripTrailingZeros(pResult); + } + } + /* + * Special case: Subtrahend is zero. + */ + else + rc = rtBigNumMagnitudeCopy(pResult, pMinuend); + + return rc; +} + + +/** + * Substracts a smaller (or equal) magnitude from another one and stores the + * result into the first. + * + * All variables must be unscrambled. The sign flag is not considered nor + * touched. For this reason, the @a pMinuendResult must be larger or equal to + * @a pSubtrahend. + * + * @returns IPRT status code (memory alloc error). + * @param pMinuendResult What to subtract from and return as result. + * @param pSubtrahend What to subtract. + */ +static int rtBigNumMagnitudeSubThis(PRTBIGNUM pMinuendResult, PCRTBIGNUM pSubtrahend) +{ + Assert(!pMinuendResult->fCurScrambled); Assert(!pSubtrahend->fCurScrambled); + Assert(pMinuendResult != pSubtrahend); + Assert(pMinuendResult->cUsed >= pSubtrahend->cUsed); + +#ifdef IPRT_BIGINT_WITH_ASM + /* + * Use the assembly worker. Requires same sized element arrays, so zero extend them. + */ + int rc = rtBigNumEnsureExtraZeroElements(pMinuendResult, RTBIGNUM_ZERO_ALIGN(pMinuendResult->cUsed)); + if (RT_SUCCESS(rc)) + rc = rtBigNumEnsureExtraZeroElements((PRTBIGNUM)pSubtrahend, RTBIGNUM_ZERO_ALIGN(pMinuendResult->cUsed)); + if (RT_FAILURE(rc)) + return rc; + rtBigNumMagnitudeSubThisAssemblyWorker(pMinuendResult->pauElements, pSubtrahend->pauElements, pMinuendResult->cUsed); +#else + /* + * The primitive way, as usual. + */ + RTBIGNUMELEMENT fBorrow = 0; + for (uint32_t i = 0; i < pMinuendResult->cUsed; i++) + pMinuendResult->pauElements[i] = rtBigNumElementSubWithBorrow(pMinuendResult->pauElements[i], + rtBigNumGetElement(pSubtrahend, i), + &fBorrow); + Assert(fBorrow == 0); +#endif + + /* + * Trim the result. + */ + rtBigNumStripTrailingZeros(pMinuendResult); + + return VINF_SUCCESS; +} + + +RTDECL(int) RTBigNumAdd(PRTBIGNUM pResult, PCRTBIGNUM pAugend, PCRTBIGNUM pAddend) +{ + Assert(pResult != pAugend); Assert(pResult != pAddend); + AssertReturn(pResult->fSensitive >= (pAugend->fSensitive | pAddend->fSensitive), VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pResult); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pResult); + rc = rtBigNumUnscramble((PRTBIGNUM)pAugend); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pAugend); + rc = rtBigNumUnscramble((PRTBIGNUM)pAddend); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pAddend); + + /* + * Same sign: Add magnitude, keep sign. + * 1 + 1 = 2 + * (-1) + (-1) = -2 + */ + if (pAugend->fNegative == pAddend->fNegative) + { + pResult->fNegative = pAugend->fNegative; + rc = rtBigNumMagnitudeAdd(pResult, pAugend, pAddend); + } + /* + * Different sign: Subtract smaller from larger, keep sign of larger. + * (-5) + 3 = -2 + * 5 + (-3) = 2 + * (-1) + 3 = 2 + * 1 + (-3) = -2 + */ + else if (rtBigNumMagnitudeCompare(pAugend, pAddend) >= 0) + { + pResult->fNegative = pAugend->fNegative; + rc = rtBigNumMagnitudeSub(pResult, pAugend, pAddend); + if (!pResult->cUsed) + pResult->fNegative = 0; + } + else + { + pResult->fNegative = pAddend->fNegative; + rc = rtBigNumMagnitudeSub(pResult, pAddend, pAugend); + } + rtBigNumScramble((PRTBIGNUM)pAddend); + } + rtBigNumScramble((PRTBIGNUM)pAugend); + } + rtBigNumScramble(pResult); + } + return rc; +} + + +RTDECL(int) RTBigNumSubtract(PRTBIGNUM pResult, PCRTBIGNUM pMinuend, PCRTBIGNUM pSubtrahend) +{ + Assert(pResult != pMinuend); Assert(pResult != pSubtrahend); + AssertReturn(pResult->fSensitive >= (pMinuend->fSensitive | pSubtrahend->fSensitive), VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pResult); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pResult); + if (pMinuend != pSubtrahend) + { + rc = rtBigNumUnscramble((PRTBIGNUM)pMinuend); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pMinuend); + rc = rtBigNumUnscramble((PRTBIGNUM)pSubtrahend); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pSubtrahend); + + /* + * Different sign: Add magnitude, keep sign of first. + * 1 - (-2) == 3 + * -1 - 2 == -3 + */ + if (pMinuend->fNegative != pSubtrahend->fNegative) + { + pResult->fNegative = pMinuend->fNegative; + rc = rtBigNumMagnitudeAdd(pResult, pMinuend, pSubtrahend); + } + /* + * Same sign, minuend has greater or equal absolute value: Subtract, keep sign of first. + * 10 - 7 = 3 + */ + else if (rtBigNumMagnitudeCompare(pMinuend, pSubtrahend) >= 0) + { + pResult->fNegative = pMinuend->fNegative; + rc = rtBigNumMagnitudeSub(pResult, pMinuend, pSubtrahend); + } + /* + * Same sign, subtrahend is larger: Reverse and subtract, invert sign of first. + * 7 - 10 = -3 + * -1 - (-3) = 2 + */ + else + { + pResult->fNegative = !pMinuend->fNegative; + rc = rtBigNumMagnitudeSub(pResult, pSubtrahend, pMinuend); + } + rtBigNumScramble((PRTBIGNUM)pSubtrahend); + } + rtBigNumScramble((PRTBIGNUM)pMinuend); + } + } + else + { + /* zero. */ + pResult->fNegative = 0; + rtBigNumSetUsed(pResult, 0); + } + rtBigNumScramble(pResult); + } + return rc; +} + + +RTDECL(int) RTBigNumNegateThis(PRTBIGNUM pThis) +{ + pThis->fNegative = !pThis->fNegative; + return VINF_SUCCESS; +} + + +RTDECL(int) RTBigNumNegate(PRTBIGNUM pResult, PCRTBIGNUM pBigNum) +{ + int rc = RTBigNumAssign(pResult, pBigNum); + if (RT_SUCCESS(rc)) + rc = RTBigNumNegateThis(pResult); + return rc; +} + + +/** + * Multiplies the magnitudes of two values, letting the caller care about the + * sign bit. + * + * @returns IPRT status code. + * @param pResult Where to store the result. + * @param pMultiplicand The first value. + * @param pMultiplier The second value. + */ +static int rtBigNumMagnitudeMultiply(PRTBIGNUM pResult, PCRTBIGNUM pMultiplicand, PCRTBIGNUM pMultiplier) +{ + Assert(pResult != pMultiplicand); Assert(pResult != pMultiplier); + Assert(!pResult->fCurScrambled); Assert(!pMultiplicand->fCurScrambled); Assert(!pMultiplier->fCurScrambled); + + /* + * Multiplication involving zero is zero. + */ + if (!pMultiplicand->cUsed || !pMultiplier->cUsed) + { + pResult->fNegative = 0; + rtBigNumSetUsed(pResult, 0); + return VINF_SUCCESS; + } + + /* + * Allocate a result array that is the sum of the two factors, initialize + * it to zero. + */ + uint32_t cMax = pMultiplicand->cUsed + pMultiplier->cUsed; + int rc = rtBigNumSetUsed(pResult, cMax); + if (RT_SUCCESS(rc)) + { + RT_BZERO(pResult->pauElements, pResult->cUsed * RTBIGNUM_ELEMENT_SIZE); + +#ifdef IPRT_BIGINT_WITH_ASM + rtBigNumMagnitudeMultiplyAssemblyWorker(pResult->pauElements, + pMultiplier->pauElements, pMultiplier->cUsed, + pMultiplicand->pauElements, pMultiplicand->cUsed); +#else + for (uint32_t i = 0; i < pMultiplier->cUsed; i++) + { + RTBIGNUMELEMENT uMultiplier = pMultiplier->pauElements[i]; + for (uint32_t j = 0; j < pMultiplicand->cUsed; j++) + { + RTBIGNUMELEMENT uHi; + RTBIGNUMELEMENT uLo; +#if RTBIGNUM_ELEMENT_SIZE == 4 + uint64_t u64 = ASMMult2xU32RetU64(pMultiplicand->pauElements[j], uMultiplier); + uLo = (uint32_t)u64; + uHi = u64 >> 32; +#elif RTBIGNUM_ELEMENT_SIZE == 8 + uLo = ASMMult2xU64Ret2xU64(pMultiplicand->pauElements[j], uMultiplier, &uHi); +#else +# error "Invalid RTBIGNUM_ELEMENT_SIZE value" +#endif + RTBIGNUMELEMENT fCarry = 0; + uint64_t k = i + j; + pResult->pauElements[k] = rtBigNumElementAddWithCarry(pResult->pauElements[k], uLo, &fCarry); + k++; + pResult->pauElements[k] = rtBigNumElementAddWithCarry(pResult->pauElements[k], uHi, &fCarry); + while (fCarry) + { + k++; + pResult->pauElements[k] = rtBigNumElementAddWithCarry(pResult->pauElements[k], 0, &fCarry); + } + Assert(k < cMax); + } + } +#endif + + /* It's possible we overestimated the output size by 1 element. */ + rtBigNumStripTrailingZeros(pResult); + } + return rc; +} + + +RTDECL(int) RTBigNumMultiply(PRTBIGNUM pResult, PCRTBIGNUM pMultiplicand, PCRTBIGNUM pMultiplier) +{ + Assert(pResult != pMultiplicand); Assert(pResult != pMultiplier); + AssertReturn(pResult->fSensitive >= (pMultiplicand->fSensitive | pMultiplier->fSensitive), VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pResult); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pResult); + rc = rtBigNumUnscramble((PRTBIGNUM)pMultiplicand); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pMultiplicand); + rc = rtBigNumUnscramble((PRTBIGNUM)pMultiplier); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pMultiplier); + + /* + * The sign values follow XOR rules: + * -1 * 1 = -1; 1 ^ 0 = 1 + * 1 * -1 = -1; 1 ^ 0 = 1 + * -1 * -1 = 1; 1 ^ 1 = 0 + * 1 * 1 = 1; 0 ^ 0 = 0 + */ + pResult->fNegative = pMultiplicand->fNegative ^ pMultiplier->fNegative; + rc = rtBigNumMagnitudeMultiply(pResult, pMultiplicand, pMultiplier); + + rtBigNumScramble((PRTBIGNUM)pMultiplier); + } + rtBigNumScramble((PRTBIGNUM)pMultiplicand); + } + rtBigNumScramble(pResult); + } + return rc; +} + + +#if 0 /* unused */ +/** + * Clears a bit in the magnitude of @a pBigNum. + * + * The variables must be unscrambled. + * + * @param pBigNum The big number. + * @param iBit The bit to clear (0-based). + */ +DECLINLINE(void) rtBigNumMagnitudeClearBit(PRTBIGNUM pBigNum, uint32_t iBit) +{ + uint32_t iElement = iBit / RTBIGNUM_ELEMENT_BITS; + if (iElement < pBigNum->cUsed) + { + iBit &= RTBIGNUM_ELEMENT_BITS - 1; + pBigNum->pauElements[iElement] &= ~RTBIGNUM_ELEMENT_BIT(iBit); + if (iElement + 1 == pBigNum->cUsed && !pBigNum->pauElements[iElement]) + rtBigNumStripTrailingZeros(pBigNum); + } +} +#endif /* unused */ + + +/** + * Sets a bit in the magnitude of @a pBigNum. + * + * The variables must be unscrambled. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param iBit The bit to clear (0-based). + */ +DECLINLINE(int) rtBigNumMagnitudeSetBit(PRTBIGNUM pBigNum, uint32_t iBit) +{ + uint32_t iElement = iBit / RTBIGNUM_ELEMENT_BITS; + int rc = rtBigNumEnsureElementPresent(pBigNum, iElement); + if (RT_SUCCESS(rc)) + { + iBit &= RTBIGNUM_ELEMENT_BITS - 1; + pBigNum->pauElements[iElement] |= RTBIGNUM_ELEMENT_BIT(iBit); + return VINF_SUCCESS; + } + return rc; +} + + +#if 0 /* unused */ +/** + * Writes a bit in the magnitude of @a pBigNum. + * + * The variables must be unscrambled. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param iBit The bit to write (0-based). + * @param fValue The bit value. + */ +DECLINLINE(int) rtBigNumMagnitudeWriteBit(PRTBIGNUM pBigNum, uint32_t iBit, bool fValue) +{ + if (fValue) + return rtBigNumMagnitudeSetBit(pBigNum, iBit); + rtBigNumMagnitudeClearBit(pBigNum, iBit); + return VINF_SUCCESS; +} +#endif + + +/** + * Returns the given magnitude bit. + * + * The variables must be unscrambled. + * + * @returns The bit value (1 or 0). + * @param pBigNum The big number. + * @param iBit The bit to return (0-based). + */ +DECLINLINE(RTBIGNUMELEMENT) rtBigNumMagnitudeGetBit(PCRTBIGNUM pBigNum, uint32_t iBit) +{ + uint32_t iElement = iBit / RTBIGNUM_ELEMENT_BITS; + if (iElement < pBigNum->cUsed) + { + iBit &= RTBIGNUM_ELEMENT_BITS - 1; + return (pBigNum->pauElements[iElement] >> iBit) & 1; + } + return 0; +} + + +/** + * Shifts the magnitude left by one. + * + * The variables must be unscrambled. + * + * @returns IPRT status code. + * @param pBigNum The big number. + * @param uCarry The value to shift in at the bottom. + */ +DECLINLINE(int) rtBigNumMagnitudeShiftLeftOne(PRTBIGNUM pBigNum, RTBIGNUMELEMENT uCarry) +{ + Assert(uCarry <= 1); + + /* Do the shifting. */ + uint32_t cUsed = pBigNum->cUsed; +#ifdef IPRT_BIGINT_WITH_ASM + uCarry = rtBigNumMagnitudeShiftLeftOneAssemblyWorker(pBigNum->pauElements, cUsed, uCarry); +#else + for (uint32_t i = 0; i < cUsed; i++) + { + RTBIGNUMELEMENT uTmp = pBigNum->pauElements[i]; + pBigNum->pauElements[i] = (uTmp << 1) | uCarry; + uCarry = uTmp >> (RTBIGNUM_ELEMENT_BITS - 1); + } +#endif + + /* If we still carry a bit, we need to increase the size. */ + if (uCarry) + { + int rc = rtBigNumSetUsed(pBigNum, cUsed + 1); + AssertRCReturn(rc, rc); + pBigNum->pauElements[cUsed] = uCarry; + } + + return VINF_SUCCESS; +} + + +/** + * Shifts the magnitude left by @a cBits. + * + * The variables must be unscrambled. + * + * @returns IPRT status code. + * @param pResult Where to store the result. + * @param pValue The value to shift. + * @param cBits The shift count. + */ +static int rtBigNumMagnitudeShiftLeft(PRTBIGNUM pResult, PCRTBIGNUM pValue, uint32_t cBits) +{ + int rc; + if (cBits) + { + uint32_t cBitsNew = rtBigNumMagnitudeBitWidth(pValue); + if (cBitsNew > 0) + { + if (cBitsNew + cBits > cBitsNew) + { + cBitsNew += cBits; + rc = rtBigNumSetUsedEx(pResult, 0, RT_ALIGN_32(cBitsNew, RTBIGNUM_ELEMENT_BITS) / RTBIGNUM_ELEMENT_BITS); + if (RT_SUCCESS(rc)) + rc = rtBigNumSetUsed(pResult, RT_ALIGN_32(cBitsNew, RTBIGNUM_ELEMENT_BITS) / RTBIGNUM_ELEMENT_BITS); + if (RT_SUCCESS(rc)) + { + uint32_t const cLeft = pValue->cUsed; + PCRTBIGNUMELEMENT pauSrc = pValue->pauElements; + PRTBIGNUMELEMENT pauDst = pResult->pauElements; + + Assert(ASMMemIsZero(pauDst, (cBits / RTBIGNUM_ELEMENT_BITS) * RTBIGNUM_ELEMENT_SIZE)); + pauDst += cBits / RTBIGNUM_ELEMENT_BITS; + + cBits &= RTBIGNUM_ELEMENT_BITS - 1; + if (cBits) + { + RTBIGNUMELEMENT uPrev = 0; + for (uint32_t i = 0; i < cLeft; i++) + { + RTBIGNUMELEMENT uCur = pauSrc[i]; + pauDst[i] = (uCur << cBits) | (uPrev >> (RTBIGNUM_ELEMENT_BITS - cBits)); + uPrev = uCur; + } + uPrev >>= RTBIGNUM_ELEMENT_BITS - cBits; + if (uPrev) + pauDst[pValue->cUsed] = uPrev; + } + else + memcpy(pauDst, pauSrc, cLeft * RTBIGNUM_ELEMENT_SIZE); + } + } + else + rc = VERR_OUT_OF_RANGE; + } + /* Shifting zero always yields a zero result. */ + else + rc = rtBigNumSetUsed(pResult, 0); + } + else + rc = rtBigNumMagnitudeCopy(pResult, pValue); + return rc; +} + + +RTDECL(int) RTBigNumShiftLeft(PRTBIGNUM pResult, PCRTBIGNUM pValue, uint32_t cBits) +{ + Assert(pResult != pValue); + AssertReturn(pResult->fSensitive >= pValue->fSensitive, VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pResult); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pResult); + rc = rtBigNumUnscramble((PRTBIGNUM)pValue); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pValue); + + pResult->fNegative = pValue->fNegative; + rc = rtBigNumMagnitudeShiftLeft(pResult, pValue, cBits); + + rtBigNumScramble((PRTBIGNUM)pValue); + } + rtBigNumScramble(pResult); + } + return rc; +} + + +/** + * Shifts the magnitude right by @a cBits. + * + * The variables must be unscrambled. + * + * @returns IPRT status code. + * @param pResult Where to store the result. + * @param pValue The value to shift. + * @param cBits The shift count. + */ +static int rtBigNumMagnitudeShiftRight(PRTBIGNUM pResult, PCRTBIGNUM pValue, uint32_t cBits) +{ + int rc; + if (cBits) + { + uint32_t cBitsNew = rtBigNumMagnitudeBitWidth(pValue); + if (cBitsNew > cBits) + { + cBitsNew -= cBits; + uint32_t cElementsNew = RT_ALIGN_32(cBitsNew, RTBIGNUM_ELEMENT_BITS) / RTBIGNUM_ELEMENT_BITS; + rc = rtBigNumSetUsed(pResult, cElementsNew); + if (RT_SUCCESS(rc)) + { + uint32_t i = cElementsNew; + PCRTBIGNUMELEMENT pauSrc = pValue->pauElements; + PRTBIGNUMELEMENT pauDst = pResult->pauElements; + + pauSrc += cBits / RTBIGNUM_ELEMENT_BITS; + + cBits &= RTBIGNUM_ELEMENT_BITS - 1; + if (cBits) + { + RTBIGNUMELEMENT uPrev = &pauSrc[i] == &pValue->pauElements[pValue->cUsed] ? 0 : pauSrc[i]; + while (i-- > 0) + { + RTBIGNUMELEMENT uCur = pauSrc[i]; + pauDst[i] = (uCur >> cBits) | (uPrev << (RTBIGNUM_ELEMENT_BITS - cBits)); + uPrev = uCur; + } + } + else + memcpy(pauDst, pauSrc, i * RTBIGNUM_ELEMENT_SIZE); + } + } + else + rc = rtBigNumSetUsed(pResult, 0); + } + else + rc = rtBigNumMagnitudeCopy(pResult, pValue); + return rc; +} + + +RTDECL(int) RTBigNumShiftRight(PRTBIGNUM pResult, PCRTBIGNUM pValue, uint32_t cBits) +{ + Assert(pResult != pValue); + AssertReturn(pResult->fSensitive >= pValue->fSensitive, VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pResult); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pResult); + rc = rtBigNumUnscramble((PRTBIGNUM)pValue); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pValue); + + pResult->fNegative = pValue->fNegative; + rc = rtBigNumMagnitudeShiftRight(pResult, pValue, cBits); + if (!pResult->cUsed) + pResult->fNegative = 0; + + rtBigNumScramble((PRTBIGNUM)pValue); + } + rtBigNumScramble(pResult); + } + return rc; +} + + +/** + * Implements the D3 test for Qhat decrementation. + * + * @returns True if Qhat should be decremented. + * @param puQhat Pointer to Qhat. + * @param uRhat The remainder. + * @param uDivisorY The penultimate divisor element. + * @param uDividendJMinus2 The j-2 dividend element. + */ +DECLINLINE(bool) rtBigNumKnuthD3_ShouldDecrementQhat(RTBIGNUMELEMENT2X const *puQhat, RTBIGNUMELEMENT uRhat, + RTBIGNUMELEMENT uDivisorY, RTBIGNUMELEMENT uDividendJMinus2) +{ + if (puQhat->s.Lo == RTBIGNUM_ELEMENT_MAX && puQhat->s.Hi == 0) + return true; +#if RTBIGNUM_ELEMENT_BITS == 64 + RTBIGNUMELEMENT2X TmpLeft; + RTUInt128MulByU64(&TmpLeft, puQhat, uDivisorY); + + RTBIGNUMELEMENT2X TmpRight; + TmpRight.s.Lo = 0; + TmpRight.s.Hi = uRhat; + RTUInt128AssignAddU64(&TmpRight, uDividendJMinus2); + + if (RTUInt128Compare(&TmpLeft, &TmpRight) > 0) + return true; +#else + if (puQhat->u * uDivisorY > ((uint64_t)uRhat << 32) + uDividendJMinus2) + return true; +#endif + return false; +} + + +/** + * C implementation of the D3 step of Knuth's division algorithm. + * + * This estimates a value Qhat that will be used as quotient "digit" (element) + * at the current level of the division (j). + * + * @returns The Qhat value we've estimated. + * @param pauDividendJN Pointer to the j+n (normalized) dividend element. + * Will access up to two elements prior to this. + * @param uDivZ The last element in the (normalized) divisor. + * @param uDivY The penultimate element in the (normalized) divisor. + */ +DECLINLINE(RTBIGNUMELEMENT) rtBigNumKnuthD3_EstimateQhat(PCRTBIGNUMELEMENT pauDividendJN, + RTBIGNUMELEMENT uDivZ, RTBIGNUMELEMENT uDivY) +{ + RTBIGNUMELEMENT2X uQhat; + RTBIGNUMELEMENT uRhat; + RTBIGNUMELEMENT uDividendJN = pauDividendJN[0]; + Assert(uDividendJN <= uDivZ); + if (uDividendJN != uDivZ) + rtBigNumElement2xDiv2xBy1x(&uQhat, &uRhat, uDividendJN, pauDividendJN[-1], uDivZ); + else + { + /* + * This is the case where we end up with an initial Qhat that's all Fs. + */ + /* Calc the remainder for max Qhat value. */ + RTBIGNUMELEMENT2X uTmp1; /* (v[j+n] << bits) + v[J+N-1] */ + uTmp1.s.Hi = uDivZ; + uTmp1.s.Lo = pauDividendJN[-1]; + + RTBIGNUMELEMENT2X uTmp2; /* uQhat * uDividendJN */ + uTmp2.s.Hi = uDivZ - 1; + uTmp2.s.Lo = 0 - uDivZ; +#if RTBIGNUM_ELEMENT_BITS == 64 + RTUInt128AssignSub(&uTmp1, &uTmp2); +#else + uTmp1.u -= uTmp2.u; +#endif + /* If we overflowed the remainder, don't bother trying to adjust. */ + if (uTmp1.s.Hi) + return RTBIGNUM_ELEMENT_MAX; + + uRhat = uTmp1.s.Lo; + uQhat.s.Lo = RTBIGNUM_ELEMENT_MAX; + uQhat.s.Hi = 0; + } + + /* + * Adjust Q to eliminate all cases where it's two to large and most cases + * where it's one too large. + */ + while (rtBigNumKnuthD3_ShouldDecrementQhat(&uQhat, uRhat, uDivY, pauDividendJN[-2])) + { + rtBigNumElement2xDec(&uQhat); + uRhat += uDivZ; + if (uRhat < uDivZ /* overflow */ || uRhat == RTBIGNUM_ELEMENT_MAX) + break; + } + + return uQhat.s.Lo; +} + + +#ifdef IPRT_BIGINT_WITH_ASM +DECLASM(bool) rtBigNumKnuthD4_MulSub(PRTBIGNUMELEMENT pauDividendJ, PRTBIGNUMELEMENT pauDivisor, + uint32_t cDivisor, RTBIGNUMELEMENT uQhat); +#else +/** + * C implementation of the D4 step of Knuth's division algorithm. + * + * This subtracts Divisor * Qhat from the dividend at the current J index. + * + * @returns true if negative result (unlikely), false if positive. + * @param pauDividendJ Pointer to the j-th (normalized) dividend element. + * Will access up to two elements prior to this. + * @param uDivZ The last element in the (normalized) divisor. + * @param uDivY The penultimate element in the (normalized) divisor. + */ +DECLINLINE(bool) rtBigNumKnuthD4_MulSub(PRTBIGNUMELEMENT pauDividendJ, PRTBIGNUMELEMENT pauDivisor, + uint32_t cDivisor, RTBIGNUMELEMENT uQhat) +{ + uint32_t i; + bool fBorrow = false; + RTBIGNUMELEMENT uMulCarry = 0; + for (i = 0; i < cDivisor; i++) + { + RTBIGNUMELEMENT2X uSub; +# if RTBIGNUM_ELEMENT_BITS == 64 + RTUInt128MulU64ByU64(&uSub, uQhat, pauDivisor[i]); + RTUInt128AssignAddU64(&uSub, uMulCarry); +# else + uSub.u = (uint64_t)uQhat * pauDivisor[i] + uMulCarry; +# endif + uMulCarry = uSub.s.Hi; + + RTBIGNUMELEMENT uDividendI = pauDividendJ[i]; + if (!fBorrow) + { + fBorrow = uDividendI < uSub.s.Lo; + uDividendI -= uSub.s.Lo; + } + else + { + fBorrow = uDividendI <= uSub.s.Lo; + uDividendI -= uSub.s.Lo + 1; + } + pauDividendJ[i] = uDividendI; + } + + /* Carry and borrow into the final dividend element. */ + RTBIGNUMELEMENT uDividendI = pauDividendJ[i]; + if (!fBorrow) + { + fBorrow = uDividendI < uMulCarry; + pauDividendJ[i] = uDividendI - uMulCarry; + } + else + { + fBorrow = uDividendI <= uMulCarry; + pauDividendJ[i] = uDividendI - uMulCarry - 1; + } + + return fBorrow; +} +#endif /* !IPRT_BIGINT_WITH_ASM */ + + +/** + * C implementation of the D6 step of Knuth's division algorithm. + * + * This adds the divisor to the dividend to undo the negative value step D4 + * produced. This is not very frequent occurence. + * + * @param pauDividendJ Pointer to the j-th (normalized) dividend element. + * Will access up to two elements prior to this. + * @param pauDivisor The last element in the (normalized) divisor. + * @param cDivisor The penultimate element in the (normalized) divisor. + */ +DECLINLINE(void) rtBigNumKnuthD6_AddBack(PRTBIGNUMELEMENT pauDividendJ, PRTBIGNUMELEMENT pauDivisor, uint32_t cDivisor) +{ + RTBIGNUMELEMENT2X uTmp; + uTmp.s.Lo = 0; + + uint32_t i; + for (i = 0; i < cDivisor; i++) + { + uTmp.s.Hi = 0; +#if RTBIGNUM_ELEMENT_BITS == 64 + RTUInt128AssignAddU64(&uTmp, pauDivisor[i]); + RTUInt128AssignAddU64(&uTmp, pauDividendJ[i]); +#else + uTmp.u += pauDivisor[i]; + uTmp.u += pauDividendJ[i]; +#endif + pauDividendJ[i] = uTmp.s.Lo; + uTmp.s.Lo = uTmp.s.Hi; + } + + /* The final dividend entry. */ + Assert(pauDividendJ[i] + uTmp.s.Lo < uTmp.s.Lo); + pauDividendJ[i] += uTmp.s.Lo; +} + + +/** + * Knuth's division (core). + * + * @returns IPRT status code. + * @param pQuotient Where to return the quotient. Can be NULL. + * @param pRemainder Where to return the remainder. + * @param pDividend What to divide. + * @param pDivisor What to divide by. + */ +static int rtBigNumMagnitudeDivideKnuth(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) +{ + Assert(pDivisor->cUsed > 1); + uint32_t const cDivisor = pDivisor->cUsed; + Assert(pDividend->cUsed >= cDivisor); + + /* + * Make sure we've got enough space in the quotient, so we can build it + * without any trouble come step D5. + */ + int rc; + if (pQuotient) + { + rc = rtBigNumSetUsedEx(pQuotient, 0, pDividend->cUsed - cDivisor + 1); + if (RT_SUCCESS(rc)) + rc = rtBigNumSetUsed(pQuotient, pDividend->cUsed - cDivisor + 1); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * D1. Normalize. The goal here is to make sure the last element in the + * divisor is greater than RTBIGNUMELEMENTS_MAX/2. We must also make sure + * we can access element pDividend->cUsed of the normalized dividend. + */ + RTBIGNUM NormDividend; + RTBIGNUM NormDivisor; + PCRTBIGNUM pNormDivisor = &NormDivisor; + rtBigNumInitZeroTemplate(&NormDivisor, pDividend); + + uint32_t cNormShift = (RTBIGNUM_ELEMENT_BITS - rtBigNumMagnitudeBitWidth(pDivisor)) & (RTBIGNUM_ELEMENT_BITS - 1); + if (cNormShift) + { + rtBigNumInitZeroTemplate(&NormDividend, pDividend); + rc = rtBigNumMagnitudeShiftLeft(&NormDividend, pDividend, cNormShift); + if (RT_SUCCESS(rc)) + rc = rtBigNumMagnitudeShiftLeft(&NormDivisor, pDivisor, cNormShift); + } + else + { + pNormDivisor = pDivisor; + rc = rtBigNumCloneInternal(&NormDividend, pDividend); + } + if (RT_SUCCESS(rc) && pDividend->cUsed == NormDividend.cUsed) + rc = rtBigNumEnsureExtraZeroElements(&NormDividend, NormDividend.cUsed + 1); + if (RT_SUCCESS(rc)) + { + /* + * D2. Initialize the j index so we can loop thru the elements in the + * dividend that makes it larger than the divisor. + */ + uint32_t j = pDividend->cUsed - cDivisor; + + RTBIGNUMELEMENT const DivZ = pNormDivisor->pauElements[cDivisor - 1]; + RTBIGNUMELEMENT const DivY = pNormDivisor->pauElements[cDivisor - 2]; + for (;;) + { + /* + * D3. Estimate a Q' by dividing the j and j-1 dividen elements by + * the last divisor element, then adjust against the next elements. + */ + RTBIGNUMELEMENT uQhat = rtBigNumKnuthD3_EstimateQhat(&NormDividend.pauElements[j + cDivisor], DivZ, DivY); + + /* + * D4. Multiply and subtract. + */ + bool fNegative = rtBigNumKnuthD4_MulSub(&NormDividend.pauElements[j], pNormDivisor->pauElements, cDivisor, uQhat); + + /* + * D5. Test remainder. + * D6. Add back. + */ + if (fNegative) + { +//__debugbreak(); + rtBigNumKnuthD6_AddBack(&NormDividend.pauElements[j], pNormDivisor->pauElements, cDivisor); + uQhat--; + } + + if (pQuotient) + pQuotient->pauElements[j] = uQhat; + + /* + * D7. Loop on j. + */ + if (j == 0) + break; + j--; + } + + /* + * D8. Unnormalize the remainder. + */ + rtBigNumStripTrailingZeros(&NormDividend); + if (cNormShift) + rc = rtBigNumMagnitudeShiftRight(pRemainder, &NormDividend, cNormShift); + else + rc = rtBigNumMagnitudeCopy(pRemainder, &NormDividend); + if (pQuotient) + rtBigNumStripTrailingZeros(pQuotient); + } + + /* + * Delete temporary variables. + */ + RTBigNumDestroy(&NormDividend); + if (pNormDivisor == &NormDivisor) + RTBigNumDestroy(&NormDivisor); + return rc; +} + + +static int rtBigNumMagnitudeDivideSlowLong(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) +{ + /* + * Do very simple long division. This ain't fast, but it does the trick. + */ + int rc = VINF_SUCCESS; + uint32_t iBit = rtBigNumMagnitudeBitWidth(pDividend); + while (iBit-- > 0) + { + rc = rtBigNumMagnitudeShiftLeftOne(pRemainder, rtBigNumMagnitudeGetBit(pDividend, iBit)); + AssertRCBreak(rc); + int iDiff = rtBigNumMagnitudeCompare(pRemainder, pDivisor); + if (iDiff >= 0) + { + if (iDiff != 0) + { + rc = rtBigNumMagnitudeSubThis(pRemainder, pDivisor); + AssertRCBreak(rc); + } + else + rtBigNumSetUsed(pRemainder, 0); + rc = rtBigNumMagnitudeSetBit(pQuotient, iBit); + AssertRCBreak(rc); + } + } + + /* This shouldn't be necessary. */ + rtBigNumStripTrailingZeros(pQuotient); + rtBigNumStripTrailingZeros(pRemainder); + + return rc; +} + + +/** + * Divides the magnitudes of two values, letting the caller care about the sign + * bit. + * + * All variables must be unscrambled. The sign flag is not considered nor + * touched, this means the caller have to check for zero outputs. + * + * @returns IPRT status code. + * @param pQuotient Where to return the quotient. + * @param pRemainder Where to return the remainder. + * @param pDividend What to divide. + * @param pDivisor What to divide by. + * @param fForceLong Force long division. + */ +static int rtBigNumMagnitudeDivide(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor, + bool fForceLong) +{ + Assert(pQuotient != pDividend); Assert(pQuotient != pDivisor); Assert(pRemainder != pDividend); Assert(pRemainder != pDivisor); Assert(pRemainder != pQuotient); + Assert(!pQuotient->fCurScrambled); Assert(!pRemainder->fCurScrambled); Assert(!pDividend->fCurScrambled); Assert(!pDivisor->fCurScrambled); + + /* + * Just set both output values to zero as that's the return for several + * special case and the initial state of the general case. + */ + rtBigNumSetUsed(pQuotient, 0); + rtBigNumSetUsed(pRemainder, 0); + + /* + * Dividing something by zero is undefined. + * Diving zero by something is zero, unless the divsor is also zero. + */ + if (!pDivisor->cUsed || !pDividend->cUsed) + return pDivisor->cUsed ? VINF_SUCCESS : VERR_BIGNUM_DIV_BY_ZERO; + + /* + * Dividing by one? Quotient = dividend, no remainder. + */ + if (pDivisor->cUsed == 1 && pDivisor->pauElements[0] == 1) + return rtBigNumMagnitudeCopy(pQuotient, pDividend); + + /* + * Dividend smaller than the divisor. Zero quotient, all divisor. + */ + int iDiff = rtBigNumMagnitudeCompare(pDividend, pDivisor); + if (iDiff < 0) + return rtBigNumMagnitudeCopy(pRemainder, pDividend); + + /* + * Since we already have done the compare, check if the two values are the + * same. The result is 1 and no remainder then. + */ + if (iDiff == 0) + { + int rc = rtBigNumSetUsed(pQuotient, 1); + if (RT_SUCCESS(rc)) + pQuotient->pauElements[0] = 1; + return rc; + } + + /* + * Sort out special cases before going to the preferred or select algorithm. + */ + int rc; + if (pDividend->cUsed <= 2 && !fForceLong) + { + if (pDividend->cUsed < 2) + { + /* + * Single element division. + */ + RTBIGNUMELEMENT uQ = pDividend->pauElements[0] / pDivisor->pauElements[0]; + RTBIGNUMELEMENT uR = pDividend->pauElements[0] % pDivisor->pauElements[0]; + rc = VINF_SUCCESS; + if (uQ) + { + rc = rtBigNumSetUsed(pQuotient, 1); + if (RT_SUCCESS(rc)) + pQuotient->pauElements[0] = uQ; + } + if (uR && RT_SUCCESS(rc)) + { + rc = rtBigNumSetUsed(pRemainder, 1); + if (RT_SUCCESS(rc)) + pRemainder->pauElements[0] = uR; + } + } + else + { + /* + * Two elements dividend by a one or two element divisor. + */ + RTBIGNUMELEMENT2X uQ, uR; + if (pDivisor->cUsed == 1) + { + rtBigNumElement2xDiv2xBy1x(&uQ, &uR.s.Lo, pDividend->pauElements[1], pDividend->pauElements[0], + pDivisor->pauElements[0]); + uR.s.Hi = 0; + } + else + rtBigNumElement2xDiv(&uQ, &uR, pDividend->pauElements[1], pDividend->pauElements[0], + pDivisor->pauElements[1], pDivisor->pauElements[0]); + rc = rtBigNumElement2xCopyToMagnitude(&uQ, pQuotient); + if (RT_SUCCESS(rc)) + rc = rtBigNumElement2xCopyToMagnitude(&uR, pRemainder); + } + } + /* + * Decide upon which algorithm to use. Knuth requires a divisor that's at + * least 2 elements big. + */ + else if (pDivisor->cUsed < 2 || fForceLong) + rc = rtBigNumMagnitudeDivideSlowLong(pQuotient, pRemainder, pDividend, pDivisor); + else + rc = rtBigNumMagnitudeDivideKnuth(pQuotient, pRemainder, pDividend, pDivisor); + return rc; +} + + +static int rtBigNumDivideCommon(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, + PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor, bool fForceLong) +{ + Assert(pQuotient != pDividend); Assert(pQuotient != pDivisor); Assert(pRemainder != pDividend); Assert(pRemainder != pDivisor); Assert(pRemainder != pQuotient); + AssertReturn(pQuotient->fSensitive >= (pDividend->fSensitive | pDivisor->fSensitive), VERR_BIGNUM_SENSITIVE_INPUT); + AssertReturn(pRemainder->fSensitive >= (pDividend->fSensitive | pDivisor->fSensitive), VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pQuotient); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pQuotient); + rc = rtBigNumUnscramble(pRemainder); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pRemainder); + rc = rtBigNumUnscramble((PRTBIGNUM)pDividend); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pDividend); + rc = rtBigNumUnscramble((PRTBIGNUM)pDivisor); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pDivisor); + + /* + * The sign value of the remainder is the same as the dividend. + * The sign values of the quotient follow XOR rules, just like multiplication: + * -3 / 2 = -1; r=-1; 1 ^ 0 = 1 + * 3 / -2 = -1; r= 1; 1 ^ 0 = 1 + * -3 / -2 = 1; r=-1; 1 ^ 1 = 0 + * 3 / 2 = 1; r= 1; 0 ^ 0 = 0 + */ + pQuotient->fNegative = pDividend->fNegative ^ pDivisor->fNegative; + pRemainder->fNegative = pDividend->fNegative; + + rc = rtBigNumMagnitudeDivide(pQuotient, pRemainder, pDividend, pDivisor, fForceLong); + + if (pQuotient->cUsed == 0) + pQuotient->fNegative = 0; + if (pRemainder->cUsed == 0) + pRemainder->fNegative = 0; + + rtBigNumScramble((PRTBIGNUM)pDivisor); + } + rtBigNumScramble((PRTBIGNUM)pDividend); + } + rtBigNumScramble(pRemainder); + } + rtBigNumScramble(pQuotient); + } + return rc; +} + + +RTDECL(int) RTBigNumDivide(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) +{ + return rtBigNumDivideCommon(pQuotient, pRemainder, pDividend, pDivisor, false /*fForceLong*/); +} + + +RTDECL(int) RTBigNumDivideLong(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) +{ + return rtBigNumDivideCommon(pQuotient, pRemainder, pDividend, pDivisor, true /*fForceLong*/); +} + + +/** + * Calculates the modulus of a magnitude value, leaving the sign bit to the + * caller. + * + * All variables must be unscrambled. The sign flag is not considered nor + * touched, this means the caller have to check for zero outputs. + * + * @returns IPRT status code. + * @param pRemainder Where to return the remainder. + * @param pDividend What to divide. + * @param pDivisor What to divide by. + */ +static int rtBigNumMagnitudeModulo(PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) +{ + Assert(pRemainder != pDividend); Assert(pRemainder != pDivisor); + Assert(!pRemainder->fCurScrambled); Assert(!pDividend->fCurScrambled); Assert(!pDivisor->fCurScrambled); + + /* + * Just set the output value to zero as that's the return for several + * special case and the initial state of the general case. + */ + rtBigNumSetUsed(pRemainder, 0); + + /* + * Dividing something by zero is undefined. + * Diving zero by something is zero, unless the divsor is also zero. + */ + if (!pDivisor->cUsed || !pDividend->cUsed) + return pDivisor->cUsed ? VINF_SUCCESS : VERR_BIGNUM_DIV_BY_ZERO; + + /* + * Dividing by one? Quotient = dividend, no remainder. + */ + if (pDivisor->cUsed == 1 && pDivisor->pauElements[0] == 1) + return VINF_SUCCESS; + + /* + * Dividend smaller than the divisor. Zero quotient, all divisor. + */ + int iDiff = rtBigNumMagnitudeCompare(pDividend, pDivisor); + if (iDiff < 0) + return rtBigNumMagnitudeCopy(pRemainder, pDividend); + + /* + * Since we already have done the compare, check if the two values are the + * same. The result is 1 and no remainder then. + */ + if (iDiff == 0) + return VINF_SUCCESS; + + /** @todo optimize small numbers. */ + int rc = VINF_SUCCESS; + if (pDivisor->cUsed < 2) + { + /* + * Do very simple long division. This ain't fast, but it does the trick. + */ + uint32_t iBit = rtBigNumMagnitudeBitWidth(pDividend); + while (iBit-- > 0) + { + rc = rtBigNumMagnitudeShiftLeftOne(pRemainder, rtBigNumMagnitudeGetBit(pDividend, iBit)); + AssertRCBreak(rc); + iDiff = rtBigNumMagnitudeCompare(pRemainder, pDivisor); + if (iDiff >= 0) + { + if (iDiff != 0) + { + rc = rtBigNumMagnitudeSubThis(pRemainder, pDivisor); + AssertRCBreak(rc); + } + else + rtBigNumSetUsed(pRemainder, 0); + } + } + } + else + { + /* + * Join paths with division. + */ + rc = rtBigNumMagnitudeDivideKnuth(NULL, pRemainder, pDividend, pDivisor); + } + + /* This shouldn't be necessary. */ + rtBigNumStripTrailingZeros(pRemainder); + return rc; +} + + +RTDECL(int) RTBigNumModulo(PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) +{ + Assert(pRemainder != pDividend); Assert(pRemainder != pDivisor); + AssertReturn(pRemainder->fSensitive >= (pDividend->fSensitive | pDivisor->fSensitive), VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pRemainder); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pRemainder); + rc = rtBigNumUnscramble((PRTBIGNUM)pDividend); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pDividend); + rc = rtBigNumUnscramble((PRTBIGNUM)pDivisor); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pDivisor); + + /* + * The sign value of the remainder is the same as the dividend. + */ + pRemainder->fNegative = pDividend->fNegative; + + rc = rtBigNumMagnitudeModulo(pRemainder, pDividend, pDivisor); + + if (pRemainder->cUsed == 0) + pRemainder->fNegative = 0; + + rtBigNumScramble((PRTBIGNUM)pDivisor); + } + rtBigNumScramble((PRTBIGNUM)pDividend); + } + rtBigNumScramble(pRemainder); + } + return rc; +} + + + +/** + * Exponentiate the magnitude. + * + * All variables must be unscrambled. The sign flag is not considered nor + * touched, this means the caller have to reject negative exponents. + * + * @returns IPRT status code. + * @param pResult Where to return power. + * @param pBase The base value. + * @param pExponent The exponent (assumed positive or zero). + */ +static int rtBigNumMagnitudeExponentiate(PRTBIGNUM pResult, PCRTBIGNUM pBase, PCRTBIGNUM pExponent) +{ + Assert(pResult != pBase); Assert(pResult != pExponent); + Assert(!pResult->fCurScrambled); Assert(!pBase->fCurScrambled); Assert(!pExponent->fCurScrambled); + + /* + * A couple of special cases. + */ + int rc; + /* base ^ 0 => 1. */ + if (pExponent->cUsed == 0) + { + rc = rtBigNumSetUsed(pResult, 1); + if (RT_SUCCESS(rc)) + pResult->pauElements[0] = 1; + return rc; + } + + /* base ^ 1 => base. */ + if (pExponent->cUsed == 1 && pExponent->pauElements[0] == 1) + return rtBigNumMagnitudeCopy(pResult, pBase); + + /* + * Set up. + */ + /* Init temporary power-of-two variable to base. */ + RTBIGNUM Pow2; + rc = rtBigNumCloneInternal(&Pow2, pBase); + if (RT_SUCCESS(rc)) + { + /* Init result to 1. */ + rc = rtBigNumSetUsed(pResult, 1); + if (RT_SUCCESS(rc)) + { + pResult->pauElements[0] = 1; + + /* Make a temporary variable that we can use for temporary storage of the result. */ + RTBIGNUM TmpMultiplicand; + rc = rtBigNumCloneInternal(&TmpMultiplicand, pResult); + if (RT_SUCCESS(rc)) + { + /* + * Exponentiation by squaring. Reduces the number of + * multiplications to: NumBitsSet(Exponent) + BitWidth(Exponent). + */ + uint32_t const cExpBits = rtBigNumMagnitudeBitWidth(pExponent); + uint32_t iBit = 0; + for (;;) + { + if (rtBigNumMagnitudeGetBit(pExponent, iBit) != 0) + { + rc = rtBigNumMagnitudeCopy(&TmpMultiplicand, pResult); + if (RT_SUCCESS(rc)) + rc = rtBigNumMagnitudeMultiply(pResult, &TmpMultiplicand, &Pow2); + if (RT_FAILURE(rc)) + break; + } + + /* Done? */ + iBit++; + if (iBit >= cExpBits) + break; + + /* Not done yet, square the base again. */ + rc = rtBigNumMagnitudeCopy(&TmpMultiplicand, &Pow2); + if (RT_SUCCESS(rc)) + rc = rtBigNumMagnitudeMultiply(&Pow2, &TmpMultiplicand, &TmpMultiplicand); + if (RT_FAILURE(rc)) + break; + } + + RTBigNumDestroy(&TmpMultiplicand); + } + } + RTBigNumDestroy(&Pow2); + } + return rc; +} + + +RTDECL(int) RTBigNumExponentiate(PRTBIGNUM pResult, PCRTBIGNUM pBase, PCRTBIGNUM pExponent) +{ + Assert(pResult != pBase); Assert(pResult != pExponent); + AssertReturn(pResult->fSensitive >= (pBase->fSensitive | pExponent->fSensitive), VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pResult); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pResult); + rc = rtBigNumUnscramble((PRTBIGNUM)pBase); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pBase); + rc = rtBigNumUnscramble((PRTBIGNUM)pExponent); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pExponent); + if (!pExponent->fNegative) + { + pResult->fNegative = pBase->fNegative; /* sign unchanged. */ + rc = rtBigNumMagnitudeExponentiate(pResult, pBase, pExponent); + } + else + rc = VERR_BIGNUM_NEGATIVE_EXPONENT; + + rtBigNumScramble((PRTBIGNUM)pExponent); + } + rtBigNumScramble((PRTBIGNUM)pBase); + } + rtBigNumScramble(pResult); + } + return rc; +} + + +/** + * Modular exponentiation, magnitudes only. + * + * All variables must be unscrambled. The sign flag is not considered nor + * touched, this means the caller have to reject negative exponents and do any + * other necessary sign bit fiddling. + * + * @returns IPRT status code. + * @param pResult Where to return the remainder of the power. + * @param pBase The base value. + * @param pExponent The exponent (assumed positive or zero). + * @param pModulus The modulus value (or divisor if you like). + */ +static int rtBigNumMagnitudeModExp(PRTBIGNUM pResult, PRTBIGNUM pBase, PRTBIGNUM pExponent, PRTBIGNUM pModulus) +{ + Assert(pResult != pBase); Assert(pResult != pBase); Assert(pResult != pExponent); Assert(pResult != pModulus); + Assert(!pResult->fCurScrambled); Assert(!pBase->fCurScrambled); Assert(!pExponent->fCurScrambled); Assert(!pModulus->fCurScrambled); + int rc; + + /* + * Check some special cases to get them out of the way. + */ + /* Div by 0 => invalid. */ + if (pModulus->cUsed == 0) + return VERR_BIGNUM_DIV_BY_ZERO; + + /* Div by 1 => no remainder. */ + if (pModulus->cUsed == 1 && pModulus->pauElements[0] == 1) + { + rtBigNumSetUsed(pResult, 0); + return VINF_SUCCESS; + } + + /* base ^ 0 => 1. */ + if (pExponent->cUsed == 0) + { + rc = rtBigNumSetUsed(pResult, 1); + if (RT_SUCCESS(rc)) + pResult->pauElements[0] = 1; + return rc; + } + + /* base ^ 1 => base. */ + if (pExponent->cUsed == 1 && pExponent->pauElements[0] == 1) + return rtBigNumMagnitudeModulo(pResult, pBase, pModulus); + + /* + * Set up. + */ + /* Result = 1; preallocate space for the result while at it. */ + rc = rtBigNumSetUsed(pResult, pModulus->cUsed + 1); + if (RT_SUCCESS(rc)) + rc = rtBigNumSetUsed(pResult, 1); + if (RT_SUCCESS(rc)) + { + pResult->pauElements[0] = 1; + + /* ModBase = pBase or pBase % pModulus depending on the difference in size. */ + RTBIGNUM Pow2; + if (pBase->cUsed <= pModulus->cUsed + pModulus->cUsed / 2) + rc = rtBigNumCloneInternal(&Pow2, pBase); + else + rc = rtBigNumMagnitudeModulo(rtBigNumInitZeroTemplate(&Pow2, pBase), pBase, pModulus); + + /* Need a couple of temporary variables. */ + RTBIGNUM TmpMultiplicand; + rtBigNumInitZeroTemplate(&TmpMultiplicand, pResult); + + RTBIGNUM TmpProduct; + rtBigNumInitZeroTemplate(&TmpProduct, pResult); + + /* + * We combine the exponentiation by squaring with the fact that: + * (a*b) mod n = ( (a mod n) * (b mod n) ) mod n + * + * Thus, we can reduce the size of intermediate results by mod'ing them + * in each step. + */ + uint32_t const cExpBits = rtBigNumMagnitudeBitWidth(pExponent); + uint32_t iBit = 0; + for (;;) + { + if (rtBigNumMagnitudeGetBit(pExponent, iBit) != 0) + { + rc = rtBigNumMagnitudeCopy(&TmpMultiplicand, pResult); + if (RT_SUCCESS(rc)) + rc = rtBigNumMagnitudeMultiply(&TmpProduct, &TmpMultiplicand, &Pow2); + if (RT_SUCCESS(rc)) + rc = rtBigNumMagnitudeModulo(pResult, &TmpProduct, pModulus); + if (RT_FAILURE(rc)) + break; + } + + /* Done? */ + iBit++; + if (iBit >= cExpBits) + break; + + /* Not done yet, square and mod the base again. */ + rc = rtBigNumMagnitudeCopy(&TmpMultiplicand, &Pow2); + if (RT_SUCCESS(rc)) + rc = rtBigNumMagnitudeMultiply(&TmpProduct, &TmpMultiplicand, &TmpMultiplicand); + if (RT_SUCCESS(rc)) + rc = rtBigNumMagnitudeModulo(&Pow2, &TmpProduct, pModulus); + if (RT_FAILURE(rc)) + break; + } + + RTBigNumDestroy(&TmpMultiplicand); + RTBigNumDestroy(&TmpProduct); + RTBigNumDestroy(&Pow2); + } + return rc; +} + + +RTDECL(int) RTBigNumModExp(PRTBIGNUM pResult, PRTBIGNUM pBase, PRTBIGNUM pExponent, PRTBIGNUM pModulus) +{ + Assert(pResult != pBase); Assert(pResult != pBase); Assert(pResult != pExponent); Assert(pResult != pModulus); + AssertReturn(pResult->fSensitive >= (pBase->fSensitive | pExponent->fSensitive | pModulus->fSensitive), + VERR_BIGNUM_SENSITIVE_INPUT); + + int rc = rtBigNumUnscramble(pResult); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pResult); + rc = rtBigNumUnscramble((PRTBIGNUM)pBase); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pBase); + rc = rtBigNumUnscramble((PRTBIGNUM)pExponent); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pExponent); + rc = rtBigNumUnscramble((PRTBIGNUM)pModulus); + if (RT_SUCCESS(rc)) + { + RTBIGNUM_ASSERT_VALID(pModulus); + if (!pExponent->fNegative) + { + pResult->fNegative = pModulus->fNegative; /* pBase ^ pExponent / pModulus; result = remainder. */ + rc = rtBigNumMagnitudeModExp(pResult, pBase, pExponent, pModulus); + } + else + rc = VERR_BIGNUM_NEGATIVE_EXPONENT; + rtBigNumScramble((PRTBIGNUM)pModulus); + } + rtBigNumScramble((PRTBIGNUM)pExponent); + } + rtBigNumScramble((PRTBIGNUM)pBase); + } + rtBigNumScramble(pResult); + } + return rc; +} + diff --git a/src/VBox/Runtime/common/math/ceil.asm b/src/VBox/Runtime/common/math/ceil.asm new file mode 100644 index 00000000..b5acff75 --- /dev/null +++ b/src/VBox/Runtime/common/math/ceil.asm @@ -0,0 +1,79 @@ +; $Id: ceil.asm $ +;; @file +; IPRT - No-CRT ceil - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the smallest integral value not less than lrd. +; @returns st(0) / xmm0 +; @param rd [rbp + 8] / xmm0 +RT_NOCRT_BEGINPROC ceil + push xBP + mov xBP, xSP + sub xSP, 10h + +%ifdef RT_ARCH_AMD64 ;; @todo there is probably some sse instruction for this. + movsd [xSP], xmm0 + fld qword [xSP] +%else + fld qword [xBP + xCB*2] +%endif + + ; Make it round up by modifying the fpu control word. + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, 00800h + and eax, 0fbffh + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; Round ST(0) to integer. + frndint + + ; Restore the fpu control word. + fldcw [xBP - 10h] + +%ifdef RT_ARCH_AMD64 + fstp qword [xSP] + movsd xmm0, [xSP] +%endif + leave + ret +ENDPROC RT_NOCRT(ceil) + diff --git a/src/VBox/Runtime/common/math/ceilf.asm b/src/VBox/Runtime/common/math/ceilf.asm new file mode 100644 index 00000000..c6d0dc2a --- /dev/null +++ b/src/VBox/Runtime/common/math/ceilf.asm @@ -0,0 +1,79 @@ +; $Id: ceilf.asm $ +;; @file +; IPRT - No-CRT ceilf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the smallest integral value not less than r32. +; @returns st(0) / xmm0 +; @param r32 [rbp + 8] / xmm0 +RT_NOCRT_BEGINPROC ceilf + push xBP + mov xBP, xSP + sub xSP, 10h + +%ifdef RT_ARCH_AMD64 ;; @todo there is probably some sse instruction for this. + movss [xSP], xmm0 + fld dword [xSP] +%else + fld dword [xBP + xCB*2] +%endif + + ; Make it round up by modifying the fpu control word. + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, 00800h + and eax, 0fbffh + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; Round ST(0) to integer. + frndint + + ; Restore the fpu control word. + fldcw [xBP - 10h] + +%ifdef RT_ARCH_AMD64 + fstp dword [xSP] + movss xmm0, [xSP] +%endif + leave + ret +ENDPROC RT_NOCRT(ceilf) + diff --git a/src/VBox/Runtime/common/math/ceill.asm b/src/VBox/Runtime/common/math/ceill.asm new file mode 100644 index 00000000..3f5057d9 --- /dev/null +++ b/src/VBox/Runtime/common/math/ceill.asm @@ -0,0 +1,70 @@ +; $Id: ceill.asm $ +;; @file +; IPRT - No-CRT ceill - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the smallest integral value not less than lrd. +; @returns st(0) +; @param lrd [rbp + 8] +RT_NOCRT_BEGINPROC ceill + push xBP + mov xBP, xSP + sub xSP, 10h + + fld tword [xBP + xCB*2] + + ; Make it round up by modifying the fpu control word. + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, 00800h + and eax, 0fbffh + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; Round ST(0) to integer. + frndint + + ; Restore the fpu control word. + fldcw [xBP - 10h] + + leave + ret +ENDPROC RT_NOCRT(ceill) + diff --git a/src/VBox/Runtime/common/math/consts.c b/src/VBox/Runtime/common/math/consts.c new file mode 100644 index 00000000..3711dc44 --- /dev/null +++ b/src/VBox/Runtime/common/math/consts.c @@ -0,0 +1,59 @@ +/* $Id: consts.c $ */ +/** @file + * IPRT - No-CRT - Math Constants. + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +#undef __infinity +const union __infinity_un RT_NOCRT(__infinity) = { RTFLOAT64U_INIT_C(0, 0, RTFLOAT64U_EXP_MAX) }; +AssertCompile(sizeof(double) == sizeof(RTFLOAT64U)); +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__infinity); + +#undef __nanf +const union __nanf_un RT_NOCRT(__nanf) = { RTFLOAT32U_INIT(0, RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1) | 1, RTFLOAT32U_EXP_MAX) }; +AssertCompile(sizeof(float) == sizeof(RTFLOAT32U)); +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL_WITHOUT_UNDERSCORE(__nanf); + diff --git a/src/VBox/Runtime/common/math/copysign.cpp b/src/VBox/Runtime/common/math/copysign.cpp new file mode 100644 index 00000000..db9fb370 --- /dev/null +++ b/src/VBox/Runtime/common/math/copysign.cpp @@ -0,0 +1,63 @@ +/* $Id: copysign.cpp $ */ +/** @file + * IPRT - No-CRT - copysign(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> + + +#undef copysign +double RT_NOCRT(copysign)(double rdMagnitude, double rdSign) +{ + AssertCompile(sizeof(rdMagnitude) == sizeof(RTFLOAT64U)); +#ifndef RT_LITTLE_ENDIAN /* MSC outputs a lot better code for the alternative below. */ + RTFLOAT64U uRet, uSign; + uSign.rd = rdSign; + uRet.rd = rdMagnitude; + uRet.s.fSign = uSign.s.fSign; +#else + RTFLOAT64U uRet; + uRet.u = (*(uint64_t const *)&rdMagnitude & (RT_BIT_64(63) - 1U)) | (*(uint64_t const *)&rdSign & RT_BIT_64(63)); +#endif + return uRet.rd; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(copysign); + diff --git a/src/VBox/Runtime/common/math/copysignf.cpp b/src/VBox/Runtime/common/math/copysignf.cpp new file mode 100644 index 00000000..6bc573be --- /dev/null +++ b/src/VBox/Runtime/common/math/copysignf.cpp @@ -0,0 +1,63 @@ +/* $Id: copysignf.cpp $ */ +/** @file + * IPRT - No-CRT - copysignf(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> + + +#undef copysignf +float RT_NOCRT(copysignf)(float r32Magnitude, float r32Sign) +{ + AssertCompile(sizeof(r32Magnitude) == sizeof(RTFLOAT32U)); +#ifndef RT_LITTLE_ENDIAN /* MSC outputs better code for the alternative below. */ + RTFLOAT32U uRet, uSign; + uSign.r = r32Sign; + uRet.r = r32Magnitude; + uRet.s.fSign = uSign.s.fSign; +#else + RTFLOAT32U uRet; + uRet.u = (*((uint32_t const *)&r32Magnitude) & (RT_BIT_32(31) - 1U)) | (*((uint32_t const *)&r32Sign) & RT_BIT_32(31)); +#endif + return uRet.r; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(copysignf); + diff --git a/src/VBox/Runtime/common/math/copysignl.cpp b/src/VBox/Runtime/common/math/copysignl.cpp new file mode 100644 index 00000000..7b8b47f2 --- /dev/null +++ b/src/VBox/Runtime/common/math/copysignl.cpp @@ -0,0 +1,66 @@ +/* $Id: copysignl.cpp $ */ +/** @file + * IPRT - No-CRT - copysignl(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> + + +#undef copysignl +long double RT_NOCRT(copysignl)(long double lrdMagnitude, long double lrdSign) +{ +#ifdef RT_COMPILER_WITH_80BIT_LONG_DOUBLE + RTFLOAT80U2 uRet, uSign; + uSign.lrd = lrdSign; + uRet.lrd = lrdMagnitude; + uRet.s.fSign = uSign.s.fSign; + return uRet.lrd; +#else + AssertCompile(sizeof(lrdMagnitude) == sizeof(RTFLOAT64U)); + RTFLOAT64U uRet, uSign; + uSign.rd = lrdSign; + uRet.rd = lrdMagnitude; + uRet.s.fSign = uSign.s.fSign; + return uRet.rd; +#endif +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(copysignl); + diff --git a/src/VBox/Runtime/common/math/cos.asm b/src/VBox/Runtime/common/math/cos.asm new file mode 100644 index 00000000..da83ef81 --- /dev/null +++ b/src/VBox/Runtime/common/math/cos.asm @@ -0,0 +1,213 @@ +; $Id: cos.asm $ +;; @file +; IPRT - No-CRT cos - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Compute the cosine of rd, measured in radians. +; +; @returns st(0) / xmm0 +; @param rd [rbp + xCB*2] / xmm0 +; +RT_NOCRT_BEGINPROC cos + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 20h + SEH64_ALLOCATE_STACK 20h + SEH64_END_PROLOGUE + +%ifdef RT_OS_WINDOWS + ; + ; Make sure we use full precision and not the windows default of 53 bits. + ; +;; @todo not sure if this makes any difference... + fnstcw [xBP - 20h] + mov ax, [xBP - 20h] + or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask. + mov [xBP - 1ch], ax + fldcw [xBP - 1ch] +%endif + + ; + ; Load the input into st0. + ; +%ifdef RT_ARCH_AMD64 + movsd [xBP - 10h], xmm0 + fld qword [xBP - 10h] +%else + fld qword [xBP + xCB*2] +%endif + + ; + ; The FCOS instruction has a very narrow range (-3pi/8 to 3pi/8) where it + ; works reliably, so outside that we'll use the FSIN instruction instead + ; as it has a larger good range (-5pi/4 to 1pi/4 for cosine). + ; Input conversion follows: cos(x) = sin(x + pi/2) + ; + ; We examin the input and weed out non-finit numbers first. + ; + + ; We only do the range check on normal finite numbers. + fxam + fnstsw ax + and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 + cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) + je .finite + cmp ax, X86_FSW_C3 ; Zero + je .zero + cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals - treat them as zero. + je .zero + cmp ax, X86_FSW_C0 ; NaN - must handle it special, + je .nan + + ; Pass infinities and unsupported inputs to fcos, assuming it does the right thing. + ; We also jump here if we get a finite number in the "good" range, see below. +.do_fcos: + fcos + jmp .return_val + + ; + ; Finite number. + ; + ; First check if it's a very tiny number where we can simply return 1. + ; Next check if it's in the range where FCOS is reasonable, otherwise + ; go to FSIN to do the work. + ; +.finite: + fld st0 + fabs + fld qword [.s_r64TinyCosTo1 xWrtRIP] + fcomip st1 + ja .zero_extra_pop + +.not_that_tiny_input: + fld qword [.s_r64FCosOkay xWrtRIP] + fcomip st1 + ffreep st0 ; pop fabs(input) + ja .do_fcos ; jmp if fabs(input) < .s_r64FCosOkay + + ; + ; If we have a positive number we subtract 3pi/2, for negative we add pi/2. + ; We still have the FXAM result in AX. + ; +.outside_fcos_range: + test ax, X86_FSW_C1 ; The sign bit. + jnz .adjust_negative_to_sine + + ; Calc -3pi/2 using FPU-internal pi constant. + fldpi + fadd st0, st0 ; st0=2pi + fldpi + fdiv qword [.s_r64Two xWrtRIP] ; st1=2pi; st0=pi/2 + fsubp st1, st0 ; st0=3pi/2 + fchs ; st0=-3pi/2 + jmp .make_sine_adjustment + +.adjust_negative_to_sine: + ; Calc +pi/2. + fldpi + fdiv qword [.s_r64Two xWrtRIP] ; st1=2pi; st0=pi/2 + +.make_sine_adjustment: + faddp st1, st0 + + ; + ; Call internal sine worker to calculate st0=sin(st0) + ; +.do_sine: + mov ecx, 1 ; double + extern NAME(rtNoCrtMathSinCore) + call NAME(rtNoCrtMathSinCore) + + ; + ; Return st0. + ; +.return_val: +%ifdef RT_ARCH_AMD64 + fstp qword [xBP - 10h] + movsd xmm0, [xBP - 10h] +%endif +%ifdef RT_OS_WINDOWS + fldcw [xBP - 20h] ; restore original +%endif +.return: + leave + ret + + ; + ; cos(+/-0) = +1.0 + ; +.zero_extra_pop: + ffreep st0 +.zero: + ffreep st0 + fld1 + jmp .return_val + + ; + ; Input is NaN, output it unmodified as far as we can (FLD changes SNaN + ; to QNaN when masked). + ; +.nan: +%ifdef RT_ARCH_AMD64 + ffreep st0 +%endif + jmp .return + + ; + ; Local constants. + ; +ALIGNCODE(8) + ; About 2**-27. When fabs(input) is below this limit we can consider cos(input) ~= 1.0. +.s_r64TinyCosTo1: + dq 7.4505806e-9 + + ; The absolute limit for the range which FCOS is expected to produce reasonable results. +.s_r64FCosOkay: + dq 1.1780972450961724644225 ; 3*pi/8 + +.s_r64Two: + dq 2.0 +ENDPROC RT_NOCRT(cos) + diff --git a/src/VBox/Runtime/common/math/cosf.asm b/src/VBox/Runtime/common/math/cosf.asm new file mode 100644 index 00000000..5f47ba14 --- /dev/null +++ b/src/VBox/Runtime/common/math/cosf.asm @@ -0,0 +1,213 @@ +; $Id: cosf.asm $ +;; @file +; IPRT - No-CRT cosf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Compute the cosine of rf, measured in radians. +; +; @returns st(0) / xmm0 +; @param rf [rbp + xCB*2] / xmm0 +; +RT_NOCRT_BEGINPROC cosf + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 20h + SEH64_ALLOCATE_STACK 20h + SEH64_END_PROLOGUE + +%ifdef RT_OS_WINDOWS + ; + ; Make sure we use full precision and not the windows default of 53 bits. + ; +;; @todo not sure if this makes any difference... + fnstcw [xBP - 20h] + mov ax, [xBP - 20h] + or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask. + mov [xBP - 1ch], ax + fldcw [xBP - 1ch] +%endif + + ; + ; Load the input into st0. + ; +%ifdef RT_ARCH_AMD64 + movss [xBP - 10h], xmm0 + fld dword [xBP - 10h] +%else + fld dword [xBP + xCB*2] +%endif + + ; + ; The FCOS instruction has a very narrow range (-3pi/8 to 3pi/8) where it + ; works reliably, so outside that we'll use the FSIN instruction instead + ; as it has a larger good range (-5pi/4 to 1pi/4 for cosine). + ; Input conversion follows: cosf(x) = sinf(x + pi/2) + ; + ; We examin the input and weed out non-finit numbers first. + ; + + ; We only do the range check on normal finite numbers. + fxam + fnstsw ax + and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 + cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) + je .finite + cmp ax, X86_FSW_C3 ; Zero + je .zero + cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals - treat them as zero. + je .zero + cmp ax, X86_FSW_C0 ; NaN - must handle it special, + je .nan + + ; Pass infinities and unsupported inputs to fcos, assuming it does the right thing. + ; We also jump here if we get a finite number in the "good" range, see below. +.do_fcos: + fcos + jmp .return_val + + ; + ; Finite number. + ; + ; First check if it's a very tiny number where we can simply return 1. + ; Next check if it's in the range where FCOS is reasonable, otherwise + ; go to FSIN to do the work. + ; +.finite: + fld st0 + fabs + fld qword [.s_r64TinyCosTo1 xWrtRIP] + fcomip st1 + ja .zero_extra_pop + +.not_that_tiny_input: + fld qword [.s_r64FCosOkay xWrtRIP] + fcomip st1 + ffreep st0 ; pop fabs(input) + ja .do_fcos ; jmp if fabs(input) < .s_r64FCosOkay + + ; + ; If we have a positive number we subtract 3pi/2, for negative we add pi/2. + ; We still have the FXAM result in AX. + ; +.outside_fcos_range: + test ax, X86_FSW_C1 ; The sign bit. + jnz .adjust_negative_to_sine + + ; Calc -3pi/2 using FPU-internal pi constant. + fldpi + fadd st0, st0 ; st0=2pi + fldpi + fdiv qword [.s_r64Two xWrtRIP] ; st1=2pi; st0=pi/2 + fsubp st1, st0 ; st0=3pi/2 + fchs ; st0=-3pi/2 + jmp .make_sine_adjustment + +.adjust_negative_to_sine: + ; Calc +pi/2. + fldpi + fdiv qword [.s_r64Two xWrtRIP] ; st1=2pi; st0=pi/2 + +.make_sine_adjustment: + faddp st1, st0 + + ; + ; Call internal sine worker to calculate st0=sin(st0) + ; +.do_sine: + mov ecx, 0 ; double + extern NAME(rtNoCrtMathSinCore) + call NAME(rtNoCrtMathSinCore) + + ; + ; Return st0. + ; +.return_val: +%ifdef RT_ARCH_AMD64 + fstp dword [xBP - 10h] + movss xmm0, [xBP - 10h] +%endif +%ifdef RT_OS_WINDOWS + fldcw [xBP - 20h] ; restore original +%endif +.return: + leave + ret + + ; + ; cosf(+/-0) = +1.0 + ; +.zero_extra_pop: + ffreep st0 +.zero: + ffreep st0 + fld1 + jmp .return_val + + ; + ; Input is NaN, output it unmodified as far as we can (FLD changes SNaN + ; to QNaN when masked). + ; +.nan: +%ifdef RT_ARCH_AMD64 + ffreep st0 +%endif + jmp .return + + ; + ; Local constants. + ; +ALIGNCODE(8) + ; About 2**-18. When fabs(input) is below this limit we can consider cosf(input) ~= 1.0. +.s_r64TinyCosTo1: + dq 0.000244140625 + + ; The absolute limit for the range which FCOS is expected to produce reasonable results. +.s_r64FCosOkay: + dq 1.1780972450961724644225 ; 3*pi/8 + +.s_r64Two: + dq 2.0 +ENDPROC RT_NOCRT(cosf) + diff --git a/src/VBox/Runtime/common/math/cosl.asm b/src/VBox/Runtime/common/math/cosl.asm new file mode 100644 index 00000000..524789f8 --- /dev/null +++ b/src/VBox/Runtime/common/math/cosl.asm @@ -0,0 +1,72 @@ +; $Id: cosl.asm $ +;; @file +; IPRT - No-CRT cosl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; compute the cosine of ldr, measured in radians. +; @returns st(0) +; @param lrd [rbp + xCB*2] +RT_NOCRT_BEGINPROC cosl + push xBP + mov xBP, xSP + sub xSP, 10h + + fld tword [xBP + xCB*2] + fcos + fnstsw ax + test ah, 4 + jz .done + + fldpi + fadd st0, st0 + fxch st1 +.again: + fprem1 + fnstsw ax + test ah, 4 + jnz .again + + fstp st0 + fcos + +.done: + leave + ret +ENDPROC RT_NOCRT(cosl) + diff --git a/src/VBox/Runtime/common/math/exp.asm b/src/VBox/Runtime/common/math/exp.asm new file mode 100644 index 00000000..4e954164 --- /dev/null +++ b/src/VBox/Runtime/common/math/exp.asm @@ -0,0 +1,151 @@ +; $Id: exp.asm $ +;; @file +; IPRT - No-CRT exp - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +extern NAME(RT_NOCRT(feraiseexcept)) + +;; +; Compute the e (2.7182818...) to the power of rd. +; @returns st(0) / xmm0 +; @param rd [xSP + xCB*2] / xmm0 +RT_NOCRT_BEGINPROC exp + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 20h + SEH64_ALLOCATE_STACK 20h + SEH64_END_PROLOGUE + + ; + ; Load the input into st0. + ; +%ifdef RT_ARCH_AMD64 + movsd [xBP - 10h], xmm0 + fld qword [xBP - 10h] +%else + fld qword [xBP + xCB*2] +%endif + + ; + ; Weed out non-normal values. + ; + fxam + fnstsw ax + mov cx, ax + and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 + cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) + je .finite + cmp ax, X86_FSW_C3 ; Zero + je .zero + cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals + je .finite + cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity. + je .inf + jmp .nan + +.finite: + ; + ; Convert to power of 2 and it'll be the same as exp2. + ; + fldl2e ; -> st0=log2(e); st1=input + fmulp ; -> st0=input*log2(e) + + ; + ; Split the job in two on the fraction and integer input parts. + ; + fld st0 ; Push a copy of the input on the stack. + frndint ; st0 = (int)(input*log2(e)) + fsub st1, st0 ; st1 = input*log2(e) - (int)input*log2(e); i.e. st1 = fraction, st0 = integer. + fxch ; st0 = fraction, st1 = integer. + + ; 1. Calculate on the fraction. + f2xm1 ; st0 = 2**fraction - 1.0 + fld1 + faddp ; st0 = 2**fraction + + ; 2. Apply the integer power of two. + fscale ; st0 = result; st1 = integer part of input. + fstp st1 ; st0 = result; no st1. + + ; + ; Return st0. + ; +.return_val: +%ifdef RT_ARCH_AMD64 + fstp qword [xBP - 10h] + movsd xmm0, [xBP - 10h] +%endif +.return: + leave + ret + + ; + ; +/-0.0: Return +1.0 + ; +.zero: + ffreep st0 + fld1 + jmp .return_val + + ; + ; -Inf: Return +0.0. + ; +Inf: Return +Inf. Join path with NaN. + ; +.inf: + test cx, X86_FSW_C1 ; sign bit + jz .nan + ffreep st0 + fldz + jmp .return_val + + ; + ; NaN: Return the input NaN value as is, if we can. + ; +.nan: +%ifdef RT_ARCH_AMD64 + ffreep st0 +%endif + jmp .return +ENDPROC RT_NOCRT(exp) + diff --git a/src/VBox/Runtime/common/math/exp2.asm b/src/VBox/Runtime/common/math/exp2.asm new file mode 100644 index 00000000..dc818135 --- /dev/null +++ b/src/VBox/Runtime/common/math/exp2.asm @@ -0,0 +1,117 @@ +; $Id: exp2.asm $ +;; @file +; IPRT - No-CRT exp2 - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + +BEGINCODE + +;; +; Calculate two to the power of @a rd. +; +; @returns st(0) / xmm0 +; @param rd [rbp + 8] / xmm0 +RT_NOCRT_BEGINPROC exp2 + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +%ifdef RT_ARCH_AMD64 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h +%endif + SEH64_END_PROLOGUE + + ; + ; Load the value into st(0). + ; +%ifdef RT_ARCH_AMD64 + movsd [xSP], xmm0 + fld qword [xSP] +%else + fld qword [xBP + xCB*2] +%endif + + ; + ; Return immediately if NaN or infinity. + ; + fxam + fstsw ax + test ax, X86_FSW_C0 ; C0 is set for NaN, Infinity and Empty register. The latter is not the case. + jz .input_ok +%ifdef RT_ARCH_AMD64 + ffreep st0 ; return the xmm0 register value unchanged, as FLD changes SNaN to QNaN. +%endif + test ax, X86_FSW_C2 ; C2 is clear for NaN (and Empty) but set for Infinity. + jz .return_val2 + test ax, X86_FSW_C1 ; C1 = sign bit + jz .return_val2 ; Not sign, return +Inf. +%ifndef RT_ARCH_AMD64 + ffreep st0 +%endif + fldz ; Signed, so return zero as that's a good approximation for 2**-Inf. + jmp .return_val +.input_ok: + + ; + ; Split the job in two on the fraction and integer input parts. + ; + fld st0 ; Push a copy of the input on the stack. + frndint ; st0 = (int)input + fsub st1, st0 ; st1 = input - (int)input; i.e. st1 = fraction, st0 = integer. + fxch ; st0 = fraction, st1 = integer. + + ; 1. Calculate on the fraction. + f2xm1 ; st0 = 2**fraction - 1.0 + fld1 + faddp ; st0 = 2**fraction + + ; 2. Apply the integer power of two. + fscale ; st0 = result; st1 = integer part of input. + fstp st1 ; st0 = result; no st1. + +.return_val: +%ifdef RT_ARCH_AMD64 + fstp qword [xSP] + movsd xmm0, [xSP] +%endif +.return_val2: + leave + ret +ENDPROC RT_NOCRT(exp2) + diff --git a/src/VBox/Runtime/common/math/exp2f.asm b/src/VBox/Runtime/common/math/exp2f.asm new file mode 100644 index 00000000..90b48c75 --- /dev/null +++ b/src/VBox/Runtime/common/math/exp2f.asm @@ -0,0 +1,117 @@ +; $Id: exp2f.asm $ +;; @file +; IPRT - No-CRT exp2f - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + +BEGINCODE + +;; +; Calculate two to the power of @a r32. +; +; @returns st(0) / xmm0 +; @param r32 [rbp + 8] / xmm0 +RT_NOCRT_BEGINPROC exp2f + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +%ifdef RT_ARCH_AMD64 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h +%endif + SEH64_END_PROLOGUE + + ; + ; Load the value into st(0). + ; +%ifdef RT_ARCH_AMD64 + movss [xSP], xmm0 + fld dword [xSP] +%else + fld dword [xBP + xCB*2] +%endif + + ; + ; Return immediately if NaN or infinity. + ; + fxam + fstsw ax + test ax, X86_FSW_C0 ; C0 is set for NaN, Infinity and Empty register. The latter is not the case. + jz .input_ok +%ifdef RT_ARCH_AMD64 + ffreep st0 ; return the xmm0 register value unchanged, as FLD changes SNaN to QNaN. +%endif + test ax, X86_FSW_C2 ; C2 is clear for NaN (and Empty) but set for Infinity. + jz .return_val2 + test ax, X86_FSW_C1 ; C1 = sign bit + jz .return_val2 ; Not sign, return +Inf. +%ifndef RT_ARCH_AMD64 + ffreep st0 +%endif + fldz ; Signed, so return zero as that's a good approximation for 2**-Inf. + jmp .return_val +.input_ok: + + ; + ; Split the job in two on the fraction and integer input parts. + ; + fld st0 ; Push a copy of the input on the stack. + frndint ; st0 = (int)input + fsub st1, st0 ; st1 = input - (int)input; i.e. st1 = fraction, st0 = integer. + fxch ; st0 = fraction, st1 = integer. + + ; 1. Calculate on the fraction. + f2xm1 ; st0 = 2**fraction - 1.0 + fld1 + faddp ; st0 = 2**fraction + + ; 2. Apply the integer power of two. + fscale ; st0 = result; st1 = integer part of input. + fstp st1 ; st0 = result; no st1. + +.return_val: +%ifdef RT_ARCH_AMD64 + fstp dword [xSP] + movss xmm0, [xSP] +%endif +.return_val2: + leave + ret +ENDPROC RT_NOCRT(exp2f) + diff --git a/src/VBox/Runtime/common/math/expf.asm b/src/VBox/Runtime/common/math/expf.asm new file mode 100644 index 00000000..81d12434 --- /dev/null +++ b/src/VBox/Runtime/common/math/expf.asm @@ -0,0 +1,151 @@ +; $Id: expf.asm $ +;; @file +; IPRT - No-CRT expf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +extern NAME(RT_NOCRT(feraiseexcept)) + +;; +; Compute the e (2.7182818...) to the power of rd. +; @returns st(0) / xmm0 +; @param rd [xSP + xCB*2] / xmm0 +RT_NOCRT_BEGINPROC expf + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 20h + SEH64_ALLOCATE_STACK 20h + SEH64_END_PROLOGUE + + ; + ; Load the input into st0. + ; +%ifdef RT_ARCH_AMD64 + movss [xBP - 10h], xmm0 + fld dword [xBP - 10h] +%else + fld dword [xBP + xCB*2] +%endif + + ; + ; Weed out non-normal values. + ; + fxam + fnstsw ax + mov cx, ax + and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 + cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) + je .finite + cmp ax, X86_FSW_C3 ; Zero + je .zero + cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals + je .finite + cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity. + je .inf + jmp .nan + +.finite: + ; + ; Convert to power of 2 and it'll be the same as exp2. + ; + fldl2e ; -> st0=log2(e); st1=input + fmulp ; -> st0=input*log2(e) + + ; + ; Split the job in two on the fraction and integer input parts. + ; + fld st0 ; Push a copy of the input on the stack. + frndint ; st0 = (int)(input*log2(e)) + fsub st1, st0 ; st1 = input*log2(e) - (int)input*log2(e); i.e. st1 = fraction, st0 = integer. + fxch ; st0 = fraction, st1 = integer. + + ; 1. Calculate on the fraction. + f2xm1 ; st0 = 2**fraction - 1.0 + fld1 + faddp ; st0 = 2**fraction + + ; 2. Apply the integer power of two. + fscale ; st0 = result; st1 = integer part of input. + fstp st1 ; st0 = result; no st1. + + ; + ; Return st0. + ; +.return_val: +%ifdef RT_ARCH_AMD64 + fstp dword [xBP - 10h] + movss xmm0, [xBP - 10h] +%endif +.return: + leave + ret + + ; + ; +/-0.0: Return +1.0 + ; +.zero: + ffreep st0 + fld1 + jmp .return_val + + ; + ; -Inf: Return +0.0. + ; +Inf: Return +Inf. Join path with NaN. + ; +.inf: + test cx, X86_FSW_C1 ; sign bit + jz .nan + ffreep st0 + fldz + jmp .return_val + + ; + ; NaN: Return the input NaN value as is, if we can. + ; +.nan: +%ifdef RT_ARCH_AMD64 + ffreep st0 +%endif + jmp .return +ENDPROC RT_NOCRT(expf) + diff --git a/src/VBox/Runtime/common/math/fabs.asm b/src/VBox/Runtime/common/math/fabs.asm new file mode 100644 index 00000000..a8e69d28 --- /dev/null +++ b/src/VBox/Runtime/common/math/fabs.asm @@ -0,0 +1,73 @@ +; $Id: fabs.asm $ +;; @file +; IPRT - No-CRT fabs - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + + +BEGINCODE + +;; +; Compute the absolute value of rd (|rd|). +; @returns 32-bit: st(0) 64-bit: xmm0 +; @param rd 32-bit: [ebp + 8] 64-bit: xmm0 +RT_NOCRT_BEGINPROC fabs + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + andps xmm0, [g_r64ClearSignMask xWrtRIP] +%else + fld qword [xBP + xCB*2] ; This turns SNaN into QNaN. + fabs +%endif + + leave + ret +ENDPROC RT_NOCRT(fabs) + +ALIGNCODE(16) +g_r64ClearSignMask: + dd 0ffffffffh + dd 07fffffffh + + dd 0ffffffffh + dd 07fffffffh + diff --git a/src/VBox/Runtime/common/math/fabsf.asm b/src/VBox/Runtime/common/math/fabsf.asm new file mode 100644 index 00000000..840c89b8 --- /dev/null +++ b/src/VBox/Runtime/common/math/fabsf.asm @@ -0,0 +1,72 @@ +; $Id: fabsf.asm $ +;; @file +; IPRT - No-CRT fabsf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + + +BEGINCODE + +;; +; Compute the absolute value of rf (|rf|). +; @returns 32-bit: st(0) 64-bit: xmm0 +; @param rf 32-bit: [ebp + 8] 64-bit: xmm0 +RT_NOCRT_BEGINPROC fabsf + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + andps xmm0, [g_r32ClearSignMask xWrtRIP] +%else + fld dword [xBP + xCB*2] ; This turns SNaN into QNaN. + fabs +%endif + + leave + ret +ENDPROC RT_NOCRT(fabsf) + +ALIGNCODE(16) +g_r32ClearSignMask: + dd 07fffffffh + dd 07fffffffh + dd 07fffffffh + dd 07fffffffh + diff --git a/src/VBox/Runtime/common/math/fabsl.asm b/src/VBox/Runtime/common/math/fabsl.asm new file mode 100644 index 00000000..d2781de3 --- /dev/null +++ b/src/VBox/Runtime/common/math/fabsl.asm @@ -0,0 +1,61 @@ +; $Id: fabsl.asm $ +;; @file +; IPRT - No-CRT fabsl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + + +BEGINCODE + +;; +; Compute the absolute value of lrd (|lrd|). +; @returns st(0) +; @param lrd [xSP + xCB*2] +RT_NOCRT_BEGINPROC fabsl + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + + fld tword [xBP + xCB*2] + fabs + + leave + ret +ENDPROC RT_NOCRT(fabsl) + diff --git a/src/VBox/Runtime/common/math/feclearexcept.asm b/src/VBox/Runtime/common/math/feclearexcept.asm new file mode 100644 index 00000000..5781d99b --- /dev/null +++ b/src/VBox/Runtime/common/math/feclearexcept.asm @@ -0,0 +1,121 @@ +; $Id: feclearexcept.asm $ +;; @file +; IPRT - No-CRT feclearexcept - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Sets the hardware rounding mode. +; +; @returns eax = 0 on success, non-zero on failure. +; @param fXcpts 32-bit: [xBP+8]; msc64: ecx; gcc64: edi; -- Zero or more bits from X86_FSW_XCPT_MASK +; +RT_NOCRT_BEGINPROC feclearexcept + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 20h + SEH64_ALLOCATE_STACK 20h + SEH64_END_PROLOGUE + + ; + ; Load the parameter into ecx, validate and adjust it. + ; +%ifdef ASM_CALL64_GCC + mov ecx, edi +%elifdef RT_ARCH_X86 + mov ecx, [xBP + xCB*2] +%endif +%if 0 + and ecx, X86_FSW_XCPT_MASK +%else + or eax, -1 + test ecx, ~X86_FSW_XCPT_MASK + jnz .return +%endif + + ; #IE implies #SF + mov al, cl + and al, X86_FSW_IE + shl al, X86_FSW_SF_BIT - X86_FSW_IE_BIT + or cl, al + + ; Make it into and AND mask suitable for clearing the specified exceptions. + not ecx + + ; + ; Make the changes. + ; + + ; Modify the x87 flags first (ecx preserved). + cmp ecx, ~X86_FSW_XCPT_MASK ; This includes all the x87 exceptions, including stack error. + jne .partial_mask + fnclex + jmp .do_sse + +.partial_mask: + fnstenv [xBP - 20h] + and word [xBP - 20h + 4], cx ; The FCW is at offset 4 in the 32-bit prot mode layout + fldenv [xBP - 20h] ; Recalculates the FSW.ES flag. +.do_sse: + +%ifdef RT_ARCH_X86 + ; SSE supported (ecx preserved)? + extern NAME(rtNoCrtHasSse) + call NAME(rtNoCrtHasSse) + test al, al + jz .return_ok +%endif + + ; Modify the SSE flags (modifies ecx). + stmxcsr [xBP - 10h] + or ecx, X86_FSW_XCPT_MASK & ~X86_MXCSR_XCPT_FLAGS ; Don't mix X86_FSW_SF with X86_MXCSR_DAZ. + and [xBP - 10h], ecx + ldmxcsr [xBP - 10h] + +.return_ok: + xor eax, eax +.return: + leave + ret +ENDPROC RT_NOCRT(feclearexcept) + diff --git a/src/VBox/Runtime/common/math/fedisableexcept.asm b/src/VBox/Runtime/common/math/fedisableexcept.asm new file mode 100644 index 00000000..3fb339c3 --- /dev/null +++ b/src/VBox/Runtime/common/math/fedisableexcept.asm @@ -0,0 +1,117 @@ +; $Id: fedisableexcept.asm $ +;; @file +; IPRT - No-CRT fedisableexcept - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Enables a set of exceptions (BSD/GNU extension). +; +; @returns eax = Previous enabled exceptions on success (not subject to fXcpt), +; -1 on failure. +; @param fXcpt 32-bit: [xBP+8]; msc64: ecx; gcc64: edi; -- Mask of exceptions to disable. +; +RT_NOCRT_BEGINPROC fedisableexcept + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + ; + ; Load the parameter into ecx. + ; +%ifdef ASM_CALL64_GCC + mov ecx, edi +%elifdef RT_ARCH_X86 + mov ecx, [xBP + xCB*2] +%endif + or eax, -1 + test ecx, ~X86_FCW_XCPT_MASK +%ifndef RT_STRICT + jnz .return +%else + jz .input_ok + int3 + jmp .return +.input_ok: +%endif + + ; + ; Make the changes (old mask in eax). + ; + + ; Modify the x87 mask first (ecx preserved). + fstcw [xBP - 10h] +%ifdef RT_ARCH_X86 ; Return the inverted x87 mask in 32-bit mode. + movzx eax, word [xBP - 10h] +%endif + or word [xBP - 10h], cx + fldcw [xBP - 10h] + +%ifdef RT_ARCH_X86 + ; SSE supported (ecx preserved)? + extern NAME(rtNoCrtHasSse) + call NAME(rtNoCrtHasSse) + test al, al + jz .return_ok +%endif + + ; Modify the SSE mask (modifies ecx). + stmxcsr [xBP - 10h] +%ifdef RT_ARCH_AMD64 ; Return the inverted MXCSR exception mask on AMD64 because windows doesn't necessarily set the x87 one. + mov eax, [xBP - 10h] + shr eax, X86_MXCSR_XCPT_MASK_SHIFT +%endif + shl ecx, X86_MXCSR_XCPT_MASK_SHIFT + or [xBP - 10h], ecx + ldmxcsr [xBP - 10h] + +.return_ok: + not eax ; Invert it as we return the enabled rather than masked exceptions. + and eax, X86_FCW_XCPT_MASK +.return: + leave + ret +ENDPROC RT_NOCRT(fedisableexcept) + diff --git a/src/VBox/Runtime/common/math/feenableexcept.asm b/src/VBox/Runtime/common/math/feenableexcept.asm new file mode 100644 index 00000000..6f874ee7 --- /dev/null +++ b/src/VBox/Runtime/common/math/feenableexcept.asm @@ -0,0 +1,121 @@ +; $Id: feenableexcept.asm $ +;; @file +; IPRT - No-CRT feenableexcept - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Enables a set of exceptions (BSD/GNU extension). +; +; @returns eax = Previous enabled exceptions on success (not subject to fXcpt), +; -1 on failure. +; @param fXcpt 32-bit: [xBP+8] msc64: ecx gcc64: edi - Mask of exceptions to enable. +; +RT_NOCRT_BEGINPROC feenableexcept + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + ; + ; Load the parameter into ecx. + ; +%ifdef ASM_CALL64_GCC + mov ecx, edi +%elifdef RT_ARCH_X86 + mov ecx, [xBP + xCB*2] +%endif + or eax, -1 + test ecx, ~X86_FCW_XCPT_MASK +%ifndef RT_STRICT + jnz .return +%else + jz .input_ok + int3 + jmp .return +.input_ok: +%endif + + ; Invert the mask as we're enabling the exceptions, not masking them. + not ecx + + ; + ; Make the changes (old mask in eax). + ; + + ; Modify the x87 mask first (ecx preserved). + fstcw [xBP - 10h] +%ifdef RT_ARCH_X86 ; Return the inverted x87 mask in 32-bit mode. + mov ax, word [xBP - 10h] + and eax, X86_FCW_XCPT_MASK +%endif + and word [xBP - 10h], cx + fldcw [xBP - 10h] + +%ifdef RT_ARCH_X86 + ; SSE supported (ecx preserved)? + extern NAME(rtNoCrtHasSse) + call NAME(rtNoCrtHasSse) + test al, al + jz .return_ok +%endif + + ; Modify the SSE mask (modifies ecx). + stmxcsr [xBP - 10h] +%ifdef RT_ARCH_AMD64 ; Return the inverted MXCSR exception mask on AMD64 because windows doesn't necessarily set the x87 one. + mov eax, [xBP - 10h] + and eax, X86_MXCSR_XCPT_MASK + shr eax, X86_MXCSR_XCPT_MASK_SHIFT +%endif + rol ecx, X86_MXCSR_XCPT_MASK_SHIFT + and [xBP - 10h], ecx + ldmxcsr [xBP - 10h] + +.return_ok: + not eax ; Invert it as we return the enabled rather than masked exceptions. +.return: + leave + ret +ENDPROC RT_NOCRT(feenableexcept) + diff --git a/src/VBox/Runtime/common/math/fegetenv.asm b/src/VBox/Runtime/common/math/fegetenv.asm new file mode 100644 index 00000000..073e64f4 --- /dev/null +++ b/src/VBox/Runtime/common/math/fegetenv.asm @@ -0,0 +1,90 @@ +; $Id: fegetenv.asm $ +;; @file +; IPRT - No-CRT fegetenv - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Gets the FPU+SSE environment. +; +; @returns eax = 0 on success (-1 on failure), +; @param pEnv 32-bit: [xBP+8]; msc64: rcx; gcc64: rdi -- Pointer to where to store the enviornment. +; +RT_NOCRT_BEGINPROC fegetenv + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + + ; + ; Load the parameter into rcx. + ; +%ifdef ASM_CALL64_GCC + mov rcx, rdi +%elifdef RT_ARCH_X86 + mov ecx, [xBP + xCB*2] +%endif + + ; + ; Save the FPU environment and MXCSR. + ; + fnstenv [xCX] + +%ifdef RT_ARCH_X86 + ; SSE supported (ecx preserved)? + and dword [xCX + 28], 0h + extern NAME(rtNoCrtHasSse) + call NAME(rtNoCrtHasSse) + test al, al + jz .return_nosse +%endif + stmxcsr [xCX + 28] +.return_nosse: + + ; + ; Return success. + ; + xor eax, eax + leave + ret +ENDPROC RT_NOCRT(fegetenv) + diff --git a/src/VBox/Runtime/common/math/fegetexcept.asm b/src/VBox/Runtime/common/math/fegetexcept.asm new file mode 100644 index 00000000..e7cc35cd --- /dev/null +++ b/src/VBox/Runtime/common/math/fegetexcept.asm @@ -0,0 +1,82 @@ +; $Id: fegetexcept.asm $ +;; @file +; IPRT - No-CRT fegetexcept - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Gets the mask of enabled exceptions, e.g. unmasked (BSD/GNU extension). +; +; @returns eax = inverted x87/sse exception mask (X86_MXCSR_XCPT_FLAGS). +; Will not return X86_FSW_SF. +; +RT_NOCRT_BEGINPROC fegetexcept + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + ; + ; Save control word and isolate the exception mask. + ; + ; On 64-bit we'll use the MXCSR since the windows compiler/CRT doesn't + ; necessarily keep them in sync. We'll still return the x87-style flags. + ; +%ifdef RT_ARCH_AMD64 + stmxcsr [xBP - 10h] + mov eax, [xBP - 10h] + shr eax, X86_MXCSR_XCPT_MASK_SHIFT +%else + fstcw [xBP - 10h] + movzx eax, word [xBP - 10h] +%endif + + not eax ; Invert it as we return the enabled rather than masked exceptions. + and eax, X86_MXCSR_XCPT_FLAGS ; Use the SSE mask so we don't return X86_FSW_SF here. + +.return_val: + leave + ret +ENDPROC RT_NOCRT(fegetexcept) + diff --git a/src/VBox/Runtime/common/math/fegetexceptflag.asm b/src/VBox/Runtime/common/math/fegetexceptflag.asm new file mode 100644 index 00000000..015565f2 --- /dev/null +++ b/src/VBox/Runtime/common/math/fegetexceptflag.asm @@ -0,0 +1,117 @@ +; $Id: fegetexceptflag.asm $ +;; @file +; IPRT - No-CRT fegetexceptflag - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Gets the pending exceptions. +; +; @returns eax = 0 on success, non-zero on failure. +; @param pfXcpts 32-bit: [xBP+8]; msc64: rcx; gcc64: rdi; -- Where to store the flags (pointer to fexcept_t (16-bit)). +; @param fXcptMask 32-bit: [xBP+c]; msc64: edx; gcc64: esi; -- The exception flags to get (X86_FSW_XCPT_MASK). +; Accepts X86_FSW_SF and will return it if given as input. +; +RT_NOCRT_BEGINPROC fegetexceptflag + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + ; + ; Load the parameter into rcx (pfXcpts) and edx (fXcptMask). + ; +%ifdef ASM_CALL64_GCC + mov rcx, rdi + mov edx, esi +%elifdef RT_ARCH_X86 + mov ecx, [xBP + xCB*2] + mov edx, [xBP + xCB*3] +%endif +%if 0 + and edx, X86_FSW_XCPT_MASK +%else + or eax, -1 + test edx, ~X86_FSW_XCPT_MASK + %ifndef RT_STRICT + jnz .return + %else + jz .input_ok + int3 + jmp .return +.input_ok: + %endif +%endif + + ; + ; Get the pending exceptions. + ; + + ; x87. + fnstsw ax + and ax, dx + mov [xCX], ax + +%ifdef RT_ARCH_X86 + ; SSE supported (ecx preserved)? + extern NAME(rtNoCrtHasSse) + call NAME(rtNoCrtHasSse) + test al, al + jz .return_ok +%endif + + ; Modify the SSE flags. + stmxcsr [xBP - 10h] + mov ax, [xBP - 10h] + and ax, dx + and ax, X86_MXCSR_XCPT_FLAGS ; Don't confuse X86_MXCSR_DAZ for X86_FSW_SF. + or [xCX], ax + +.return_ok: + xor eax, eax +.return: + leave + ret +ENDPROC RT_NOCRT(fegetexceptflag) + diff --git a/src/VBox/Runtime/common/math/fegetround.asm b/src/VBox/Runtime/common/math/fegetround.asm new file mode 100644 index 00000000..3da8da75 --- /dev/null +++ b/src/VBox/Runtime/common/math/fegetround.asm @@ -0,0 +1,79 @@ +; $Id: fegetround.asm $ +;; @file +; IPRT - No-CRT fegetround - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Gets the hardware rounding mode. +; @returns eax x87 rounding mask (X86_FCW_RC_MASK) +; +RT_NOCRT_BEGINPROC fegetround + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + ; + ; Save control word and isolate the rounding mode. + ; + ; On 64-bit we'll use the MXCSR since the windows compiler/CRT doesn't + ; necessarily keep them in sync. We'll still return the x87-style flags. + ; +%ifdef RT_ARCH_AMD64 + stmxcsr [xBP - 10h] + mov eax, [xBP - 10h] + and eax, X86_MXCSR_RC_MASK + shr eax, X86_MXCSR_RC_SHIFT - X86_FCW_RC_SHIFT +%else + fstcw [xBP - 10h] + movzx eax, word [xBP - 10h] + and eax, X86_FCW_RC_MASK +%endif + +.return_val: + leave + ret +ENDPROC RT_NOCRT(fegetround) + diff --git a/src/VBox/Runtime/common/math/fegetx87precision.asm b/src/VBox/Runtime/common/math/fegetx87precision.asm new file mode 100644 index 00000000..66c13074 --- /dev/null +++ b/src/VBox/Runtime/common/math/fegetx87precision.asm @@ -0,0 +1,70 @@ +; $Id: fegetx87precision.asm $ +;; @file +; IPRT - No-CRT fegetx87precision - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Gets the x87 hardware precision mode - IPRT extension. +; +; @returns eax = precision mode, -1 on failure. +; +RT_NOCRT_BEGINPROC fegetx87precision + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + ; + ; Extract the current from the x87 FCW and return it. + ; + fnstcw [xBP - 10h] + mov ax, [xBP - 10h] + and eax, X86_FCW_PC_MASK + +.return: + leave + ret +ENDPROC RT_NOCRT(fegetx87precision) + diff --git a/src/VBox/Runtime/common/math/feholdexcept.asm b/src/VBox/Runtime/common/math/feholdexcept.asm new file mode 100644 index 00000000..106751b8 --- /dev/null +++ b/src/VBox/Runtime/common/math/feholdexcept.asm @@ -0,0 +1,99 @@ +; $Id: feholdexcept.asm $ +;; @file +; IPRT - No-CRT feholdexcept - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Gets the FPU+SSE environment and disables (masks) all exceptions. +; +; @returns eax = 0 on success (-1 on failure) +; @param pEnv 32-bit: [xBP+8] msc64: rcx gcc64: rdi +; +RT_NOCRT_BEGINPROC feholdexcept + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + + ; + ; Load the parameter into rcx. + ; +%ifdef ASM_CALL64_GCC + mov rcx, rdi +%elifdef RT_ARCH_X86 + mov ecx, [xBP + xCB*2] +%endif + + ; + ; Save the FPU environment and MXCSR. + ; + fnstenv [xCX] + mov al, [xCX] ; Save FCW. + or byte [xCX], X86_FCW_MASK_ALL + fldcw [xCX] + mov [xCX], al ; Restore FCW. + +%ifdef RT_ARCH_X86 + ; SSE supported (ecx preserved)? + and dword [xCX + 28], 0h + extern NAME(rtNoCrtHasSse) + call NAME(rtNoCrtHasSse) + test al, al + jz .return_nosse +%endif + stmxcsr [xCX + 28] + mov eax, [xCX + 28] ; Save MXCSR. + or dword [xCX + 28], X86_MXCSR_XCPT_MASK + ldmxcsr [xCX + 28] + mov [xCX + 28], eax ; Restore MXCSR. + +.return_nosse: + + ; + ; Return success. + ; + xor eax, eax + leave + ret +ENDPROC RT_NOCRT(feholdexcept) + diff --git a/src/VBox/Runtime/common/math/feraiseexcept.asm b/src/VBox/Runtime/common/math/feraiseexcept.asm new file mode 100644 index 00000000..12534848 --- /dev/null +++ b/src/VBox/Runtime/common/math/feraiseexcept.asm @@ -0,0 +1,188 @@ +; $Id: feraiseexcept.asm $ +;; @file +; IPRT - No-CRT feraiseexcept - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +%ifdef RT_ARCH_AMD64 + %define RT_NOCRT_RAISE_FPU_EXCEPT_IN_SSE_MODE +%endif + + +BEGINCODE + +;; +; Raises the given FPU/SSE exceptions. +; +; @returns eax = 0 on success, -1 on failure. +; @param fXcpt 32-bit: [xBP+8]; msc64: ecx; gcc64: edi; -- The exceptions to raise. +; Accepts X86_FSW_XCPT_MASK, but ignores X86_FSW_DE and X86_FSW_SF. +; +RT_NOCRT_BEGINPROC feraiseexcept + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +%ifndef RT_NOCRT_RAISE_FPU_EXCEPT_IN_SSE_MODE + sub xBP, 20h + SEH64_ALLOCATE_STACK 20h +%endif + SEH64_END_PROLOGUE + + ; + ; Load the parameter into rcx. + ; +%ifdef ASM_CALL64_GCC + mov rcx, rdi +%elifdef RT_ARCH_X86 + mov ecx, [xBP + xCB*2] +%endif +%ifdef RT_STRICT + test ecx, ~X86_FSW_XCPT_MASK + jz .input_ok + int3 +.input_ok: +%endif + + ; + ; We have to raise these buggers one-by-one and order is said to be important. + ; We ASSUME that x86 runs is okay with the x87 raising the exception. + ; + + ; 1. Invalid operation. Like +0.0 / +0.0. + test cl, X86_FSW_IE + jz .not_ie +%ifdef RT_NOCRT_RAISE_FPU_EXCEPT_IN_SSE_MODE + movss xmm0, [g_r32Zero xWrtRIP] + divss xmm0, xmm0 +%else + fnstenv [xBP - 20h] + or byte [xBP - 20h + X86FSTENV32P.FSW], X86_FSW_IE + fldenv [xBP - 20h] + fwait +%endif +.not_ie: + + ; 2. Division by zero. + test cl, X86_FSW_ZE + jz .not_ze +%ifdef RT_NOCRT_RAISE_FPU_EXCEPT_IN_SSE_MODE + movss xmm0, [g_r32One xWrtRIP] + movss xmm1, [g_r32Zero xWrtRIP] + divss xmm0, xmm1 +%else + fnstenv [xBP - 20h] + or byte [xBP - 20h + X86FSTENV32P.FSW], X86_FSW_ZE + fldenv [xBP - 20h] + fwait +%endif +.not_ze: + + ; 3. Overflow. + test cl, X86_FSW_OE + jz .not_oe +%ifdef RT_NOCRT_RAISE_FPU_EXCEPT_IN_SSE_MODE + xorps xmm0, [g_r32Large xWrtRIP] + movss xmm1, [g_r32Tiny xWrtRIP] + divss xmm0, xmm1 +%else + fnstenv [xBP - 20h] + or byte [xBP - 20h + X86FSTENV32P.FSW], X86_FSW_OE + fldenv [xBP - 20h] + fwait +%endif +.not_oe: + + ; 4. Underflow. + test cl, X86_FSW_UE + jz .not_ue +%ifdef RT_NOCRT_RAISE_FPU_EXCEPT_IN_SSE_MODE + xorps xmm0, [g_r32Tiny xWrtRIP] + movss xmm1, [g_r32Large xWrtRIP] + divss xmm0, xmm1 +%else + fnstenv [xBP - 20h] + or byte [xBP - 20h + X86FSTENV32P.FSW], X86_FSW_UE + fldenv [xBP - 20h] + fwait +%endif +.not_ue: + + ; 5. Precision. + test cl, X86_FSW_PE + jz .not_pe +%ifdef RT_NOCRT_RAISE_FPU_EXCEPT_IN_SSE_MODE + xorps xmm0, [g_r32Two xWrtRIP] + movss xmm1, [g_r32Three xWrtRIP] + divss xmm0, xmm1 +%else + fnstenv [xBP - 20h] + or byte [xBP - 20h + X86FSTENV32P.FSW], X86_FSW_PE + fldenv [xBP - 20h] + fwait +%endif +.not_pe: + + ; We currently do not raise X86_FSW_DE or X86_FSW_SF. + + ; + ; Return success. + ; + xor eax, eax +.return: + leave + ret +ENDPROC RT_NOCRT(feraiseexcept) + + +%ifdef RT_NOCRT_RAISE_FPU_EXCEPT_IN_SSE_MODE +g_r32Zero: + dd 0.0 +g_r32One: + dd 1.0 +g_r32Two: + dd 2.0 +g_r32Three: + dd 3.0 +g_r32Large: + dd 1.0e+38 +g_r32Tiny: + dd 1.0e-37 +%endif + diff --git a/src/VBox/Runtime/common/math/fesetenv.asm b/src/VBox/Runtime/common/math/fesetenv.asm new file mode 100644 index 00000000..394d3fce --- /dev/null +++ b/src/VBox/Runtime/common/math/fesetenv.asm @@ -0,0 +1,194 @@ +; $Id: fesetenv.asm $ +;; @file +; IPRT - No-CRT fesetenv - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + +%define RT_NOCRT_FE_DFL_ENV 1 +%define RT_NOCRT_FE_NOMASK_ENV 2 +%define RT_NOCRT_FE_PC53_ENV 3 +%define RT_NOCRT_FE_PC64_ENV 4 +%define RT_NOCRT_FE_LAST_ENV 4 + + +BEGINCODE + +;; +; Sets the FPU+SSE environment. +; +; @returns eax = 0 on success, -1 on failure. +; @param pEnv 32-bit: [xBP+8] msc64: rcx gcc64: rdi - Saved environment to restore. +; +RT_NOCRT_BEGINPROC fesetenv + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xBP, 20h + SEH64_ALLOCATE_STACK 20h + SEH64_END_PROLOGUE + + ; + ; Load the parameter into rcx. + ; +%ifdef ASM_CALL64_GCC + mov rcx, rdi +%elifdef RT_ARCH_X86 + mov ecx, [xBP + xCB*2] +%endif + + ; + ; For the x87 state we only set FSW.XCPT, FCW.XCPT, FCW.RC and FCW.PC. + ; So we save the current environment, merge those fields and load it. + ; + fnstenv [xBP - 20h] + + ; Check for special "pointer" values: + cmp xCX, RT_NOCRT_FE_LAST_ENV + ja .x87_regular + + or eax, -1 + test xCX, xCX + jnz .x87_special +%ifdef RT_STRICT + int3 +%endif + jmp .return + + ; + ; Special x87 state. Clear all pending exceptions. + ; + ; We have 4 special environments with only some differences in FCW differs, so set + ; up FCW in AX, starting with a NOMASK environment as it has the fewest bits set. + ; +.x87_special: + and word [xBP - 20h + X86FSTENV32P.FSW], ~X86_FSW_XCPT_ES_MASK + mov ax, [xBP - 20h + X86FSTENV32P.FCW] + and ax, ~(X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK | X86_FCW_IC_MASK) +%ifdef RT_OS_WINDOWS + or ax, X86_FCW_DM | X86_FCW_PC_53 | X86_FCW_RC_NEAREST | X86_FCW_IC_PROJECTIVE +%else + or ax, X86_FCW_DM | X86_FCW_PC_64 | X86_FCW_RC_NEAREST | X86_FCW_IC_PROJECTIVE +%endif + cmp xCX, RT_NOCRT_FE_NOMASK_ENV + je .x87_special_done + or ax, X86_FCW_MASK_ALL + +%ifdef RT_OS_WINDOWS + cmp xCX, RT_NOCRT_FE_PC64_ENV + jne .x87_special_done + or ax, X86_FCW_PC_64 ; X86_FCW_PC_64 is a super set of X86_FCW_PC_53, so no need to clear bits +%else + cmp xCX, RT_NOCRT_FE_PC53_ENV + jne .x87_special_done + and ax, X86_FCW_PC_64 & ~X86_FCW_PC_53 ; X86_FCW_PC_64 is a super set of X86_FCW_PC_53, so clear the bit that differs. +%endif + +.x87_special_done: + mov [xBP - 20h + X86FSTENV32P.FCW], ax + jmp .x87_common + + ; + ; Merge input and current. + ; +.x87_regular: + ; FCW: + mov ax, [xCX + X86FSTENV32P.FCW] + mov dx, [xBP - 20h + X86FSTENV32P.FCW] + and ax, X86_FCW_MASK_ALL | X86_FCW_RC_MASK | X86_FCW_PC_MASK + and dx, ~(X86_FCW_MASK_ALL | X86_FCW_RC_MASK | X86_FCW_PC_MASK) + or dx, ax + mov [xBP - 20h + X86FSTENV32P.FCW], dx + ; FSW + mov ax, [xCX + X86FSTENV32P.FSW] + mov dx, [xBP - 20h + X86FSTENV32P.FSW] + and ax, X86_FSW_XCPT_MASK + and dx, ~(X86_FSW_XCPT_MASK) + or dx, ax + mov [xBP - 20h + X86FSTENV32P.FSW], dx + +.x87_common: + ; Clear the exception info. + xor eax, eax + mov [xBP - 20h + X86FSTENV32P.FPUIP], eax + mov [xBP - 20h + X86FSTENV32P.FPUCS], eax ; covers FOP too. + mov [xBP - 20h + X86FSTENV32P.FPUDP], eax + mov [xBP - 20h + X86FSTENV32P.FPUDS], eax + + ; Load the merged and cleaned up environment. + fldenv [xBP - 20h] + + + ; + ; Now for SSE, if supported, where we'll restore everything as is. + ; +%ifdef RT_ARCH_X86 + ; SSE supported (ecx preserved)? + extern NAME(rtNoCrtHasSse) + call NAME(rtNoCrtHasSse) + test al, al + jz .return_okay +%endif + + cmp xCX, RT_NOCRT_FE_LAST_ENV + jb .sse_special_env + ldmxcsr [xCX + 28] + jmp .return_okay + +.sse_special_env: + stmxcsr [xBP - 10h] + mov eax, [xBP - 10h] + and eax, ~(X86_MXCSR_XCPT_FLAGS | X86_MXCSR_XCPT_MASK | X86_MXCSR_RC_MASK | X86_MXCSR_DAZ | X86_MXCSR_FZ) + or eax, X86_MXCSR_RC_NEAREST | X86_MXCSR_DM + cmp xCX, RT_NOCRT_FE_NOMASK_ENV ; Only the NOMASK one differs here. + je .sse_special_load_eax + or eax, X86_MXCSR_RC_NEAREST | X86_MXCSR_XCPT_MASK ; default environment masks all exceptions +.sse_special_load_eax: + mov [xBP - 10h], eax + ldmxcsr [xBP - 10h] + + ; + ; Return success. + ; +.return_okay: + xor eax, eax +.return: + leave + ret +ENDPROC RT_NOCRT(fesetenv) + diff --git a/src/VBox/Runtime/common/math/fesetexceptflag.asm b/src/VBox/Runtime/common/math/fesetexceptflag.asm new file mode 100644 index 00000000..548437a8 --- /dev/null +++ b/src/VBox/Runtime/common/math/fesetexceptflag.asm @@ -0,0 +1,127 @@ +; $Id: fesetexceptflag.asm $ +;; @file +; IPRT - No-CRT fesetexceptflag - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Gets the pending exceptions. +; +; @returns eax = 0 on success, non-zero on failure. +; @param pfXcpts 32-bit: [xBP+8]; msc64: rcx; gcc64: rdi; -- pointer to fexcept_t (16-bit) +; @param fXcptMask 32-bit: [xBP+c]; msc64: edx; gcc64: esi; -- X86_MXCSR_XCPT_FLAGS (X86_FSW_XCPT_MASK) +; Accepts X86_FSW_SF. +; +RT_NOCRT_BEGINPROC fesetexceptflag + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 20h + SEH64_END_PROLOGUE + + ; + ; Load the parameter into ecx (*pfXcpts) and edx (fXcptMask) and validate the latter. + ; +%ifdef ASM_CALL64_GCC + movzx ecx, word [rdi] + mov edx, esi +%elifdef ASM_CALL64_MSC + movzx ecx, word [rcx] +%elifdef RT_ARCH_X86 + mov ecx, [xBP + xCB*2] + movzx ecx, word [ecx] + mov edx, [xBP + xCB*3] +%endif +%if 0 + and ecx, X86_FSW_XCPT_MASK + and edx, X86_FSW_XCPT_MASK +%else + or eax, -1 + test edx, ~X86_FSW_XCPT_MASK + jnz .return + test ecx, ~X86_FSW_XCPT_MASK + jnz .return +%endif + + ; + ; Apply the AND mask to ECX and invert it so we can use it to clear flags + ; before OR'ing in the new values. + ; + and ecx, edx + not edx + + ; + ; Make the modifications + ; + + ; Modify the pending x87 exceptions (FSW). + fnstenv [xBP - 20h] + and [xBP - 20h + X86FSTENV32P.FSW], dx + or [xBP - 20h + X86FSTENV32P.FSW], cx + fldenv [xSP - 20h] + +%ifdef RT_ARCH_X86 + ; SSE supported (ecx preserved)? + extern NAME(rtNoCrtHasSse) + call NAME(rtNoCrtHasSse) + test al, al + jz .return_ok +%endif + + ; Modify the pending SSE exceptions (same bit positions as in FSW). + stmxcsr [xBP - 10h] + mov eax, [xBP - 10h] + or edx, X86_FSW_XCPT_MASK & ~X86_MXCSR_XCPT_FLAGS ; Don't mix X86_FSW_SF with X86_MXCSR_DAZ. + and ecx, X86_MXCSR_XCPT_FLAGS ; Ditto + and eax, edx + or eax, ecx + mov [xBP - 10h], eax + ldmxcsr [xBP - 10h] + +.return_ok: + xor eax, eax +.return: + leave + ret +ENDPROC RT_NOCRT(fesetexceptflag) + diff --git a/src/VBox/Runtime/common/math/fesetround.asm b/src/VBox/Runtime/common/math/fesetround.asm new file mode 100644 index 00000000..e0aca7ab --- /dev/null +++ b/src/VBox/Runtime/common/math/fesetround.asm @@ -0,0 +1,107 @@ +; $Id: fesetround.asm $ +;; @file +; IPRT - No-CRT fesetround - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Sets the hardware rounding mode. +; +; @returns eax = 0 on success, non-zero on failure. +; @param iRoundingMode 32-bit: [xBP+8] msc64: ecx gcc64: edi +; +RT_NOCRT_BEGINPROC fesetround + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + ; + ; Load the parameter into ecx. + ; + or eax, -1 +%ifdef ASM_CALL64_GCC + mov ecx, edi +%elifdef RT_ARCH_X86 + mov ecx, [xBP + xCB*2] +%endif + test ecx, ~X86_FCW_RC_MASK + jnz .return + + ; + ; Make the changes. + ; + + ; Set x87 rounding first (ecx preserved). + fstcw [xBP - 10h] + mov ax, word [xBP - 10h] + and ax, ~X86_FCW_RC_MASK + or ax, cx + mov [xBP - 10h], ax + fldcw [xBP - 10h] + +%ifdef RT_ARCH_X86 + ; SSE supported (ecx preserved)? + extern NAME(rtNoCrtHasSse) + call NAME(rtNoCrtHasSse) + test al, al + jz .return_ok +%endif + + ; Set SSE rounding (modifies ecx). + stmxcsr [xBP - 10h] + mov eax, [xBP - 10h] + and eax, ~X86_MXCSR_RC_MASK + shl ecx, X86_MXCSR_RC_SHIFT - X86_FCW_RC_SHIFT + or eax, ecx + mov [xBP - 10h], eax + ldmxcsr [xBP - 10h] + +.return_ok: + xor eax, eax +.return: + leave + ret +ENDPROC RT_NOCRT(fesetround) + diff --git a/src/VBox/Runtime/common/math/fesetx87precision.asm b/src/VBox/Runtime/common/math/fesetx87precision.asm new file mode 100644 index 00000000..1d0c54c1 --- /dev/null +++ b/src/VBox/Runtime/common/math/fesetx87precision.asm @@ -0,0 +1,88 @@ +; $Id: fesetx87precision.asm $ +;; @file +; IPRT - No-CRT fesetx87precision - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Sets the x87 hardware precision mode - IPRT extension. +; +; @returns eax = previous precision mode, -1 on failure. +; @param iPrecisionMode 32-bit: [xBP+8] msc64: ecx gcc64: edi +; +RT_NOCRT_BEGINPROC fesetx87precision + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + ; + ; Load the parameter into ecx. + ; + or eax, -1 +%ifdef ASM_CALL64_GCC + mov ecx, edi +%elifdef RT_ARCH_X86 + mov ecx, [xBP + xCB*2] +%endif + test ecx, ~X86_FCW_PC_MASK + jnz .return + + ; + ; Extract the current from the x87 FCW. + ; + fnstcw [xBP - 10h] + mov dx, [xBP - 10h] + mov ax, dx + and dx, ~X86_FCW_PC_MASK + or dx, cx + mov [xBP - 10h], dx + fldcw [xBP - 10h] + + and eax, X86_FCW_PC_MASK +.return: + leave + ret +ENDPROC RT_NOCRT(fesetx87precision) + diff --git a/src/VBox/Runtime/common/math/fetestexcept.asm b/src/VBox/Runtime/common/math/fetestexcept.asm new file mode 100644 index 00000000..93c6abf1 --- /dev/null +++ b/src/VBox/Runtime/common/math/fetestexcept.asm @@ -0,0 +1,107 @@ +; $Id: fetestexcept.asm $ +;; @file +; IPRT - No-CRT fetestexcept - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Return the pending exceptions in the given mask. +; +; Basically a simpler fegetexceptflags function. +; +; @returns eax = pending exceptions (X86_FSW_XCPT_MASK) & fXcptMask. +; @param fXcptMask 32-bit: [xBP+8]; msc64: ecx; gcc64: edi; -- Exceptions to test for (X86_FSW_XCPT_MASK). +; Accepts X86_FSW_SF and will return it if given as input. +; +RT_NOCRT_BEGINPROC fetestexcept + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + ; + ; Load the parameter into ecx (fXcptMask). + ; +%ifdef ASM_CALL64_GCC + mov ecx, edi +%elifdef RT_ARCH_X86 + mov ecx, [xBP + xCB*2] +%endif +%if 0 + and ecx, X86_FSW_XCPT_MASK +%else + or eax, -1 + test ecx, ~X86_FSW_XCPT_MASK + jnz .return +%endif + + ; + ; Get the pending exceptions. + ; + + ; Get x87 exceptions first. + fnstsw ax + and eax, ecx + +%ifdef RT_ARCH_X86 + ; SSE supported (ecx preserved)? + mov ch, al ; Save the return value - it's only the lower 6 bits. + extern NAME(rtNoCrtHasSse) + call NAME(rtNoCrtHasSse) + test al, al + mov al, ch ; Restore the return value - no need for movzx here. + jz .return +%endif + + ; OR in the SSE exceptions (modifies ecx). + stmxcsr [xBP - 10h] + and ecx, [xBP - 10h] + and ecx, X86_MXCSR_XCPT_FLAGS + or eax, ecx + +.return: + leave + ret +ENDPROC RT_NOCRT(fetestexcept) + diff --git a/src/VBox/Runtime/common/math/feupdateenv.asm b/src/VBox/Runtime/common/math/feupdateenv.asm new file mode 100644 index 00000000..2c66af41 --- /dev/null +++ b/src/VBox/Runtime/common/math/feupdateenv.asm @@ -0,0 +1,128 @@ +; $Id: feupdateenv.asm $ +;; @file +; IPRT - No-CRT feupdateenv - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE +extern NAME(RT_NOCRT(fesetenv)) +extern NAME(RT_NOCRT(feraiseexcept)) + + +;; +; Updates the FPU+SSE environment. +; +; This will restore @a pEnv and merge in pending exception flags. +; +; @returns eax = 0 on success, -1 on failure. +; @param pEnv 32-bit: [xBP+8] msc64: rcx gcc64: rdi - Saved environment. +; +RT_NOCRT_BEGINPROC feupdateenv + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 30h + SEH64_ALLOCATE_STACK 30h + SEH64_END_PROLOGUE + + ; + ; Load the parameter into rcx. + ; +%ifdef ASM_CALL64_GCC + mov rcx, rdi +%elifdef RT_ARCH_X86 + mov ecx, [xBP + xCB*2] +%endif + + ; + ; Save the pending exceptions. + ; +%ifdef RT_ARCH_X86 + extern NAME(rtNoCrtHasSse) + call NAME(rtNoCrtHasSse) ; Preserves all except xAX. + xor edx, edx + test al, al + jz .no_sse +%endif + stmxcsr [xBP - 10h] + mov edx, [xBP - 10h] + and edx, X86_MXCSR_XCPT_FLAGS +.no_sse: + fnstsw ax + or edx, eax + mov [xBP - 8h], edx ; save the pending exceptions here (will apply X86_FSW_XCPT_MASK later). + + ; + ; Call fesetenv to update the environment. + ; Note! We have not yet modified the parameter registers for calling + ; convensions using them. So, parameters only needs to be loaded + ; for the stacked based convention. + ; +%ifdef RT_ARCH_X86 + mov [xSP], ecx +%endif + call NAME(RT_NOCRT(fesetenv)) + + ; + ; Raise exceptions if any are pending. + ; +%ifdef ASM_CALL64_GCC + mov edi, [xBP - 8h] + and edi, X86_FSW_XCPT_MASK +%elifdef ASM_CALL64_MSC + mov ecx, [xBP - 8h] + and ecx, X86_FSW_XCPT_MASK +%else + mov ecx, [xBP - 8h] + and ecx, X86_FSW_XCPT_MASK + mov [xSP], ecx +%endif + jz .no_exceptions_to_raise + call NAME(RT_NOCRT(feraiseexcept)) +.no_exceptions_to_raise: + + ; + ; Return success. + ; + xor eax, eax + leave + ret +ENDPROC RT_NOCRT(feupdateenv) + diff --git a/src/VBox/Runtime/common/math/floor.asm b/src/VBox/Runtime/common/math/floor.asm new file mode 100644 index 00000000..5e8c267e --- /dev/null +++ b/src/VBox/Runtime/common/math/floor.asm @@ -0,0 +1,78 @@ +; $Id: floor.asm $ +;; @file +; IPRT - No-CRT floor - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the largest integral value not greater than rd. +; @returns 32-bit: st(0) 64-bit: xmm0 +; @param rd 32-bit: [ebp + 8] 64-bit: xmm0 +RT_NOCRT_BEGINPROC floor + push xBP + mov xBP, xSP + sub xSP, 10h + +%ifdef RT_ARCH_AMD64 + movsd [xSP], xmm0 + fld qword [xSP] +%else + fld qword [xBP + xCB*2] +%endif + + ; Make it round down by modifying the fpu control word. + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, 00400h + and eax, 0f7ffh + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; Round ST(0) to integer. + frndint + + ; Restore the fpu control word. + fldcw [xBP - 10h] + +%ifdef RT_ARCH_AMD64 + fstp qword [xSP] + movsd xmm0, [xSP] +%endif + leave + ret +ENDPROC RT_NOCRT(floor) + diff --git a/src/VBox/Runtime/common/math/floorf.asm b/src/VBox/Runtime/common/math/floorf.asm new file mode 100644 index 00000000..df0a67cb --- /dev/null +++ b/src/VBox/Runtime/common/math/floorf.asm @@ -0,0 +1,78 @@ +; $Id: floorf.asm $ +;; @file +; IPRT - No-CRT floorf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the largest integral value not greater than rf. +; @returns st(0) +; @param rf 32-bit: [ebp + 8] 64-bit: xmm0 +RT_NOCRT_BEGINPROC floorf + push xBP + mov xBP, xSP + sub xSP, 10h + +%ifdef RT_ARCH_AMD64 + movss [xSP], xmm0 + fld dword [xSP] +%else + fld dword [xBP + xCB*2] +%endif + + ; Make it round down by modifying the fpu control word. + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, 00400h + and eax, 0f7ffh + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; Round ST(0) to integer. + frndint + + ; Restore the fpu control word. + fldcw [xBP - 10h] + +%ifdef RT_ARCH_AMD64 + fstp dword [xSP] + movss xmm0, [xSP] +%endif + leave + ret +ENDPROC RT_NOCRT(floorf) + diff --git a/src/VBox/Runtime/common/math/floorl.asm b/src/VBox/Runtime/common/math/floorl.asm new file mode 100644 index 00000000..42af21a5 --- /dev/null +++ b/src/VBox/Runtime/common/math/floorl.asm @@ -0,0 +1,69 @@ +; $Id: floorl.asm $ +;; @file +; IPRT - No-CRT floorl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the largest integral value not greater than lrd. +; @returns st(0) +; @param lrd [rbp + 8] +RT_NOCRT_BEGINPROC floorl + push xBP + mov xBP, xSP + sub xSP, 10h + + fld tword [xBP + xCB*2] + + ; Make it round down by modifying the fpu control word. + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, 00400h + and eax, 0f7ffh + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; Round ST(0) to integer. + frndint + + ; Restore the fpu control word. + fldcw [xBP - 10h] + + leave + ret +ENDPROC RT_NOCRT(floorl) + diff --git a/src/VBox/Runtime/common/math/fma-asm.asm b/src/VBox/Runtime/common/math/fma-asm.asm new file mode 100644 index 00000000..3caa6153 --- /dev/null +++ b/src/VBox/Runtime/common/math/fma-asm.asm @@ -0,0 +1,104 @@ +; $Id: fma-asm.asm $ +;; @file +; IPRT - No-CRT fma alternatives - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Fused multiplication and add, intel version. +; +; @returns st(0) / xmm0 +; @param rdFactor1 [rbp + 08h] / xmm0 +; @param rdFactor2 [rbp + 10h] / xmm1 +; @param rdAddend [rbp + 18h] / xmm2 +BEGINPROC rtNoCrtMathFma3 + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_X86 + movsd xmm0, qword [xBP + xCB*2 + 00h] + movsd xmm1, qword [xBP + xCB*2 + 08h] + movsd xmm2, qword [xBP + xCB*2 + 10h] +%endif + + vfmadd132sd xmm0, xmm2, xmm1 ; xmm0 = xmm0 * xmm1 + xmm2 (132 = multiply op1 with op3 and add op2) + +%ifdef RT_ARCH_X86 + sub xSP, 10h + movsd [xSP], xmm0 + fld qword [xSP] +%endif + leave + ret +ENDPROC rtNoCrtMathFma3 + + +;; +; Fused multiplication and add, amd version. +; +; @returns st(0) / xmm0 +; @param rdFactor1 [rbp + 08h] / xmm0 +; @param rdFactor2 [rbp + 10h] / xmm1 +; @param rdAddend [rbp + 18h] / xmm2 +BEGINPROC rtNoCrtMathFma4 + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_X86 + movsd xmm0, qword [xBP + xCB*2 + 00h] + movsd xmm1, qword [xBP + xCB*2 + 08h] + movsd xmm2, qword [xBP + xCB*2 + 10h] +%endif + + vfmaddsd xmm0, xmm0, xmm1, xmm2 ; xmm0 = xmm0 * xmm1 + xmm2 + +%ifdef RT_ARCH_X86 + sub xSP, 10h + movsd [xSP], xmm0 + fld qword [xSP] +%endif + leave + ret +ENDPROC rtNoCrtMathFma4 + diff --git a/src/VBox/Runtime/common/math/fma.cpp b/src/VBox/Runtime/common/math/fma.cpp new file mode 100644 index 00000000..14d98f9a --- /dev/null +++ b/src/VBox/Runtime/common/math/fma.cpp @@ -0,0 +1,100 @@ +/* $Id: fma.cpp $ */ +/** @file + * IPRT - No-CRT - fma(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) +# include <iprt/asm-amd64-x86.h> +# include <iprt/x86.h> +#endif +#include <softfloat.h> + + +/********************************************************************************************************************************* +* External Symbols * +*********************************************************************************************************************************/ +DECLASM(double) rtNoCrtMathFma3(double rdFactor1, double rdFactor2, double rdAddend); +DECLASM(double) rtNoCrtMathFma4(double rdFactor1, double rdFactor2, double rdAddend); + + +#undef fma +double RT_NOCRT(fma)(double rdFactor1, double rdFactor2, double rdAddend) +{ + /* + * We prefer using native FMA instructions when available. + */ +#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) + typedef enum { kCpuDetect = 0, kCpuWithFma3, kCpuWithFma4, kCpuWithoutFma } CPUFMASUPPORT; + static CPUFMASUPPORT volatile s_enmSup = kCpuDetect; + CPUFMASUPPORT enmSup = s_enmSup; + if (enmSup != kCpuDetect) + { } + else + { + if (ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_FMA) + enmSup = kCpuWithFma3; + else if (ASMCpuId_ECX(UINT32_C(0x80000001)) & X86_CPUID_AMD_FEATURE_ECX_FMA4) + enmSup = kCpuWithFma4; + else + enmSup = kCpuWithoutFma; + s_enmSup = enmSup; + } + if (enmSup == kCpuWithFma3) + return rtNoCrtMathFma3(rdFactor1, rdFactor2, rdAddend); + if (enmSup == kCpuWithFma4) + return rtNoCrtMathFma4(rdFactor1, rdFactor2, rdAddend); +#endif + + /* + * Fall back on SoftFloat. + */ + AssertCompile(sizeof(rdFactor1) == sizeof(RTFLOAT64U)); + softfloat_state_t State = SOFTFLOAT_STATE_INIT_DEFAULTS(); /** @todo init from MXCSR/FCW */ + union { RTFLOAT64U Iprt; float64_t SoftFloat; } uFactor1, uFactor2, uAddend, uResult; + uFactor1.Iprt.rd = rdFactor1; + uFactor2.Iprt.rd = rdFactor2; + uAddend.Iprt.rd = rdAddend; + uResult.SoftFloat = f64_mulAdd(uFactor1.SoftFloat, uFactor2.SoftFloat, uAddend.SoftFloat, &State); + return uResult.Iprt.rd; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(fma); + diff --git a/src/VBox/Runtime/common/math/fmaf-asm.asm b/src/VBox/Runtime/common/math/fmaf-asm.asm new file mode 100644 index 00000000..105044e1 --- /dev/null +++ b/src/VBox/Runtime/common/math/fmaf-asm.asm @@ -0,0 +1,104 @@ +; $Id: fmaf-asm.asm $ +;; @file +; IPRT - No-CRT fmaf alternatives - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Fused multiplication and add, intel version. +; +; @returns st(0) / xmm0 +; @param r32Factor1 [rbp + 08h] / xmm0 +; @param r32Factor2 [rbp + 0ch] / xmm1 +; @param r32Addend [rbp + 10h] / xmm2 +BEGINPROC rtNoCrtMathFma3f + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_X86 + movss xmm0, dword [xBP + xCB*2 + 00h] + movss xmm1, dword [xBP + xCB*2 + 04h] + movss xmm2, dword [xBP + xCB*2 + 08h] +%endif + + vfmadd132ss xmm0, xmm2, xmm1 ; xmm0 = xmm0 * xmm1 + xmm2 (132 = multiply op1 with op3 and add op2) + +%ifdef RT_ARCH_X86 + sub xSP, 10h + movss [xSP], xmm0 + fld dword [xSP] +%endif + leave + ret +ENDPROC rtNoCrtMathFma3f + + +;; +; Fused multiplication and add, amd version. +; +; @returns st(0) / xmm0 +; @param r32Factor1 [rbp + 08h] / xmm0 +; @param r32Factor2 [rbp + 10h] / xmm1 +; @param r32Addend [rbp + 18h] / xmm2 +BEGINPROC rtNoCrtMathFma4f + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_X86 + movss xmm0, dword [xBP + xCB*2 + 00h] + movss xmm1, dword [xBP + xCB*2 + 04h] + movss xmm2, dword [xBP + xCB*2 + 08h] +%endif + + vfmaddss xmm0, xmm0, xmm1, xmm2 ; xmm0 = xmm0 * xmm1 + xmm2 + +%ifdef RT_ARCH_X86 + sub xSP, 10h + movss [xSP], xmm0 + fld dword [xSP] +%endif + leave + ret +ENDPROC rtNoCrtMathFma4f + diff --git a/src/VBox/Runtime/common/math/fmaf.cpp b/src/VBox/Runtime/common/math/fmaf.cpp new file mode 100644 index 00000000..86c00b6e --- /dev/null +++ b/src/VBox/Runtime/common/math/fmaf.cpp @@ -0,0 +1,101 @@ +/* $Id: fmaf.cpp $ */ +/** @file + * IPRT - No-CRT - fmaf(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) +# include <iprt/asm-amd64-x86.h> +# include <iprt/x86.h> +#endif +#include <softfloat.h> + + +/********************************************************************************************************************************* +* External Symbols * +*********************************************************************************************************************************/ +DECLASM(float) rtNoCrtMathFma3f(float r32Factor1, float r32Factor2, float r32Addend); +DECLASM(float) rtNoCrtMathFma4f(float r32Factor1, float r32Factor2, float r32Addend); + + +#undef fmaf +float RT_NOCRT(fmaf)(float r32Factor1, float r32Factor2, float r32Addend) +{ + /* + * We prefer using native FMA instructions when available. + */ +#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) + typedef enum { kCpuDetect = 0, kCpuWithFma3, kCpuWithFma4, kCpuWithoutFma } CPUFMASUPPORT; + static CPUFMASUPPORT volatile s_enmSup = kCpuDetect; + CPUFMASUPPORT enmSup = s_enmSup; + if (enmSup != kCpuDetect) + { } + else + { + if (ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_FMA) + enmSup = kCpuWithFma3; + else if (ASMCpuId_ECX(UINT32_C(0x80000001)) & X86_CPUID_AMD_FEATURE_ECX_FMA4) + enmSup = kCpuWithFma4; + else + enmSup = kCpuWithoutFma; + s_enmSup = enmSup; + } + if (enmSup == kCpuWithFma3) + return rtNoCrtMathFma3f(r32Factor1, r32Factor2, r32Addend); + if (enmSup == kCpuWithFma4) + return rtNoCrtMathFma4f(r32Factor1, r32Factor2, r32Addend); +#endif + + /* + * Fall back on SoftFloat. + */ + /** @todo couldn't we just use double as a fallback here? */ + AssertCompile(sizeof(r32Factor1) == sizeof(RTFLOAT32U)); + softfloat_state_t State = SOFTFLOAT_STATE_INIT_DEFAULTS(); /** @todo init from MXCSR/FCW */ + union { RTFLOAT32U Iprt; float32_t SoftFloat; } uFactor1, uFactor2, uAddend, uResult; + uFactor1.Iprt.r = r32Factor1; + uFactor2.Iprt.r = r32Factor2; + uAddend.Iprt.r = r32Addend; + uResult.SoftFloat = f32_mulAdd(uFactor1.SoftFloat, uFactor2.SoftFloat, uAddend.SoftFloat, &State); + return uResult.Iprt.r; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(fmaf); + diff --git a/src/VBox/Runtime/common/math/fmax.cpp b/src/VBox/Runtime/common/math/fmax.cpp new file mode 100644 index 00000000..f7cb87c9 --- /dev/null +++ b/src/VBox/Runtime/common/math/fmax.cpp @@ -0,0 +1,64 @@ +/* $Id: fmax.cpp $ */ +/** @file + * IPRT - No-CRT - fmax(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> + + +#undef fmax +double RT_NOCRT(fmax)(double rdLeft, double rdRight) +{ + if (!isnan(rdLeft)) + { + if (!isnan(rdRight)) + { + /* We don't trust the hw with comparing signed zeros, thus + the 0.0 test and signbit fun here. */ + if (rdLeft != rdRight || rdLeft != 0.0) + return rdLeft >= rdRight ? rdLeft : rdRight; + return signbit(rdLeft) <= signbit(rdRight) ? rdLeft : rdRight; + } + return rdLeft; + } + return rdRight; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(fmax); + diff --git a/src/VBox/Runtime/common/math/fmaxf.cpp b/src/VBox/Runtime/common/math/fmaxf.cpp new file mode 100644 index 00000000..2e6c0222 --- /dev/null +++ b/src/VBox/Runtime/common/math/fmaxf.cpp @@ -0,0 +1,64 @@ +/* $Id: fmaxf.cpp $ */ +/** @file + * IPRT - No-CRT - fmaxf(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> + + +#undef fmaxf +float RT_NOCRT(fmaxf)(float r32Left, float r32Right) +{ + if (!isnan(r32Left)) + { + if (!isnan(r32Right)) + { + /* We don't trust the hw with comparing signed zeros, thus + the 0.0 test and signbit fun here. */ + if (r32Left != r32Right || r32Left != 0.0) + return r32Left >= r32Right ? r32Left : r32Right; + return signbit(r32Left) <= signbit(r32Right) ? r32Left : r32Right; + } + return r32Left; + } + return r32Right; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(fmaxf); + diff --git a/src/VBox/Runtime/common/math/fmaxl.cpp b/src/VBox/Runtime/common/math/fmaxl.cpp new file mode 100644 index 00000000..8eccc9ba --- /dev/null +++ b/src/VBox/Runtime/common/math/fmaxl.cpp @@ -0,0 +1,64 @@ +/* $Id: fmaxl.cpp $ */ +/** @file + * IPRT - No-CRT - fmaxl(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> + + +#undef fmaxl +long double RT_NOCRT(fmaxl)(long double lrdLeft, long double lrdRight) +{ + if (!isnan(lrdLeft)) + { + if (!isnan(lrdRight)) + { + /* We don't trust the hw with comparing signed zeros, thus + the 0.0 test and signbit fun here. */ + if (lrdLeft != lrdRight || lrdLeft != 0.0) + return lrdLeft >= lrdRight ? lrdLeft : lrdRight; + return signbit(lrdLeft) <= signbit(lrdRight) ? lrdLeft : lrdRight; + } + return lrdLeft; + } + return lrdRight; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(fmaxl); + diff --git a/src/VBox/Runtime/common/math/fmin.cpp b/src/VBox/Runtime/common/math/fmin.cpp new file mode 100644 index 00000000..5d104e42 --- /dev/null +++ b/src/VBox/Runtime/common/math/fmin.cpp @@ -0,0 +1,64 @@ +/* $Id: fmin.cpp $ */ +/** @file + * IPRT - No-CRT - fmin(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> + + +#undef fmin +double RT_NOCRT(fmin)(double rdLeft, double rdRight) +{ + if (!isnan(rdLeft)) + { + if (!isnan(rdRight)) + { + /* We don't trust the hw with comparing signed zeros, thus + the 0.0 test and signbit fun here. */ + if (rdLeft != rdRight || rdLeft != 0.0) + return rdLeft <= rdRight ? rdLeft : rdRight; + return signbit(rdLeft) >= signbit(rdRight) ? rdLeft : rdRight; + } + return rdLeft; + } + return rdRight; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(fmin); + diff --git a/src/VBox/Runtime/common/math/fminf.cpp b/src/VBox/Runtime/common/math/fminf.cpp new file mode 100644 index 00000000..ed3aa90d --- /dev/null +++ b/src/VBox/Runtime/common/math/fminf.cpp @@ -0,0 +1,64 @@ +/* $Id: fminf.cpp $ */ +/** @file + * IPRT - No-CRT - fminf(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> + + +#undef fminf +float RT_NOCRT(fminf)(float r32Left, float r32Right) +{ + if (!isnan(r32Left)) + { + if (!isnan(r32Right)) + { + /* We don't trust the hw with comparing signed zeros, thus + the 0.0 test and signbit fun here. */ + if (r32Left != r32Right || r32Left != 0.0) + return r32Left <= r32Right ? r32Left : r32Right; + return signbit(r32Left) >= signbit(r32Right) ? r32Left : r32Right; + } + return r32Left; + } + return r32Right; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(fminf); + diff --git a/src/VBox/Runtime/common/math/fminl.cpp b/src/VBox/Runtime/common/math/fminl.cpp new file mode 100644 index 00000000..ceb37669 --- /dev/null +++ b/src/VBox/Runtime/common/math/fminl.cpp @@ -0,0 +1,64 @@ +/* $Id: fminl.cpp $ */ +/** @file + * IPRT - No-CRT - fminl(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> + + +#undef fminl +long double RT_NOCRT(fminl)(long double lrdLeft, long double lrdRight) +{ + if (!isnan(lrdLeft)) + { + if (!isnan(lrdRight)) + { + /* We don't trust the hw with comparing signed zeros, thus + the 0.0 test and signbit fun here. */ + if (lrdLeft != lrdRight || lrdLeft != 0.0) + return lrdLeft <= lrdRight ? lrdLeft : lrdRight; + return signbit(lrdLeft) >= signbit(lrdRight) ? lrdLeft : lrdRight; + } + return lrdLeft; + } + return lrdRight; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(fminl); + diff --git a/src/VBox/Runtime/common/math/frexp.cpp b/src/VBox/Runtime/common/math/frexp.cpp new file mode 100644 index 00000000..46c2f3dc --- /dev/null +++ b/src/VBox/Runtime/common/math/frexp.cpp @@ -0,0 +1,88 @@ +/* $Id: frexp.cpp $ */ +/** @file + * IPRT - No-CRT - frexp(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/nocrt/limits.h> + + +/* Similar to the fxtract instruction. */ +#undef frexp +double RT_NOCRT(frexp)(double rdValue, int *piExp) +{ + RTFLOAT64U Value; + AssertCompile(sizeof(Value) == sizeof(rdValue)); + Value.r = rdValue; + + if (RTFLOAT64U_IS_NORMAL(&Value)) + { + *piExp = (int)Value.s.uExponent - RTFLOAT64U_EXP_BIAS + 1; + Value.s.uExponent = RTFLOAT64U_EXP_BIAS - 1; + } + else if (RTFLOAT64U_IS_ZERO(&Value)) + { + *piExp = 0; + return rdValue; + } + else if (RTFLOAT64U_IS_SUBNORMAL(&Value)) + { + int iExp = -RTFLOAT64U_EXP_BIAS + 1; + uint64_t uFraction = Value.s64.uFraction; + while (!(uFraction & RT_BIT_64(RTFLOAT64U_FRACTION_BITS))) + { + iExp--; + uFraction <<= 1; + } + Value.s64.uFraction = uFraction; + Value.s64.uExponent = RTFLOAT64U_EXP_BIAS - 1; + *piExp = iExp + 1; + } + else + { + /* NaN, Inf */ + *piExp = Value.s.fSign ? INT_MIN : INT_MAX; + return rdValue; + } + return Value.r; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(frexp); + diff --git a/src/VBox/Runtime/common/math/frexpf.cpp b/src/VBox/Runtime/common/math/frexpf.cpp new file mode 100644 index 00000000..f7d63173 --- /dev/null +++ b/src/VBox/Runtime/common/math/frexpf.cpp @@ -0,0 +1,87 @@ +/* $Id: frexpf.cpp $ */ +/** @file + * IPRT - No-CRT - frexpf(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/nocrt/limits.h> + + +/* Similar to the fxtract instruction. */ +#undef frexpf +float RT_NOCRT(frexpf)(float rfValue, int *piExp) +{ + RTFLOAT32U Value; + AssertCompile(sizeof(Value) == sizeof(rfValue)); + Value.r = rfValue; + + if (RTFLOAT32U_IS_NORMAL(&Value)) + { + *piExp = (int)Value.s.uExponent - RTFLOAT32U_EXP_BIAS + 1; + Value.s.uExponent = RTFLOAT32U_EXP_BIAS - 1; + } + else if (RTFLOAT32U_IS_ZERO(&Value)) + { + *piExp = 0; + return rfValue; + } + else if (RTFLOAT32U_IS_SUBNORMAL(&Value)) + { + int iExp = -RTFLOAT32U_EXP_BIAS + 1; + uint32_t uFraction = Value.s.uFraction; + while (!(uFraction & RT_BIT_32(RTFLOAT32U_FRACTION_BITS))) + { + iExp--; + uFraction <<= 1; + } + Value.s.uFraction = uFraction; + Value.s.uExponent = RTFLOAT32U_EXP_BIAS - 1; + *piExp = iExp + 1; + } + else /* NaN, Inf */ + { + *piExp = Value.s.fSign ? INT_MIN : INT_MAX; + return rfValue; + } + return Value.r; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(frexpf); + diff --git a/src/VBox/Runtime/common/math/frexpl.cpp b/src/VBox/Runtime/common/math/frexpl.cpp new file mode 100644 index 00000000..161d9b35 --- /dev/null +++ b/src/VBox/Runtime/common/math/frexpl.cpp @@ -0,0 +1,167 @@ +/* $Id: frexpl.cpp $ */ +/** @file + * IPRT - No-CRT - frexpl(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/nocrt/limits.h> +#ifdef RT_COMPILER_WITH_128BIT_LONG_DOUBLE +# include <iprt/uint128.h> +#endif + + +/* Similar to the fxtract instruction. */ +#undef frexpl +long double RT_NOCRT(frexpl)(long double lrdValue, int *piExp) +{ +#ifdef RT_COMPILER_WITH_64BIT_LONG_DOUBLE + RTFLOAT64U Value; + AssertCompile(sizeof(Value) == sizeof(lrdValue)); + Value.lrd = lrdValue; + + if (RTFLOAT64U_IS_NORMAL(&Value)) + { + *piExp = (int)Value.s.uExponent - RTFLOAT64U_EXP_BIAS + 1; + Value.s.uExponent = RTFLOAT64U_EXP_BIAS - 1; + } + else if (RTFLOAT64U_IS_ZERO(&Value)) + { + *piExp = 0; + return lrdValue; + } + else if (RTFLOAT64U_IS_SUBNORMAL(&Value)) + { + int iExp = -RTFLOAT64U_EXP_BIAS + 1; + uint64_t uFraction = Value.s64.uFraction; + while (!(uFraction & RT_BIT_64(RTFLOAT64U_FRACTION_BITS))) + { + iExp--; + uFraction <<= 1; + } + Value.s64.uFraction = uFraction; + Value.s64.uExponent = RTFLOAT64U_EXP_BIAS - 1; + *piExp = iExp + 1; + } + else + { + /* NaN, Inf */ + *piExp = Value.s.fSign ? INT_MIN : INT_MAX; + return lrdValue; + } + return Value.lrd; + +#elif defined(RT_COMPILER_WITH_80BIT_LONG_DOUBLE) + RTFLOAT80U2 Value; + Value.r = lrdValue; + + if (RTFLOAT80U_IS_NORMAL(&Value)) + { + *piExp = (int)Value.s.uExponent - RTFLOAT80U_EXP_BIAS + 1; + Value.s.uExponent = RTFLOAT80U_EXP_BIAS - 1; + } + else if (RTFLOAT80U_IS_ZERO(&Value)) + { + *piExp = 0; + return lrdValue; + } + else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(&Value)) + { + int iExp = -RTFLOAT80U_EXP_BIAS + 1; + while (!(Value.s.uMantissa & RT_BIT_64(RTFLOAT80U_FRACTION_BITS))) + { + iExp--; + Value.s.uMantissa <<= 1; + } + Value.s.uExponent = RTFLOAT80U_EXP_BIAS - 1; + *piExp = iExp + 1; + } + else /* NaN, Inf */ + { + *piExp = Value.s.fSign ? INT_MIN : INT_MAX; + return lrdValue; + } + return Value.r; + + +#elif defined(RT_COMPILER_WITH_128BIT_LONG_DOUBLE) + RTFLOAT128U Value; + AssertCompile(sizeof(Value) == sizeof(lrdValue)); + Value.r = lrdValue; + + if (RTFLOAT128U_IS_NORMAL(&Value)) + { + *piExp = (int)Value.s.uExponent - RTFLOAT128U_EXP_BIAS + 1; + Value.s.uExponent = RTFLOAT128U_EXP_BIAS - 1; + } + else if (RTFLOAT128U_IS_ZERO(&Value)) + { + *piExp = 0; + return lrdValue; + } + else if (RTFLOAT128U_IS_SUBNORMAL(&Value)) + { + int iExp = -RTFLOAT128U_EXP_BIAS + 1; + RTUINT128U uFraction; + uFraction.s.Hi = Value.s64.uFractionHi; + uFraction.s.Lo = Value.s64.uFractionLo; + while (!(uFraction.s.Hi & RT_BIT_64(RTFLOAT128U_FRACTION_BITS - 64))) + { + iExp--; + RTUInt128AssignShiftLeft(&uFraction, 1); + } + Value.s64.uFractionHi = uFraction.s.Hi; + Value.s64.uFractionLo = uFraction.s.Lo; + Value.s64.uExponent = RTFLOAT64U_EXP_BIAS - 1; + *piExp = iExp + 1; + } + else + { + /* NaN, Inf */ + *piExp = Value.s.fSign ? INT_MIN : INT_MAX; + return lrdValue; + } + return Value.r; +#else +# error "Port ME!" +#endif +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(frexpl); + diff --git a/src/VBox/Runtime/common/math/gcc/Makefile.kup b/src/VBox/Runtime/common/math/gcc/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/Makefile.kup diff --git a/src/VBox/Runtime/common/math/gcc/adddi3.c b/src/VBox/Runtime/common/math/gcc/adddi3.c new file mode 100644 index 00000000..ce2b1d85 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/adddi3.c @@ -0,0 +1,63 @@ +/* $NetBSD: adddi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)adddi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: adddi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Add two quads. This is trivial since a one-bit carry from a single + * u_int addition x+y occurs if and only if the sum x+y is less than + * either x or y (the choice to compare with x or y is arbitrary). + */ +quad_t +__adddi3(a, b) + quad_t a, b; +{ + union uu aa, bb, sum; + + aa.q = a; + bb.q = b; + sum.ul[L] = aa.ul[L] + bb.ul[L]; + sum.ul[H] = aa.ul[H] + bb.ul[H] + (sum.ul[L] < bb.ul[L]); + return (sum.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/anddi3.c b/src/VBox/Runtime/common/math/gcc/anddi3.c new file mode 100644 index 00000000..2f35ced8 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/anddi3.c @@ -0,0 +1,61 @@ +/* $NetBSD: anddi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)anddi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: anddi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return a & b, in quad. + */ +quad_t +__anddi3(a, b) + quad_t a, b; +{ + union uu aa, bb; + + aa.q = a; + bb.q = b; + aa.ul[0] &= bb.ul[0]; + aa.ul[1] &= bb.ul[1]; + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/ashldi3.c b/src/VBox/Runtime/common/math/gcc/ashldi3.c new file mode 100644 index 00000000..e7df3c18 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/ashldi3.c @@ -0,0 +1,70 @@ +/* $NetBSD: ashldi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)ashldi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: ashldi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Shift a (signed) quad value left (arithmetic shift left). + * This is the same as logical shift left! + */ +quad_t +__ashldi3(a, shift) + quad_t a; + qshift_t shift; +{ + union uu aa; + + if (shift == 0) + return(a); + aa.q = a; + if (shift >= INT_BITS) { + aa.ul[H] = aa.ul[L] << (shift - INT_BITS); + aa.ul[L] = 0; + } else { + aa.ul[H] = (aa.ul[H] << shift) | + (aa.ul[L] >> (INT_BITS - shift)); + aa.ul[L] <<= shift; + } + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/ashrdi3.c b/src/VBox/Runtime/common/math/gcc/ashrdi3.c new file mode 100644 index 00000000..aaa1c71b --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/ashrdi3.c @@ -0,0 +1,82 @@ +/* $NetBSD: ashrdi3.c,v 1.10 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)ashrdi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: ashrdi3.c,v 1.10 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Shift a (signed) quad value right (arithmetic shift right). + */ +quad_t +__ashrdi3(a, shift) + quad_t a; + qshift_t shift; +{ + union uu aa; + + if (shift == 0) + return(a); + aa.q = a; + if (shift >= INT_BITS) { + int s; + + /* + * Smear bits rightward using the machine's right-shift + * method, whether that is sign extension or zero fill, + * to get the `sign word' s. Note that shifting by + * INT_BITS is undefined, so we shift (INT_BITS-1), + * then 1 more, to get our answer. + */ + /* LINTED inherits machine dependency */ + s = (aa.sl[H] >> (INT_BITS - 1)) >> 1; + /* LINTED inherits machine dependency*/ + aa.ul[L] = aa.sl[H] >> (shift - INT_BITS); + aa.ul[H] = s; + } else { + aa.ul[L] = (aa.ul[L] >> shift) | + (aa.ul[H] << (INT_BITS - shift)); + /* LINTED inherits machine dependency */ + aa.sl[H] >>= shift; + } + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/cmpdi2.c b/src/VBox/Runtime/common/math/gcc/cmpdi2.c new file mode 100644 index 00000000..c876eb4e --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/cmpdi2.c @@ -0,0 +1,62 @@ +/* $NetBSD: cmpdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)cmpdi2.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: cmpdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return 0, 1, or 2 as a <, =, > b respectively. + * Both a and b are considered signed---which means only the high word is + * signed. + */ +int +__cmpdi2(a, b) + quad_t a, b; +{ + union uu aa, bb; + + aa.q = a; + bb.q = b; + return (aa.sl[H] < bb.sl[H] ? 0 : aa.sl[H] > bb.sl[H] ? 2 : + aa.ul[L] < bb.ul[L] ? 0 : aa.ul[L] > bb.ul[L] ? 2 : 1); +} diff --git a/src/VBox/Runtime/common/math/gcc/divdi3.c b/src/VBox/Runtime/common/math/gcc/divdi3.c new file mode 100644 index 00000000..eecc17ad --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/divdi3.c @@ -0,0 +1,70 @@ +/* $NetBSD: divdi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)divdi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: divdi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Divide two signed quads. + * ??? if -1/2 should produce -1 on this machine, this code is wrong + */ +quad_t +__divdi3(a, b) + quad_t a, b; +{ + u_quad_t ua, ub, uq; + int neg = 0; + + ua = a; + ub = b; + + if (a < 0) + ua = -ua, neg ^= 1; + if (b < 0) + ub = -ub, neg ^= 1; + + uq = __qdivrem(ua, ub, (u_quad_t *)0); + if (neg) + uq = - uq; + return uq; +} diff --git a/src/VBox/Runtime/common/math/gcc/divmoddi4.c b/src/VBox/Runtime/common/math/gcc/divmoddi4.c new file mode 100644 index 00000000..c85498d3 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/divmoddi4.c @@ -0,0 +1,84 @@ +/* $Id: divmoddi4.c $ */ +/** @file + * IPRT - __divmoddi4 implementation + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + +#include <iprt/stdint.h> +#include <iprt/uint64.h> + +int64_t __divmoddi4(int64_t i64A, int64_t i64B, int64_t *pi64R); +uint64_t __udivmoddi4(uint64_t u64A, uint64_t u64B, uint64_t *pu64R); + +/** + * __divmoddi4() implementation to satisfy external references from 32-bit code + * generated by gcc-7 or later (more likely with gcc-11). + * + * @param i64A The divident value. + * @param i64B The divisor value. + * @param pi64R A pointer to the reminder. May be NULL. + * @returns i64A / i64B + */ +int64_t __divmoddi4(int64_t i64A, int64_t i64B, int64_t *pi64R) +{ + int64_t i64Ret; + if (i64A >= 0) + { + /* Dividing two non-negative numbers is the same as unsigned division. */ + if (i64B >= 0) + i64Ret = (int64_t)__udivmoddi4((uint64_t)i64A, (uint64_t)i64B, (uint64_t *)pi64R); + /* Dividing a non-negative number by a negative one yields a negative + result and positive remainder. */ + else + i64Ret = -(int64_t)__udivmoddi4((uint64_t)i64A, (uint64_t)-i64B, (uint64_t *)pi64R); + } + else + { + uint64_t u64R; + + /* Dividing a negative number by a non-negative one yields a negative + result and negative remainder. */ + if (i64B >= 0) + i64Ret = -(int64_t)__udivmoddi4((uint64_t)-i64A, (uint64_t)i64B, &u64R); + /* Dividing two negative numbers yields a positive result and a + negative remainder. */ + else + i64Ret = (int64_t)__udivmoddi4((uint64_t)-i64A, (uint64_t)-i64B, &u64R); + + if (pi64R) + *pi64R = -(int64_t)u64R; + } + + return i64Ret; +} + diff --git a/src/VBox/Runtime/common/math/gcc/iordi3.c b/src/VBox/Runtime/common/math/gcc/iordi3.c new file mode 100644 index 00000000..3d0a7eda --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/iordi3.c @@ -0,0 +1,61 @@ +/* $NetBSD: iordi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)iordi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: iordi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return a | b, in quad. + */ +quad_t +__iordi3(a, b) + quad_t a, b; +{ + union uu aa, bb; + + aa.q = a; + bb.q = b; + aa.ul[0] |= bb.ul[0]; + aa.ul[1] |= bb.ul[1]; + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/lshldi3.c b/src/VBox/Runtime/common/math/gcc/lshldi3.c new file mode 100644 index 00000000..611cb08d --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/lshldi3.c @@ -0,0 +1,70 @@ +/* $NetBSD: lshldi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)lshldi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: lshldi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Shift an (unsigned) quad value left (logical shift left). + * This is the same as arithmetic shift left! + */ +quad_t +__lshldi3(a, shift) + quad_t a; + qshift_t shift; +{ + union uu aa; + + if (shift == 0) + return(a); + aa.q = a; + if (shift >= INT_BITS) { + aa.ul[H] = aa.ul[L] << (shift - INT_BITS); + aa.ul[L] = 0; + } else { + aa.ul[H] = (aa.ul[H] << shift) | + (aa.ul[L] >> (INT_BITS - shift)); + aa.ul[L] <<= shift; + } + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/lshrdi3.c b/src/VBox/Runtime/common/math/gcc/lshrdi3.c new file mode 100644 index 00000000..3dba60c3 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/lshrdi3.c @@ -0,0 +1,69 @@ +/* $NetBSD: lshrdi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)lshrdi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: lshrdi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Shift an (unsigned) quad value right (logical shift right). + */ +quad_t +__lshrdi3(a, shift) + quad_t a; + qshift_t shift; +{ + union uu aa; + + if (shift == 0) + return(a); + aa.q = a; + if (shift >= INT_BITS) { + aa.ul[L] = aa.ul[H] >> (shift - INT_BITS); + aa.ul[H] = 0; + } else { + aa.ul[L] = (aa.ul[L] >> shift) | + (aa.ul[H] << (INT_BITS - shift)); + aa.ul[H] >>= shift; + } + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/moddi3.c b/src/VBox/Runtime/common/math/gcc/moddi3.c new file mode 100644 index 00000000..764ea01d --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/moddi3.c @@ -0,0 +1,70 @@ +/* $NetBSD: moddi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)moddi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: moddi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return remainder after dividing two signed quads. + * + * XXX we assume a % b < 0 iff a < 0, but this is actually machine-dependent. + */ +quad_t +__moddi3(a, b) + quad_t a, b; +{ + u_quad_t ua, ub, ur; + int neg = 0; + + ua = a; + ub = b; + + if (a < 0) + ua = -ua, neg ^= 1; + if (b < 0) + ub = -ub; + (void)__qdivrem(ua, ub, &ur); + if (neg) + ur = -ur; + return (ur); +} diff --git a/src/VBox/Runtime/common/math/gcc/muldi3.c b/src/VBox/Runtime/common/math/gcc/muldi3.c new file mode 100644 index 00000000..370ef3d2 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/muldi3.c @@ -0,0 +1,249 @@ +/* $NetBSD: muldi3.c,v 1.10 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)muldi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: muldi3.c,v 1.10 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Multiply two quads. + * + * Our algorithm is based on the following. Split incoming quad values + * u and v (where u,v >= 0) into + * + * u = 2^n u1 * u0 (n = number of bits in `u_int', usu. 32) + * + * and + * + * v = 2^n v1 * v0 + * + * Then + * + * uv = 2^2n u1 v1 + 2^n u1 v0 + 2^n v1 u0 + u0 v0 + * = 2^2n u1 v1 + 2^n (u1 v0 + v1 u0) + u0 v0 + * + * Now add 2^n u1 v1 to the first term and subtract it from the middle, + * and add 2^n u0 v0 to the last term and subtract it from the middle. + * This gives: + * + * uv = (2^2n + 2^n) (u1 v1) + + * (2^n) (u1 v0 - u1 v1 + u0 v1 - u0 v0) + + * (2^n + 1) (u0 v0) + * + * Factoring the middle a bit gives us: + * + * uv = (2^2n + 2^n) (u1 v1) + [u1v1 = high] + * (2^n) (u1 - u0) (v0 - v1) + [(u1-u0)... = mid] + * (2^n + 1) (u0 v0) [u0v0 = low] + * + * The terms (u1 v1), (u1 - u0) (v0 - v1), and (u0 v0) can all be done + * in just half the precision of the original. (Note that either or both + * of (u1 - u0) or (v0 - v1) may be negative.) + * + * This algorithm is from Knuth vol. 2 (2nd ed), section 4.3.3, p. 278. + * + * Since C does not give us a `int * int = quad' operator, we split + * our input quads into two ints, then split the two ints into two + * shorts. We can then calculate `short * short = int' in native + * arithmetic. + * + * Our product should, strictly speaking, be a `long quad', with 128 + * bits, but we are going to discard the upper 64. In other words, + * we are not interested in uv, but rather in (uv mod 2^2n). This + * makes some of the terms above vanish, and we get: + * + * (2^n)(high) + (2^n)(mid) + (2^n + 1)(low) + * + * or + * + * (2^n)(high + mid + low) + low + * + * Furthermore, `high' and `mid' can be computed mod 2^n, as any factor + * of 2^n in either one will also vanish. Only `low' need be computed + * mod 2^2n, and only because of the final term above. + */ +static quad_t __lmulq(u_int, u_int); + +quad_t +__muldi3(a, b) + quad_t a, b; +{ + union uu u, v, low, prod; + u_int high, mid, udiff, vdiff; + int negall, negmid; +#define u1 u.ul[H] +#define u0 u.ul[L] +#define v1 v.ul[H] +#define v0 v.ul[L] + + /* + * Get u and v such that u, v >= 0. When this is finished, + * u1, u0, v1, and v0 will be directly accessible through the + * int fields. + */ + if (a >= 0) + u.q = a, negall = 0; + else + u.q = -a, negall = 1; + if (b >= 0) + v.q = b; + else + v.q = -b, negall ^= 1; + + if (u1 == 0 && v1 == 0) { + /* + * An (I hope) important optimization occurs when u1 and v1 + * are both 0. This should be common since most numbers + * are small. Here the product is just u0*v0. + */ + prod.q = __lmulq(u0, v0); + } else { + /* + * Compute the three intermediate products, remembering + * whether the middle term is negative. We can discard + * any upper bits in high and mid, so we can use native + * u_int * u_int => u_int arithmetic. + */ + low.q = __lmulq(u0, v0); + + if (u1 >= u0) + negmid = 0, udiff = u1 - u0; + else + negmid = 1, udiff = u0 - u1; + if (v0 >= v1) + vdiff = v0 - v1; + else + vdiff = v1 - v0, negmid ^= 1; + mid = udiff * vdiff; + + high = u1 * v1; + + /* + * Assemble the final product. + */ + prod.ul[H] = high + (negmid ? -mid : mid) + low.ul[L] + + low.ul[H]; + prod.ul[L] = low.ul[L]; + } + return (negall ? -prod.q : prod.q); +#undef u1 +#undef u0 +#undef v1 +#undef v0 +} + +/* + * Multiply two 2N-bit ints to produce a 4N-bit quad, where N is half + * the number of bits in an int (whatever that is---the code below + * does not care as long as quad.h does its part of the bargain---but + * typically N==16). + * + * We use the same algorithm from Knuth, but this time the modulo refinement + * does not apply. On the other hand, since N is half the size of an int, + * we can get away with native multiplication---none of our input terms + * exceeds (UINT_MAX >> 1). + * + * Note that, for u_int l, the quad-precision result + * + * l << N + * + * splits into high and low ints as HHALF(l) and LHUP(l) respectively. + */ +static quad_t +__lmulq(u_int u, u_int v) +{ + u_int u1, u0, v1, v0, udiff, vdiff, high, mid, low; + u_int prodh, prodl, was; + union uu prod; + int neg; + + u1 = HHALF(u); + u0 = LHALF(u); + v1 = HHALF(v); + v0 = LHALF(v); + + low = u0 * v0; + + /* This is the same small-number optimization as before. */ + if (u1 == 0 && v1 == 0) + return (low); + + if (u1 >= u0) + udiff = u1 - u0, neg = 0; + else + udiff = u0 - u1, neg = 1; + if (v0 >= v1) + vdiff = v0 - v1; + else + vdiff = v1 - v0, neg ^= 1; + mid = udiff * vdiff; + + high = u1 * v1; + + /* prod = (high << 2N) + (high << N); */ + prodh = high + HHALF(high); + prodl = LHUP(high); + + /* if (neg) prod -= mid << N; else prod += mid << N; */ + if (neg) { + was = prodl; + prodl -= LHUP(mid); + prodh -= HHALF(mid) + (prodl > was); + } else { + was = prodl; + prodl += LHUP(mid); + prodh += HHALF(mid) + (prodl < was); + } + + /* prod += low << N */ + was = prodl; + prodl += LHUP(low); + prodh += HHALF(low) + (prodl < was); + /* ... + low; */ + if ((prodl += low) < low) + prodh++; + + /* return 4N-bit product */ + prod.ul[H] = prodh; + prod.ul[L] = prodl; + return (prod.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/negdi2.c b/src/VBox/Runtime/common/math/gcc/negdi2.c new file mode 100644 index 00000000..2eafcffa --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/negdi2.c @@ -0,0 +1,60 @@ +/* $NetBSD: negdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)negdi2.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: negdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return -a (or, equivalently, 0 - a), in quad. See subdi3.c. + */ +quad_t +__negdi2(a) + quad_t a; +{ + union uu aa, res; + + aa.q = a; + res.ul[L] = -aa.ul[L]; + res.ul[H] = -aa.ul[H] - (res.ul[L] > 0); + return (res.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/notdi2.c b/src/VBox/Runtime/common/math/gcc/notdi2.c new file mode 100644 index 00000000..c671e037 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/notdi2.c @@ -0,0 +1,61 @@ +/* $NetBSD: notdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)notdi2.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: notdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return ~a. For some reason gcc calls this `one's complement' rather + * than `not'. + */ +quad_t +__one_cmpldi2(a) + quad_t a; +{ + union uu aa; + + aa.q = a; + aa.ul[0] = ~aa.ul[0]; + aa.ul[1] = ~aa.ul[1]; + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/qdivrem.c b/src/VBox/Runtime/common/math/gcc/qdivrem.c new file mode 100644 index 00000000..7ca2d38c --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/qdivrem.c @@ -0,0 +1,285 @@ +/* $NetBSD: qdivrem.c,v 1.12 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)qdivrem.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: qdivrem.c,v 1.12 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +/* + * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), + * section 4.3.1, pp. 257--259. + */ + +#include "quad.h" + +#define B ((int)1 << HALF_BITS) /* digit base */ + +/* Combine two `digits' to make a single two-digit number. */ +#define COMBINE(a, b) (((u_int)(a) << HALF_BITS) | (b)) + +/* select a type for digits in base B: use unsigned short if they fit */ +#if UINT_MAX == 0xffffffffU && USHRT_MAX >= 0xffff +typedef unsigned short digit; +#else +typedef u_int digit; +#endif + +static void shl __P((digit *p, int len, int sh)); + +/* + * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. + * + * We do this in base 2-sup-HALF_BITS, so that all intermediate products + * fit within u_int. As a consequence, the maximum length dividend and + * divisor are 4 `digits' in this base (they are shorter if they have + * leading zeros). + */ +u_quad_t +__qdivrem(uq, vq, arq) + u_quad_t uq, vq, *arq; +{ + union uu tmp; + digit *u, *v, *q; + digit v1, v2; + u_int qhat, rhat, t; + int m, n, d, j, i; + digit uspace[5], vspace[5], qspace[5]; + + /* + * Take care of special cases: divide by zero, and u < v. + */ + if (vq == 0) { + /* divide by zero. */ + static volatile const unsigned int zero = 0; + + tmp.ul[H] = tmp.ul[L] = 1 / zero; + if (arq) + *arq = uq; + return (tmp.q); + } + if (uq < vq) { + if (arq) + *arq = uq; + return (0); + } + u = &uspace[0]; + v = &vspace[0]; + q = &qspace[0]; + + /* + * Break dividend and divisor into digits in base B, then + * count leading zeros to determine m and n. When done, we + * will have: + * u = (u[1]u[2]...u[m+n]) sub B + * v = (v[1]v[2]...v[n]) sub B + * v[1] != 0 + * 1 < n <= 4 (if n = 1, we use a different division algorithm) + * m >= 0 (otherwise u < v, which we already checked) + * m + n = 4 + * and thus + * m = 4 - n <= 2 + */ + tmp.uq = uq; + u[0] = 0; + u[1] = (digit)HHALF(tmp.ul[H]); + u[2] = (digit)LHALF(tmp.ul[H]); + u[3] = (digit)HHALF(tmp.ul[L]); + u[4] = (digit)LHALF(tmp.ul[L]); + tmp.uq = vq; + v[1] = (digit)HHALF(tmp.ul[H]); + v[2] = (digit)LHALF(tmp.ul[H]); + v[3] = (digit)HHALF(tmp.ul[L]); + v[4] = (digit)LHALF(tmp.ul[L]); + for (n = 4; v[1] == 0; v++) { + if (--n == 1) { + u_int rbj; /* r*B+u[j] (not root boy jim) */ + digit q1, q2, q3, q4; + + /* + * Change of plan, per exercise 16. + * r = 0; + * for j = 1..4: + * q[j] = floor((r*B + u[j]) / v), + * r = (r*B + u[j]) % v; + * We unroll this completely here. + */ + t = v[2]; /* nonzero, by definition */ + q1 = (digit)(u[1] / t); + rbj = COMBINE(u[1] % t, u[2]); + q2 = (digit)(rbj / t); + rbj = COMBINE(rbj % t, u[3]); + q3 = (digit)(rbj / t); + rbj = COMBINE(rbj % t, u[4]); + q4 = (digit)(rbj / t); + if (arq) + *arq = rbj % t; + tmp.ul[H] = COMBINE(q1, q2); + tmp.ul[L] = COMBINE(q3, q4); + return (tmp.q); + } + } + + /* + * By adjusting q once we determine m, we can guarantee that + * there is a complete four-digit quotient at &qspace[1] when + * we finally stop. + */ + for (m = 4 - n; u[1] == 0; u++) + m--; + for (i = 4 - m; --i >= 0;) + q[i] = 0; + q += 4 - m; + + /* + * Here we run Program D, translated from MIX to C and acquiring + * a few minor changes. + * + * D1: choose multiplier 1 << d to ensure v[1] >= B/2. + */ + d = 0; + for (t = v[1]; t < B / 2; t <<= 1) + d++; + if (d > 0) { + shl(&u[0], m + n, d); /* u <<= d */ + shl(&v[1], n - 1, d); /* v <<= d */ + } + /* + * D2: j = 0. + */ + j = 0; + v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ + v2 = v[2]; /* for D3 */ + do { + digit uj0, uj1, uj2; + + /* + * D3: Calculate qhat (\^q, in TeX notation). + * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and + * let rhat = (u[j]*B + u[j+1]) mod v[1]. + * While rhat < B and v[2]*qhat > rhat*B+u[j+2], + * decrement qhat and increase rhat correspondingly. + * Note that if rhat >= B, v[2]*qhat < rhat*B. + */ + uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ + uj1 = u[j + 1]; /* for D3 only */ + uj2 = u[j + 2]; /* for D3 only */ + if (uj0 == v1) { + qhat = B; + rhat = uj1; + goto qhat_too_big; + } else { + u_int nn = COMBINE(uj0, uj1); + qhat = nn / v1; + rhat = nn % v1; + } + while (v2 * qhat > COMBINE(rhat, uj2)) { + qhat_too_big: + qhat--; + if ((rhat += v1) >= B) + break; + } + /* + * D4: Multiply and subtract. + * The variable `t' holds any borrows across the loop. + * We split this up so that we do not require v[0] = 0, + * and to eliminate a final special case. + */ + for (t = 0, i = n; i > 0; i--) { + t = u[i + j] - v[i] * qhat - t; + u[i + j] = (digit)LHALF(t); + t = (B - HHALF(t)) & (B - 1); + } + t = u[j] - t; + u[j] = (digit)LHALF(t); + /* + * D5: test remainder. + * There is a borrow if and only if HHALF(t) is nonzero; + * in that (rare) case, qhat was too large (by exactly 1). + * Fix it by adding v[1..n] to u[j..j+n]. + */ + if (HHALF(t)) { + qhat--; + for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ + t += u[i + j] + v[i]; + u[i + j] = (digit)LHALF(t); + t = HHALF(t); + } + u[j] = (digit)LHALF(u[j] + t); + } + q[j] = (digit)qhat; + } while (++j <= m); /* D7: loop on j. */ + + /* + * If caller wants the remainder, we have to calculate it as + * u[m..m+n] >> d (this is at most n digits and thus fits in + * u[m+1..m+n], but we may need more source digits). + */ + if (arq) { + if (d) { + for (i = m + n; i > m; --i) + u[i] = (digit)(((u_int)u[i] >> d) | + LHALF((u_int)u[i - 1] << (HALF_BITS - d))); + u[i] = 0; + } + tmp.ul[H] = COMBINE(uspace[1], uspace[2]); + tmp.ul[L] = COMBINE(uspace[3], uspace[4]); + *arq = tmp.q; + } + + tmp.ul[H] = COMBINE(qspace[1], qspace[2]); + tmp.ul[L] = COMBINE(qspace[3], qspace[4]); + return (tmp.q); +} + +/* + * Shift p[0]..p[len] left `sh' bits, ignoring any bits that + * `fall out' the left (there never will be any such anyway). + * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. + */ +static void +shl(digit *p, int len, int sh) +{ + int i; + + for (i = 0; i < len; i++) + p[i] = (digit)(LHALF((u_int)p[i] << sh) | + ((u_int)p[i + 1] >> (HALF_BITS - sh))); + p[i] = (digit)(LHALF((u_int)p[i] << sh)); +} diff --git a/src/VBox/Runtime/common/math/gcc/quad.h b/src/VBox/Runtime/common/math/gcc/quad.h new file mode 100644 index 00000000..c4197795 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/quad.h @@ -0,0 +1,174 @@ +/* $NetBSD: quad.h,v 1.17 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)quad.h 8.1 (Berkeley) 6/4/93 + */ + +#ifndef IPRT_INCLUDED_COMMON_MATH_quad_h +#define IPRT_INCLUDED_COMMON_MATH_quad_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +/* + * Quad arithmetic. + * + * This library makes the following assumptions: + * + * - The type long long (aka quad_t) exists. + * + * - A quad variable is exactly twice as long as `int'. + * + * - The machine's arithmetic is two's complement. + * + * This library can provide 128-bit arithmetic on a machine with 128-bit + * quads and 64-bit ints, for instance, or 96-bit arithmetic on machines + * with 48-bit ints. + */ + +#if 0 /* iprt */ +#include <sys/types.h> +#if !defined(_KERNEL) && !defined(_STANDALONE) +#include <limits.h> +#else +#include <machine/limits.h> +#endif +#else /* iprt */ +# include <iprt/types.h> +# include <iprt/nocrt/limits.h> +# undef __P +# define __P(a) a +# undef __GNUC_PREREQ__ +# define __GNUC_PREREQ__(m1,m2) 1 +# if 1 /* ASSUMES: little endian */ +# define _QUAD_HIGHWORD 1 +# define _QUAD_LOWWORD 0 +# else +# define _QUAD_HIGHWORD 0 +# define _QUAD_LOWWORD 1 +# endif +# if !defined(RT_OS_LINUX) || !defined(__KERNEL__) /* (linux/types.h defines u_int) */ + typedef unsigned int u_int; +# endif +# if !defined(RT_OS_SOLARIS) + typedef int64_t quad_t; +# else +# define quad_t int64_t +# endif + typedef uint64_t u_quad_t; + typedef quad_t *qaddr_t; +#endif /* iprt */ + +/* + * Depending on the desired operation, we view a `long long' (aka quad_t) in + * one or more of the following formats. + */ +union uu { + quad_t q; /* as a (signed) quad */ + u_quad_t uq; /* as an unsigned quad */ + int sl[2]; /* as two signed ints */ + u_int ul[2]; /* as two unsigned ints */ +}; + +/* + * Define high and low parts of a quad_t. + */ +#define H _QUAD_HIGHWORD +#define L _QUAD_LOWWORD + +/* + * Total number of bits in a quad_t and in the pieces that make it up. + * These are used for shifting, and also below for halfword extraction + * and assembly. + */ +#define QUAD_BITS (sizeof(quad_t) * CHAR_BIT) +#define INT_BITS (sizeof(int) * CHAR_BIT) +#define HALF_BITS (sizeof(int) * CHAR_BIT / 2) + +/* + * Extract high and low shortwords from longword, and move low shortword of + * longword to upper half of long, i.e., produce the upper longword of + * ((quad_t)(x) << (number_of_bits_in_int/2)). (`x' must actually be u_int.) + * + * These are used in the multiply code, to split a longword into upper + * and lower halves, and to reassemble a product as a quad_t, shifted left + * (sizeof(int)*CHAR_BIT/2). + */ +#define HHALF(x) ((u_int)(x) >> HALF_BITS) +#define LHALF(x) ((u_int)(x) & (((int)1 << HALF_BITS) - 1)) +#define LHUP(x) ((u_int)(x) << HALF_BITS) + +/* + * XXX + * Compensate for gcc 1 vs gcc 2. Gcc 1 defines ?sh?di3's second argument + * as u_quad_t, while gcc 2 correctly uses int. Unfortunately, we still use + * both compilers. + */ +#if __GNUC_PREREQ__(2, 0) || defined(lint) +typedef unsigned int qshift_t; +#else +typedef u_quad_t qshift_t; +#endif + +RT_C_DECLS_BEGIN +quad_t __adddi3 __P((quad_t, quad_t)); +quad_t __anddi3 __P((quad_t, quad_t)); +quad_t __ashldi3 __P((quad_t, qshift_t)); +quad_t __ashrdi3 __P((quad_t, qshift_t)); +int __cmpdi2 __P((quad_t, quad_t )); +quad_t __divdi3 __P((quad_t, quad_t)); +quad_t __fixdfdi __P((double)); +quad_t __fixsfdi __P((float)); +u_quad_t __fixunsdfdi __P((double)); +u_quad_t __fixunssfdi __P((float)); +double __floatdidf __P((quad_t)); +float __floatdisf __P((quad_t)); +double __floatunsdidf __P((u_quad_t)); +quad_t __iordi3 __P((quad_t, quad_t)); +quad_t __lshldi3 __P((quad_t, qshift_t)); +quad_t __lshrdi3 __P((quad_t, qshift_t)); +quad_t __moddi3 __P((quad_t, quad_t)); +quad_t __muldi3 __P((quad_t, quad_t)); +quad_t __negdi2 __P((quad_t)); +quad_t __one_cmpldi2 __P((quad_t)); +u_quad_t __qdivrem __P((u_quad_t, u_quad_t, u_quad_t *)); +quad_t __subdi3 __P((quad_t, quad_t)); +int __ucmpdi2 __P((u_quad_t, u_quad_t)); +u_quad_t __udivdi3 __P((u_quad_t, u_quad_t )); +u_quad_t __umoddi3 __P((u_quad_t, u_quad_t )); +quad_t __xordi3 __P((quad_t, quad_t)); +RT_C_DECLS_END + +#endif + diff --git a/src/VBox/Runtime/common/math/gcc/subdi3.c b/src/VBox/Runtime/common/math/gcc/subdi3.c new file mode 100644 index 00000000..2751acc2 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/subdi3.c @@ -0,0 +1,62 @@ +/* $NetBSD: subdi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)subdi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: subdi3.c,v 1.9 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Subtract two quad values. This is trivial since a one-bit carry + * from a single u_int difference x-y occurs if and only if (x-y) > x. + */ +quad_t +__subdi3(a, b) + quad_t a, b; +{ + union uu aa, bb, diff; + + aa.q = a; + bb.q = b; + diff.ul[L] = aa.ul[L] - bb.ul[L]; + diff.ul[H] = aa.ul[H] - bb.ul[H] - (diff.ul[L] > aa.ul[L]); + return (diff.q); +} diff --git a/src/VBox/Runtime/common/math/gcc/ucmpdi2.c b/src/VBox/Runtime/common/math/gcc/ucmpdi2.c new file mode 100644 index 00000000..47d79164 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/ucmpdi2.c @@ -0,0 +1,61 @@ +/* $NetBSD: ucmpdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)ucmpdi2.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: ucmpdi2.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return 0, 1, or 2 as a <, =, > b respectively. + * Neither a nor b are considered signed. + */ +int +__ucmpdi2(a, b) + u_quad_t a, b; +{ + union uu aa, bb; + + aa.uq = a; + bb.uq = b; + return (aa.ul[H] < bb.ul[H] ? 0 : aa.ul[H] > bb.ul[H] ? 2 : + aa.ul[L] < bb.ul[L] ? 0 : aa.ul[L] > bb.ul[L] ? 2 : 1); +} diff --git a/src/VBox/Runtime/common/math/gcc/udivdi3.c b/src/VBox/Runtime/common/math/gcc/udivdi3.c new file mode 100644 index 00000000..9069f4d2 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/udivdi3.c @@ -0,0 +1,56 @@ +/* $NetBSD: udivdi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)udivdi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: udivdi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Divide two unsigned quads. + */ +u_quad_t +__udivdi3(a, b) + u_quad_t a, b; +{ + + return (__qdivrem(a, b, (u_quad_t *)0)); +} diff --git a/src/VBox/Runtime/common/math/gcc/udivmoddi4.c b/src/VBox/Runtime/common/math/gcc/udivmoddi4.c new file mode 100644 index 00000000..b6173b46 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/udivmoddi4.c @@ -0,0 +1,65 @@ +/* $Id: udivmoddi4.c $ */ +/** @file + * IPRT - __udivmoddi4 implementation + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + +#include <iprt/stdint.h> +#include <iprt/uint64.h> + +uint64_t __udivmoddi4(uint64_t u64A, uint64_t u64B, uint64_t *pu64R); + +/** + * __udivmoddi4() implementation to satisfy external references from 32-bit + * code generated by gcc-7 or later. + * + * @param u64A The divident value. + * @param u64B The divisor value. + * @param pu64R A pointer to the reminder. May be NULL. + * @returns u64A / u64B + */ +uint64_t __udivmoddi4(uint64_t u64A, uint64_t u64B, uint64_t *pu64R) +{ + RTUINT64U Divident; + RTUINT64U Divisor; + RTUINT64U Quotient; + RTUINT64U Reminder; + Divident.u = u64A; + Divisor.u = u64B; + Quotient.u = 0; /* shut up gcc 10 */ + Reminder.u = 0; /* shut up gcc 10 */ + RTUInt64DivRem(&Quotient, &Reminder, &Divident, &Divisor); + if (pu64R) + *pu64R = Reminder.u; + return Quotient.u; +} diff --git a/src/VBox/Runtime/common/math/gcc/umoddi3.c b/src/VBox/Runtime/common/math/gcc/umoddi3.c new file mode 100644 index 00000000..2e65ecab --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/umoddi3.c @@ -0,0 +1,58 @@ +/* $NetBSD: umoddi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)umoddi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: umoddi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return remainder after dividing two unsigned quads. + */ +u_quad_t +__umoddi3(a, b) + u_quad_t a, b; +{ + u_quad_t r; + + (void)__qdivrem(a, b, &r); + return (r); +} diff --git a/src/VBox/Runtime/common/math/gcc/xordi3.c b/src/VBox/Runtime/common/math/gcc/xordi3.c new file mode 100644 index 00000000..aa5db229 --- /dev/null +++ b/src/VBox/Runtime/common/math/gcc/xordi3.c @@ -0,0 +1,61 @@ +/* $NetBSD: xordi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)xordi3.c 8.1 (Berkeley) 6/4/93"; +#else +__RCSID("$NetBSD: xordi3.c,v 1.8 2005/12/11 12:24:37 christos Exp $"); +#endif +#endif*/ /* LIBC_SCCS and not lint */ + +#include "quad.h" + +/* + * Return a ^ b, in quad. + */ +quad_t +__xordi3(a, b) + quad_t a, b; +{ + union uu aa, bb; + + aa.q = a; + bb.q = b; + aa.ul[0] ^= bb.ul[0]; + aa.ul[1] ^= bb.ul[1]; + return (aa.q); +} diff --git a/src/VBox/Runtime/common/math/isinf.cpp b/src/VBox/Runtime/common/math/isinf.cpp new file mode 100644 index 00000000..6632c888 --- /dev/null +++ b/src/VBox/Runtime/common/math/isinf.cpp @@ -0,0 +1,57 @@ +/* $Id: isinf.cpp $ */ +/** @file + * IPRT - No-CRT - isinf(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef isinf +int RT_NOCRT(isinf)(double rd) +{ + AssertCompile(sizeof(rd) == sizeof(RTFLOAT64U)); + RTFLOAT64U u; + u.rd = rd; + return RTFLOAT64U_IS_INF(&u); +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(isinf); + diff --git a/src/VBox/Runtime/common/math/isnan.cpp b/src/VBox/Runtime/common/math/isnan.cpp new file mode 100644 index 00000000..acd0d0a4 --- /dev/null +++ b/src/VBox/Runtime/common/math/isnan.cpp @@ -0,0 +1,57 @@ +/* $Id: isnan.cpp $ */ +/** @file + * IPRT - No-CRT - isnan(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef isnan +int RT_NOCRT(isnan)(double rd) +{ + AssertCompile(sizeof(rd) == sizeof(RTFLOAT64U)); + RTFLOAT64U u; + u.rd = rd; + return RTFLOAT64U_IS_NAN(&u); +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(isnan); + diff --git a/src/VBox/Runtime/common/math/isnanf.cpp b/src/VBox/Runtime/common/math/isnanf.cpp new file mode 100644 index 00000000..cb6f9e8a --- /dev/null +++ b/src/VBox/Runtime/common/math/isnanf.cpp @@ -0,0 +1,57 @@ +/* $Id: isnanf.cpp $ */ +/** @file + * IPRT - No-CRT - isnanf(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/assertcompile.h> +#include <iprt/assert.h> + + +#undef isnanf +int RT_NOCRT(isnanf)(float r32) +{ + AssertCompile(sizeof(r32) == sizeof(RTFLOAT32U)); + RTFLOAT32U u; + u.r = r32; + return RTFLOAT32U_IS_NAN(&u); +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(isnanf); + diff --git a/src/VBox/Runtime/common/math/ldexp.asm b/src/VBox/Runtime/common/math/ldexp.asm new file mode 100644 index 00000000..257c4172 --- /dev/null +++ b/src/VBox/Runtime/common/math/ldexp.asm @@ -0,0 +1,89 @@ +; $Id: ldexp.asm $ +;; @file +; IPRT - No-CRT ldexp - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + + +BEGINCODE + +;; +; Computes rd * 2^exp +; @returns st(0) / xmm0 +; @param rd [rbp + xCB*2] / xmm0 +; @param exp [ebp + 10h] gcc:edi msc:edx +RT_NOCRT_BEGINPROC ldexp + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + ; + ; Load the value and scaling factor. + ; +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + mov [rbp - 8h], edi + %else + mov [rbp - 8h], edx + %endif + fild dword [rbp - 8h] + movsd [rbp - 10h], xmm0 + fld qword [rbp - 10h] +%else + fild dword [xBP + xCB*2 + 8] + fld qword [xBP + xCB*2] +%endif + + ; + ; Do the scaling and return the result. + ; + fscale + + fstp st1 +%ifdef RT_ARCH_AMD64 + fstp qword [rbp - 10h] + movsd xmm0, [rbp - 10h] +%endif + + leave + ret +ENDPROC RT_NOCRT(ldexp) + diff --git a/src/VBox/Runtime/common/math/ldexpf.asm b/src/VBox/Runtime/common/math/ldexpf.asm new file mode 100644 index 00000000..68cce655 --- /dev/null +++ b/src/VBox/Runtime/common/math/ldexpf.asm @@ -0,0 +1,89 @@ +; $Id: ldexpf.asm $ +;; @file +; IPRT - No-CRT ldexpf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + + +BEGINCODE + +;; +; Computes r32 * 2^exp +; @returns st(0) / xmm0 +; @param r32 [rbp + xCB*2] / xmm0 +; @param exp [ebp + 0ch] gcc:edi msc:edx +RT_NOCRT_BEGINPROC ldexpf + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + ; + ; Load the value and scaling factor. + ; +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + mov [rbp - 8h], edi + %else + mov [rbp - 8h], edx + %endif + fild dword [rbp - 8h] + movss [rbp - 10h], xmm0 + fld dword [rbp - 10h] +%else + fild dword [eBP + xCB*2 + 4] + fld dword [xBP + xCB*2] +%endif + + ; + ; Do the scaling and return the result. + ; + fscale + + fstp st1 +%ifdef RT_ARCH_AMD64 + fstp dword [rbp - 10h] + movss xmm0, [rbp - 10h] +%endif + + leave + ret +ENDPROC RT_NOCRT(ldexpf) + diff --git a/src/VBox/Runtime/common/math/ldexpl.asm b/src/VBox/Runtime/common/math/ldexpl.asm new file mode 100644 index 00000000..6ad3591e --- /dev/null +++ b/src/VBox/Runtime/common/math/ldexpl.asm @@ -0,0 +1,80 @@ +; $Id: ldexpl.asm $ +;; @file +; IPRT - No-CRT ldexpl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Computes lrd * 2^exp +; @returns st(0) +; @param lrd [rbp + xCB*2] +; @param exp [ebp + 14h] gcc:edi msc:edx +RT_NOCRT_BEGINPROC ldexpl + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + ; + ; Load the value and scaling factor. + ; +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + mov [rbp - 10h], edi + %else + mov [rbp - 10h], edx + %endif + fild dword [rbp - 10h] +%else + fild dword [ebp + xCB*2 + RTLRD_CB] +%endif + fld tword [xBP + xCB*2] + + ; + ; Do the scaling and return the result. + ; + fscale + fstp st1 + + leave + ret +ENDPROC RT_NOCRT(ldexpl) + diff --git a/src/VBox/Runtime/common/math/llrint.asm b/src/VBox/Runtime/common/math/llrint.asm new file mode 100644 index 00000000..bcc7d071 --- /dev/null +++ b/src/VBox/Runtime/common/math/llrint.asm @@ -0,0 +1,72 @@ +; $Id: llrint.asm $ +;; @file +; IPRT - No-CRT llrint - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + + +BEGINCODE + +;; +; Round rd to the nearest integer value, rounding according to the current rounding direction. +; @returns 32-bit: edx:eax 64-bit: rax +; @param rd 32-bit: [esp + 4h] 64-bit: xmm0 +RT_NOCRT_BEGINPROC llrint + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +%ifdef RT_ARCH_X86 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h +%endif + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + cvtsd2si rax, xmm0 +%else + fld qword [ebp + 8h] + fistp qword [esp] + fwait + mov eax, [esp] + mov edx, [esp + 4] +%endif + + leave + ret +ENDPROC RT_NOCRT(llrint) + diff --git a/src/VBox/Runtime/common/math/llrintf.asm b/src/VBox/Runtime/common/math/llrintf.asm new file mode 100644 index 00000000..6b741d76 --- /dev/null +++ b/src/VBox/Runtime/common/math/llrintf.asm @@ -0,0 +1,72 @@ +; $Id: llrintf.asm $ +;; @file +; IPRT - No-CRT llrintf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + + +BEGINCODE + +;; +; Round rf to the nearest integer value, rounding according to the current rounding direction. +; @returns 32-bit: edx:eax 64-bit: rax +; @param rf 32-bit: [esp + 4h] 64-bit: xmm0 +RT_NOCRT_BEGINPROC llrintf + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +%ifdef RT_ARCH_X86 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h +%endif + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + cvtss2si rax, xmm0 +%else + fld dword [ebp + 8h] + fistp qword [esp] + fwait + mov eax, [esp] + mov edx, [esp + 4] +%endif + + leave + ret +ENDPROC RT_NOCRT(llrintf) + diff --git a/src/VBox/Runtime/common/math/llrintl.asm b/src/VBox/Runtime/common/math/llrintl.asm new file mode 100644 index 00000000..d21b8b4d --- /dev/null +++ b/src/VBox/Runtime/common/math/llrintl.asm @@ -0,0 +1,70 @@ +; $Id: llrintl.asm $ +;; @file +; IPRT - No-CRT llrintl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + + +BEGINCODE + +;; +; Round lrd to the nearest integer value, rounding according to the current rounding direction. +; @returns 32-bit: edx:eax 64-bit: rax +; @param lrd [rbp + xCB*2] +RT_NOCRT_BEGINPROC llrintl + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + fld tword [xBP + xCB*2] + fistp qword [xSP] + fwait +%ifdef RT_ARCH_AMD64 + mov rax, [xSP] +%else + mov eax, [xSP] + mov edx, [xSP + 4] +%endif + + leave + ret +ENDPROC RT_NOCRT(llrintl) + diff --git a/src/VBox/Runtime/common/math/llround.cpp b/src/VBox/Runtime/common/math/llround.cpp new file mode 100644 index 00000000..dd8fde3a --- /dev/null +++ b/src/VBox/Runtime/common/math/llround.cpp @@ -0,0 +1,65 @@ +/* $Id: llround.cpp $ */ +/** @file + * IPRT - No-CRT - llround(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/nocrt/limits.h> +#include <iprt/nocrt/fenv.h> + + +#undef llround +long long RT_NOCRT(llround)(double rd) +{ + if (isfinite(rd)) + { + rd = RT_NOCRT(round)(rd); + if (rd >= (double)LLONG_MIN && rd <= (double)LLONG_MAX) + return (long long)rd; + RT_NOCRT(feraiseexcept)(FE_INVALID); + return rd > 0.0 ? LLONG_MAX : LLONG_MIN; + } + RT_NOCRT(feraiseexcept)(FE_INVALID); + if (RT_NOCRT(isinf)(rd) && rd < 0.0) + return LLONG_MIN; + return LLONG_MAX; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(llround); + diff --git a/src/VBox/Runtime/common/math/llroundf.cpp b/src/VBox/Runtime/common/math/llroundf.cpp new file mode 100644 index 00000000..ae58186f --- /dev/null +++ b/src/VBox/Runtime/common/math/llroundf.cpp @@ -0,0 +1,65 @@ +/* $Id: llroundf.cpp $ */ +/** @file + * IPRT - No-CRT - llroundf(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/nocrt/limits.h> +#include <iprt/nocrt/fenv.h> + + +#undef llroundf +long long RT_NOCRT(llroundf)(float r32) +{ + if (isfinite(r32)) + { + r32 = RT_NOCRT(roundf)(r32); + if (r32 >= (float)LLONG_MIN && r32 <= (float)LLONG_MAX) + return (long long)r32; + RT_NOCRT(feraiseexcept)(FE_INVALID); + return r32 > 0.0f ? LLONG_MAX : LLONG_MIN; + } + RT_NOCRT(feraiseexcept)(FE_INVALID); + if (RT_NOCRT(__isinff)(r32) && r32 < 0.0) + return LLONG_MIN; + return LLONG_MAX; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(llroundf); + diff --git a/src/VBox/Runtime/common/math/llroundl.cpp b/src/VBox/Runtime/common/math/llroundl.cpp new file mode 100644 index 00000000..439fad8f --- /dev/null +++ b/src/VBox/Runtime/common/math/llroundl.cpp @@ -0,0 +1,65 @@ +/* $Id: llroundl.cpp $ */ +/** @file + * IPRT - No-CRT - llroundl(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/nocrt/limits.h> +#include <iprt/nocrt/fenv.h> + + +#undef llroundl +long long RT_NOCRT(llroundl)(long double lrd) +{ + if (isfinite(lrd)) + { + lrd = RT_NOCRT(roundl)(lrd); + if (lrd >= (long double)LLONG_MIN && lrd <= (long double)LLONG_MAX) + return (long long)lrd; + RT_NOCRT(feraiseexcept)(FE_INVALID); + return lrd > 0.0L ? LLONG_MAX : LLONG_MIN; + } + RT_NOCRT(feraiseexcept)(FE_INVALID); + if (RT_NOCRT(__isinfl)(lrd) && lrd < 0.0) + return LLONG_MIN; + return LLONG_MAX; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(llroundl); + diff --git a/src/VBox/Runtime/common/math/log.asm b/src/VBox/Runtime/common/math/log.asm new file mode 100644 index 00000000..b94f3ba6 --- /dev/null +++ b/src/VBox/Runtime/common/math/log.asm @@ -0,0 +1,97 @@ +; $Id: log.asm $ +;; @file +; IPRT - No-CRT log - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; compute the natural logarithm of rd +; @returns st(0) / xmm0 +; @param rd [rbp + xCB*2] / xmm0 +RT_NOCRT_BEGINPROC log + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +%ifdef RT_ARCH_AMD64 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h +%endif + SEH64_END_PROLOGUE + + fldln2 ; st0=log(2) +%ifdef RT_ARCH_AMD64 + movsd [xBP - 10h], xmm0 + fld qword [xBP - 10h] +%else + fld qword [xBP + xCB*2] ; st1=log(2) st0=lrd +%endif + fld st0 ; st1=log(2) st0=lrd st0=lrd + fsub qword [.one xWrtRIP] ; st2=log(2) st1=lrd st0=lrd-1.0 + fld st0 ; st3=log(2) st2=lrd st1=lrd-1.0 st0=lrd-1.0 + + fabs ; st3=log(2) st2=lrd st1=lrd-1.0 st0=abs(lrd-1.0) + fcomp qword [.limit xWrtRIP] ; st2=log(2) st1=lrd st0=lrd-1.0 + fnstsw ax + and eax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 + jnz .use_st1 + + fstp st0 ; st1=log(2) st0=lrd + fyl2x ; log(lrd) + jmp .done + +.use_st1: + fstp st1 ; st1=log(2) st0=lrd-1.0 + fyl2xp1 ; log(lrd) + +.done: +%ifdef RT_ARCH_AMD64 + fstp qword [xBP - 10h] + movsd xmm0, [xBP - 10h] +%endif + leave + ret + +ALIGNCODE(8) +.one: dq 1.0 +.limit: dq 0.29 +ENDPROC RT_NOCRT(log) + diff --git a/src/VBox/Runtime/common/math/log2.asm b/src/VBox/Runtime/common/math/log2.asm new file mode 100644 index 00000000..f1fbf283 --- /dev/null +++ b/src/VBox/Runtime/common/math/log2.asm @@ -0,0 +1,229 @@ +; $Id: log2.asm $ +;; @file +; IPRT - No-CRT log2 - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +extern NAME(RT_NOCRT(feraiseexcept)) + +;; +; Compute the log2 of rd +; @returns st(0) / xmm0 +; @param rd [xSP + xCB*2] / xmm0 +RT_NOCRT_BEGINPROC log2 + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 20h + SEH64_ALLOCATE_STACK 20h + SEH64_END_PROLOGUE + + ; + ; Load the input into st0. + ; +%ifdef RT_ARCH_AMD64 + movsd [xBP - 10h], xmm0 + fld qword [xBP - 10h] +%else + fld qword [xBP + xCB*2] +%endif + + ; + ; Weed out non-normal values. + ; + fxam + fnstsw ax + mov cx, ax + and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 + cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) + je .finite + cmp ax, X86_FSW_C3 ; Zero + je .zero + cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals + je .finite + cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity. + je .inf + jmp .nan + +.finite: + ; Negative number? + test cx, X86_FSW_C1 + jnz .negative + + ; Is it +1.0? + fld1 + fcomip st1 + jz .plus_one + + ; + ; The fyl2xp1 instruction (ST1=ST1*log2(ST0+1.0), popping ST0) has a + ; valid ST0 range of 1(1-sqrt(0.5)) (approx 0.29289321881) on both + ; sides of zero. We try use it if we can. + ; +.above_one: + ; For both fyl2xp1 and fyl2xp1 we need st1=1.0. + fld1 + fxch st0, st1 ; -> st0=input; st1=1.0 + + ; Check if the input is within the fyl2xp1 range. + fld qword [.s_r64AbsFyL2xP1InputMax xWrtRIP] + fcomip st0, st1 + jbe .cannot_use_fyl2xp1 + + fld qword [.s_r64AbsFyL2xP1InputMin xWrtRIP] + fcomip st0, st1 + jae .cannot_use_fyl2xp1 + + ; Do the calculation. +.use_fyl2xp1: + fsub st0, st1 ; -> st0=input-1; st1=1.0 + fyl2xp1 ; -> st0=1.0*log2(st0+1.0) + jmp .return_val + +.cannot_use_fyl2xp1: + fyl2x ; -> st0=1.0*log2(st0) + + ; + ; Return st0. + ; +.return_val: +%ifdef RT_ARCH_AMD64 + fstp qword [xBP - 10h] + movsd xmm0, [xBP - 10h] +%endif +.return: + leave + ret + + + ; + ; +1.0: Return +0.0. + ; +.plus_one: + ffreep st0 + fldz + jmp .return_val + + ; + ; Negative numbers: Return NaN and raise invalid operation. + ; +.negative: +.minus_inf: + ; Raise invalid operation +%ifdef RT_ARCH_X86 + mov dword [xSP], X86_FSW_IE +%elifdef ASM_CALL64_GCC + mov edi, X86_FSW_IE +%elifdef ASM_CALL64_MSC + mov ecx, X86_FSW_IE +%else + %error calling conv. +%endif + call NAME(RT_NOCRT(feraiseexcept)) + + ; Load NaN +%ifdef RT_ARCH_AMD64 + movsd xmm0, [.s_r64NaN xWrtRIP] +%else + fld qword [.s_r64NaN xWrtRIP] +%endif + jmp .return + + ; + ; +/-0.0: Return inf and raise divide by zero error. + ; +.zero: + ffreep st0 + + ; Raise div/0 +%ifdef RT_ARCH_X86 + mov dword [xSP], X86_FSW_ZE +%elifdef ASM_CALL64_GCC + mov edi, X86_FSW_ZE +%elifdef ASM_CALL64_MSC + mov ecx, X86_FSW_ZE +%else + %error calling conv. +%endif + call NAME(RT_NOCRT(feraiseexcept)) + + ; Load +Inf +%ifdef RT_ARCH_AMD64 + movsd xmm0, [.s_r64MinusInf xWrtRIP] +%else + fld qword [.s_r64MinusInf xWrtRIP] +%endif + jmp .return + + ; + ; -Inf: Same as other negative numbers + ; +Inf: return +Inf. Join path with NaN. + ; +.inf: + test cx, X86_FSW_C1 ; sign bit + jnz .minus_inf + + ; + ; NaN: Return the input NaN value as is, if we can. + ; +.nan: +%ifdef RT_ARCH_AMD64 + ffreep st0 +%endif + jmp .return + +ALIGNCODE(8) + ;; The fyl2xp1 instruction only works between +/-1(1-sqrt(0.5)). + ; These two variables is that range + 1.0, so we can compare directly + ; with the input w/o any extra fsub and fabs work. +.s_r64AbsFyL2xP1InputMin: + dq 0.708 ; -0.292 + 1.0 +.s_r64AbsFyL2xP1InputMax: + dq 1.292 +;.s_r64AbsFyL2xP1Range: +; dq 0.292 +.s_r64MinusInf: + dq RTFLOAT64U_INF_MINUS +.s_r64NaN: + dq RTFLOAT64U_QNAN_MINUS +ENDPROC RT_NOCRT(log2) + diff --git a/src/VBox/Runtime/common/math/log2f.asm b/src/VBox/Runtime/common/math/log2f.asm new file mode 100644 index 00000000..c9b41696 --- /dev/null +++ b/src/VBox/Runtime/common/math/log2f.asm @@ -0,0 +1,227 @@ +; $Id: log2f.asm $ +;; @file +; IPRT - No-CRT log2f - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +extern NAME(RT_NOCRT(feraiseexcept)) + +;; +; Compute the log2f of rf +; @returns st(0) / xmm0 +; @param rf [xSP + xCB*2] / xmm0 +RT_NOCRT_BEGINPROC log2f + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 20h + SEH64_ALLOCATE_STACK 20h + SEH64_END_PROLOGUE + + ; + ; Load the input into st0. + ; +%ifdef RT_ARCH_AMD64 + movss [xBP - 10h], xmm0 + fld dword [xBP - 10h] +%else + fld dword [xBP + xCB*2] +%endif + + ; + ; Weed out non-normal values. + ; + fxam + fnstsw ax + mov cx, ax + and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 + cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) + je .finite + cmp ax, X86_FSW_C3 ; Zero + je .zero + cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals + je .finite + cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity. + je .inf + jmp .nan + +.finite: + ; Negative number? + test cx, X86_FSW_C1 + jnz .negative + + ; Is it +1.0? + fld1 + fcomip st1 + jz .plus_one + + ; + ; The fyl2xp1 instruction (ST1=ST1*log2(ST0+1.0), popping ST0) has a + ; valid ST0 range of 1(1-sqrt(0.5)) (approx 0.29289321881) on both + ; sides of zero. We try use it if we can. + ; +.above_one: + ; For both fyl2xp1 and fyl2xp1 we need st1=1.0. + fld1 + fxch st0, st1 ; -> st0=input; st1=1.0 + + ; Check if the input is within the fyl2xp1 range. + fld qword [.s_r64AbsFyL2xP1InputMax xWrtRIP] + fcomip st0, st1 + jbe .cannot_use_fyl2xp1 + + fld qword [.s_r64AbsFyL2xP1InputMin xWrtRIP] + fcomip st0, st1 + jae .cannot_use_fyl2xp1 + + ; Do the calculation. +.use_fyl2xp1: + fsub st0, st1 ; -> st0=input-1; st1=1.0 + fyl2xp1 ; -> st0=1.0*log2(st0+1.0) + jmp .return_val + +.cannot_use_fyl2xp1: + fyl2x ; -> st0=1.0*log2(st0) + + ; + ; Run st0. + ; +.return_val: +%ifdef RT_ARCH_AMD64 + fstp dword [xBP - 10h] + movss xmm0, [xBP - 10h] +%endif +.return: + leave + ret + + + ; + ; +1.0: Return +0.0. + ; +.plus_one: + ffreep st0 + fldz + jmp .return_val + + ; + ; Negative numbers: Return NaN and raise invalid operation. + ; +.negative: +.minus_inf: + ; Raise invalid operation +%ifdef RT_ARCH_X86 + mov dword [xSP], X86_FSW_IE +%elifdef ASM_CALL64_GCC + mov edi, X86_FSW_IE +%elifdef ASM_CALL64_MSC + mov ecx, X86_FSW_IE +%else + %error calling conv. +%endif + call NAME(RT_NOCRT(feraiseexcept)) + + ; Load NaN +%ifdef RT_ARCH_AMD64 + movss xmm0, [.s_r32NaN xWrtRIP] +%else + fld dword [.s_r32NaN xWrtRIP] +%endif + jmp .return + + ; + ; +/-0.0: Return inf and raise divide by zero error. + ; +.zero: + ffreep st0 + + ; Raise div/0 +%ifdef RT_ARCH_X86 + mov dword [xSP], X86_FSW_ZE +%elifdef ASM_CALL64_GCC + mov edi, X86_FSW_ZE +%elifdef ASM_CALL64_MSC + mov ecx, X86_FSW_ZE +%else + %error calling conv. +%endif + call NAME(RT_NOCRT(feraiseexcept)) + + ; Load +Inf +%ifdef RT_ARCH_AMD64 + movss xmm0, [.s_r32MinusInf xWrtRIP] +%else + fld dword [.s_r32MinusInf xWrtRIP] +%endif + jmp .return + + ; + ; -Inf: Same as other negative numbers + ; +Inf: return +Inf. Join path with NaN. + ; +.inf: + test cx, X86_FSW_C1 ; sign bit + jnz .minus_inf + + ; + ; NaN: Return the input NaN value as is, if we can. + ; +.nan: +%ifdef RT_ARCH_AMD64 + ffreep st0 +%endif + jmp .return + +ALIGNCODE(8) + ;; The fyl2xp1 instruction only works between +/-1(1-sqrt(0.5)). + ; These two variables is that range + 1.0, so we can compare directly + ; with the input w/o any extra fsub and fabs work. +.s_r64AbsFyL2xP1InputMin: + dq 0.708 ; -0.292 + 1.0 +.s_r64AbsFyL2xP1InputMax: + dq 1.292 +.s_r32MinusInf: + dd RTFLOAT32U_INF_MINUS +.s_r32NaN: + dd RTFLOAT32U_QNAN_MINUS +ENDPROC RT_NOCRT(log2f) + diff --git a/src/VBox/Runtime/common/math/logf.asm b/src/VBox/Runtime/common/math/logf.asm new file mode 100644 index 00000000..53ba0c6a --- /dev/null +++ b/src/VBox/Runtime/common/math/logf.asm @@ -0,0 +1,97 @@ +; $Id: logf.asm $ +;; @file +; IPRT - No-CRT logf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; compute the natural logarithm of r32 +; @returns st(0) / xmm0 +; @param r32 [rbp + xCB*2] / xmm0 +RT_NOCRT_BEGINPROC logf + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +%ifdef RT_ARCH_AMD64 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h +%endif + SEH64_END_PROLOGUE + + fldln2 ; st0=log(2) +%ifdef RT_ARCH_AMD64 + movss [xBP - 10h], xmm0 + fld dword [xBP - 10h] +%else + fld dword [xBP + xCB*2] ; st1=log(2) st0=lrd +%endif + fld st0 ; st1=log(2) st0=lrd st0=lrd + fsub qword [.one xWrtRIP] ; st2=log(2) st1=lrd st0=lrd-1.0 + fld st0 ; st3=log(2) st2=lrd st1=lrd-1.0 st0=lrd-1.0 + + fabs ; st3=log(2) st2=lrd st1=lrd-1.0 st0=abs(lrd-1.0) + fcomp qword [.limit xWrtRIP] ; st2=log(2) st1=lrd st0=lrd-1.0 + fnstsw ax + and eax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 + jnz .use_st1 + + fstp st0 ; st1=log(2) st0=lrd + fyl2x ; log(lrd) + jmp .done + +.use_st1: + fstp st1 ; st1=log(2) st0=lrd-1.0 + fyl2xp1 ; log(lrd) + +.done: +%ifdef RT_ARCH_AMD64 + fstp dword [xBP - 10h] + movss xmm0, [xBP - 10h] +%endif + leave + ret + +ALIGNCODE(8) +.one: dq 1.0 +.limit: dq 0.29 +ENDPROC RT_NOCRT(logf) + diff --git a/src/VBox/Runtime/common/math/logl.asm b/src/VBox/Runtime/common/math/logl.asm new file mode 100644 index 00000000..6c9127c3 --- /dev/null +++ b/src/VBox/Runtime/common/math/logl.asm @@ -0,0 +1,84 @@ +; $Id: logl.asm $ +;; @file +; IPRT - No-CRT logl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; compute the natural logarithm of lrd +; @returns st(0) +; @param lrd [rbp + xCB*2] +RT_NOCRT_BEGINPROC logl + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + + fldln2 ; st0=log(2) + fld tword [xBP + xCB*2] ; st1=log(2) st0=lrd + fld st0 ; st1=log(2) st0=lrd st0=lrd + fsub qword [.one xWrtRIP] ; st2=log(2) st1=lrd st0=lrd-1.0 + fld st0 ; st3=log(2) st2=lrd st1=lrd-1.0 st0=lrd-1.0 + + fabs ; st3=log(2) st2=lrd st1=lrd-1.0 st0=abs(lrd-1.0) + fcomp qword [.limit xWrtRIP] ; st2=log(2) st1=lrd st0=lrd-1.0 + fnstsw ax + and eax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 + jnz .use_st1 + + fstp st0 ; st1=log(2) st0=lrd + fyl2x ; log(lrd) + jmp .done + +.use_st1: + fstp st1 ; st1=log(2) st0=lrd-1.0 + fyl2xp1 ; log(lrd) + +.done: + leave + ret + +ALIGNCODE(8) +.one: dq 1.0 +.limit: dq 0.29 +ENDPROC RT_NOCRT(logl) + diff --git a/src/VBox/Runtime/common/math/lrint.asm b/src/VBox/Runtime/common/math/lrint.asm new file mode 100644 index 00000000..5d25b160 --- /dev/null +++ b/src/VBox/Runtime/common/math/lrint.asm @@ -0,0 +1,75 @@ +; $Id: lrint.asm $ +;; @file +; IPRT - No-CRT lrint - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + + +BEGINCODE + +;; +; Round rd to the nearest integer value, rounding according to the current rounding direction. +; @returns 32-bit: eax 64-bit: rax (non-windows) or eax (windows) +; @param rd 32-bit: [esp + 4h] 64-bit: xmm0 +RT_NOCRT_BEGINPROC lrint + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +%ifdef RT_ARCH_X86 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h +%endif + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %ifdef RT_OS_WINDOWS + cvtsd2si eax, xmm0 + %else + cvtsd2si rax, xmm0 + %endif +%else + fld qword [ebp + 8h] + fistp dword [esp] + fwait + mov eax, [esp] +%endif + + leave + ret +ENDPROC RT_NOCRT(lrint) + diff --git a/src/VBox/Runtime/common/math/lrintf.asm b/src/VBox/Runtime/common/math/lrintf.asm new file mode 100644 index 00000000..441482c5 --- /dev/null +++ b/src/VBox/Runtime/common/math/lrintf.asm @@ -0,0 +1,74 @@ +; $Id: lrintf.asm $ +;; @file +; IPRT - No-CRT lrintf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + + +BEGINCODE + +;; +; Round rd to the nearest integer value, rounding according to the current rounding direction. +; @returns 32-bit: eax 64-bit: rax (non-windows) or eax (windows) +; @param rf 32-bit: [esp + 4h] 64-bit: xmm0 +RT_NOCRT_BEGINPROC lrintf + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +%ifdef RT_ARCH_X86 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h +%endif + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + %ifdef RT_OS_WINDOWS + cvtss2si eax, xmm0 + %else + cvtss2si rax, xmm0 + %endif +%else + fld dword [ebp + 8h] + fistp dword [esp] + fwait + mov eax, [esp] +%endif + + leave + ret +ENDPROC RT_NOCRT(lrintf) + diff --git a/src/VBox/Runtime/common/math/lrintl.asm b/src/VBox/Runtime/common/math/lrintl.asm new file mode 100644 index 00000000..057c45ee --- /dev/null +++ b/src/VBox/Runtime/common/math/lrintl.asm @@ -0,0 +1,77 @@ +; $Id: lrintl.asm $ +;; @file +; IPRT - No-CRT lrintl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + + +BEGINCODE + +;; +; Round lrd to the nearest integer value, rounding according to the current rounding direction. +; @returns 32-bit: eax 64-bit: rax (non-windows) or eax (windows) +; @param lrd [rbp + xCB*2] +RT_NOCRT_BEGINPROC lrintl + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + fld tword [xBP + xCB*2] +%ifdef RT_ARCH_AMD64 + %ifdef RT_OS_WINDOWS + fistp dword [xSP] + fwait + mov eax, [xSP] + %else + fistp qword [xSP] + fwait + mov rax, [xSP] + %endif +%else + fistp dword [xSP] + fwait + mov eax, [xSP] +%endif + + leave + ret +ENDPROC RT_NOCRT(lrintl) + diff --git a/src/VBox/Runtime/common/math/lround.cpp b/src/VBox/Runtime/common/math/lround.cpp new file mode 100644 index 00000000..1348398f --- /dev/null +++ b/src/VBox/Runtime/common/math/lround.cpp @@ -0,0 +1,65 @@ +/* $Id: lround.cpp $ */ +/** @file + * IPRT - No-CRT - lround(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/nocrt/limits.h> +#include <iprt/nocrt/fenv.h> + + +#undef lround +long RT_NOCRT(lround)(double rd) +{ + if (isfinite(rd)) + { + rd = RT_NOCRT(round)(rd); + if (rd >= (double)LONG_MIN && rd <= (double)LONG_MAX) + return (long)rd; + RT_NOCRT(feraiseexcept)(FE_INVALID); + return rd > 0.0 ? LONG_MAX : LONG_MIN; + } + RT_NOCRT(feraiseexcept)(FE_INVALID); + if (RT_NOCRT(isinf)(rd) && rd < 0.0) + return LONG_MIN; + return LONG_MAX; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(lround); + diff --git a/src/VBox/Runtime/common/math/lroundf.cpp b/src/VBox/Runtime/common/math/lroundf.cpp new file mode 100644 index 00000000..c753504d --- /dev/null +++ b/src/VBox/Runtime/common/math/lroundf.cpp @@ -0,0 +1,65 @@ +/* $Id: lroundf.cpp $ */ +/** @file + * IPRT - No-CRT - lroundf(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/nocrt/limits.h> +#include <iprt/nocrt/fenv.h> + + +#undef lroundf +long RT_NOCRT(lroundf)(float r32) +{ + if (isfinite(r32)) + { + r32 = RT_NOCRT(roundf)(r32); + if (r32 >= (float)LONG_MIN && r32 <= (float)LONG_MAX) + return (long)r32; + RT_NOCRT(feraiseexcept)(FE_INVALID); + return r32 > 0.0f ? LONG_MAX : LONG_MIN; + } + RT_NOCRT(feraiseexcept)(FE_INVALID); + if (RT_NOCRT(__isinff)(r32) && r32 < 0.0) + return LONG_MIN; + return LONG_MAX; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(lroundf); + diff --git a/src/VBox/Runtime/common/math/lroundl.cpp b/src/VBox/Runtime/common/math/lroundl.cpp new file mode 100644 index 00000000..c5618396 --- /dev/null +++ b/src/VBox/Runtime/common/math/lroundl.cpp @@ -0,0 +1,65 @@ +/* $Id: lroundl.cpp $ */ +/** @file + * IPRT - No-CRT - lroundl(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> +#include <iprt/nocrt/limits.h> +#include <iprt/nocrt/fenv.h> + + +#undef lroundl +long RT_NOCRT(lroundl)(long double lrd) +{ + if (isfinite(lrd)) + { + lrd = RT_NOCRT(roundl)(lrd); + if (lrd >= (long double)LONG_MIN && lrd <= (long double)LONG_MAX) + return (long)lrd; + RT_NOCRT(feraiseexcept)(FE_INVALID); + return lrd > 0.0L ? LONG_MAX : LONG_MIN; + } + RT_NOCRT(feraiseexcept)(FE_INVALID); + if (RT_NOCRT(__isinfl)(lrd) && lrd < 0.0) + return LONG_MIN; + return LONG_MAX; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(lroundl); + diff --git a/src/VBox/Runtime/common/math/nocrt-abs.cpp b/src/VBox/Runtime/common/math/nocrt-abs.cpp new file mode 100644 index 00000000..bfa3e3ea --- /dev/null +++ b/src/VBox/Runtime/common/math/nocrt-abs.cpp @@ -0,0 +1,52 @@ +/* $Id: nocrt-abs.cpp $ */ +/** @file + * IPRT - No-CRT - abs(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/stdlib.h> + + +#undef abs +int RT_NOCRT(abs)(int iValue) RT_NOEXCEPT +{ + return iValue >= 0 ? iValue : -iValue; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(abs); + diff --git a/src/VBox/Runtime/common/math/nocrt-labs.cpp b/src/VBox/Runtime/common/math/nocrt-labs.cpp new file mode 100644 index 00000000..e8a28ff4 --- /dev/null +++ b/src/VBox/Runtime/common/math/nocrt-labs.cpp @@ -0,0 +1,52 @@ +/* $Id: nocrt-labs.cpp $ */ +/** @file + * IPRT - No-CRT - labs(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/stdlib.h> + + +#undef labs +long RT_NOCRT(labs)(long iValue) RT_NOEXCEPT +{ + return iValue >= 0 ? iValue : -iValue; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(labs); + diff --git a/src/VBox/Runtime/common/math/nocrt-llabs.cpp b/src/VBox/Runtime/common/math/nocrt-llabs.cpp new file mode 100644 index 00000000..b404ddb2 --- /dev/null +++ b/src/VBox/Runtime/common/math/nocrt-llabs.cpp @@ -0,0 +1,52 @@ +/* $Id: nocrt-llabs.cpp $ */ +/** @file + * IPRT - No-CRT - llabs(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/stdlib.h> + + +#undef llabs +long long RT_NOCRT(llabs)(long long iValue) RT_NOEXCEPT +{ + return iValue >= 0 ? iValue : -iValue; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(llabs); + diff --git a/src/VBox/Runtime/common/math/pow.asm b/src/VBox/Runtime/common/math/pow.asm new file mode 100644 index 00000000..c9e760ff --- /dev/null +++ b/src/VBox/Runtime/common/math/pow.asm @@ -0,0 +1,127 @@ +; $Id: pow.asm $ +;; @file +; IPRT - No-CRT pow - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +extern NAME(rtNoCrtMathPowCore) + +;; +; Compute the rdBase to the power of rdExp. +; @returns st(0) / xmm0 +; @param rdBase [xSP + xCB*2] / xmm0 +; @param rdExp [xSP + xCB*2 + 8] / xmm1 +; +RT_NOCRT_BEGINPROC pow + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + push xBX + SEH64_PUSH_GREG rbx + sub xSP, 30h - xCB + SEH64_ALLOCATE_STACK 30h - xCB + SEH64_END_PROLOGUE + + ; + ; Load rdBase into st1 and rdExp into st0. + ; +%ifdef RT_ARCH_AMD64 + movsd [xBP - 20h], xmm0 + fld qword [xBP - 20h] + fxam + fnstsw ax + mov dx, ax ; dx=fxam(base) + + movsd [xBP - 30h], xmm1 + fld qword [xBP - 30h] +%else + fld qword [xBP + xCB*2] + fxam + fnstsw ax + mov dx, ax ; dx=fxam(base) + + fld qword [xBP + xCB*2 + RTLRD_CB] +%endif + + ; + ; Call common worker for the calculation. + ; + mov ebx, 1 ; float + call NAME(rtNoCrtMathPowCore) + + ; + ; Normally, we return with eax==0 and we have to load the result + ; from st0 and into xmm0. + ; + cmp eax, 0 + jne .return_input_reg + + fstp qword [xSP - 30h] + movsd xmm0, [xSP - 30h] + +.return: + lea xSP, [xBP - xCB] + pop xBX + leave + ret + + ; + ; But sometimes, like if we have NaN or other special inputs, we should + ; return the input as-is and ditch the st0 value. + ; +.return_input_reg: + ffreep st0 + cmp eax, 2 + je .return_exp +%ifdef RT_STRICT + cmp eax, 1 + je .return_base + int3 +%endif +.return_base: + jmp .return + +.return_exp: + movsd xmm0, xmm1 + jmp .return +ENDPROC RT_NOCRT(pow) + diff --git a/src/VBox/Runtime/common/math/powcore.asm b/src/VBox/Runtime/common/math/powcore.asm new file mode 100644 index 00000000..e37af891 --- /dev/null +++ b/src/VBox/Runtime/common/math/powcore.asm @@ -0,0 +1,633 @@ +; $Id: powcore.asm $ +;; @file +; IPRT - No-CRT common pow code - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +extern NAME(RT_NOCRT(feraiseexcept)) + +;; +; Call feraiseexcept(%1) +%macro CALL_feraiseexcept_WITH 1 + %ifdef RT_ARCH_X86 + mov dword [xSP], X86_FSW_IE + %elifdef ASM_CALL64_GCC + mov edi, X86_FSW_IE + %elifdef ASM_CALL64_MSC + mov ecx, X86_FSW_IE + %else + %error calling conv. + %endif + call NAME(RT_NOCRT(feraiseexcept)) +%endmacro + + +;; +; Compute the st1 to the power of st0. +; +; @returns st(0) = result +; eax = what's being returned: +; 0 - Just a value. +; 1 - The rBase value. Caller may take steps to ensure it's exactly the same. +; 2 - The rExp value. Caller may take steps to ensure it's exactly the same. +; @param rBase/st1 The base. +; @param rExp/st0 The exponent +; @param fFxamBase/dx The status flags after fxam(rBase). +; @param enmType/ebx The original parameter and return types: +; 0 - 32-bit / float +; 1 - 64-bit / double +; 2 - 80-bit / long double +; +BEGINPROC rtNoCrtMathPowCore + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 30h + SEH64_ALLOCATE_STACK 30h + SEH64_END_PROLOGUE + + ; + ; Weed out special values, starting with the exponent. + ; + fxam + fnstsw ax + mov cx, ax ; cx=fxam(exp) + + and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 + cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) + je .exp_finite + cmp ax, X86_FSW_C3 ; Zero + je .exp_zero + cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals + je .exp_finite + cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity. + je .exp_inf + jmp .exp_nan + +.exp_finite: + ; + ; Detect special base values. + ; + mov ax, dx ; ax=fxam(base) + and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 + cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) + je .base_finite + cmp ax, X86_FSW_C3 ; Zero + je .base_zero + cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals + je .base_finite + cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity. + je .base_inf + jmp .base_nan + +.base_finite: + ; + ; 1 in the base is also special. + ; Rule 6 (see below): base == +1 and exponent = whatever: Return +1.0 + ; + fld1 + fcomip st0, st2 + je .return_base_value + + ; + ; Check if the exponent is an integer value we can handle in a 64-bit + ; GRP as that is simpler to handle accurately. + ; + ; In 64-bit integer range? + fld tword [.s_r80MaxInt xWrtRIP] + fcomip st0, st1 + jb .not_integer_exp + + fld tword [.s_r80MinInt xWrtRIP] + fcomip st0, st1 + ja .not_integer_exp + + ; Convert it to integer. + fld st0 ; -> st0=exp; st1=exp; st2=base + fistp qword [xBP - 8] ; Save and pop 64-bit int (no non-popping version of this instruction). + + fild qword [xBP - 8] ; Load it again for comparison. + fucomip st0, st1 ; Compare integer exp and floating point exp to see if they are the same. Pop. + jne .not_integer_exp + + + ; + ; + ; Ok, we've got an integer exponent value in that fits into a 64-bit. + ; We'll multiply the base exponention bit by exponention bit, applying + ; it as a factor for bits that are set. + ; + ; +.integer_exp: + ; Load the integer value into edx:exx / rdx and ditch the floating point exponent. + mov xDX, [xBP - 8] +%ifdef RT_ARCH_X86 + mov eax, [xBP - 8 + 4] +%endif + ffreep st0 ; -> st0=base; + + ; Load a 1 onto the stack, we'll need it below as well as for converting + ; a negative exponent to a positive one. + fld1 ; -> st0=1.0; st1=base; + + ; If the exponent is negative, negate it and change base to 1/base. + or xDX, xDX + jns .integer_exp_positive + neg xDX +%ifdef RT_ARCH_X86 + neg eax + sbb edx, 0 +%endif + fdivr st1, st0 ; -> st0=1.0; st1=1/base +.integer_exp_positive: + + ; + ; We'll process edx:eax / rdx bit by bit till it's zero, using st0 for + ; the multiplication factor corresponding to the current exponent bit + ; and st1 as the result. + ; + fxch ; -> st0=base; st1=1.0; +.integer_exp_loop: +%ifdef RT_ARCH_X86 + shrd eax, edx, 1 +%else + shr rdx, 1 +%endif + jnc .integer_exp_loop_advance + fmul st1, st0 + +.integer_exp_loop_advance: + ; Check if we're done. +%ifdef RT_ARCH_AMD64 + jz .integer_exp_return ; (we will have the flags for the shr rdx above) +%else + shr edx, 1 ; complete the above shift operation + + mov ecx, edx ; check if edx:eax is zero. + or ecx, eax + jz .integer_exp_return +%endif + ; Calculate the factor for the next bit. + fmul st0, st0 + jmp .integer_exp_loop + +.integer_exp_return: + ffreep st0 ; drop the factor -> st0=result; no st1. + jmp .return_val + + + ; + ; + ; Non-integer or value was out of range for an int64_t. + ; + ; The approach here is the same as in exp.asm, only we have to do the + ; log2(base) calculation first as it's a parameter and not a constant. + ; + ; +.not_integer_exp: + + ; First reject negative numbers. We still have the fxam(base) status in dx. + test dx, X86_FSW_C1 + jnz .base_negative_non_integer_exp + + ; Swap the items on the stack, so we can process the base first. + fxch st0, st1 ; -> st0=base; st1=exponent; + + ; + ; From log2.asm: + ; + ; The fyl2xp1 instruction (ST1=ST1*log2(ST0+1.0), popping ST0) has a + ; valid ST0 range of 1(1-sqrt(0.5)) (approx 0.29289321881) on both + ; sides of zero. We try use it if we can. + ; +.above_one: + ; For both fyl2xp1 and fyl2xp1 we need st1=1.0. + fld1 + fxch st0, st1 ; -> st0=base; st1=1.0; st2=exponent + + ; Check if the input is within the fyl2xp1 range. + fld qword [.s_r64AbsFyL2xP1InputMax xWrtRIP] + fcomip st0, st1 + jbe .cannot_use_fyl2xp1 + + fld qword [.s_r64AbsFyL2xP1InputMin xWrtRIP] + fcomip st0, st1 + jae .cannot_use_fyl2xp1 + + ; Do the calculation. +.use_fyl2xp1: + fsub st0, st1 ; -> st0=base-1; st1=1.0; st2=exponent + fyl2xp1 ; -> st0=1.0*log2(base-1.0+1.0); st1=exponent + jmp .done_log2 + +.cannot_use_fyl2xp1: + fyl2x ; -> st0=1.0*log2(base); st1=exponent +.done_log2: + + ; + ; From exp.asm: + ; + ; Convert to power of 2 and it'll be the same as exp2. + ; + fmulp ; st0=log2(base); st1=exponent -> st0=pow2exp + + ; + ; Split the job in two on the fraction and integer l2base parts. + ; + fld st0 ; Push a copy of the pow2exp on the stack. + frndint ; st0 = (int)pow2exp + fsub st1, st0 ; st1 = pow2exp - (int)pow2exp; i.e. st1 = fraction, st0 = integer. + fxch ; st0 = fraction, st1 = integer. + + ; 1. Calculate on the fraction. + f2xm1 ; st0 = 2**fraction - 1.0 + fld1 + faddp ; st0 = 2**fraction + + ; 2. Apply the integer power of two. + fscale ; st0 = result; st1 = integer part of pow2exp. + fstp st1 ; st0 = result; no st1. + + ; + ; Return st0. + ; +.return_val: + xor eax, eax +.return: + leave + ret + + + ; + ; + ; pow() has a lot of defined behavior for special values, which is why + ; this is the largest and most difficult part of the code. :-) + ; + ; On https://pubs.opengroup.org/onlinepubs/9699919799/functions/pow.html + ; there are 21 error conditions listed in the return value section. + ; The code below refers to this by number. + ; + ; When we get here: + ; dx=fxam(base) + ; cx=fxam(exponent) + ; st1=base + ; st0=exponent + ; + + ; + ; 1. Finit base < 0 and finit non-interger exponent: -> domain error (#IE) + NaN. + ; + ; The non-integer exponent claim might be wrong, as we only check if it + ; fits into a int64_t register. But, I don't see how we can calculate + ; it right now. + ; +.base_negative_non_integer_exp: + CALL_feraiseexcept_WITH X86_FSW_IE + jmp .return_nan + + ; + ; 7. Exponent = +/-0.0, any base value including NaN: return +1.0 + ; Note! According to https://en.cppreference.com/w/c/numeric/math/pow a + ; domain error (#IE) occur if base=+/-0. Not implemented. +.exp_zero: +.return_plus_one: + fld1 + jmp .return_pop_pop_val + + ; + ; 6. Exponent = whatever and base = 1: Return 1.0 + ; 10. Exponent = +/-Inf and base = -1: Return 1.0 + ;6+10 => Exponent = +/-Inf and |base| = 1: Return 1.0 + ; 11. Exponent = -Inf and |base| < 1: Return +Inf + ; 12. Exponent = -Inf and |base| > 1: Return +0 + ; 13. Exponent = +Inf and |base| < 1: Return +0 + ; 14. Exponent = +Inf and |base| > 1: Return +Inf + ; + ; Note! Rule 4 would trigger for the same conditions as 11 when base == 0, + ; but it's optional to raise div/0 and it's apparently marked as + ; obsolete in C23, so not implemented. + ; +.exp_inf: + ; Check if base is NaN or unsupported. + and dx, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 ; fxam(base) + cmp dx, X86_FSW_C0 + jbe .return_base_nan + + ; Calc fabs(base) and replace the exponent with 1.0 as we're very likely to need this here. + ffreep st0 + fabs + fld1 ; st0=1.0; st1=|rdBase| + fcomi st0, st1 + je .return_plus_one ; Matches rule 6 + 10 (base is +/-1). + ja .exp_inf_base_smaller_than_one +.exp_inf_base_larger_than_one: + test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign + jz .return_plus_inf ; Matches rule 14 (exponent is +Inf). + jmp .return_plus_zero ; Matches rule 12 (exponent is -Inf). + +.exp_inf_base_smaller_than_one: + test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign + jnz .return_plus_inf ; Matches rule 11 (exponent is -Inf). + jmp .return_plus_zero ; Matches rule 13 (exponent is +Inf). + + ; + ; 6. Exponent = whatever and base = 1: Return 1.0 + ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN. + ; +.exp_nan: + ; Check if base is a number and possible 1. + test dx, X86_FSW_C2 ; dx=fxam(base); C2 is set for finite number, infinity and denormals. + jz .return_exp_nan + fld1 + fcomip st0, st2 + jne .return_exp_nan + jmp .return_plus_one + + ; + ; 4a. base == +/-0.0 and exp < 0 and exp is odd integer: Return +/-Inf, raise div/0. + ; 4b. base == +/-0.0 and exp < 0 and exp is not odd int: Return +Inf, raise div/0. + ; 8. base == +/-0.0 and exp > 0 and exp is odd integer: Return +/-0.0 + ; 9. base == +/-0.0 and exp > 0 and exp is not odd int: Return +0 + ; + ; Note! Exponent must be finite and non-zero if we get here. + ; +.base_zero: + fldz + fcomip st0, st1 + jbe .base_zero_plus_exp +.base_zero_minus_exp: + mov cx, dx ; stashing fxam(base) in CX because EDX is trashed by .is_exp_odd_integer + call .is_exp_odd_integer ; trashes EDX but no ECX. + or eax, eax + jz .base_zero_minus_exp_not_odd_int + + ; Matching 4a. +.base_zero_minus_exp_odd_int: + test cx, X86_FSW_C1 ; base sign + jz .raise_de_and_return_plus_inf +.raise_de_and_return_minus_inf: + CALL_feraiseexcept_WITH X86_FSW_DE + jmp .return_minus_inf +.raise_de_and_return_plus_inf: + CALL_feraiseexcept_WITH X86_FSW_DE + jmp .return_plus_inf + + ; Matching 4b. +.base_zero_minus_exp_not_odd_int: + CALL_feraiseexcept_WITH X86_FSW_DE + jmp .return_plus_inf + +.base_zero_plus_exp: + call .is_exp_odd_integer + or eax, eax + jnz .return_base_value ; Matching 8 +.return_plus_zero: ; Matching 9 + fldz + jmp .return_pop_pop_val + + ; + ; 15. base == -Inf and exp < 0 and exp is odd integer: Return -0 + ; 16. base == -Inf and exp < 0 and exp is not odd int: Return +0 + ; 17. base == -Inf and exp > 0 and exp is odd integer: Return -Inf + ; 18. base == -Inf and exp > 0 and exp is not odd int: Return +Inf + ; 19. base == +Inf and exp < 0: Return +0 + ; 20. base == +Inf and exp > 0: Return +Inf + ; + ; Note! Exponent must be finite and non-zero if we get here. + ; +.base_inf: + fldz + fcomip st0, st1 + jbe .base_inf_plus_exp +.base_inf_minus_exp: + test dx, X86_FSW_C1 + jz .return_plus_zero ; Matches 19 (base == +Inf). +.base_minus_inf_minus_exp: + call .is_exp_odd_integer + or eax, eax + jz .return_plus_zero ; Matches 16 (exp not odd and < 0, base == -Inf) +.return_minus_zero: ; Matches 15 (exp is odd and < 0, base == -Inf) + fldz + fchs + jmp .return_pop_pop_val + +.base_inf_plus_exp: + test dx, X86_FSW_C1 + jz .return_plus_inf ; Matches 20 (base == +Inf). +.base_minus_inf_plus_exp: + call .is_exp_odd_integer + or eax, eax + jnz .return_minus_inf ; Matches 17 (exp is odd and > 0, base == +Inf) + jmp .return_plus_inf ; Matches 18 (exp not odd and > 0, base == +Inf) + + ; + ; Return the exponent NaN (or whatever) value. + ; +.return_exp_nan: + fld st0 + mov eax, 2 ; return param 2 + jmp .return_pop_pop_val_with_eax + + ; + ; Return the base NaN (or whatever) value. + ; +.return_base_nan: +.return_base_value: +.base_nan: ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN. + fld st1 + mov eax, 1 ; return param 1 + jmp .return_pop_pop_val_with_eax + + ; + ; Pops the two values off the FPU stack and returns NaN. + ; +.return_nan: + fld qword [.s_r64QNan xWrtRIP] + jmp .return_pop_pop_val + + ; + ; Pops the two values off the FPU stack and returns +Inf. + ; +.return_plus_inf: + fld qword [.s_r64PlusInf xWrtRIP] + jmp .return_pop_pop_val + + ; + ; Pops the two values off the FPU stack and returns -Inf. + ; +.return_minus_inf: + fld qword [.s_r64MinusInf xWrtRIP] + jmp .return_pop_pop_val + + ; + ; Return st0, remove st1 and st2. + ; +.return_pop_pop_val: + xor eax, eax +.return_pop_pop_val_with_eax: + fstp st2 + ffreep st0 + jmp .return + + +ALIGNCODE(8) +.s_r80MaxInt: + dt +9223372036854775807.0 + +ALIGNCODE(8) +.s_r80MinInt: + dt -9223372036854775807.0 + +ALIGNCODE(8) + ;; The fyl2xp1 instruction only works between +/-1(1-sqrt(0.5)). + ; These two variables is that range + 1.0, so we can compare directly + ; with the input w/o any extra fsub and fabs work. +.s_r64AbsFyL2xP1InputMin: + dq 0.708 ; -0.292 + 1.0 +.s_r64AbsFyL2xP1InputMax: + dq 1.292 + +.s_r64QNan: + dq RTFLOAT64U_QNAN_MINUS +.s_r64PlusInf: + dq RTFLOAT64U_INF_PLUS +.s_r64MinusInf: + dq RTFLOAT64U_INF_MINUS + + ;; + ; Sub-function that checks if the exponent (st0) is an odd integer or not. + ; + ; @returns eax = 1 if odd, 0 if even or not integer. + ; @uses eax, edx, eflags. + ; +.is_exp_odd_integer: + ; + ; Save the FPU enviornment and mask all exceptions. + ; + fnstenv [xBP - 30h] + mov ax, [xBP - 30h + X86FSTENV32P.FCW] + or word [xBP - 30h + X86FSTENV32P.FCW], X86_FCW_MASK_ALL + fldcw [xBP - 30h + X86FSTENV32P.FCW] + mov [xBP - 30h + X86FSTENV32P.FCW], ax + + ; + ; Convert to 64-bit integer (probably not 100% correct). + ; + fld st0 ; -> st0=exponent st1=exponent; st2=base; + fistp qword [xBP - 10h] + fild qword [xBP - 10h] ; -> st0=int(exponent) st1=exponent; st2=base; + fcomip st0, st1 ; -> st0=exponent; st1=base; + jne .is_exp_odd_integer__return_false ; jump if not integer. + mov xAX, [xBP - 10h] +%ifdef + mov edx, [xBP - 10h + 4] +%endif + + ; + ; Check the lowest bit if it might be odd. + ; This works both for positive and negative numbers. + ; + test al, 1 + jz .is_exp_odd_integer__return_false ; jump if even. + + ; + ; If the result is negative, convert to positive. + ; +%ifdef RT_ARCH_AMD64 + bt rax, 63 +%else + bt edx, 31 +%endif + jnc .is_exp_odd_integer__positive +%ifdef RT_ARCH_AMD64 + neg xAX +%else + neg edx + neg eax + sbb edx, 0 +%endif +.is_exp_odd_integer__positive: + + ; + ; Now find the most significant bit in the value so we can verify that + ; the odd bit was part of the mantissa/fraction of the input. + ; + cmp bl, 3 ; Skip if 80-bit input, as it has a 64-bit mantissa which + je .is_exp_odd_integer__return_true ; makes it a 1 bit more precision than out integer reg(s). + +%ifdef RT_ARCH_AMD64 + bsr rax, rax +%else + bsr edx, edx + jnz .is_exp_odd_integer__high_dword_is_zero + lea eax, [edx + 20h] + jmp .is_exp_odd_integer__first_bit_in_eax +.is_exp_odd_integer__high_dword_is_zero: + bsr eax, eax +.is_exp_odd_integer__first_bit_in_eax: +%endif + ; + ; The limit is 53 for double precision (one implicit bit + 52 bits fraction), + ; and 24 for single precision types. + ; + mov ah, 53 ; RTFLOAT64U_FRACTION_BITS + 1 + cmp bl, 0 + jne .is_exp_odd_integer__is_double_limit + mov ah, 24 ; RTFLOAT32U_FRACTION_BITS + 1 +.is_exp_odd_integer__is_double_limit: + + cmp al, ah + jae .is_exp_odd_integer__return_false + mov eax, 1 + + ; Return. +.is_exp_odd_integer__return_true: + jmp .is_exp_odd_integer__return +.is_exp_odd_integer__return_false: + xor eax, eax +.is_exp_odd_integer__return: + ffreep st0 + fldenv [xBP - 30h] + ret + +ENDPROC rtNoCrtMathPowCore + diff --git a/src/VBox/Runtime/common/math/powf.asm b/src/VBox/Runtime/common/math/powf.asm new file mode 100644 index 00000000..19aba29e --- /dev/null +++ b/src/VBox/Runtime/common/math/powf.asm @@ -0,0 +1,127 @@ +; $Id: powf.asm $ +;; @file +; IPRT - No-CRT powf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +extern NAME(rtNoCrtMathPowCore) + +;; +; Compute the rfBase to the power of rfExp. +; @returns st(0) / xmm0 +; @param rfBase [xSP + xCB*2] / xmm0 +; @param rfExp [xSP + xCB*2 + 4] / xmm1 +; +RT_NOCRT_BEGINPROC powf + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + push xBX + SEH64_PUSH_GREG rbx + sub xSP, 30h - xCB + SEH64_ALLOCATE_STACK 30h - xCB + SEH64_END_PROLOGUE + + ; + ; Load rdBase into st1 and rdExp into st0. + ; +%ifdef RT_ARCH_AMD64 + movss [xBP - 20h], xmm0 + fld dword [xBP - 20h] + fxam + fnstsw ax + mov dx, ax ; dx=fxam(base) + + movss [xBP - 30h], xmm1 + fld dword [xBP - 30h] +%else + fld dword [xBP + xCB*2] + fxam + fnstsw ax + mov dx, ax ; dx=fxam(base) + + fld dword [xBP + xCB*2 + RTLRD_CB] +%endif + + ; + ; Call common worker for the calculation. + ; + mov ebx, 1 ; float + call NAME(rtNoCrtMathPowCore) + + ; + ; Normally, we return with eax==0 and we have to load the result + ; from st0 and into xmm0. + ; + cmp eax, 0 + jne .return_input_reg + + fstp dword [xSP - 30h] + movss xmm0, [xSP - 30h] + +.return: + lea xSP, [xBP - xCB] + pop xBX + leave + ret + + ; + ; But sometimes, like if we have NaN or other special inputs, we should + ; return the input as-is and ditch the st0 value. + ; +.return_input_reg: + ffreep st0 + cmp eax, 2 + je .return_exp +%ifdef RT_STRICT + cmp eax, 1 + je .return_base + int3 +%endif +.return_base: + jmp .return + +.return_exp: + movss xmm0, xmm1 + jmp .return +ENDPROC RT_NOCRT(powf) + diff --git a/src/VBox/Runtime/common/math/remainder.asm b/src/VBox/Runtime/common/math/remainder.asm new file mode 100644 index 00000000..65666390 --- /dev/null +++ b/src/VBox/Runtime/common/math/remainder.asm @@ -0,0 +1,104 @@ +; $Id: remainder.asm $ +;; @file +; IPRT - No-CRT remainder - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; See SUS. +; @returns st(0) / xmm0 +; @param rd1 [ebp + 8h] xmm0 Dividend. +; @param rd2 [ebp + 10h] xmm1 Divisor. +RT_NOCRT_BEGINPROC remainder + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +%ifdef RT_ARCH_AMD64 + sub xSP, 20h + SEH64_ALLOCATE_STACK 20h +%endif + SEH64_END_PROLOGUE + + ; + ; Load the dividend into st0 and divisor into st1. + ; +%ifdef RT_ARCH_AMD64 + movsd [xBP - 20h], xmm1 + movsd [xBP - 10h], xmm0 + fld qword [xBP - 20h] + fld qword [xBP - 10h] +%else + fld qword [ebp + 10h] + fld qword [ebp + 08h] +%endif + + ; + ; The fprem1 only does between 32 and 64 rounds, so we have to loop + ; here till we've got a final result. We count down in ECX to + ; avoid getting stuck here... + ; + mov ecx, 2048 / 32 + 4 +.again: + fprem1 + fstsw ax + test ah, (X86_FSW_C2 >> 8) + jz .done + dec cx + jnz .again +%ifdef RT_STRICT + int3 +%endif + + ; + ; Return the result. + ; +.done: + fstp st1 +%ifdef RT_ARCH_AMD64 + fstp qword [rsp] + movsd xmm0, [rsp] +%endif + + leave + ret +ENDPROC RT_NOCRT(remainder) + diff --git a/src/VBox/Runtime/common/math/remainderf.asm b/src/VBox/Runtime/common/math/remainderf.asm new file mode 100644 index 00000000..f2c79ce7 --- /dev/null +++ b/src/VBox/Runtime/common/math/remainderf.asm @@ -0,0 +1,104 @@ +; $Id: remainderf.asm $ +;; @file +; IPRT - No-CRT remainderf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; See SUS. +; @returns st(0) / xmm0 +; @param rf1 [ebp + 8h] xmm0 Dividend. +; @param rf2 [ebp + 10h] xmm1 Divisor. +RT_NOCRT_BEGINPROC remainderf + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +%ifdef RT_ARCH_AMD64 + sub xSP, 20h + SEH64_ALLOCATE_STACK 20h +%endif + SEH64_END_PROLOGUE + + ; + ; Load the dividend into st0 and divisor into st1. + ; +%ifdef RT_ARCH_AMD64 + movss [xBP - 20h], xmm1 + movss [xBP - 10h], xmm0 + fld dword [xBP - 20h] + fld dword [xBP - 10h] +%else + fld dword [ebp + 0ch] + fld dword [ebp + 08h] +%endif + + ; + ; The fprem1 only does between 32 and 64 rounds, so we have to loop + ; here till we've got a final result. We count down in ECX to + ; avoid getting stuck here... + ; + mov ecx, 256 / 32 + 4 +.again: + fprem1 + fstsw ax + test ah, (X86_FSW_C2 >> 8) + jz .done + dec cx + jnz .again +%ifdef RT_STRICT + int3 +%endif + + ; + ; Return the result. + ; +.done: + fstp st1 +%ifdef RT_ARCH_AMD64 + fstp dword [rsp] + movss xmm0, [rsp] +%endif + + leave + ret +ENDPROC RT_NOCRT(remainderf) + diff --git a/src/VBox/Runtime/common/math/remainderl.asm b/src/VBox/Runtime/common/math/remainderl.asm new file mode 100644 index 00000000..3eade8df --- /dev/null +++ b/src/VBox/Runtime/common/math/remainderl.asm @@ -0,0 +1,88 @@ +; $Id: remainderl.asm $ +;; @file +; IPRT - No-CRT remainderl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; See SUS. +; @returns st(0) +; @param lrd1 [rbp + 10h] +; @param lrd2 [rbp + 20h] +RT_NOCRT_BEGINPROC remainderl + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + + ; + ; Load the dividend into st0 and divisor into st1. + ; + fld tword [xBP + 2*xCB + RTLRD_CB] + fld tword [xBP + 2*xCB] + + ; + ; The fprem1 only does between 32 and 64 rounds, so we have to loop + ; here till we've got a final result. We count down in ECX to + ; avoid getting stuck here... + ; + mov ecx, 16384 / 32 + 4 +.again: + fprem1 + fstsw ax + test ah, (X86_FSW_C2 >> 8) + jz .done + dec cx + jnz .again +%ifdef RT_STRICT + int3 +%endif + + ; + ; Return the result. + ; +.done: + fstp st1 + leave + ret +ENDPROC RT_NOCRT(remainderl) + diff --git a/src/VBox/Runtime/common/math/rint.asm b/src/VBox/Runtime/common/math/rint.asm new file mode 100644 index 00000000..2ef8edac --- /dev/null +++ b/src/VBox/Runtime/common/math/rint.asm @@ -0,0 +1,99 @@ +; $Id: rint.asm $ +;; @file +; IPRT - No-CRT rint - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Round to integer value according to current rounding mode. +; +; ASSUME FCW and MXCSR are in sync for AMD64. +; +; @returns st(0) / xmm0 +; @param rd [rbp + 08h] / xmm0 +RT_NOCRT_BEGINPROC rint + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +%ifdef RT_ARCH_AMD64 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h +%endif + SEH64_END_PROLOGUE + + ; + ; Load the value into st(0). This messes up SNaN values. + ; +%ifdef RT_ARCH_AMD64 + movsd qword [xSP], xmm0 + fld qword [xSP] +%else + fld qword [xBP + xCB*2] +%endif + + ; + ; Return immediately if NaN or infinity. + ; + fxam + fstsw ax + test ax, X86_FSW_C0 ; C0 is set for NaN, Infinity and Empty register. The latter is not the case. + jz .input_ok +%ifdef RT_ARCH_AMD64 + ffreep st0 ; return the xmm0 register value unchanged, as FLD changes SNaN to QNaN. +%endif + jmp .return +.input_ok: + + ; + ; Do the job and return. + ; + frndint + +%ifdef RT_ARCH_AMD64 + fstp qword [xSP] + movsd xmm0, qword [xSP] +%endif +.return: + leave + ret +ENDPROC RT_NOCRT(rint) + diff --git a/src/VBox/Runtime/common/math/rintf.asm b/src/VBox/Runtime/common/math/rintf.asm new file mode 100644 index 00000000..10bbda44 --- /dev/null +++ b/src/VBox/Runtime/common/math/rintf.asm @@ -0,0 +1,99 @@ +; $Id: rintf.asm $ +;; @file +; IPRT - No-CRT rintf - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Round to integer value according to current rounding mode. +; +; ASSUME FCW and MXCSR are in sync for AMD64. +; +; @returns st(0) / xmm0 +; @param rd [rbp + 08h] / xmm0 +RT_NOCRT_BEGINPROC rintf + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 +%ifdef RT_ARCH_AMD64 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h +%endif + SEH64_END_PROLOGUE + + ; + ; Load the value into st(0). This messes up SNaN values. + ; +%ifdef RT_ARCH_AMD64 + movss dword [xSP], xmm0 + fld dword [xSP] +%else + fld dword [xBP + xCB*2] +%endif + + ; + ; Return immediately if NaN or infinity. + ; + fxam + fstsw ax + test ax, X86_FSW_C0 ; C0 is set for NaN, Infinity and Empty register. The latter is not the case. + jz .input_ok +%ifdef RT_ARCH_AMD64 + ffreep st0 ; return the xmm0 register value unchanged, as FLD changes SNaN to QNaN. +%endif + jmp .return +.input_ok: + + ; + ; Do the job and return. + ; + frndint + +%ifdef RT_ARCH_AMD64 + fstp dword [xSP] + movss xmm0, dword [xSP] +%endif +.return: + leave + ret +ENDPROC RT_NOCRT(rintf) + diff --git a/src/VBox/Runtime/common/math/round.cpp b/src/VBox/Runtime/common/math/round.cpp new file mode 100644 index 00000000..f9771250 --- /dev/null +++ b/src/VBox/Runtime/common/math/round.cpp @@ -0,0 +1,69 @@ +/* $Id: round.cpp $ */ +/** @file + * IPRT - No-CRT - round(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> + + +#undef round +double RT_NOCRT(round)(double rd) +{ + if (isfinite(rd)) + { + double const rdIn = rd; + if (rd >= 0.0) + { + rd = RT_NOCRT(ceil)(rd); + if (rd - rdIn > 0.5) + rd -= 1.0; + } + else + { + rd = RT_NOCRT(ceil)(-rd); + if (rd + rdIn > 0.5) + rd -= 1.0; + rd = -rd; + } + } + return rd; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(round); + diff --git a/src/VBox/Runtime/common/math/roundf.cpp b/src/VBox/Runtime/common/math/roundf.cpp new file mode 100644 index 00000000..12a8915c --- /dev/null +++ b/src/VBox/Runtime/common/math/roundf.cpp @@ -0,0 +1,69 @@ +/* $Id: roundf.cpp $ */ +/** @file + * IPRT - No-CRT - roundf(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> + + +#undef roundf +float RT_NOCRT(roundf)(float r32) +{ + if (isfinite(r32)) + { + float const r32In = r32; + if (r32 >= 0.0) + { + r32 = RT_NOCRT(ceilf)(r32); + if (r32 - r32In > 0.5) + r32 -= 1.0; + } + else + { + r32 = RT_NOCRT(ceilf)(-r32); + if (r32 + r32In > 0.5) + r32 -= 1.0; + r32 = -r32; + } + } + return r32; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(roundf); + diff --git a/src/VBox/Runtime/common/math/roundl.cpp b/src/VBox/Runtime/common/math/roundl.cpp new file mode 100644 index 00000000..49344cf0 --- /dev/null +++ b/src/VBox/Runtime/common/math/roundl.cpp @@ -0,0 +1,69 @@ +/* $Id: roundl.cpp $ */ +/** @file + * IPRT - No-CRT - roundl(). + */ + +/* + * Copyright (C) 2022-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define IPRT_NO_CRT_FOR_3RD_PARTY +#include "internal/nocrt.h" +#include <iprt/nocrt/math.h> + + +#undef roundl +long double RT_NOCRT(roundl)(long double lrd) +{ + if (isfinite(lrd)) + { + long double const lrdIn = lrd; + if (lrd >= 0.0) + { + lrd = RT_NOCRT(ceill)(lrd); + if (lrd - lrdIn > 0.5) + lrd -= 1.0; + } + else + { + lrd = RT_NOCRT(ceill)(-lrd); + if (lrd + lrdIn > 0.5) + lrd -= 1.0; + lrd = -lrd; + } + } + return lrd; +} +RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(roundl); + diff --git a/src/VBox/Runtime/common/math/rtNoCrtHasSse.asm b/src/VBox/Runtime/common/math/rtNoCrtHasSse.asm new file mode 100644 index 00000000..15c1bf4f --- /dev/null +++ b/src/VBox/Runtime/common/math/rtNoCrtHasSse.asm @@ -0,0 +1,78 @@ +; $Id: rtNoCrtHasSse.asm $ +;; @file +; IPRT - No-CRT rtNoCrtHasSse - X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINDATA +g_frtNoCrtHasSse: db 0x80 + + +BEGINCODE + +;; +; Checks if SSE is supported. +; @returns 1 if supported, 0 if not. Entire eax/rax is set. +; @uses rax only +; +BEGINPROC rtNoCrtHasSse + mov al, [g_frtNoCrtHasSse] + test al, 0x80 + jnz .detect_sse + ret + +.detect_sse: + push ebx + push ecx + push edx + + mov eax, 1 + cpuid + + mov eax, 1 + test edx, X86_CPUID_FEATURE_EDX_SSE + jz .no_supported + xor eax, eax +.no_supported: + + pop edx + pop ecx + pop ebx + ret +ENDPROC rtNoCrtHasSse + diff --git a/src/VBox/Runtime/common/math/sin.asm b/src/VBox/Runtime/common/math/sin.asm new file mode 100644 index 00000000..d0c478a3 --- /dev/null +++ b/src/VBox/Runtime/common/math/sin.asm @@ -0,0 +1,185 @@ +; $Id: sin.asm $ +;; @file +; IPRT - No-CRT sin - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + + +;; +; Compute the sine of rf, measured in radians. +; +; @returns st(0) / xmm0 +; @param rf [rbp + xCB*2] / xmm0 +; +RT_NOCRT_BEGINPROC sin + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 20h + SEH64_ALLOCATE_STACK 20h + SEH64_END_PROLOGUE + +%ifdef RT_OS_WINDOWS + ; + ; Make sure we use full precision and not the windows default of 53 bits. + ; + fnstcw [xBP - 20h] + mov ax, [xBP - 20h] + or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask. + mov [xBP - 1ch], ax + fldcw [xBP - 1ch] +%endif + + ; + ; Load the input into st0. + ; +%ifdef RT_ARCH_AMD64 + movsd [xBP - 10h], xmm0 + fld qword [xBP - 10h] +%else + fld qword [xBP + xCB*2] +%endif + + ; + ; We examin the input and weed out non-finit numbers first. + ; + fxam + fnstsw ax + and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 + cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) + je .finite + cmp ax, X86_FSW_C3 ; Zero + je .zero + cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals - treat them as zero. + je .zero + cmp ax, X86_FSW_C0 ; NaN - must handle it special, + je .nan + + ; Pass infinities and unsupported inputs to fsin, assuming it does the right thing. +.do_sin: + fsin + jmp .return_val + + ; + ; Finite number. + ; +.finite: + ; For very tiny numbers, 0 < abs(input) < 2**-25, we can return the + ; input value directly. + fld st0 ; duplicate st0 + fabs ; make it an absolute (positive) value. + fld qword [.s_r64Tiny xWrtRIP] + fcomip st1 ; compare s_r64Tiny and fabs(input) + ja .return_tiny_number_as_is ; jump if fabs(input) is smaller + + ; FSIN is documented to be reasonable for the range ]-3pi/4,3pi/4[, so + ; while we have fabs(input) loaded already, check for that here and + ; allow rtNoCrtMathSinCore to assume it won't see values very close to + ; zero, except by cos -> sin conversion where they won't be relevant to + ; any assumpttions about precision approximation. + fld qword [.s_r64FSinOkay xWrtRIP] + fcomip st1 + ffreep st0 ; drop the fabs(input) value + ja .do_sin + + ; + ; Call common sine/cos worker. + ; + mov ecx, 1 ; double + extern NAME(rtNoCrtMathSinCore) + call NAME(rtNoCrtMathSinCore) + + ; + ; Run st0. + ; +.return_val: +%ifdef RT_ARCH_AMD64 + fstp qword [xBP - 10h] + movsd xmm0, [xBP - 10h] +%endif +%ifdef RT_OS_WINDOWS + fldcw [xBP - 20h] ; restore original +%endif +.return: + leave + ret + + ; + ; As explained already, we can return tiny numbers directly too as the + ; output from sin(input) = input given our precision. + ; We can skip the st0 -> xmm0 translation here, so follow the same path + ; as .zero & .nan, after we've removed the fabs(input) value. + ; +.return_tiny_number_as_is: + ffreep st0 + + ; + ; sin(+/-0.0) = +/-0.0 (preserve the sign) + ; We can skip the st0 -> xmm0 translation here, so follow the .nan code path. + ; +.zero: + + ; + ; Input is NaN, output it unmodified as far as we can (FLD changes SNaN + ; to QNaN when masked). + ; +.nan: +%ifdef RT_ARCH_AMD64 + ffreep st0 +%endif + jmp .return + +ALIGNCODE(8) + ; Ca. 2**-17, absolute value. Inputs closer to zero than this can be + ; returns directly as the sin(input) value should be basically the same + ; given the precision we're working with and FSIN probably won't even + ; manage that. + ;; @todo experiment when FSIN gets better than this. +.s_r64Tiny: + dq 0.00000762939453125 + ; The absolute limit of FSIN "good" range. +.s_r64FSinOkay: + dq 2.356194490192344928845 ; 3pi/4 + ;dq 1.57079632679489661923 ; pi/2 - alternative. + +ENDPROC RT_NOCRT(sin) + diff --git a/src/VBox/Runtime/common/math/sincore.asm b/src/VBox/Runtime/common/math/sincore.asm new file mode 100644 index 00000000..50493cbf --- /dev/null +++ b/src/VBox/Runtime/common/math/sincore.asm @@ -0,0 +1,352 @@ +; $Id: sincore.asm $ +;; @file +; IPRT - No-CRT common sin & cos - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Internal sine and cosine worker that calculates the sine of st0 returning +; it in st0. +; +; When called by a sine function, fabs(st0) >= pi/2. +; When called by a cosine function, fabs(original input value) >= 3pi/8. +; +; That the input isn't a tiny number close to zero, means that we can do a bit +; cruder rounding when operating close to a pi/2 boundrary. The value in the +; ecx register indicates the input precision and controls the crudeness of the +; rounding. +; +; @returns st0 = sine +; @param st0 A finite number to calucate sine of. +; @param ecx Set to 0 if original input was a 32-bit float. +; Set to 1 if original input was a 64-bit double. +; set to 2 if original input was a 80-bit long double. +; +BEGINPROC rtNoCrtMathSinCore + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + + ; + ; Load the pointer to the rounding crudeness factor into xDX. + ; + lea xDX, [.s_ar64NearZero xWrtRIP] + lea xDX, [xDX + xCX * xCB] + + ; + ; Finite number. We want it in the range [0,2pi] and will preform + ; a remainder division if it isn't. + ; + fcom qword [.s_r64Max xWrtRIP] ; compares st0 and 2*pi + fnstsw ax + test ax, X86_FSW_C3 | X86_FSW_C0 | X86_FSW_C2 ; C3 := st0 == mem; C0 := st0 < mem; C2 := unordered (should be the case); + jz .reduce_st0 ; Jump if st0 > mem + + fcom qword [.s_r64Min xWrtRIP] ; compares st0 and 0.0 + fnstsw ax + test ax, X86_FSW_C3 | X86_FSW_C0 + jnz .reduce_st0 ; Jump if st0 <= mem + + ; + ; We get here if st0 is in the [0,2pi] range. + ; + ; Now, FSIN is documented to be reasonably accurate for the range + ; -3pi/4 to +3pi/4, so we have to make some more effort to calculate + ; in that range only. + ; +.in_range: + ; if (st0 < pi) + fldpi + fcom st1 ; compares st0 (pi) with st1 (the normalized value) + fnstsw ax + test ax, X86_FSW_C0 ; st1 > pi + jnz .larger_than_pi + test ax, X86_FSW_C3 + jnz .equals_pi + + ; + ; input in the range [0,pi[ + ; +.smaller_than_pi: + fdiv qword [.s_r64Two xWrtRIP] ; st0 = pi/2 + + ; if (st0 < pi/2) + fcom st1 ; compares st0 (pi/2) with st1 + fnstsw ax + test ax, X86_FSW_C0 ; st1 > pi + jnz .between_half_pi_and_pi + test ax, X86_FSW_C3 + jnz .equals_half_pi + + ; + ; The value is between zero and half pi, including the zero value. + ; + ; This is in range where FSIN works reasonably reliably. So drop the + ; half pi in st0 and do the calculation. + ; +.between_zero_and_half_pi: + ; Check if we're so close to pi/2 that it makes no difference. + fsub st0, st1 ; st0 = pi/2 - st1 + fcom qword [xDX] + fnstsw ax + test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. + jnz .equals_half_pi + ffreep st0 + + ; Check if we're so close to zero that it makes no difference given the + ; internal accuracy of the FPU. + fcom qword [xDX] + fnstsw ax + test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. + jnz .equals_zero_popped_one + + ; Ok, calculate sine. + fsin + jmp .return + + ; + ; The value is in the range ]pi/2,pi[ + ; + ; This is outside the comfortable FSIN range, but if we subtract PI and + ; move to the ]-pi/2,0[ range we just have to change the sign to get + ; the value we want. + ; +.between_half_pi_and_pi: + ; Check if we're so close to pi/2 that it makes no difference. + fsubr st0, st1 ; st0 = st1 - st0 + fcom qword [xDX] + fnstsw ax + test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. + jnz .equals_half_pi + ffreep st0 + + ; Check if we're so close to pi that it makes no difference. + fldpi + fsub st0, st1 ; st0 = st0 - st1 + fcom qword [xDX] + fnstsw ax + test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. + jnz .equals_pi + ffreep st0 + + ; Ok, transform the value and calculate sine. + fldpi + fsubp st1, st0 + + fsin + fchs + jmp .return + + ; + ; input in the range ]pi,2pi[ + ; +.larger_than_pi: + fsub st1, st0 ; st1 -= pi + fdiv qword [.s_r64Two xWrtRIP] ; st0 = pi/2 + + ; if (st0 < pi/2) + fcom st1 ; compares st0 (pi/2) with reduced st1 + fnstsw ax + test ax, X86_FSW_C0 ; st1 > pi + jnz .between_3_half_pi_and_2pi + test ax, X86_FSW_C3 + jnz .equals_3_half_pi + + ; + ; The value is in the the range: ]pi,3pi/2[ + ; + ; The actual st0 is in the range ]pi,pi/2[ where FSIN is performing okay + ; and we can get the desired result by changing the sign (-FSIN). + ; +.between_pi_and_3_half_pi: + ; Check if we're so close to pi/2 that it makes no difference. + fsub st0, st1 ; st0 = pi/2 - st1 + fcom qword [xDX] + fnstsw ax + test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. + jnz .equals_3_half_pi + ffreep st0 + + ; Check if we're so close to zero that it makes no difference given the + ; internal accuracy of the FPU. + fcom qword [xDX] + fnstsw ax + test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. + jnz .equals_pi_popped + + ; Ok, calculate sine and flip the sign. + fsin + fchs + jmp .return + + ; + ; The value is in the last pi/2 of the range: ]3pi/2,2pi[ + ; + ; Since FSIN should work reasonably well for ]-pi/2,pi], we can just + ; subtract pi again (we subtracted pi at .larger_than_pi above) and + ; run FSIN on it. (st1 is currently in the range ]pi/2,pi[.) + ; +.between_3_half_pi_and_2pi: + ; Check if we're so close to pi/2 that it makes no difference. + fsubr st0, st1 ; st0 = st1 - st0 + fcom qword [xDX] + fnstsw ax + test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. + jnz .equals_3_half_pi + ffreep st0 + + ; Check if we're so close to pi that it makes no difference. + fldpi + fsub st0, st1 ; st0 = st0 - st1 + fcom qword [xDX] + fnstsw ax + test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. + jnz .equals_2pi + ffreep st0 + + ; Ok, adjust input and calculate sine. + fldpi + fsubp st1, st0 + fsin + jmp .return + + ; + ; sin(0) = 0 + ; sin(pi) = 0 + ; +.equals_zero: +.equals_pi: +.equals_2pi: + ffreep st0 +.equals_zero_popped_one: +.equals_pi_popped: + ffreep st0 + fldz + jmp .return + + ; + ; sin(pi/2) = 1 + ; +.equals_half_pi: + ffreep st0 + ffreep st0 + fld1 + jmp .return + + ; + ; sin(3*pi/2) = -1 + ; +.equals_3_half_pi: + ffreep st0 + ffreep st0 + fld1 + fchs + jmp .return + + ; + ; Return. + ; +.return: + leave + ret + + ; + ; Reduce st0 by reminder division by PI*2. The result should be positive here. + ; + ;; @todo this is one of our weak spots (really any calculation involving PI is). +.reduce_st0: + fldpi + fadd st0, st0 + fxch st1 ; st0=input (dividend) st1=2pi (divisor) +.again: + fprem1 + fnstsw ax + test ah, (X86_FSW_C2 >> 8) ; C2 is set if partial result. + jnz .again ; Loop till C2 == 0 and we have a final result. + + ; + ; Make sure the result is positive. + ; + fxam + fnstsw ax + test ax, X86_FSW_C1 ; The sign bit + jz .reduced_to_positive + + fadd st0, st1 ; st0 += 2pi, which should make it positive + +%ifdef RT_STRICT + fxam + fnstsw ax + test ax, X86_FSW_C1 + jz .reduced_to_positive + int3 +%endif + +.reduced_to_positive: + fstp st1 ; Get rid of the 2pi value. + jmp .in_range + +ALIGNCODE(8) +.s_r64Max: + dq +6.28318530717958647692 ; 2*pi +.s_r64Min: + dq 0.0 +.s_r64Two: + dq 2.0 + ;; + ; Close to 2/pi rounding limits for 32-bit, 64-bit and 80-bit floating point operations. + ; Given that the original input is at least +/-3pi/8 (1.178) and that precision of the + ; PI constant used during reduction/whatever, I think we can round to a whole pi/2 + ; step when we get close enough. + ; + ; Look to RTFLOAT64U for the format details, but 52 is the shift for the exponent field + ; and 1023 is the exponent bias. Since the format uses an implied 1 in the mantissa, + ; we only have to set the exponent to get a valid number. + ; +.s_ar64NearZero: +;; @todo check how sensible these really are... + dq (-18 + 1023) << 52 ; float / 32-bit / single precision input + dq (-40 + 1023) << 52 ; double / 64-bit / double precision input + dq (-52 + 1023) << 52 ; long double / 80-bit / extended precision input +ENDPROC rtNoCrtMathSinCore + diff --git a/src/VBox/Runtime/common/math/sinf.asm b/src/VBox/Runtime/common/math/sinf.asm new file mode 100644 index 00000000..1d2325d1 --- /dev/null +++ b/src/VBox/Runtime/common/math/sinf.asm @@ -0,0 +1,185 @@ +; $Id: sinf.asm $ +;; @file +; IPRT - No-CRT sinf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + + +;; +; Compute the sine of rd, measured in radians. +; +; @returns st(0) / xmm0 +; @param rd [rbp + xCB*2] / xmm0 +; +RT_NOCRT_BEGINPROC sinf + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 20h + SEH64_ALLOCATE_STACK 20h + SEH64_END_PROLOGUE + +%ifdef RT_OS_WINDOWS + ; + ; Make sure we use full precision and not the windows default of 53 bits. + ; + fnstcw [xBP - 20h] + mov ax, [xBP - 20h] + or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask. + mov [xBP - 1ch], ax + fldcw [xBP - 1ch] +%endif + + ; + ; Load the input into st0. + ; +%ifdef RT_ARCH_AMD64 + movss [xBP - 10h], xmm0 + fld dword [xBP - 10h] +%else + fld dword [xBP + xCB*2] +%endif + + ; + ; We examin the input and weed out non-finit numbers first. + ; + fxam + fnstsw ax + and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 + cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) + je .finite + cmp ax, X86_FSW_C3 ; Zero + je .zero + cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals - treat them as zero. + je .zero + cmp ax, X86_FSW_C0 ; NaN - must handle it special, + je .nan + + ; Pass infinities and unsupported inputs to fsin, assuming it does the right thing. +.do_sin: + fsin + jmp .return_val + + ; + ; Finite number. + ; +.finite: + ; For very tiny numbers, 0 < abs(input) < 2**-25, we can return the + ; input value directly. + fld st0 ; duplicate st0 + fabs ; make it an absolute (positive) value. + fld qword [.s_r64Tiny xWrtRIP] + fcomip st1 ; compare s_r64Tiny and fabs(input) + ja .return_tiny_number_as_is ; jump if fabs(input) is smaller + + ; FSIN is documented to be reasonable for the range ]-3pi/4,3pi/4[, so + ; while we have fabs(input) loaded already, check for that here and + ; allow rtNoCrtMathSinCore to assume it won't see values very close to + ; zero, except by cos -> sin conversion where they won't be relevant to + ; any assumpttions about precision approximation. + fld qword [.s_r64FSinOkay xWrtRIP] + fcomip st1 + ffreep st0 ; drop the fabs(input) value + ja .do_sin + + ; + ; Call common sine/cos worker. + ; + mov ecx, 0 ; float + extern NAME(rtNoCrtMathSinCore) + call NAME(rtNoCrtMathSinCore) + + ; + ; Run st0. + ; +.return_val: +%ifdef RT_ARCH_AMD64 + fstp dword [xBP - 10h] + movss xmm0, [xBP - 10h] +%endif +%ifdef RT_OS_WINDOWS + fldcw [xBP - 20h] ; restore original +%endif +.return: + leave + ret + + ; + ; As explained already, we can return tiny numbers directly too as the + ; output from sinf(input) = input given our precision. + ; We can skip the st0 -> xmm0 translation here, so follow the same path + ; as .zero & .nan, after we've removed the fabs(input) value. + ; +.return_tiny_number_as_is: + ffreep st0 + + ; + ; sinf(+/-0.0) = +/-0.0 (preserve the sign) + ; We can skip the st0 -> xmm0 translation here, so follow the .nan code path. + ; +.zero: + + ; + ; Input is NaN, output it unmodified as far as we can (FLD changes SNaN + ; to QNaN when masked). + ; +.nan: +%ifdef RT_ARCH_AMD64 + ffreep st0 +%endif + jmp .return + +ALIGNCODE(8) + ; Ca. 2**-26, absolute value. Inputs closer to zero than this can be + ; returns directly as the sinf(input) value should be basically the same + ; given the precision we're working with and FSIN probably won't even + ; manage that. + ;; @todo experiment when FSIN gets better than this. +.s_r64Tiny: + dq 1.49011612e-8 + ; The absolute limit of FSIN "good" range. +.s_r64FSinOkay: + dq 2.356194490192344928845 ; 3pi/4 + ;dq 1.57079632679489661923 ; pi/2 - alternative. + +ENDPROC RT_NOCRT(sinf) + diff --git a/src/VBox/Runtime/common/math/sinl.asm b/src/VBox/Runtime/common/math/sinl.asm new file mode 100644 index 00000000..93fa31b6 --- /dev/null +++ b/src/VBox/Runtime/common/math/sinl.asm @@ -0,0 +1,71 @@ +; $Id: sinl.asm $ +;; @file +; IPRT - No-CRT sinl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the sine of lrd +; @returns st(0) +; @param lrd [xSP + xCB*2] +RT_NOCRT_BEGINPROC sinl + push xBP + mov xBP, xSP + sub xSP, 10h + + fld tword [xBP + xCB*2] + fsin + fnstsw ax + test ah, 04h + jz .done + + fldpi + fadd st0 + fxch st1 +.again: + fprem1 + fnstsw ax + test ah, 04h + jnz .again + fstp st1 + fsin + +.done: + leave + ret +ENDPROC RT_NOCRT(sinl) + diff --git a/src/VBox/Runtime/common/math/sqrt.asm b/src/VBox/Runtime/common/math/sqrt.asm new file mode 100644 index 00000000..64aa83c0 --- /dev/null +++ b/src/VBox/Runtime/common/math/sqrt.asm @@ -0,0 +1,65 @@ +; $Id: sqrt.asm $ +;; @file +; IPRT - No-CRT sqrt - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Square root - the fast not entirely C standard confirming version. +; +; @returns st(0) / xmm0 +; @param rd [rbp + 08h] / xmm0 +RT_NOCRT_BEGINPROC sqrt + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + sqrtsd xmm0, xmm0 +%else + fld qword [xBP + xCB*2] + fsqrt + ;; @todo rounding? +%endif + + leave + ret +ENDPROC RT_NOCRT(sqrt) + diff --git a/src/VBox/Runtime/common/math/sqrtf.asm b/src/VBox/Runtime/common/math/sqrtf.asm new file mode 100644 index 00000000..39bea17d --- /dev/null +++ b/src/VBox/Runtime/common/math/sqrtf.asm @@ -0,0 +1,65 @@ +; $Id: sqrtf.asm $ +;; @file +; IPRT - No-CRT sqrtf - AMD64 & X86. +; + +; +; Copyright (C) 2022-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Square root - the fast not entirely C standard confirming version. +; +; @returns st(0) / xmm0 +; @param r32 [rbp + 08h] / xmm0 +RT_NOCRT_BEGINPROC sqrtf + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + SEH64_END_PROLOGUE + +%ifdef RT_ARCH_AMD64 + sqrtss xmm0, xmm0 +%else + fld dword [xBP + xCB*2] + fsqrt + ;; @todo rounding? +%endif + + leave + ret +ENDPROC RT_NOCRT(sqrtf) + diff --git a/src/VBox/Runtime/common/math/tan.asm b/src/VBox/Runtime/common/math/tan.asm new file mode 100644 index 00000000..a29dddf8 --- /dev/null +++ b/src/VBox/Runtime/common/math/tan.asm @@ -0,0 +1,119 @@ +; $Id: tan.asm $ +;; @file +; IPRT - No-CRT tan - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Compute the sine of rd +; @returns st(0) / xmm0 +; @param rd [xSP + xCB*2] / xmm0 +RT_NOCRT_BEGINPROC tan + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 20h + SEH64_ALLOCATE_STACK 20h + SEH64_END_PROLOGUE + +%ifdef RT_OS_WINDOWS + ; + ; Make sure we use full precision and not the windows default of 53 bits. + ; + fnstcw [xBP - 20h] + mov ax, [xBP - 20h] + or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask. + mov [xBP - 1ch], ax + fldcw [xBP - 1ch] +%endif + + ; + ; Load the input into st0. + ; +%ifdef RT_ARCH_AMD64 + movsd [xBP - 10h], xmm0 + fld qword [xBP - 10h] +%else + fld qword [xBP + xCB*2] +%endif + + ; + ; Calculate the tangent. + ; + fptan + fnstsw ax + test ah, (X86_FSW_C2 >> 8) ; C2 is set if the input was out of range. + jz .return_val + + ; + ; Input was out of range, perform reduction to +/-2pi. + ; + fldpi + fadd st0 + fxch st1 +.again: + fprem1 + fnstsw ax + test ah, (X86_FSW_C2 >> 8) ; C2 is set if partial result. + jnz .again ; Loop till C2 == 0 and we have a final result. + + fstp st1 + + fptan + + ; + ; Run st0. + ; +.return_val: + ffreep st0 ; ignore the 1.0 fptan pushed +%ifdef RT_ARCH_AMD64 + fstp qword [xBP - 10h] + movsd xmm0, [xBP - 10h] +%endif +%ifdef RT_OS_WINDOWS + fldcw [xBP - 20h] ; restore original +%endif +.return: + leave + ret +ENDPROC RT_NOCRT(tan) + diff --git a/src/VBox/Runtime/common/math/tanf.asm b/src/VBox/Runtime/common/math/tanf.asm new file mode 100644 index 00000000..ce7fc9b8 --- /dev/null +++ b/src/VBox/Runtime/common/math/tanf.asm @@ -0,0 +1,119 @@ +; $Id: tanf.asm $ +;; @file +; IPRT - No-CRT tanf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Compute the sine of rf +; @returns st(0) / xmm0 +; @param rf [xSP + xCB*2] / xmm0 +RT_NOCRT_BEGINPROC tanf + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 20h + SEH64_ALLOCATE_STACK 20h + SEH64_END_PROLOGUE + +%ifdef RT_OS_WINDOWS + ; + ; Make sure we use full precision and not the windows default of 53 bits. + ; + fnstcw [xBP - 20h] + mov ax, [xBP - 20h] + or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask. + mov [xBP - 1ch], ax + fldcw [xBP - 1ch] +%endif + + ; + ; Load the input into st0. + ; +%ifdef RT_ARCH_AMD64 + movss [xBP - 10h], xmm0 + fld dword [xBP - 10h] +%else + fld dword [xBP + xCB*2] +%endif + + ; + ; Calculate the tangent. + ; + fptan + fnstsw ax + test ah, (X86_FSW_C2 >> 8) ; C2 is set if the input was out of range. + jz .return_val + + ; + ; Input was out of range, perform reduction to +/-2pi. + ; + fldpi + fadd st0 + fxch st1 +.again: + fprem1 + fnstsw ax + test ah, (X86_FSW_C2 >> 8) ; C2 is set if partial result. + jnz .again ; Loop till C2 == 0 and we have a final result. + + fstp st1 + + fptan + + ; + ; Run st0. + ; +.return_val: + ffreep st0 ; ignore the 1.0 fptan pushed +%ifdef RT_ARCH_AMD64 + fstp dword [xBP - 10h] + movss xmm0, [xBP - 10h] +%endif +%ifdef RT_OS_WINDOWS + fldcw [xBP - 20h] ; restore original +%endif +.return: + leave + ret +ENDPROC RT_NOCRT(tanf) + diff --git a/src/VBox/Runtime/common/math/tanl.asm b/src/VBox/Runtime/common/math/tanl.asm new file mode 100644 index 00000000..1056e60b --- /dev/null +++ b/src/VBox/Runtime/common/math/tanl.asm @@ -0,0 +1,72 @@ +; $Id: tanl.asm $ +;; @file +; IPRT - No-CRT tanl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%include "iprt/asmdefs.mac" + +BEGINCODE + +;; +; Compute the sine of lrd +; @returns st(0) +; @param lrd [xSP + xCB*2] +RT_NOCRT_BEGINPROC tanl + push xBP + mov xBP, xSP + sub xSP, 10h + + fld tword [xBP + xCB*2] + fptan + fnstsw ax + test ah, 04h ; check for C2 + jz .done + + fldpi + fadd st0 + fxch st1 +.again: + fprem1 + fnstsw ax + test ah, 04h + jnz .again + fstp st1 + fptan + +.done: + fstp st0 + leave + ret +ENDPROC RT_NOCRT(tanl) + diff --git a/src/VBox/Runtime/common/math/trunc.asm b/src/VBox/Runtime/common/math/trunc.asm new file mode 100644 index 00000000..4234f5b6 --- /dev/null +++ b/src/VBox/Runtime/common/math/trunc.asm @@ -0,0 +1,108 @@ +; $Id: trunc.asm $ +;; @file +; IPRT - No-CRT trunc - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Round to truncated integer value. +; @returns 32-bit: st(0) 64-bit: xmm0 +; @param rd 32-bit: [ebp + 8] 64-bit: xmm0 +RT_NOCRT_BEGINPROC trunc + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + ; + ; Load the value into st(0). This messes up SNaN values. + ; +%ifdef RT_ARCH_AMD64 + movsd [xSP], xmm0 + fld qword [xSP] +%else + fld qword [xBP + xCB*2] +%endif + + ; + ; Return immediately if NaN or infinity. + ; + fxam + fstsw ax + test ax, X86_FSW_C0 ; C0 is set for NaN, Infinity and Empty register. The latter is not the case. + jz .input_ok +%ifdef RT_ARCH_AMD64 + ffreep st0 ; return the xmm0 register value unchanged, as FLD changes SNaN to QNaN. +%endif + jmp .return_val +.input_ok: + + ; + ; Make it truncate up by modifying the fpu control word. + ; + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, X86_FCW_RC_ZERO ; both bits set, so no need to clear anything first. + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; + ; Round ST(0) to integer. + ; + frndint + + ; + ; Restore the fpu control word and return. + ; + fldcw [xBP - 10h] + +%ifdef RT_ARCH_AMD64 + fstp qword [xSP] + movsd xmm0, [xSP] +%endif +.return_val: + leave + ret +ENDPROC RT_NOCRT(trunc) + diff --git a/src/VBox/Runtime/common/math/truncf.asm b/src/VBox/Runtime/common/math/truncf.asm new file mode 100644 index 00000000..677b7fd3 --- /dev/null +++ b/src/VBox/Runtime/common/math/truncf.asm @@ -0,0 +1,108 @@ +; $Id: truncf.asm $ +;; @file +; IPRT - No-CRT truncf - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Round to truncated integer value. +; @returns 32-bit: st(0) 64-bit: xmm0 +; @param rf 32-bit: [ebp + 8] 64-bit: xmm0 +RT_NOCRT_BEGINPROC truncf + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + ; + ; Load the value into st(0). This messes up SNaN values. + ; +%ifdef RT_ARCH_AMD64 + movss [xSP], xmm0 + fld dword [xSP] +%else + fld dword [xBP + xCB*2] +%endif + + ; + ; Return immediately if NaN or infinity. + ; + fxam + fstsw ax + test ax, X86_FSW_C0 ; C0 is set for NaN, Infinity and Empty register. The latter is not the case. + jz .input_ok +%ifdef RT_ARCH_AMD64 + ffreep st0 ; return the xmm0 register value unchanged, as FLD changes SNaN to QNaN. +%endif + jmp .return_val +.input_ok: + + ; + ; Make it truncate up by modifying the fpu control word. + ; + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, X86_FCW_RC_ZERO ; both bits set, so no need to clear anything first. + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; + ; Round ST(0) to integer. + ; + frndint + + ; + ; Restore the fpu control word and return. + ; + fldcw [xBP - 10h] + +%ifdef RT_ARCH_AMD64 + fstp dword [xSP] + movss xmm0, [xSP] +%endif +.return_val: + leave + ret +ENDPROC RT_NOCRT(truncf) + diff --git a/src/VBox/Runtime/common/math/truncl.asm b/src/VBox/Runtime/common/math/truncl.asm new file mode 100644 index 00000000..e037cab5 --- /dev/null +++ b/src/VBox/Runtime/common/math/truncl.asm @@ -0,0 +1,76 @@ +; $Id: truncl.asm $ +;; @file +; IPRT - No-CRT truncl - AMD64 & X86. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +%define RT_ASM_WITH_SEH64 +%include "iprt/asmdefs.mac" +%include "iprt/x86.mac" + + +BEGINCODE + +;; +; Round to truncated integer value. +; @returns st(0) +; @param lrd [rbp + 8] +RT_NOCRT_BEGINPROC truncl + push xBP + SEH64_PUSH_xBP + mov xBP, xSP + SEH64_SET_FRAME_xBP 0 + sub xSP, 10h + SEH64_ALLOCATE_STACK 10h + SEH64_END_PROLOGUE + + fld tword [xBP + xCB*2] + + ; Make it truncate up by modifying the fpu control word. + fstcw [xBP - 10h] + mov eax, [xBP - 10h] + or eax, X86_FCW_RC_ZERO ; both bits set, so no need to clear anything first. + mov [xBP - 08h], eax + fldcw [xBP - 08h] + + ; Round ST(0) to integer. + frndint + + ; Restore the fpu control word. + fldcw [xBP - 10h] + + leave + ret +ENDPROC RT_NOCRT(truncl) + diff --git a/src/VBox/Runtime/common/math/watcom/I8D-x86-32.asm b/src/VBox/Runtime/common/math/watcom/I8D-x86-32.asm new file mode 100644 index 00000000..2aa02520 --- /dev/null +++ b/src/VBox/Runtime/common/math/watcom/I8D-x86-32.asm @@ -0,0 +1,108 @@ +; $Id: I8D-x86-32.asm $ +;; @file +; BS3Kit - 32-bit Watcom C/C++, 64-bit signed integer division. +; + +; +; Copyright (C) 2007-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%include "iprt/asmdefs.mac" + + +BEGINCODE + +extern __U8D + + +;; +; 64-bit signed integer division. +; +; @returns EDX:EAX Quotient, ECX:EBX Remainder. +; @param EDX:EAX Dividend. +; @param ECX:EBX Divisor +; +global __I8D +__I8D: + ; + ; We use __U8D to do the work, we take care of the signedness. + ; + or edx, edx + js .negative_dividend + + or ecx, ecx + js .negative_divisor_positive_dividend + jmp __U8D + + +.negative_divisor_positive_dividend: + ; negate the divisor, do unsigned division, and negate the quotient. + neg ecx + neg ebx + sbb ecx, 0 + + call __U8D + + neg edx + neg eax + sbb edx, 0 + ret + +.negative_dividend: + neg edx + neg eax + sbb edx, 0 + + or ecx, ecx + js .negative_dividend_negative_divisor + +.negative_dividend_positive_divisor: + ; negate the dividend (above), do unsigned division, and negate both quotient and remainder + call __U8D + + neg edx + neg eax + sbb edx, 0 + +.return_negated_remainder: + neg ecx + neg ebx + sbb ecx, 0 + ret + +.negative_dividend_negative_divisor: + ; negate both dividend (above) and divisor, do unsigned division, and negate the remainder. + neg ecx + neg ebx + sbb ecx, 0 + + call __U8D + jmp .return_negated_remainder + diff --git a/src/VBox/Runtime/common/math/watcom/RTWatcomUInt64Div.c b/src/VBox/Runtime/common/math/watcom/RTWatcomUInt64Div.c new file mode 100644 index 00000000..fda3c972 --- /dev/null +++ b/src/VBox/Runtime/common/math/watcom/RTWatcomUInt64Div.c @@ -0,0 +1,48 @@ +/* $Id: RTWatcomUInt64Div.c $ */ +/** @file + * BS3Kit - Unsigned 64-bit division (compiler support routine helper). + */ + +/* + * Copyright (C) 2007-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include <iprt/uint64.h> + + +DECLASM(void) RTWatcomUInt64Div(RTUINT64U uDividend, RTUINT64U uDivisor, RTUINT64U RT_FAR *paQuotientReminder) +{ + RTUInt64DivRem(&paQuotientReminder[0], &paQuotientReminder[1], &uDividend, &uDivisor); +} + diff --git a/src/VBox/Runtime/common/math/watcom/U8D-x86-32.asm b/src/VBox/Runtime/common/math/watcom/U8D-x86-32.asm new file mode 100644 index 00000000..4eb22411 --- /dev/null +++ b/src/VBox/Runtime/common/math/watcom/U8D-x86-32.asm @@ -0,0 +1,84 @@ +; $Id: U8D-x86-32.asm $ +;; @file +; BS3Kit - 32-bit Watcom C/C++, 64-bit unsigned integer division. +; + +; +; Copyright (C) 2007-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%include "iprt/asmdefs.mac" + + +BEGINCODE + +extern NAME(RTWatcomUInt64Div) + + +;; +; 64-bit unsigned integer division. +; +; @returns EDX:EAX Quotient, ECX:EBX Remainder. +; @param EDX:EAX Dividend. +; @param ECX:EBX Divisor +; +global __U8D +__U8D: + ; + ; Convert to a C __cdecl call - not doing this in assembly. + ; + + ; Set up a frame, allocating 16 bytes for the result buffer. + push ebp + mov ebp, esp + sub esp, 10h + + ; Pointer to the return buffer. + push esp + + ; The divisor. + push ecx + push ebx + + ; The dividend. + push edx + push eax + + call NAME(RTWatcomUInt64Div) + + ; Load the result. + mov ecx, [ebp - 10h + 12] + mov ebx, [ebp - 10h + 8] + mov edx, [ebp - 10h + 4] + mov eax, [ebp - 10h] + + leave + ret + diff --git a/src/VBox/Runtime/common/math/watcom/U8LS-x86-32.asm b/src/VBox/Runtime/common/math/watcom/U8LS-x86-32.asm new file mode 100644 index 00000000..ea81565a --- /dev/null +++ b/src/VBox/Runtime/common/math/watcom/U8LS-x86-32.asm @@ -0,0 +1,74 @@ +; $Id: U8LS-x86-32.asm $ +;; @file +; BS3Kit - 32-bit Watcom C/C++, 64-bit integer left shift. +; + +; +; Copyright (C) 2007-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%include "iprt/asmdefs.mac" + + +BEGINCODE + +;; +; 64-bit integer left shift. +; +; @returns EDX:EAX +; @param EDX:EAX Value to shift. +; @param BL Shift count (it's specified as ECX:EBX, but we only use BL). +; +global __U8LS +__U8LS: +global __I8LS +__I8LS: + push ecx ; We're allowed to trash ECX, but why bother. + + mov cl, bl + and cl, 3fh + test cl, 20h + jnz .big_shift + + ; Shifting less than 32. + shld edx, eax, cl + shl eax, cl + +.return: + pop ecx + ret + +.big_shift: + ; Shifting 32 or more. + mov edx, eax + shl edx, cl ; Only uses lower 5 bits. + xor eax, eax + jmp .return + diff --git a/src/VBox/Runtime/common/math/watcom/U8M-I8M-x86-32.asm b/src/VBox/Runtime/common/math/watcom/U8M-I8M-x86-32.asm new file mode 100644 index 00000000..3b01cd5c --- /dev/null +++ b/src/VBox/Runtime/common/math/watcom/U8M-I8M-x86-32.asm @@ -0,0 +1,87 @@ +; $Id: U8M-I8M-x86-32.asm $ +;; @file +; BS3Kit - 32-bit Watcom C/C++, 64-bit unsigned integer division. +; + +; +; Copyright (C) 2007-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%include "iprt/asmdefs.mac" + + +BEGINCODE + + +;; +; 64-bit signed & unsigned integer multiplication. +; +; @returns EDX:EAX product +; @param EDX:EAX Factor #1. +; @param ECX:EBX Factor #2. +; @uses ECX, EBX +; +global __U8M +__U8M: +global __I8M +__I8M: + ; + ; See if this is a pure 32-bit multiplication. We might get lucky. + ; + test edx, edx + jnz .complicated + test ecx, ecx + jnz .complicated + + mul ebx ; eax * ebx -> edx:eax + ret + +.complicated: + push eax + push edx + + ; ecx = F1.lo * F2.hi (edx contains overflow here can be ignored) + mul ecx + mov ecx, eax + + ; ecx += F1.hi * F2.lo (edx can be ignored again) + pop eax + mul ebx + add ecx, eax + + ; edx:eax = F1.lo * F2.lo + pop eax + mul ebx + + ; Add ecx to the high part (edx). + add edx, ecx + + ret + diff --git a/src/VBox/Runtime/common/math/watcom/U8RS-x86-32.asm b/src/VBox/Runtime/common/math/watcom/U8RS-x86-32.asm new file mode 100644 index 00000000..8be43d13 --- /dev/null +++ b/src/VBox/Runtime/common/math/watcom/U8RS-x86-32.asm @@ -0,0 +1,73 @@ +; $Id: U8RS-x86-32.asm $ +;; @file +; BS3Kit - 32-bit Watcom C/C++, 64-bit unsigned integer right shift. +; + +; +; Copyright (C) 2007-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%include "iprt/asmdefs.mac" + + +BEGINCODE + + +;; +; 64-bit unsigned integer right shift. +; +; @returns EDX:EAX +; @param EDX:EAX Value to shift. +; @param BL Shift count (it's specified as ECX:EBX, but we only use BL). +; +global __U8RS +__U8RS: + push ecx ; We're allowed to trash ECX, but why bother. + + mov cl, bl + and cl, 3fh + test cl, 20h + jnz .big_shift + + ; Shifting less than 32. + shrd eax, edx, cl + shr edx, cl + +.return: + pop ecx + ret + +.big_shift: + ; Shifting 32 or more. + mov eax, edx + shr eax, cl ; Only uses lower 5 bits. + xor edx, edx + jmp .return + |