diff options
Diffstat (limited to 'src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm')
-rw-r--r-- | src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm b/src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm new file mode 100644 index 00000000..bf2f284d --- /dev/null +++ b/src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm @@ -0,0 +1,141 @@ +; $Id: ASMMultU64ByU32DivByU32.asm $ +;; @file +; IPRT - Assembly Functions, ASMMultU64ByU32DivByU32. +; + +; +; Copyright (C) 2006-2023 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + +%include "iprt/asmdefs.mac" + + +;; +; Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer +; using a 96 bit intermediate result. +; +; @returns (u64A * u32B) / u32C. +; @param u64A/rcx/rdi The 64-bit value. +; @param u32B/edx/esi The 32-bit value to multiple by A. +; @param u32C/r8d/edx The 32-bit value to divide A*B by. +; +; @cproto DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C); +; +RT_BEGINPROC ASMMultU64ByU32DivByU32 +%if ARCH_BITS == 64 + + %ifdef ASM_CALL64_MSC + mov rax, rcx ; rax = u64A + mov r9d, edx ; should check the specs wrt to the high bits one day... + mov r8d, r8d ; be paranoid for the time being. + %else + mov rax, rdi ; rax = u64A + mov r9d, esi ; r9d = u32B + mov r8d, edx ; r8d = u32C + %endif + mul r9 + div r8 + +%else ; 16 or 32 bit + ; + ; This implementation is converted from the GCC inline + ; version of the code. Nothing additional has been done + ; performance wise. + ; + %if ARCH_BITS == 16 + push bp + mov bp, sp + push eax ; push all return registers to preserve high value (paranoia) + push ebx + push ecx + push edx + %endif + push esi + push edi + + %if ARCH_BITS == 16 + %define u64A_Lo [bp + 4 + 04h] + %define u64A_Hi [bp + 4 + 08h] + %define u32B [bp + 4 + 0ch] + %define u32C [bp + 4 + 10h] + %else + %define u64A_Lo [esp + 04h + 08h] + %define u64A_Hi [esp + 08h + 08h] + %define u32B [esp + 0ch + 08h] + %define u32C [esp + 10h + 08h] + %endif + + ; Load parameters into registers. + mov eax, u64A_Lo + mov esi, u64A_Hi + mov ecx, u32B + mov edi, u32C + + ; The body, just like the in + mul ecx ; eax = u64Lo.lo = (u64A.lo * u32B).lo + ; edx = u64Lo.hi = (u64A.lo * u32B).hi + xchg eax, esi ; esi = u64Lo.lo + ; eax = u64A.hi + xchg edx, edi ; edi = u64Low.hi + ; edx = u32C + xchg edx, ecx ; ecx = u32C + ; edx = u32B + mul edx ; eax = u64Hi.lo = (u64A.hi * u32B).lo + ; edx = u64Hi.hi = (u64A.hi * u32B).hi + add eax, edi ; u64Hi.lo += u64Lo.hi + adc edx, 0 ; u64Hi.hi += carry + div ecx ; eax = u64Hi / u32C + ; edx = u64Hi % u32C + mov edi, eax ; edi = u64Result.hi = u64Hi / u32C + mov eax, esi ; eax = u64Lo.lo + div ecx ; u64Result.lo + mov edx, edi ; u64Result.hi + + ; epilogue + pop edi + pop esi + %if ARCH_BITS == 16 + ; DX:CX:BX:AX, where DX holds bits 15:0, CX bits 31:16, BX bits 47:32, and AX bits 63:48. + mov ax, [bp - 4*4] ; dx = bits 15:0 + shr eax, 16 + mov ax, [bp - 3*4] ; cx = bits 31:16 + mov dx, [bp - 2*4] ; bx = bits 47:32 + shr edx, 16 + mov dx, [bp - 1*4] ; ax = bits 63:48 + pop edx + pop ecx + pop ebx + pop eax + leave + %endif +%endif + ret +ENDPROC ASMMultU64ByU32DivByU32 + |