diff options
Diffstat (limited to 'src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm')
-rw-r--r-- | src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm b/src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm new file mode 100644 index 00000000..e764b389 --- /dev/null +++ b/src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm @@ -0,0 +1,102 @@ +; $Id: ASMMultU64ByU32DivByU32.asm $ +;; @file +; IPRT - Assembly Functions, ASMMultU64ByU32DivByU32. +; + +; +; Copyright (C) 2006-2019 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "iprt/asmdefs.mac" + + +;; +; Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer +; using a 96 bit intermediate result. +; +; @returns (u64A * u32B) / u32C. +; @param u64A/rcx/rdi The 64-bit value. +; @param u32B/edx/esi The 32-bit value to multiple by A. +; @param u32C/r8d/edx The 32-bit value to divide A*B by. +; +; @cproto DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C); +; +BEGINPROC_EXPORTED ASMMultU64ByU32DivByU32 +%ifdef RT_ARCH_AMD64 + + %ifdef ASM_CALL64_MSC + mov rax, rcx ; rax = u64A + mov r9d, edx ; should check the specs wrt to the high bits one day... + mov r8d, r8d ; be paranoid for the time being. + %else + mov rax, rdi ; rax = u64A + mov r9d, esi ; r9d = u32B + mov r8d, edx ; r8d = u32C + %endif + mul r9 + div r8 + +%else ; X86 + ; + ; This implementation is converted from the GCC inline + ; version of the code. Nothing additional has been done + ; performance wise. + ; + push esi + push edi + +%define u64A_Lo [esp + 04h + 08h] +%define u64A_Hi [esp + 08h + 08h] +%define u32B [esp + 0ch + 08h] +%define u32C [esp + 10h + 08h] + + ; Load parameters into registers. + mov eax, u64A_Lo + mov esi, u64A_Hi + mov ecx, u32B + mov edi, u32C + + ; The body, just like the in + mul ecx ; eax = u64Lo.lo = (u64A.lo * u32B).lo + ; edx = u64Lo.hi = (u64A.lo * u32B).hi + xchg eax, esi ; esi = u64Lo.lo + ; eax = u64A.hi + xchg edx, edi ; edi = u64Low.hi + ; edx = u32C + xchg edx, ecx ; ecx = u32C + ; edx = u32B + mul edx ; eax = u64Hi.lo = (u64A.hi * u32B).lo + ; edx = u64Hi.hi = (u64A.hi * u32B).hi + add eax, edi ; u64Hi.lo += u64Lo.hi + adc edx, 0 ; u64Hi.hi += carry + div ecx ; eax = u64Hi / u32C + ; edx = u64Hi % u32C + mov edi, eax ; edi = u64Result.hi = u64Hi / u32C + mov eax, esi ; eax = u64Lo.lo + div ecx ; u64Result.lo + mov edx, edi ; u64Result.hi + + ; epilogue + pop edi + pop esi +%endif + ret +ENDPROC ASMMultU64ByU32DivByU32 + |