summaryrefslogtreecommitdiffstats
path: root/src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm')
-rw-r--r--src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm102
1 files changed, 102 insertions, 0 deletions
diff --git a/src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm b/src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm
new file mode 100644
index 00000000..e764b389
--- /dev/null
+++ b/src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm
@@ -0,0 +1,102 @@
+; $Id: ASMMultU64ByU32DivByU32.asm $
+;; @file
+; IPRT - Assembly Functions, ASMMultU64ByU32DivByU32.
+;
+
+;
+; Copyright (C) 2006-2019 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+; The contents of this file may alternatively be used under the terms
+; of the Common Development and Distribution License Version 1.0
+; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+; VirtualBox OSE distribution, in which case the provisions of the
+; CDDL are applicable instead of those of the GPL.
+;
+; You may elect to license modified versions of this file under the
+; terms and conditions of either the GPL or the CDDL or both.
+;
+
+%include "iprt/asmdefs.mac"
+
+
+;;
+; Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
+; using a 96 bit intermediate result.
+;
+; @returns (u64A * u32B) / u32C.
+; @param u64A/rcx/rdi The 64-bit value.
+; @param u32B/edx/esi The 32-bit value to multiple by A.
+; @param u32C/r8d/edx The 32-bit value to divide A*B by.
+;
+; @cproto DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
+;
+BEGINPROC_EXPORTED ASMMultU64ByU32DivByU32
+%ifdef RT_ARCH_AMD64
+
+ %ifdef ASM_CALL64_MSC
+ mov rax, rcx ; rax = u64A
+ mov r9d, edx ; should check the specs wrt to the high bits one day...
+ mov r8d, r8d ; be paranoid for the time being.
+ %else
+ mov rax, rdi ; rax = u64A
+ mov r9d, esi ; r9d = u32B
+ mov r8d, edx ; r8d = u32C
+ %endif
+ mul r9
+ div r8
+
+%else ; X86
+ ;
+ ; This implementation is converted from the GCC inline
+ ; version of the code. Nothing additional has been done
+ ; performance wise.
+ ;
+ push esi
+ push edi
+
+%define u64A_Lo [esp + 04h + 08h]
+%define u64A_Hi [esp + 08h + 08h]
+%define u32B [esp + 0ch + 08h]
+%define u32C [esp + 10h + 08h]
+
+ ; Load parameters into registers.
+ mov eax, u64A_Lo
+ mov esi, u64A_Hi
+ mov ecx, u32B
+ mov edi, u32C
+
+ ; The body, just like the in
+ mul ecx ; eax = u64Lo.lo = (u64A.lo * u32B).lo
+ ; edx = u64Lo.hi = (u64A.lo * u32B).hi
+ xchg eax, esi ; esi = u64Lo.lo
+ ; eax = u64A.hi
+ xchg edx, edi ; edi = u64Low.hi
+ ; edx = u32C
+ xchg edx, ecx ; ecx = u32C
+ ; edx = u32B
+ mul edx ; eax = u64Hi.lo = (u64A.hi * u32B).lo
+ ; edx = u64Hi.hi = (u64A.hi * u32B).hi
+ add eax, edi ; u64Hi.lo += u64Lo.hi
+ adc edx, 0 ; u64Hi.hi += carry
+ div ecx ; eax = u64Hi / u32C
+ ; edx = u64Hi % u32C
+ mov edi, eax ; edi = u64Result.hi = u64Hi / u32C
+ mov eax, esi ; eax = u64Lo.lo
+ div ecx ; u64Result.lo
+ mov edx, edi ; u64Result.hi
+
+ ; epilogue
+ pop edi
+ pop esi
+%endif
+ ret
+ENDPROC ASMMultU64ByU32DivByU32
+