1 files changed, 141 insertions, 0 deletions
diff --git a/src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm b/src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm
new file mode 100644
index 00000000..bf2f284d
--- /dev/null
+++ b/src/VBox/Runtime/common/asm/ASMMultU64ByU32DivByU32.asm
@@ -0,0 +1,141 @@
+; $Id: ASMMultU64ByU32DivByU32.asm $
+;; @file
+; IPRT - Assembly Functions, ASMMultU64ByU32DivByU32.
+;
+
+;
+; Copyright (C) 2006-2023 Oracle and/or its affiliates.
+;
+; This file is part of VirtualBox base platform packages, as
+; available from https://www.virtualbox.org.
+;
+; This program is free software; you can redistribute it and/or
+; modify it under the terms of the GNU General Public License
+; as published by the Free Software Foundation, in version 3 of the
+; License.
+;
+; This program is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with this program; if not, see <https://www.gnu.org/licenses>.
+;
+; The contents of this file may alternatively be used under the terms
+; of the Common Development and Distribution License Version 1.0
+; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+; in the VirtualBox distribution, in which case the provisions of the
+; CDDL are applicable instead of those of the GPL.
+;
+; You may elect to license modified versions of this file under the
+; terms and conditions of either the GPL or the CDDL or both.
+;
+; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+;
+
+%include "iprt/asmdefs.mac"
+
+
+;;
+; Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
+; using a 96 bit intermediate result.
+;
+; @returns (u64A * u32B) / u32C.
+; @param   u64A/rcx/rdi     The 64-bit value.
+; @param   u32B/edx/esi     The 32-bit value to multiple by A.
+; @param   u32C/r8d/edx     The 32-bit value to divide A*B by.
+;
+; @cproto  DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
+;
+RT_BEGINPROC ASMMultU64ByU32DivByU32
+%if ARCH_BITS == 64
+
+ %ifdef ASM_CALL64_MSC
+    mov     rax, rcx                    ; rax = u64A
+    mov     r9d, edx                    ; should check the specs wrt to the high bits one day...
+    mov     r8d, r8d                    ; be paranoid for the time being.
+ %else
+    mov     rax, rdi                    ; rax = u64A
+    mov     r9d, esi                    ; r9d = u32B
+    mov     r8d, edx                    ; r8d = u32C
+ %endif
+    mul     r9
+    div     r8
+
+%else ; 16 or 32 bit
+    ;
+    ; This implementation is converted from the GCC inline
+    ; version of the code. Nothing additional has been done
+    ; performance wise.
+    ;
+ %if ARCH_BITS == 16
+    push    bp
+    mov     bp, sp
+    push    eax                         ; push all return registers to preserve high value (paranoia)
+    push    ebx
+    push    ecx
+    push    edx
+ %endif
+    push    esi
+    push    edi
+
+ %if ARCH_BITS == 16
+  %define u64A_Lo     [bp + 4 + 04h]
+  %define u64A_Hi     [bp + 4 + 08h]
+  %define u32B        [bp + 4 + 0ch]
+  %define u32C        [bp + 4 + 10h]
+ %else
+  %define u64A_Lo     [esp + 04h + 08h]
+  %define u64A_Hi     [esp + 08h + 08h]
+  %define u32B        [esp + 0ch + 08h]
+  %define u32C        [esp + 10h + 08h]
+ %endif
+
+    ; Load parameters into registers.
+    mov     eax, u64A_Lo
+    mov     esi, u64A_Hi
+    mov     ecx, u32B
+    mov     edi, u32C
+
+    ; The body, just like the in
+    mul     ecx                         ; eax = u64Lo.lo = (u64A.lo * u32B).lo
+                                        ; edx = u64Lo.hi = (u64A.lo * u32B).hi
+    xchg    eax, esi                    ; esi = u64Lo.lo
+                                        ; eax = u64A.hi
+    xchg    edx, edi                    ; edi = u64Low.hi
+                                        ; edx = u32C
+    xchg    edx, ecx                    ; ecx = u32C
+                                        ; edx = u32B
+    mul     edx                         ; eax = u64Hi.lo = (u64A.hi * u32B).lo
+                                        ; edx = u64Hi.hi = (u64A.hi * u32B).hi
+    add     eax, edi                    ; u64Hi.lo += u64Lo.hi
+    adc     edx, 0                      ; u64Hi.hi += carry
+    div     ecx                         ; eax = u64Hi / u32C
+                                        ; edx = u64Hi % u32C
+    mov     edi, eax                    ; edi = u64Result.hi = u64Hi / u32C
+    mov     eax, esi                    ; eax = u64Lo.lo
+    div     ecx                         ; u64Result.lo
+    mov     edx, edi                    ; u64Result.hi
+
+    ; epilogue
+    pop     edi
+    pop     esi
+ %if ARCH_BITS == 16
+    ;  DX:CX:BX:AX, where DX holds bits 15:0, CX bits 31:16, BX bits 47:32, and AX bits 63:48.
+    mov     ax, [bp - 4*4]              ; dx = bits 15:0
+    shr     eax, 16
+    mov     ax, [bp - 3*4]              ; cx = bits 31:16
+    mov     dx, [bp - 2*4]              ; bx = bits 47:32
+    shr     edx, 16
+    mov     dx, [bp - 1*4]              ; ax = bits 63:48
+    pop     edx
+    pop     ecx
+    pop     ebx
+    pop     eax
+    leave
+ %endif
+%endif
+    ret
+ENDPROC ASMMultU64ByU32DivByU32
+