1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
|
; $Id: ASMMultU64ByU32DivByU32.asm $
;; @file
; IPRT - Assembly Functions, ASMMultU64ByU32DivByU32.
;
;
; Copyright (C) 2006-2022 Oracle and/or its affiliates.
;
; This file is part of VirtualBox base platform packages, as
; available from https://www.virtualbox.org.
;
; This program is free software; you can redistribute it and/or
; modify it under the terms of the GNU General Public License
; as published by the Free Software Foundation, in version 3 of the
; License.
;
; This program is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with this program; if not, see <https://www.gnu.org/licenses>.
;
; The contents of this file may alternatively be used under the terms
; of the Common Development and Distribution License Version 1.0
; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
; in the VirtualBox distribution, in which case the provisions of the
; CDDL are applicable instead of those of the GPL.
;
; You may elect to license modified versions of this file under the
; terms and conditions of either the GPL or the CDDL or both.
;
; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
;
%include "iprt/asmdefs.mac"
;;
; Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
; using a 96 bit intermediate result.
;
; @returns (u64A * u32B) / u32C.
; @param u64A/rcx/rdi The 64-bit value.
; @param u32B/edx/esi The 32-bit value to multiple by A.
; @param u32C/r8d/edx The 32-bit value to divide A*B by.
;
; @cproto DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
;
RT_BEGINPROC ASMMultU64ByU32DivByU32
%if ARCH_BITS == 64
%ifdef ASM_CALL64_MSC
mov rax, rcx ; rax = u64A
mov r9d, edx ; should check the specs wrt to the high bits one day...
mov r8d, r8d ; be paranoid for the time being.
%else
mov rax, rdi ; rax = u64A
mov r9d, esi ; r9d = u32B
mov r8d, edx ; r8d = u32C
%endif
mul r9
div r8
%else ; 16 or 32 bit
;
; This implementation is converted from the GCC inline
; version of the code. Nothing additional has been done
; performance wise.
;
%if ARCH_BITS == 16
push bp
mov bp, sp
push eax ; push all return registers to preserve high value (paranoia)
push ebx
push ecx
push edx
%endif
push esi
push edi
%if ARCH_BITS == 16
%define u64A_Lo [bp + 4 + 04h]
%define u64A_Hi [bp + 4 + 08h]
%define u32B [bp + 4 + 0ch]
%define u32C [bp + 4 + 10h]
%else
%define u64A_Lo [esp + 04h + 08h]
%define u64A_Hi [esp + 08h + 08h]
%define u32B [esp + 0ch + 08h]
%define u32C [esp + 10h + 08h]
%endif
; Load parameters into registers.
mov eax, u64A_Lo
mov esi, u64A_Hi
mov ecx, u32B
mov edi, u32C
; The body, just like the in
mul ecx ; eax = u64Lo.lo = (u64A.lo * u32B).lo
; edx = u64Lo.hi = (u64A.lo * u32B).hi
xchg eax, esi ; esi = u64Lo.lo
; eax = u64A.hi
xchg edx, edi ; edi = u64Low.hi
; edx = u32C
xchg edx, ecx ; ecx = u32C
; edx = u32B
mul edx ; eax = u64Hi.lo = (u64A.hi * u32B).lo
; edx = u64Hi.hi = (u64A.hi * u32B).hi
add eax, edi ; u64Hi.lo += u64Lo.hi
adc edx, 0 ; u64Hi.hi += carry
div ecx ; eax = u64Hi / u32C
; edx = u64Hi % u32C
mov edi, eax ; edi = u64Result.hi = u64Hi / u32C
mov eax, esi ; eax = u64Lo.lo
div ecx ; u64Result.lo
mov edx, edi ; u64Result.hi
; epilogue
pop edi
pop esi
%if ARCH_BITS == 16
; DX:CX:BX:AX, where DX holds bits 15:0, CX bits 31:16, BX bits 47:32, and AX bits 63:48.
mov ax, [bp - 4*4] ; dx = bits 15:0
shr eax, 16
mov ax, [bp - 3*4] ; cx = bits 31:16
mov dx, [bp - 2*4] ; bx = bits 47:32
shr edx, 16
mov dx, [bp - 1*4] ; ax = bits 63:48
pop edx
pop ecx
pop ebx
pop eax
leave
%endif
%endif
ret
ENDPROC ASMMultU64ByU32DivByU32
|