# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifdef DARWIN #define s_mpv_mul_d _s_mpv_mul_d #define s_mpv_mul_d_add _s_mpv_mul_d_add #define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop #define s_mpv_sqr_add_prop _s_mpv_sqr_add_prop #define s_mpv_div_2dx1d _s_mpv_div_2dx1d #define TYPE_FUNCTION(x) #else #define TYPE_FUNCTION(x) .type x, @function #endif .text # ebp - 8: caller's esi # ebp - 4: caller's edi # ebp + 0: caller's ebp # ebp + 4: return address # ebp + 8: a argument # ebp + 12: a_len argument # ebp + 16: b argument # ebp + 20: c argument # registers: # ebx: # ecx: a_len # esi: a ptr # edi: c ptr .globl s_mpv_mul_d .private_extern s_mpv_mul_d TYPE_FUNCTION(s_mpv_mul_d) s_mpv_mul_d: push %ebp mov %esp, %ebp push %edi push %esi psubq %mm2, %mm2 # carry = 0 mov 12(%ebp), %ecx # ecx = a_len movd 16(%ebp), %mm1 # mm1 = b mov 20(%ebp), %edi cmp $0, %ecx je 2f # jmp if a_len == 0 mov 8(%ebp), %esi # esi = a cld 1: movd 0(%esi), %mm0 # mm0 = *a++ add $4, %esi pmuludq %mm1, %mm0 # mm0 = b * *a++ paddq %mm0, %mm2 # add the carry movd %mm2, 0(%edi) # store the 32bit result add $4, %edi psrlq $32, %mm2 # save the carry dec %ecx # --a_len jnz 1b # jmp if a_len != 0 2: movd %mm2, 0(%edi) # *c = carry emms pop %esi pop %edi leave ret nop # ebp - 8: caller's esi # ebp - 4: caller's edi # ebp + 0: caller's ebp # ebp + 4: return address # ebp + 8: a argument # ebp + 12: a_len argument # ebp + 16: b argument # ebp + 20: c argument # registers: # ebx: # ecx: a_len # esi: a ptr # edi: c ptr .globl s_mpv_mul_d_add .private_extern s_mpv_mul_d_add TYPE_FUNCTION(s_mpv_mul_d_add) s_mpv_mul_d_add: push %ebp mov %esp, %ebp push %edi push %esi psubq %mm2, %mm2 # carry = 0 mov 12(%ebp), %ecx # ecx = a_len movd 16(%ebp), %mm1 # mm1 = b mov 20(%ebp), %edi cmp $0, %ecx je 2f # jmp if a_len == 0 mov 8(%ebp), %esi # esi = a cld 1: movd 0(%esi), %mm0 # mm0 = *a++ add $4, %esi pmuludq %mm1, %mm0 # mm0 = b * *a++ paddq %mm0, %mm2 # add the carry movd 0(%edi), %mm0 paddq %mm0, %mm2 # add the carry movd %mm2, 0(%edi) # store the 32bit result add $4, %edi psrlq $32, %mm2 # save the carry dec %ecx # --a_len jnz 1b # jmp if a_len != 0 2: movd %mm2, 0(%edi) # *c = carry emms pop %esi pop %edi leave ret nop # ebp - 12: caller's ebx # ebp - 8: caller's esi # ebp - 4: caller's edi # ebp + 0: caller's ebp # ebp + 4: return address # ebp + 8: a argument # ebp + 12: a_len argument # ebp + 16: b argument # ebp + 20: c argument # registers: # eax: # ebx: carry # ecx: a_len # esi: a ptr # edi: c ptr .globl s_mpv_mul_d_add_prop .private_extern s_mpv_mul_d_add_prop TYPE_FUNCTION(s_mpv_mul_d_add_prop) s_mpv_mul_d_add_prop: push %ebp mov %esp, %ebp push %edi push %esi push %ebx psubq %mm2, %mm2 # carry = 0 mov 12(%ebp), %ecx # ecx = a_len movd 16(%ebp), %mm1 # mm1 = b mov 20(%ebp), %edi cmp $0, %ecx je 2f # jmp if a_len == 0 mov 8(%ebp), %esi # esi = a cld 1: movd 0(%esi), %mm0 # mm0 = *a++ movd 0(%edi), %mm3 # fetch the sum add $4, %esi pmuludq %mm1, %mm0 # mm0 = b * *a++ paddq %mm0, %mm2 # add the carry paddq %mm3, %mm2 # add *c++ movd %mm2, 0(%edi) # store the 32bit result add $4, %edi psrlq $32, %mm2 # save the carry dec %ecx # --a_len jnz 1b # jmp if a_len != 0 2: movd %mm2, %ebx cmp $0, %ebx # is carry zero? jz 4f mov 0(%edi), %eax add %ebx, %eax stosl jnc 4f 3: mov 0(%edi), %eax # add in current word from *c adc $0, %eax stosl # [es:edi] = ax; edi += 4; jc 3b 4: emms pop %ebx pop %esi pop %edi leave ret nop # ebp - 12: caller's ebx # ebp - 8: caller's esi # ebp - 4: caller's edi # ebp + 0: caller's ebp # ebp + 4: return address # ebp + 8: pa argument # ebp + 12: a_len argument # ebp + 16: ps argument # registers: # eax: # ebx: carry # ecx: a_len # esi: a ptr # edi: c ptr .globl s_mpv_sqr_add_prop .private_extern s_mpv_sqr_add_prop TYPE_FUNCTION(s_mpv_sqr_add_prop) s_mpv_sqr_add_prop: push %ebp mov %esp, %ebp push %edi push %esi push %ebx psubq %mm2, %mm2 # carry = 0 mov 12(%ebp), %ecx # ecx = a_len mov 16(%ebp), %edi cmp $0, %ecx je 2f # jmp if a_len == 0 mov 8(%ebp), %esi # esi = a cld 1: movd 0(%esi), %mm0 # mm0 = *a movd 0(%edi), %mm3 # fetch the sum add $4, %esi pmuludq %mm0, %mm0 # mm0 = sqr(a) paddq %mm0, %mm2 # add the carry paddq %mm3, %mm2 # add the low word movd 4(%edi), %mm3 movd %mm2, 0(%edi) # store the 32bit result psrlq $32, %mm2 paddq %mm3, %mm2 # add the high word movd %mm2, 4(%edi) # store the 32bit result psrlq $32, %mm2 # save the carry. add $8, %edi dec %ecx # --a_len jnz 1b # jmp if a_len != 0 2: movd %mm2, %ebx cmp $0, %ebx # is carry zero? jz 4f mov 0(%edi), %eax add %ebx, %eax stosl jnc 4f 3: mov 0(%edi), %eax # add in current word from *c adc $0, %eax stosl # [es:edi] = ax; edi += 4; jc 3b 4: emms pop %ebx pop %esi pop %edi leave ret nop # # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized # so its high bit is 1. This code is from NSPR. # # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, # mp_digit *qp, mp_digit *rp) # esp + 0: Caller's ebx # esp + 4: return address # esp + 8: Nhi argument # esp + 12: Nlo argument # esp + 16: divisor argument # esp + 20: qp argument # esp + 24: rp argument # registers: # eax: # ebx: carry # ecx: a_len # edx: # esi: a ptr # edi: c ptr # .globl s_mpv_div_2dx1d .private_extern s_mpv_div_2dx1d TYPE_FUNCTION(s_mpv_div_2dx1d) s_mpv_div_2dx1d: push %ebx mov 8(%esp), %edx mov 12(%esp), %eax mov 16(%esp), %ebx div %ebx mov 20(%esp), %ebx mov %eax, 0(%ebx) mov 24(%esp), %ebx mov %edx, 0(%ebx) xor %eax, %eax # return zero pop %ebx ret nop #ifndef DARWIN # Magic indicating no need for an executable stack .section .note.GNU-stack, "", @progbits .previous #endif