From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- security/nss/lib/freebl/mpi/montmulf.s | 1938 ++++++++++++++++++++++++++++++++ 1 file changed, 1938 insertions(+) create mode 100644 security/nss/lib/freebl/mpi/montmulf.s (limited to 'security/nss/lib/freebl/mpi/montmulf.s') diff --git a/security/nss/lib/freebl/mpi/montmulf.s b/security/nss/lib/freebl/mpi/montmulf.s new file mode 100644 index 0000000000..69d2a3c51b --- /dev/null +++ b/security/nss/lib/freebl/mpi/montmulf.s @@ -0,0 +1,1938 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .section ".text",#alloc,#execinstr + .file "montmulf.c" + + .section ".data",#alloc,#write + .align 8 +TwoTo16: /* frequency 1.0 confidence 0.0 */ + .word 1089470464 + .word 0 + .type TwoTo16,#object + .size TwoTo16,8 +TwoToMinus16: /* frequency 1.0 confidence 0.0 */ + .word 1055916032 + .word 0 + .type TwoToMinus16,#object + .size TwoToMinus16,8 +Zero: /* frequency 1.0 confidence 0.0 */ + .word 0 + .word 0 + .type Zero,#object + .size Zero,8 +TwoTo32: /* frequency 1.0 confidence 0.0 */ + .word 1106247680 + .word 0 + .type TwoTo32,#object + .size TwoTo32,8 +TwoToMinus32: /* frequency 1.0 confidence 0.0 */ + .word 1039138816 + .word 0 + .type TwoToMinus32,#object + .size TwoToMinus32,8 + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 4 +! +! SUBROUTINE cleanup +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global cleanup + cleanup: /* frequency 1.0 confidence 0.0 */ +! FILE montmulf.c + +! 1 !#define RF_INLINE_MACROS +! 3 !static double TwoTo16=65536.0; +! 4 !static double TwoToMinus16=1.0/65536.0; +! 5 !static double Zero=0.0; +! 6 !static double TwoTo32=65536.0*65536.0; +! 7 !static double TwoToMinus32=1.0/(65536.0*65536.0); +! 9 !#ifdef RF_INLINE_MACROS +! 11 !double upper32(double); +! 12 !double lower32(double, double); +! 13 !double mod(double, double, double); +! 15 !#else +! 17 !static double upper32(double x) +! 18 !{ +! 19 ! return floor(x*TwoToMinus32); +! 20 !} +! 22 !static double lower32(double x, double y) +! 23 !{ +! 24 ! return x-TwoTo32*floor(x*TwoToMinus32); +! 25 !} +! 27 !static double mod(double x, double oneoverm, double m) +! 28 !{ +! 29 ! return x-m*floor(x*oneoverm); +! 30 !} +! 32 !#endif +! 35 !void cleanup(double *dt, int from, int tlen) +! 36 !{ +! 37 ! int i; +! 38 ! double tmp,tmp1,x,x1; +! 40 ! tmp=tmp1=Zero; + +/* 000000 40 ( 0 1) */ sethi %hi(Zero),%g2 + +! 41 ! /* original code ** +! 42 ! for(i=2*from;i<2*tlen-2;i++) +! 43 ! { +! 44 ! x=dt[i]; +! 45 ! dt[i]=lower32(x,Zero)+tmp1; +! 46 ! tmp1=tmp; +! 47 ! tmp=upper32(x); +! 48 ! } +! 49 ! dt[tlen-2]+=tmp1; +! 50 ! dt[tlen-1]+=tmp; +! 51 ! **end original code ***/ +! 52 ! /* new code ***/ +! 53 ! for(i=2*from;i<2*tlen;i+=2) + +/* 0x0004 53 ( 1 2) */ sll %o2,1,%g3 +/* 0x0008 40 ( 1 4) */ ldd [%g2+%lo(Zero)],%f0 +/* 0x000c ( 1 2) */ add %g2,%lo(Zero),%g2 +/* 0x0010 53 ( 2 3) */ sll %o1,1,%g4 +/* 0x0014 36 ( 3 4) */ sll %o1,4,%g1 +/* 0x0018 40 ( 3 4) */ fmovd %f0,%f4 +/* 0x001c 53 ( 3 4) */ cmp %g4,%g3 +/* 0x0020 ( 3 4) */ bge,pt %icc,.L77000116 ! tprob=0.56 +/* 0x0024 ( 4 5) */ fmovd %f0,%f2 +/* 0x0028 36 ( 4 5) */ add %o0,%g1,%g1 +/* 0x002c ( 4 5) */ sub %g3,1,%g3 + +! 54 ! { +! 55 ! x=dt[i]; + +/* 0x0030 55 ( 5 8) */ ldd [%g1],%f8 + .L900000114: /* frequency 6.4 confidence 0.0 */ +/* 0x0034 ( 0 3) */ fdtox %f8,%f6 + +! 56 ! x1=dt[i+1]; + +/* 0x0038 56 ( 0 3) */ ldd [%g1+8],%f10 + +! 57 ! dt[i]=lower32(x,Zero)+tmp; +! 58 ! dt[i+1]=lower32(x1,Zero)+tmp1; +! 59 ! tmp=upper32(x); +! 60 ! tmp1=upper32(x1); + +/* 0x003c 60 ( 0 1) */ add %g4,2,%g4 +/* 0x0040 ( 1 4) */ fdtox %f8,%f8 +/* 0x0044 ( 1 2) */ cmp %g4,%g3 +/* 0x0048 ( 5 6) */ fmovs %f0,%f6 +/* 0x004c ( 7 10) */ fxtod %f6,%f6 +/* 0x0050 ( 8 11) */ fdtox %f10,%f0 +/* 0x0054 57 (10 13) */ faddd %f6,%f2,%f2 +/* 0x0058 (10 11) */ std %f2,[%g1] +/* 0x005c (12 15) */ ldd [%g2],%f2 +/* 0x0060 (14 15) */ fmovs %f2,%f0 +/* 0x0064 (16 19) */ fxtod %f0,%f6 +/* 0x0068 (17 20) */ fdtox %f10,%f0 +/* 0x006c (18 21) */ fitod %f8,%f2 +/* 0x0070 58 (19 22) */ faddd %f6,%f4,%f4 +/* 0x0074 (19 20) */ std %f4,[%g1+8] +/* 0x0078 60 (19 20) */ add %g1,16,%g1 +/* 0x007c (20 23) */ fitod %f0,%f4 +/* 0x0080 (20 23) */ ldd [%g2],%f0 +/* 0x0084 (20 21) */ ble,a,pt %icc,.L900000114 ! tprob=0.86 +/* 0x0088 (21 24) */ ldd [%g1],%f8 + .L77000116: /* frequency 1.0 confidence 0.0 */ +/* 0x008c ( 0 2) */ retl ! Result = +/* 0x0090 ( 1 2) */ nop +/* 0x0094 0 ( 0 0) */ .type cleanup,2 +/* 0x0094 ( 0 0) */ .size cleanup,(.-cleanup) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 4 +! +! SUBROUTINE conv_d16_to_i32 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global conv_d16_to_i32 + conv_d16_to_i32: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ save %sp,-136,%sp + +! 61 ! } +! 62 ! /** end new code **/ +! 63 !} +! 66 !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen) +! 67 !{ +! 68 !int i; +! 69 !long long t, t1, a, b, c, d; +! 71 ! t1=0; +! 72 ! a=(long long)d16[0]; + +/* 0x0004 72 ( 1 4) */ ldd [%i1],%f0 + +! 73 ! b=(long long)d16[1]; +! 74 ! for(i=0; i>32); +! 79 ! d=(long long)d16[2*i+3]; +! 80 ! t1+=(b&0xffff)<<16; + +/* 0x0070 80 (15 16) */ and %g1,%o1,%o0 + +! 81 ! t+=(b>>16)+(t1>>32); +! 82 ! i32[i]=t1&0xffffffff; +! 83 ! t1=t; +! 84 ! a=c; +! 85 ! b=d; + +/* 0x0074 85 (15 16) */ add %g2,16,%g2 +/* 0x0078 80 (16 17) */ sllx %o0,16,%g3 +/* 0x007c 77 (16 17) */ and %g4,%o3,%o0 +/* 0x0080 76 (17 20) */ fdtox %f0,%f0 +/* 0x0084 (17 18) */ std %f0,[%sp+104] +/* 0x0088 74 (17 18) */ add %o0,%g3,%o4 +/* 0x008c 79 (18 21) */ ldd [%g2+8],%f2 +/* 0x0090 81 (18 19) */ srax %g1,16,%o0 +/* 0x0094 82 (18 19) */ and %o4,%o3,%o7 +/* 0x0098 81 (19 20) */ stx %o0,[%sp+112] +/* 0x009c (19 20) */ srax %o4,32,%o0 +/* 0x00a0 85 (19 20) */ add %g5,4,%o5 +/* 0x00a4 81 (20 21) */ stx %o0,[%sp+120] +/* 0x00a8 78 (20 21) */ srax %g4,32,%o4 +/* 0x00ac 79 (20 23) */ fdtox %f2,%f0 +/* 0x00b0 (21 22) */ std %f0,[%sp+96] +/* 0x00b4 81 (22 24) */ ldx [%sp+112],%o0 +/* 0x00b8 (23 25) */ ldx [%sp+120],%g4 +/* 0x00bc 76 (25 27) */ ldx [%sp+104],%g3 +/* 0x00c0 81 (25 26) */ add %o0,%g4,%g4 +/* 0x00c4 79 (26 28) */ ldx [%sp+96],%g1 +/* 0x00c8 81 (26 27) */ add %o4,%g4,%o4 +/* 0x00cc 82 (27 28) */ st %o7,[%g5] +/* 0x00d0 (27 28) */ or %g0,1,%o7 +/* 0x00d4 84 (27 28) */ or %g0,%g3,%g4 + .L900000209: /* frequency 64.0 confidence 0.0 */ +/* 0x00d8 76 (17 19) */ ldd [%g2+16],%f0 +/* 0x00dc 85 (17 18) */ add %o7,1,%o7 +/* 0x00e0 (17 18) */ add %o5,4,%o5 +/* 0x00e4 (18 18) */ cmp %o7,%o2 +/* 0x00e8 (18 19) */ add %g2,16,%g2 +/* 0x00ec 76 (19 22) */ fdtox %f0,%f0 +/* 0x00f0 (20 21) */ std %f0,[%sp+104] +/* 0x00f4 79 (21 23) */ ldd [%g2+8],%f0 +/* 0x00f8 (23 26) */ fdtox %f0,%f0 +/* 0x00fc (24 25) */ std %f0,[%sp+96] +/* 0x0100 80 (25 26) */ and %g1,%o1,%g3 +/* 0x0104 (26 27) */ sllx %g3,16,%g3 +/* 0x0108 ( 0 0) */ stx %g3,[%sp+120] +/* 0x010c 77 (26 27) */ and %g4,%o3,%g3 +/* 0x0110 74 ( 0 0) */ stx %o7,[%sp+128] +/* 0x0114 ( 0 0) */ ldx [%sp+120],%o7 +/* 0x0118 (27 27) */ add %g3,%o7,%g3 +/* 0x011c ( 0 0) */ ldx [%sp+128],%o7 +/* 0x0120 81 (28 29) */ srax %g1,16,%g1 +/* 0x0124 74 (28 28) */ add %g3,%o4,%g3 +/* 0x0128 81 (29 30) */ srax %g3,32,%o4 +/* 0x012c ( 0 0) */ stx %o4,[%sp+112] +/* 0x0130 78 (30 31) */ srax %g4,32,%o4 +/* 0x0134 81 ( 0 0) */ ldx [%sp+112],%g4 +/* 0x0138 (30 31) */ add %g1,%g4,%g4 +/* 0x013c 79 (31 33) */ ldx [%sp+96],%g1 +/* 0x0140 81 (31 32) */ add %o4,%g4,%o4 +/* 0x0144 82 (32 33) */ and %g3,%o3,%g3 +/* 0x0148 84 ( 0 0) */ ldx [%sp+104],%g4 +/* 0x014c 85 (33 34) */ ble,pt %icc,.L900000209 ! tprob=0.50 +/* 0x0150 (33 34) */ st %g3,[%o5-4] + .L900000212: /* frequency 8.0 confidence 0.0 */ +/* 0x0154 85 ( 0 1) */ ba .L900000214 ! tprob=1.00 +/* 0x0158 ( 0 1) */ sethi %hi(0xfc00),%g2 + .L77000134: /* frequency 0.7 confidence 0.0 */ + .L900000213: /* frequency 6.4 confidence 0.0 */ +/* 0x015c 77 ( 0 1) */ and %g4,%o3,%o0 +/* 0x0160 80 ( 0 1) */ and %g1,%o1,%g3 +/* 0x0164 76 ( 0 3) */ fdtox %f0,%f0 +/* 0x0168 77 ( 1 2) */ add %o4,%o0,%o0 +/* 0x016c 76 ( 1 2) */ std %f0,[%sp+104] +/* 0x0170 85 ( 1 2) */ add %o7,1,%o7 +/* 0x0174 80 ( 2 3) */ sllx %g3,16,%o4 +/* 0x0178 79 ( 2 5) */ ldd [%g2+24],%f2 +/* 0x017c 85 ( 2 3) */ add %g2,16,%g2 +/* 0x0180 80 ( 3 4) */ add %o0,%o4,%o4 +/* 0x0184 81 ( 3 4) */ stx %o7,[%sp+128] +/* 0x0188 ( 4 5) */ srax %g1,16,%o0 +/* 0x018c ( 4 5) */ stx %o0,[%sp+112] +/* 0x0190 82 ( 4 5) */ and %o4,%o3,%g3 +/* 0x0194 81 ( 5 6) */ srax %o4,32,%o0 +/* 0x0198 ( 5 6) */ stx %o0,[%sp+120] +/* 0x019c 79 ( 5 8) */ fdtox %f2,%f0 +/* 0x01a0 ( 6 7) */ std %f0,[%sp+96] +/* 0x01a4 78 ( 6 7) */ srax %g4,32,%o4 +/* 0x01a8 81 ( 7 9) */ ldx [%sp+120],%o7 +/* 0x01ac ( 8 10) */ ldx [%sp+112],%g4 +/* 0x01b0 76 (10 12) */ ldx [%sp+104],%g1 +/* 0x01b4 81 (10 11) */ add %g4,%o7,%g4 +/* 0x01b8 (11 13) */ ldx [%sp+128],%o7 +/* 0x01bc (11 12) */ add %o4,%g4,%o4 +/* 0x01c0 79 (12 14) */ ldx [%sp+96],%o0 +/* 0x01c4 84 (12 13) */ or %g0,%g1,%g4 +/* 0x01c8 82 (13 14) */ st %g3,[%o5] +/* 0x01cc 85 (13 14) */ add %o5,4,%o5 +/* 0x01d0 (13 14) */ cmp %o7,%o2 +/* 0x01d4 (14 15) */ or %g0,%o0,%g1 +/* 0x01d8 (14 15) */ ble,a,pt %icc,.L900000213 ! tprob=0.86 +/* 0x01dc (14 17) */ ldd [%g2+16],%f0 + .L77000127: /* frequency 1.0 confidence 0.0 */ + +! 86 ! } +! 87 ! t1+=a&0xffffffff; +! 88 ! t=(a>>32); +! 89 ! t1+=(b&0xffff)<<16; +! 90 ! i32[i]=t1&0xffffffff; + +/* 0x01e0 90 ( 0 1) */ sethi %hi(0xfc00),%g2 + .L900000214: /* frequency 1.0 confidence 0.0 */ +/* 0x01e4 90 ( 0 1) */ or %g0,-1,%g3 +/* 0x01e8 ( 0 1) */ add %g2,1023,%g2 +/* 0x01ec ( 1 2) */ srl %g3,0,%g3 +/* 0x01f0 ( 1 2) */ and %g1,%g2,%g2 +/* 0x01f4 ( 2 3) */ and %g4,%g3,%g4 +/* 0x01f8 ( 3 4) */ sllx %g2,16,%g2 +/* 0x01fc ( 3 4) */ add %o4,%g4,%g4 +/* 0x0200 ( 4 5) */ add %g4,%g2,%g2 +/* 0x0204 ( 5 6) */ sll %o7,2,%g4 +/* 0x0208 ( 5 6) */ and %g2,%g3,%g2 +/* 0x020c ( 6 7) */ st %g2,[%g5+%g4] +/* 0x0210 ( 7 9) */ ret ! Result = +/* 0x0214 ( 9 10) */ restore %g0,%g0,%g0 +/* 0x0218 0 ( 0 0) */ .type conv_d16_to_i32,2 +/* 0x0218 ( 0 0) */ .size conv_d16_to_i32,(.-conv_d16_to_i32) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000301: /* frequency 1.0 confidence 0.0 */ +/* 000000 0 ( 0 0) */ .word 1127219200,0 +/* 0x0008 0 ( 0 0) */ .align 4 +! +! SUBROUTINE conv_i32_to_d32 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global conv_i32_to_d32 + conv_i32_to_d32: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ orcc %g0,%o2,%g1 + +! 92 !} +! 94 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len) +! 95 !{ +! 96 !int i; +! 98 !#pragma pipeloop(0) +! 99 ! for(i=0;i>16); + +/* 0x0018 113 ( 3 4) */ sethi %hi(.L_const_seg_900000401),%o0 +/* 0x001c ( 3 4) */ add %o5,1,%g3 +/* 0x0020 ( 4 5) */ add %g2,1023,%o4 +/* 0x0024 109 ( 4 5) */ or %g0,0,%g1 +/* 0x0028 ( 5 6) */ cmp %g3,3 +/* 0x002c ( 5 6) */ or %g0,%i1,%o7 +/* 0x0030 ( 6 7) */ add %o0,%lo(.L_const_seg_900000401),%o3 +/* 0x0034 ( 6 7) */ or %g0,%i0,%g2 +/* 0x0038 ( 6 7) */ bl,pn %icc,.L77000154 ! tprob=0.44 +/* 0x003c ( 7 8) */ add %o7,4,%o0 +/* 0x0040 112 ( 7 10) */ ldd [%o3],%f0 +/* 0x0044 113 ( 7 8) */ or %g0,1,%g1 +/* 0x0048 111 ( 8 11) */ ld [%o0-4],%o1 +/* 0x004c 0 ( 8 9) */ or %g0,%o0,%o7 +/* 0x0050 112 (10 11) */ and %o1,%o4,%o0 + .L900000406: /* frequency 64.0 confidence 0.0 */ +/* 0x0054 112 (22 23) */ st %o0,[%sp+96] +/* 0x0058 113 (22 23) */ add %g1,1,%g1 +/* 0x005c (22 23) */ add %g2,16,%g2 +/* 0x0060 (23 23) */ cmp %g1,%o5 +/* 0x0064 (23 24) */ add %o7,4,%o7 +/* 0x0068 112 (29 31) */ ld [%sp+96],%f3 +/* 0x006c ( 0 0) */ fmovs %f0,%f2 +/* 0x0070 (31 34) */ fsubd %f2,%f0,%f2 +/* 0x0074 113 (32 33) */ srl %o1,16,%o0 +/* 0x0078 112 (32 33) */ std %f2,[%g2-16] +/* 0x007c 113 (33 34) */ st %o0,[%sp+92] +/* 0x0080 (40 42) */ ld [%sp+92],%f3 +/* 0x0084 111 (41 43) */ ld [%o7-4],%o1 +/* 0x0088 113 ( 0 0) */ fmovs %f0,%f2 +/* 0x008c (42 45) */ fsubd %f2,%f0,%f2 +/* 0x0090 112 (43 44) */ and %o1,%o4,%o0 +/* 0x0094 113 (43 44) */ ble,pt %icc,.L900000406 ! tprob=0.50 +/* 0x0098 (43 44) */ std %f2,[%g2-8] + .L900000409: /* frequency 8.0 confidence 0.0 */ +/* 0x009c 112 ( 0 1) */ st %o0,[%sp+96] +/* 0x00a0 ( 0 1) */ fmovs %f0,%f2 +/* 0x00a4 113 ( 0 1) */ add %g2,16,%g2 +/* 0x00a8 ( 1 2) */ srl %o1,16,%o0 +/* 0x00ac 112 ( 4 7) */ ld [%sp+96],%f3 +/* 0x00b0 ( 6 9) */ fsubd %f2,%f0,%f2 +/* 0x00b4 ( 6 7) */ std %f2,[%g2-16] +/* 0x00b8 113 ( 7 8) */ st %o0,[%sp+92] +/* 0x00bc (10 11) */ fmovs %f0,%f2 +/* 0x00c0 (11 14) */ ld [%sp+92],%f3 +/* 0x00c4 (13 16) */ fsubd %f2,%f0,%f0 +/* 0x00c8 (13 14) */ std %f0,[%g2-8] +/* 0x00cc (14 16) */ ret ! Result = +/* 0x00d0 (16 17) */ restore %g0,%g0,%g0 + .L77000154: /* frequency 0.7 confidence 0.0 */ +/* 0x00d4 111 ( 0 3) */ ld [%o7],%o0 + .L900000410: /* frequency 6.4 confidence 0.0 */ +/* 0x00d8 112 ( 0 1) */ and %o0,%o4,%o1 +/* 0x00dc ( 0 1) */ st %o1,[%sp+96] +/* 0x00e0 113 ( 0 1) */ add %g1,1,%g1 +/* 0x00e4 112 ( 1 4) */ ldd [%o3],%f0 +/* 0x00e8 113 ( 1 2) */ srl %o0,16,%o0 +/* 0x00ec ( 1 2) */ add %o7,4,%o7 +/* 0x00f0 ( 2 3) */ cmp %g1,%o5 +/* 0x00f4 112 ( 3 4) */ fmovs %f0,%f2 +/* 0x00f8 ( 4 7) */ ld [%sp+96],%f3 +/* 0x00fc ( 6 9) */ fsubd %f2,%f0,%f2 +/* 0x0100 ( 6 7) */ std %f2,[%g2] +/* 0x0104 113 ( 7 8) */ st %o0,[%sp+92] +/* 0x0108 (10 11) */ fmovs %f0,%f2 +/* 0x010c (11 14) */ ld [%sp+92],%f3 +/* 0x0110 (13 16) */ fsubd %f2,%f0,%f0 +/* 0x0114 (13 14) */ std %f0,[%g2+8] +/* 0x0118 (13 14) */ add %g2,16,%g2 +/* 0x011c (13 14) */ ble,a,pt %icc,.L900000410 ! tprob=0.86 +/* 0x0120 (14 17) */ ld [%o7],%o0 + .L77000150: /* frequency 1.0 confidence 0.0 */ +/* 0x0124 ( 0 2) */ ret ! Result = +/* 0x0128 ( 2 3) */ restore %g0,%g0,%g0 +/* 0x012c 0 ( 0 0) */ .type conv_i32_to_d16,2 +/* 0x012c ( 0 0) */ .size conv_i32_to_d16,(.-conv_i32_to_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000501: /* frequency 1.0 confidence 0.0 */ +/* 000000 0 ( 0 0) */ .word 1127219200,0 +/* 0x0008 0 ( 0 0) */ .align 4 +! +! SUBROUTINE conv_i32_to_d32_and_d16 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global conv_i32_to_d32_and_d16 + conv_i32_to_d32_and_d16: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ save %sp,-104,%sp +/* 0x0004 ( 1 2) */ or %g0,%i3,%i4 +/* 0x0008 ( 1 2) */ or %g0,%i2,%g1 + +! 114 ! } +! 115 !} +! 118 !void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/, +! 119 ! double * /* 0 */, +! 120 ! double * /*result16*/, double * /* result32 */, +! 121 ! float * /*source - should be unsigned int* +! 122 ! converted to float* */); +! 126 !void conv_i32_to_d32_and_d16(double *d32, double *d16, +! 127 ! unsigned int *i32, int len) +! 128 !{ +! 129 !int i; +! 130 !unsigned int a; +! 132 !#pragma pipeloop(0) +! 133 ! for(i=0;i>16); + +/* 0x0128 143 ( 0 1) */ sethi %hi(.L_const_seg_900000501),%o1 +/* 0x012c 138 ( 1 2) */ sethi %hi(0xfc00),%o0 +/* 0x0130 141 ( 1 4) */ ldd [%o1+%lo(.L_const_seg_900000501)],%f0 +/* 0x0134 138 ( 1 2) */ sub %i4,%o7,%g3 +/* 0x0138 ( 2 3) */ sll %o7,2,%g2 +/* 0x013c ( 2 3) */ add %o0,1023,%o3 +/* 0x0140 ( 3 4) */ sll %o7,3,%g4 +/* 0x0144 ( 3 4) */ cmp %g3,3 +/* 0x0148 ( 4 5) */ add %g1,%g2,%o0 +/* 0x014c ( 4 5) */ add %o1,%lo(.L_const_seg_900000501),%o2 +/* 0x0150 ( 5 6) */ add %i3,%g4,%o4 +/* 0x0154 ( 5 6) */ sub %i4,1,%o1 +/* 0x0158 ( 6 7) */ sll %o7,4,%g5 +/* 0x015c ( 6 7) */ bl,pn %icc,.L77000161 ! tprob=0.44 +/* 0x0160 ( 7 8) */ add %i1,%g5,%o5 +/* 0x0164 141 ( 7 10) */ ld [%g1+%g2],%f3 +/* 0x0168 143 ( 7 8) */ add %o4,8,%o4 +/* 0x016c 140 ( 8 11) */ ld [%g1+%g2],%g1 +/* 0x0170 143 ( 8 9) */ add %o5,16,%o5 +/* 0x0174 ( 8 9) */ add %o7,1,%o7 +/* 0x0178 141 ( 9 10) */ fmovs %f0,%f2 +/* 0x017c 143 ( 9 10) */ add %o0,4,%o0 +/* 0x0180 142 (10 11) */ and %g1,%o3,%g2 +/* 0x0184 141 (11 14) */ fsubd %f2,%f0,%f2 +/* 0x0188 (11 12) */ std %f2,[%o4-8] +/* 0x018c 143 (11 12) */ srl %g1,16,%g1 +/* 0x0190 142 (12 13) */ st %g2,[%sp+96] +/* 0x0194 (15 16) */ fmovs %f0,%f2 +/* 0x0198 (16 19) */ ld [%sp+96],%f3 +/* 0x019c (18 21) */ fsubd %f2,%f0,%f2 +/* 0x01a0 (18 19) */ std %f2,[%o5-16] +/* 0x01a4 143 (19 20) */ st %g1,[%sp+92] +/* 0x01a8 (22 23) */ fmovs %f0,%f2 +/* 0x01ac (23 26) */ ld [%sp+92],%f3 +/* 0x01b0 (25 28) */ fsubd %f2,%f0,%f2 +/* 0x01b4 (25 26) */ std %f2,[%o5-8] + .L900000509: /* frequency 64.0 confidence 0.0 */ +/* 0x01b8 141 (26 28) */ ld [%o0],%f3 +/* 0x01bc 143 (26 27) */ add %o7,2,%o7 +/* 0x01c0 (26 27) */ add %o5,32,%o5 +/* 0x01c4 140 (27 29) */ ld [%o0],%g1 +/* 0x01c8 143 (27 27) */ cmp %o7,%o1 +/* 0x01cc (27 28) */ add %o4,16,%o4 +/* 0x01d0 141 ( 0 0) */ fmovs %f0,%f2 +/* 0x01d4 (28 31) */ fsubd %f2,%f0,%f2 +/* 0x01d8 (29 30) */ std %f2,[%o4-16] +/* 0x01dc 142 (29 30) */ and %g1,%o3,%g2 +/* 0x01e0 (30 31) */ st %g2,[%sp+96] +/* 0x01e4 (37 39) */ ld [%sp+96],%f3 +/* 0x01e8 ( 0 0) */ fmovs %f0,%f2 +/* 0x01ec (39 42) */ fsubd %f2,%f0,%f2 +/* 0x01f0 143 (40 41) */ srl %g1,16,%g1 +/* 0x01f4 142 (40 41) */ std %f2,[%o5-32] +/* 0x01f8 143 (41 42) */ st %g1,[%sp+92] +/* 0x01fc (48 50) */ ld [%sp+92],%f3 +/* 0x0200 ( 0 0) */ fmovs %f0,%f2 +/* 0x0204 (50 53) */ fsubd %f2,%f0,%f2 +/* 0x0208 (51 52) */ std %f2,[%o5-24] +/* 0x020c (51 52) */ add %o0,4,%o0 +/* 0x0210 141 (52 54) */ ld [%o0],%f3 +/* 0x0214 140 (53 55) */ ld [%o0],%g1 +/* 0x0218 141 ( 0 0) */ fmovs %f0,%f2 +/* 0x021c (54 57) */ fsubd %f2,%f0,%f2 +/* 0x0220 (55 56) */ std %f2,[%o4-8] +/* 0x0224 142 (55 56) */ and %g1,%o3,%g2 +/* 0x0228 (56 57) */ st %g2,[%sp+96] +/* 0x022c (63 65) */ ld [%sp+96],%f3 +/* 0x0230 ( 0 0) */ fmovs %f0,%f2 +/* 0x0234 (65 68) */ fsubd %f2,%f0,%f2 +/* 0x0238 143 (66 67) */ srl %g1,16,%g1 +/* 0x023c 142 (66 67) */ std %f2,[%o5-16] +/* 0x0240 143 (67 68) */ st %g1,[%sp+92] +/* 0x0244 (74 76) */ ld [%sp+92],%f3 +/* 0x0248 ( 0 0) */ fmovs %f0,%f2 +/* 0x024c (76 79) */ fsubd %f2,%f0,%f2 +/* 0x0250 (77 78) */ std %f2,[%o5-8] +/* 0x0254 (77 78) */ bl,pt %icc,.L900000509 ! tprob=0.50 +/* 0x0258 (77 78) */ add %o0,4,%o0 + .L900000512: /* frequency 8.0 confidence 0.0 */ +/* 0x025c 143 ( 0 1) */ cmp %o7,%i4 +/* 0x0260 ( 0 1) */ bge,pn %icc,.L77000164 ! tprob=0.14 +/* 0x0264 ( 0 1) */ nop + .L77000161: /* frequency 0.7 confidence 0.0 */ +/* 0x0268 141 ( 0 3) */ ld [%o0],%f3 + .L900000513: /* frequency 6.4 confidence 0.0 */ +/* 0x026c 141 ( 0 3) */ ldd [%o2],%f0 +/* 0x0270 143 ( 0 1) */ add %o7,1,%o7 +/* 0x0274 140 ( 1 4) */ ld [%o0],%o1 +/* 0x0278 143 ( 1 2) */ add %o0,4,%o0 +/* 0x027c ( 1 2) */ cmp %o7,%i4 +/* 0x0280 141 ( 2 3) */ fmovs %f0,%f2 +/* 0x0284 142 ( 3 4) */ and %o1,%o3,%g1 +/* 0x0288 141 ( 4 7) */ fsubd %f2,%f0,%f2 +/* 0x028c ( 4 5) */ std %f2,[%o4] +/* 0x0290 143 ( 4 5) */ srl %o1,16,%o1 +/* 0x0294 142 ( 5 6) */ st %g1,[%sp+96] +/* 0x0298 143 ( 5 6) */ add %o4,8,%o4 +/* 0x029c 142 ( 8 9) */ fmovs %f0,%f2 +/* 0x02a0 ( 9 12) */ ld [%sp+96],%f3 +/* 0x02a4 (11 14) */ fsubd %f2,%f0,%f2 +/* 0x02a8 (11 12) */ std %f2,[%o5] +/* 0x02ac 143 (12 13) */ st %o1,[%sp+92] +/* 0x02b0 (15 16) */ fmovs %f0,%f2 +/* 0x02b4 (16 19) */ ld [%sp+92],%f3 +/* 0x02b8 (18 21) */ fsubd %f2,%f0,%f0 +/* 0x02bc (18 19) */ std %f0,[%o5+8] +/* 0x02c0 (18 19) */ add %o5,16,%o5 +/* 0x02c4 (18 19) */ bl,a,pt %icc,.L900000513 ! tprob=0.86 +/* 0x02c8 (19 22) */ ld [%o0],%f3 + .L77000164: /* frequency 1.0 confidence 0.0 */ +/* 0x02cc ( 0 2) */ ret ! Result = +/* 0x02d0 ( 2 3) */ restore %g0,%g0,%g0 +/* 0x02d4 0 ( 0 0) */ .type conv_i32_to_d32_and_d16,2 +/* 0x02d4 ( 0 0) */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 4 +! +! SUBROUTINE adjust_montf_result +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global adjust_montf_result + adjust_montf_result: /* frequency 1.0 confidence 0.0 */ + +! 144 ! } +! 145 !} +! 148 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) +! 149 !{ +! 150 !long long acc; +! 151 !int i; +! 153 ! if(i32[len]>0) i=-1; + +/* 000000 153 ( 0 1) */ sll %o2,2,%g1 +/* 0x0004 ( 0 1) */ or %g0,-1,%g3 +/* 0x0008 ( 1 4) */ ld [%o0+%g1],%g1 +/* 0x000c ( 3 4) */ cmp %g1,0 +/* 0x0010 ( 3 4) */ bleu,pn %icc,.L77000175 ! tprob=0.50 +/* 0x0014 ( 3 4) */ or %g0,%o1,%o3 +/* 0x0018 ( 4 5) */ ba .L900000611 ! tprob=1.00 +/* 0x001c ( 4 5) */ cmp %g3,0 + .L77000175: /* frequency 0.8 confidence 0.0 */ + +! 154 ! else +! 155 ! { +! 156 ! for(i=len-1; i>=0; i++) + +/* 0x0020 156 ( 0 1) */ subcc %o2,1,%g3 +/* 0x0024 ( 0 1) */ bneg,pt %icc,.L900000611 ! tprob=0.60 +/* 0x0028 ( 1 2) */ cmp %g3,0 +/* 0x002c ( 1 2) */ sll %g3,2,%g1 +/* 0x0030 ( 2 3) */ add %o0,%g1,%g2 +/* 0x0034 ( 2 3) */ add %o1,%g1,%g1 + +! 157 ! { +! 158 ! if(i32[i]!=nint[i]) break; + +/* 0x0038 158 ( 3 6) */ ld [%g1],%g5 + .L900000610: /* frequency 5.3 confidence 0.0 */ +/* 0x003c 158 ( 0 3) */ ld [%g2],%o5 +/* 0x0040 ( 0 1) */ add %g1,4,%g1 +/* 0x0044 ( 0 1) */ add %g2,4,%g2 +/* 0x0048 ( 2 3) */ cmp %o5,%g5 +/* 0x004c ( 2 3) */ bne,pn %icc,.L77000182 ! tprob=0.16 +/* 0x0050 ( 2 3) */ nop +/* 0x0054 ( 3 4) */ addcc %g3,1,%g3 +/* 0x0058 ( 3 4) */ bpos,a,pt %icc,.L900000610 ! tprob=0.84 +/* 0x005c ( 3 6) */ ld [%g1],%g5 + .L77000182: /* frequency 1.0 confidence 0.0 */ + +! 159 ! } +! 160 ! } +! 161 ! if((i<0)||(i32[i]>nint[i])) + +/* 0x0060 161 ( 0 1) */ cmp %g3,0 + .L900000611: /* frequency 1.0 confidence 0.0 */ +/* 0x0064 161 ( 0 1) */ bl,pn %icc,.L77000198 ! tprob=0.50 +/* 0x0068 ( 0 1) */ sll %g3,2,%g2 +/* 0x006c ( 1 4) */ ld [%o1+%g2],%g1 +/* 0x0070 ( 2 5) */ ld [%o0+%g2],%g2 +/* 0x0074 ( 4 5) */ cmp %g2,%g1 +/* 0x0078 ( 4 5) */ bleu,pt %icc,.L77000191 ! tprob=0.56 +/* 0x007c ( 4 5) */ nop + .L77000198: /* frequency 0.8 confidence 0.0 */ + +! 162 ! { +! 163 ! acc=0; +! 164 ! for(i=0;i>32; + +/* 0x00c4 168 ( 6 7) */ or %g0,2,%o5 +/* 0x00c8 166 ( 7 10) */ ld [%o0+4],%g1 +/* 0x00cc 164 ( 8 9) */ sub %o2,%o1,%o2 +/* 0x00d0 ( 9 10) */ or %g0,%o2,%g5 +/* 0x00d4 167 ( 9 10) */ and %o2,%g3,%o2 +/* 0x00d8 ( 9 10) */ st %o2,[%o0] +/* 0x00dc 168 (10 11) */ srax %g5,32,%g5 + .L900000605: /* frequency 64.0 confidence 0.0 */ +/* 0x00e0 166 (12 20) */ ld [%o3],%o2 +/* 0x00e4 168 (12 13) */ add %o5,1,%o5 +/* 0x00e8 (12 13) */ add %o3,4,%o3 +/* 0x00ec (13 13) */ cmp %o5,%g4 +/* 0x00f0 (13 14) */ add %o4,4,%o4 +/* 0x00f4 164 (14 14) */ sub %g1,%o2,%g1 +/* 0x00f8 (15 15) */ add %g1,%g5,%g5 +/* 0x00fc 167 (16 17) */ and %g5,%g3,%o2 +/* 0x0100 166 (16 24) */ ld [%o4-4],%g1 +/* 0x0104 167 (17 18) */ st %o2,[%o4-8] +/* 0x0108 168 (17 18) */ ble,pt %icc,.L900000605 ! tprob=0.50 +/* 0x010c (17 18) */ srax %g5,32,%g5 + .L900000608: /* frequency 8.0 confidence 0.0 */ +/* 0x0110 166 ( 0 3) */ ld [%o3],%g2 +/* 0x0114 164 ( 2 3) */ sub %g1,%g2,%g1 +/* 0x0118 ( 3 4) */ add %g1,%g5,%g1 +/* 0x011c 167 ( 4 5) */ and %g1,%g3,%g2 +/* 0x0120 ( 5 7) */ retl ! Result = +/* 0x0124 ( 6 7) */ st %g2,[%o4-4] + .L77000199: /* frequency 0.6 confidence 0.0 */ +/* 0x0128 166 ( 0 3) */ ld [%o4],%g1 + .L900000609: /* frequency 5.3 confidence 0.0 */ +/* 0x012c 166 ( 0 3) */ ld [%o3],%g2 +/* 0x0130 ( 0 1) */ add %g5,%g1,%g1 +/* 0x0134 168 ( 0 1) */ add %o5,1,%o5 +/* 0x0138 ( 1 2) */ add %o3,4,%o3 +/* 0x013c ( 1 2) */ cmp %o5,%g4 +/* 0x0140 166 ( 2 3) */ sub %g1,%g2,%g1 +/* 0x0144 167 ( 3 4) */ and %g1,%g3,%g2 +/* 0x0148 ( 3 4) */ st %g2,[%o4] +/* 0x014c 168 ( 3 4) */ add %o4,4,%o4 +/* 0x0150 ( 4 5) */ srax %g1,32,%g5 +/* 0x0154 ( 4 5) */ ble,a,pt %icc,.L900000609 ! tprob=0.84 +/* 0x0158 ( 4 7) */ ld [%o4],%g1 + .L77000191: /* frequency 1.0 confidence 0.0 */ +/* 0x015c ( 0 2) */ retl ! Result = +/* 0x0160 ( 1 2) */ nop +/* 0x0164 0 ( 0 0) */ .type adjust_montf_result,2 +/* 0x0164 ( 0 0) */ .size adjust_montf_result,(.-adjust_montf_result) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 32 +! +! SUBROUTINE mont_mulf_noconv +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global mont_mulf_noconv + mont_mulf_noconv: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ save %sp,-144,%sp +/* 0x0004 ( 1 2) */ st %i0,[%fp+68] + +! 169 ! } +! 170 ! } +! 171 !} +! 175 !void cleanup(double *dt, int from, int tlen); +! 177 !/* +! 178 !** the lengths of the input arrays should be at least the following: +! 179 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] +! 180 !** all of them should be different from one another +! 181 !** +! 182 !*/ +! 183 !void mont_mulf_noconv(unsigned int *result, +! 184 ! double *dm1, double *dm2, double *dt, +! 185 ! double *dn, unsigned int *nint, +! 186 ! int nlen, double dn0) +! 187 !{ +! 188 ! int i, j, jj; +! 189 ! int tmp; +! 190 ! double digit, m2j, nextm2j, a, b; +! 191 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; +! 193 ! pdm1=&(dm1[0]); +! 194 ! pdm2=&(dm2[0]); +! 195 ! pdn=&(dn[0]); +! 196 ! pdm2[2*nlen]=Zero; + +/* 0x0008 196 ( 1 2) */ sethi %hi(Zero),%g2 +/* 0x000c 187 ( 1 2) */ or %g0,%i2,%o1 +/* 0x0010 ( 2 3) */ st %i5,[%fp+88] +/* 0x0014 ( 2 3) */ or %g0,%i3,%o2 +/* 0x0018 196 ( 2 3) */ add %g2,%lo(Zero),%g4 +/* 0x001c ( 3 6) */ ldd [%g2+%lo(Zero)],%f2 +/* 0x0020 187 ( 3 4) */ or %g0,%o2,%g5 +/* 0x0024 196 ( 3 4) */ or %g0,%o1,%i0 +/* 0x0028 187 ( 4 5) */ or %g0,%i4,%i2 + +! 198 ! if (nlen!=16) +! 199 ! { +! 200 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero; +! 202 ! a=dt[0]=pdm1[0]*pdm2[0]; +! 203 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16); +! 205 ! pdtj=&(dt[0]); +! 206 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++) +! 207 ! { +! 208 ! m2j=pdm2[j]; +! 209 ! a=pdtj[0]+pdn[0]*digit; +! 210 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16; +! 211 ! pdtj[1]=b; +! 213 !#pragma pipeloop(0) +! 214 ! for(i=1;i