diff options
Diffstat (limited to 'security/nss/lib/freebl/mpi/vis_32.il')
-rw-r--r-- | security/nss/lib/freebl/mpi/vis_32.il | 1291 |
1 files changed, 1291 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/vis_32.il b/security/nss/lib/freebl/mpi/vis_32.il new file mode 100644 index 0000000000..d2e8024ac2 --- /dev/null +++ b/security/nss/lib/freebl/mpi/vis_32.il @@ -0,0 +1,1291 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + +! The interface to the VIS instructions as declared below (and in the VIS +! User's Manual) will not change, but the macro implementation might change +! in the future. + +!-------------------------------------------------------------------- +! Pure edge handling instructions +! +! int vis_edge8(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8,8 + edge8 %o0,%o1,%o0 + .end +! +! int vis_edge8l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8l,8 + edge8l %o0,%o1,%o0 + .end +! +! int vis_edge16(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16,8 + edge16 %o0,%o1,%o0 + .end +! +! int vis_edge16l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16l,8 + edge16l %o0,%o1,%o0 + .end +! +! int vis_edge32(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32,8 + edge32 %o0,%o1,%o0 + .end +! +! int vis_edge32l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32l,8 + edge32l %o0,%o1,%o0 + .end + +!-------------------------------------------------------------------- +! Edge handling instructions with negative return values if cc set +! +! int vis_edge8cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8cc,8 + edge8 %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge8lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8lcc,8 + edge8l %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge16cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16cc,8 + edge16 %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge16lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16lcc,8 + edge16l %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge32cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32cc,8 + edge32 %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge32lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32lcc,8 + edge32l %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end + +!-------------------------------------------------------------------- +! Alignment instructions +! +! void *vis_alignaddr(void */*rs1*/, int /*rs2*/); +! + .inline vis_alignaddr,8 + alignaddr %o0,%o1,%o0 + .end +! +! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/); +! + .inline vis_alignaddrl,8 + alignaddrl %o0,%o1,%o0 + .end +! +! double vis_faligndata(double /*frs1*/, double /*frs2*/); +! + .inline vis_faligndata,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + faligndata %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Partitioned comparison instructions +! +! int vis_fcmple16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmple16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmple16 %f4,%f10,%o0 + .end +! +! int vis_fcmpne16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpne16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpne16 %f4,%f10,%o0 + .end +! +! int vis_fcmple32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmple32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmple32 %f4,%f10,%o0 + .end +! +! int vis_fcmpne32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpne32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpne32 %f4,%f10,%o0 + .end +! +! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpgt16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpgt16 %f4,%f10,%o0 + .end +! +! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpeq16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpeq16 %f4,%f10,%o0 + .end +! +! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpgt32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpgt32 %f4,%f10,%o0 + .end +! +! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpeq32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpeq32 %f4,%f10,%o0 + .end + +!-------------------------------------------------------------------- +! Partitioned arithmetic +! +! double vis_fmul8x16(float /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8x16,12 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f4 + st %o1,[%sp+0x48] + st %o2,[%sp+0x4c] + ldd [%sp+0x48],%f10 + fmul8x16 %f4,%f10,%f0 + .end +! +! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/); +! + .inline vis_fmul8x16_dummy,16 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fmul8x16 %f4,%f10,%f0 + .end +! +! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmul8x16au,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmul8x16au %f4,%f10,%f0 + .end +! +! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmul8x16al,8 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmul8x16al %f4,%f10,%f0 + .end +! +! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8sux16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fmul8sux16 %f4,%f10,%f0 + .end +! +! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8ulx16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fmul8ulx16 %f4,%f10,%f0 + .end +! +! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmuld8sux16,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmuld8sux16 %f4,%f10,%f0 + .end +! +! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmuld8ulx16,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmuld8ulx16 %f4,%f10,%f0 + .end +! +! double vis_fpadd16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpadd16,16 + std %o0,[%sp+0x40] + ldd [%sp+0x40],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpadd16 %f4,%f10,%f0 + .end +! +! float vis_fpadd16s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpadd16s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpadd16s %f4,%f10,%f0 + .end +! +! double vis_fpadd32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpadd32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpadd32 %f4,%f10,%f0 + .end +! +! float vis_fpadd32s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpadd32s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpadd32s %f4,%f10,%f0 + .end +! +! double vis_fpsub16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpsub16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpsub16 %f4,%f10,%f0 + .end +! +! float vis_fpsub16s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpsub16s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpsub16s %f4,%f10,%f0 + .end +! +! double vis_fpsub32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpsub32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpsub32 %f4,%f10,%f0 + .end +! +! float vis_fpsub32s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpsub32s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpsub32s %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Pixel packing +! +! float vis_fpack16(double /*frs2*/); +! + .inline vis_fpack16,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpack16 %f4,%f0 + .end + +! +! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/); +! + .inline vis_fpack16_pair,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack16 %f4,%f0 + fpack16 %f10,%f1 + .end +! +! void vis_st2_fpack16(double, double, double *) +! + .inline vis_st2_fpack16,20 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack16 %f4,%f0 + fpack16 %f10,%f1 + st %f0,[%o4+0] + st %f1,[%o4+4] + .end +! +! void vis_std_fpack16(double, double, double *) +! + .inline vis_std_fpack16,20 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack16 %f4,%f0 + fpack16 %f10,%f1 + std %f0,[%o4] + .end +! +! void vis_st2_fpackfix(double, double, double *) +! + .inline vis_st2_fpackfix,20 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpackfix %f4,%f0 + fpackfix %f10,%f1 + st %f0,[%o4+0] + st %f1,[%o4+4] + .end +! +! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack16_to_hi,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpack16 %f4,%f0 + .end + +! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack16_to_lo,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpack16 %f4,%f3 + fmovs %f3,%f1 /* without this, optimizer goes wrong */ + .end + +! +! double vis_fpack32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack32 %f4,%f10,%f0 + .end +! +! float vis_fpackfix(double /*frs2*/); +! + .inline vis_fpackfix,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpackfix %f4,%f0 + .end +! +! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/); +! + .inline vis_fpackfix_pair,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f6 + fpackfix %f4,%f0 + fpackfix %f6,%f1 + .end + +!-------------------------------------------------------------------- +! Motion estimation +! +! double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/); +! + .inline vis_pdist,24 + std %o4,[%sp+0x48] + ldd [%sp+0x48],%f0 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + pdist %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Channel merging +! +! double vis_fpmerge(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpmerge,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpmerge %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Pixel expansion +! +! double vis_fexpand(float /*frs2*/); +! + .inline vis_fexpand,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + fexpand %f4,%f0 + .end + +! double vis_fexpand_hi(double /*frs2*/); +! + .inline vis_fexpand_hi,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fexpand %f4,%f0 + .end + +! double vis_fexpand_lo(double /*frs2*/); +! + .inline vis_fexpand_lo,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fmovs %f5, %f2 + fexpand %f2,%f0 + .end + +!-------------------------------------------------------------------- +! Bitwise logical operations +! +! double vis_fnor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fnor,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fnor %f4,%f10,%f0 + .end +! +! float vis_fnors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fnors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fnors %f4,%f10,%f0 + .end +! +! double vis_fandnot(double /*frs1*/, double /*frs2*/); +! + .inline vis_fandnot,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fandnot1 %f4,%f10,%f0 + .end +! +! float vis_fandnots(float /*frs1*/, float /*frs2*/); +! + .inline vis_fandnots,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fandnot1s %f4,%f10,%f0 + .end +! +! double vis_fnot(double /*frs1*/); +! + .inline vis_fnot,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fnot1 %f4,%f0 + .end +! +! float vis_fnots(float /*frs1*/); +! + .inline vis_fnots,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + fnot1s %f4,%f0 + .end +! +! double vis_fxor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fxor,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fxor %f4,%f10,%f0 + .end +! +! float vis_fxors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fxors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fxors %f4,%f10,%f0 + .end +! +! double vis_fnand(double /*frs1*/, double /*frs2*/); +! + .inline vis_fnand,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fnand %f4,%f10,%f0 + .end +! +! float vis_fnands(float /*frs1*/, float /*frs2*/); +! + .inline vis_fnands,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fnands %f4,%f10,%f0 + .end +! +! double vis_fand(double /*frs1*/, double /*frs2*/); +! + .inline vis_fand,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fand %f4,%f10,%f0 + .end +! +! float vis_fands(float /*frs1*/, float /*frs2*/); +! + .inline vis_fands,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fands %f4,%f10,%f0 + .end +! +! double vis_fxnor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fxnor,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fxnor %f4,%f10,%f0 + .end +! +! float vis_fxnors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fxnors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fxnors %f4,%f10,%f0 + .end +! +! double vis_fsrc(double /*frs1*/); +! + .inline vis_fsrc,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fsrc1 %f4,%f0 + .end +! +! float vis_fsrcs(float /*frs1*/); +! + .inline vis_fsrcs,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + fsrc1s %f4,%f0 + .end +! +! double vis_fornot(double /*frs1*/, double /*frs2*/); +! + .inline vis_fornot,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fornot1 %f4,%f10,%f0 + .end +! +! float vis_fornots(float /*frs1*/, float /*frs2*/); +! + .inline vis_fornots,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fornot1s %f4,%f10,%f0 + .end +! +! double vis_for(double /*frs1*/, double /*frs2*/); +! + .inline vis_for,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + for %f4,%f10,%f0 + .end +! +! float vis_fors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fors %f4,%f10,%f0 + .end +! +! double vis_fzero(/* void */) +! + .inline vis_fzero,0 + fzero %f0 + .end +! +! float vis_fzeros(/* void */) +! + .inline vis_fzeros,0 + fzeros %f0 + .end +! +! double vis_fone(/* void */) +! + .inline vis_fone,0 + fone %f0 + .end +! +! float vis_fones(/* void */) +! + .inline vis_fones,0 + fones %f0 + .end + +!-------------------------------------------------------------------- +! Partial store instructions +! +! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8P,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P + .end +! +! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8PL,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc8 ! ASI_PST8_PL + .end +! +! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask); +! + .inline vis_stdfa_ASI_PST8P_int_pair,16 + ld [%o0],%f4 + ld [%o1],%f5 + stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P + .end +! +! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8S,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc1 ! ASI_PST8_S + .end +! +! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST16P,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc2 ! ASI_PST16_P + .end +! +! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST16S,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc3 ! ASI_PST16_S + .end +! +! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST32P,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc4 ! ASI_PST32_P + .end +! +! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST32S,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc5 ! ASI_PST32_S + .end + +!-------------------------------------------------------------------- +! Short store instructions +! +! vis_stdfa_ASI_FL8P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8P,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd0 ! ASI_FL8_P + .end +! +! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index) +! + .inline vis_stdfa_ASI_FL8P_index,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2+%o3]0xd0 ! ASI_FL8_P + .end +! +! vis_stdfa_ASI_FL8S(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8S,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd1 ! ASI_FL8_S + .end +! +! vis_stdfa_ASI_FL16P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16P,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd2 ! ASI_FL16_P + .end +! +! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index) +! + .inline vis_stdfa_ASI_FL16P_index,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2+%o3]0xd2 ! ASI_FL16_P + .end +! +! vis_stdfa_ASI_FL16S(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16S,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd3 ! ASI_FL16_S + .end +! +! vis_stdfa_ASI_FL8PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8PL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd8 ! ASI_FL8_PL + .end +! +! vis_stdfa_ASI_FL8SL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8SL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd9 ! ASI_FL8_SL + .end +! +! vis_stdfa_ASI_FL16PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16PL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xda ! ASI_FL16_PL + .end +! +! vis_stdfa_ASI_FL16SL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16SL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xdb ! ASI_FL16_SL + .end + +!-------------------------------------------------------------------- +! Short load instructions +! +! double vis_lddfa_ASI_FL8P(void *rs1) +! + .inline vis_lddfa_ASI_FL8P,4 + ldda [%o0]0xd0,%f4 ! ASI_FL8_P + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_FL8P_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL8P_index,8 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index) +! + .inline vis_lddfa_ASI_FL8P_hi,8 + sra %o1,16,%o1 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index) +! + .inline vis_lddfa_ASI_FL8P_lo,8 + sll %o1,16,%o1 + sra %o1,16,%o1 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8S(void *rs1) +! + .inline vis_lddfa_ASI_FL8S,4 + ldda [%o0]0xd1,%f4 ! ASI_FL8_S + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16P(void *rs1) +! + .inline vis_lddfa_ASI_FL16P,4 + ldda [%o0]0xd2,%f4 ! ASI_FL16_P + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16P_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL16P_index,8 + ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16S(void *rs1) +! + .inline vis_lddfa_ASI_FL16S,4 + ldda [%o0]0xd3,%f4 ! ASI_FL16_S + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8PL(void *rs1) +! + .inline vis_lddfa_ASI_FL8PL,4 + ldda [%o0]0xd8,%f4 ! ASI_FL8_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL8PL_index,8 + ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8SL(void *rs1) +! + .inline vis_lddfa_ASI_FL8SL,4 + ldda [%o0]0xd9,%f4 ! ASI_FL8_SL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16PL(void *rs1) +! + .inline vis_lddfa_ASI_FL16PL,4 + ldda [%o0]0xda,%f4 ! ASI_FL16_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL16PL_index,8 + ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16SL(void *rs1) +! + .inline vis_lddfa_ASI_FL16SL,4 + ldda [%o0]0xdb,%f4 ! ASI_FL16_SL + fmovd %f4,%f0 + .end + +!-------------------------------------------------------------------- +! Graphics status register +! +! unsigned int vis_read_gsr(void) +! + .inline vis_read_gsr,0 + rd %gsr,%o0 + .end +! +! void vis_write_gsr(unsigned int /* GSR */) +! + .inline vis_write_gsr,4 + wr %g0,%o0,%gsr + .end + +!-------------------------------------------------------------------- +! Voxel texture mapping +! +! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/) +! + .inline vis_array8,12 + sllx %o0,32,%o0 + srl %o1,0,%o1 ! clear the most significant 32 bits of %o1 + or %o0,%o1,%o3 ! join %o0 and %o1 into %o3 + array8 %o3,%o2,%o0 + .end +! +! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/) +! + .inline vis_array16,12 + sllx %o0,32,%o0 + srl %o1,0,%o1 ! clear the most significant 32 bits of %o1 + or %o0,%o1,%o3 ! join %o0 and %o1 into %o3 + array16 %o3,%o2,%o0 + .end +! +! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/) +! + .inline vis_array32,12 + sllx %o0,32,%o0 + srl %o1,0,%o1 ! clear the most significant 32 bits of %o1 + or %o0,%o1,%o3 ! join %o0 and %o1 into %o3 + array32 %o3,%o2,%o0 + .end + +!-------------------------------------------------------------------- +! Register aliasing and type casts +! +! float vis_read_hi(double /* frs1 */); +! + .inline vis_read_hi,8 + std %o0,[%sp+0x48] ! store double frs1 + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; return %f0; + .end +! +! float vis_read_lo(double /* frs1 */); +! + .inline vis_read_lo,8 + std %o0,[%sp+0x48] ! store double frs1 + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; + fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0; + .end +! +! double vis_write_hi(double /* frs1 */, float /* frs2 */); +! + .inline vis_write_hi,12 + std %o0,[%sp+0x48] ! store double frs1; + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; + st %o2,[%sp+0x44] ! store float frs2; + ld [%sp+0x44],%f2 ! %f2 = float frs2; + fmovs %f2,%f0 ! %f0 = float frs2; return %f0:f1; + .end +! +! double vis_write_lo(double /* frs1 */, float /* frs2 */); +! + .inline vis_write_lo,12 + std %o0,[%sp+0x48] ! store double frs1; + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; + st %o2,[%sp+0x44] ! store float frs2; + ld [%sp+0x44],%f2 ! %f2 = float frs2; + fmovs %f2,%f1 ! %f1 = float frs2; return %f0:f1; + .end +! +! double vis_freg_pair(float /* frs1 */, float /* frs2 */); +! + .inline vis_freg_pair,8 + st %o0,[%sp+0x48] ! store float frs1 + ld [%sp+0x48],%f0 + st %o1,[%sp+0x48] ! store float frs2 + ld [%sp+0x48],%f1 + .end +! +! float vis_to_float(unsigned int /*value*/); +! + .inline vis_to_float,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f0 + .end +! +! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/); +! + .inline vis_to_double,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end +! +! double vis_to_double_dup(unsigned int /*value*/); +! + .inline vis_to_double_dup,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f1 + fmovs %f1,%f0 ! duplicate value + .end +! +! double vis_ll_to_double(unsigned long long /*value*/); +! + .inline vis_ll_to_double,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + +!-------------------------------------------------------------------- +! Address space identifier (ASI) register +! +! unsigned int vis_read_asi(void) +! + .inline vis_read_asi,0 + rd %asi,%o0 + .end +! +! void vis_write_asi(unsigned int /* ASI */) +! + .inline vis_write_asi,4 + wr %g0,%o0,%asi + .end + +!-------------------------------------------------------------------- +! Load/store from/into alternate space +! +! float vis_ldfa_ASI_REG(void *rs1) +! + .inline vis_ldfa_ASI_REG,4 + lda [%o0+0]%asi,%f4 + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! float vis_ldfa_ASI_P(void *rs1) +! + .inline vis_ldfa_ASI_P,4 + lda [%o0]0x80,%f4 ! ASI_P + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! float vis_ldfa_ASI_PL(void *rs1) +! + .inline vis_ldfa_ASI_PL,4 + lda [%o0]0x88,%f4 ! ASI_PL + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_REG(void *rs1) +! + .inline vis_lddfa_ASI_REG,4 + ldda [%o0+0]%asi,%f4 + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_P(void *rs1) +! + .inline vis_lddfa_ASI_P,4 + ldda [%o0]0x80,%f4 ! ASI_P + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_PL(void *rs1) +! + .inline vis_lddfa_ASI_PL,4 + ldda [%o0]0x88,%f4 ! ASI_PL + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! vis_stfa_ASI_REG(float frs, void *rs1) +! + .inline vis_stfa_ASI_REG,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + sta %f4,[%o1+0]%asi + .end +! +! vis_stfa_ASI_P(float frs, void *rs1) +! + .inline vis_stfa_ASI_P,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + sta %f4,[%o1]0x80 ! ASI_P + .end +! +! vis_stfa_ASI_PL(float frs, void *rs1) +! + .inline vis_stfa_ASI_PL,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + sta %f4,[%o1]0x88 ! ASI_PL + .end +! +! vis_stdfa_ASI_REG(double frd, void *rs1) +! + .inline vis_stdfa_ASI_REG,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2+0]%asi + .end +! +! vis_stdfa_ASI_P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_P,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0x80 ! ASI_P + .end +! +! vis_stdfa_ASI_PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_PL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0x88 ! ASI_PL + .end +! +! unsigned short vis_lduha_ASI_REG(void *rs1) +! + .inline vis_lduha_ASI_REG,4 + lduha [%o0+0]%asi,%o0 + .end +! +! unsigned short vis_lduha_ASI_P(void *rs1) +! + .inline vis_lduha_ASI_P,4 + lduha [%o0]0x80,%o0 ! ASI_P + .end +! +! unsigned short vis_lduha_ASI_PL(void *rs1) +! + .inline vis_lduha_ASI_PL,4 + lduha [%o0]0x88,%o0 ! ASI_PL + .end +! +! unsigned short vis_lduha_ASI_P_index(void *rs1, long index) +! + .inline vis_lduha_ASI_P_index,8 + lduha [%o0+%o1]0x80,%o0 ! ASI_P + .end +! +! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index) +! + .inline vis_lduha_ASI_PL_index,8 + lduha [%o0+%o1]0x88,%o0 ! ASI_PL + .end + +!-------------------------------------------------------------------- +! Prefetch +! +! void vis_prefetch_read(void * /*address*/); +! + .inline vis_prefetch_read,4 + prefetch [%o0+0],0 + .end +! +! void vis_prefetch_write(void * /*address*/); +! + .inline vis_prefetch_write,4 + prefetch [%o0+0],2 + .end |