diff options
Diffstat (limited to '')
-rw-r--r-- | web/server/h2o/libh2o/deps/picotls/deps/cifra/src/arm/unacl/sqr.s | 777 |
1 files changed, 777 insertions, 0 deletions
diff --git a/web/server/h2o/libh2o/deps/picotls/deps/cifra/src/arm/unacl/sqr.s b/web/server/h2o/libh2o/deps/picotls/deps/cifra/src/arm/unacl/sqr.s new file mode 100644 index 00000000..3b190c92 --- /dev/null +++ b/web/server/h2o/libh2o/deps/picotls/deps/cifra/src/arm/unacl/sqr.s @@ -0,0 +1,777 @@ + .align 2 + .global square256_asm + .type square256_asm, %function +square256_asm: + push {r4-r7,lr} + mov r2, r8 + mov r3, r9 + mov r4, r10 + mov r5, r11 + push {r0-r5} + + mov r12, r0 + mov r4, r1 + ldm r4!, {r0-r3} + push {r4} + /////////BEGIN LOW PART ////////////////////// + ///SQR 128, in r0-r3 + mov r8, r2 + mov r9, r3 + eor r4, r4 + sub r2, r0 + sbc r3, r1 + sbc r4, r4 + eor r2, r4 + eor r3, r4 + sub r2, r4 + sbc r3, r4 + mov r10, r2 + mov r11, r3 + //SQR64, in: r0, r1, out: r0-r3, used: r0-r6 + mov r2, r0 + eor r3, r3 + sub r2, r1 + sbc r3, r3 + eor r2, r3 + sub r2, r3 + lsr r3, r0, #16 + uxth r0, r0 + mov r4, r0 + mul r4, r3 + mul r0, r0 + mul r3, r3 + lsr r5, r4, #16 + lsl r4, #16 + add r0, r4 + adc r3, r5 + add r0, r4 + adc r3, r5 + lsr r4, r1, #16 + uxth r1, r1 + mov r5, r1 + mul r5, r4 + mul r1, r1 + mul r4, r4 + eor r6, r6 + add r1, r3 + adc r4, r6 + lsr r3, r5, #16 + lsl r5, r5, #16 + add r1, r5 + adc r4, r3 + add r1, r5 + adc r3, r4 + lsr r4, r2, #16 + uxth r2, r2 + mov r5, r2 + mul r5, r4 + mul r2, r2 + mul r4, r4 + lsr r6, r5, #16 + lsl r5, #16 + add r2, r5 + adc r4, r6 + add r5, r2 + adc r6, r4 + eor r7, r7 + mov r2, r1 + sub r1, r5 + sbc r2, r6 + sbc r7, r7 + add r1, r0 + adc r2, r3 + adc r7, r3 + mov r3, r12 + stm r3!, {r0-r1} + push {r3} + + mov r12, r0 + mov r0, r8 + mov r8, r1 + mov r1, r9 + mov r9, r2 + //SQR64, in: r0, r1, out: r0-r3, used: r0-r6 + mov r2, r0 + eor r3, r3 + sub r2, r1 + sbc r3, r3 + eor r2, r3 + sub r2, r3 + lsr r3, r0, #16 + uxth r0, r0 + mov r4, r0 + mul r4, r3 + mul r0, r0 + mul r3, r3 + lsr r5, r4, #16 + lsl r4, #16 + add r0, r4 + adc r3, r5 + add r0, r4 + adc r3, r5 + lsr r4, r1, #16 + uxth r1, r1 + mov r5, r1 + mul r5, r4 + mul r1, r1 + mul r4, r4 + eor r6, r6 + add r1, r3 + adc r4, r6 + lsr r3, r5, #16 + lsl r5, r5, #16 + add r1, r5 + adc r4, r3 + add r1, r5 + adc r3, r4 + lsr r4, r2, #16 + uxth r2, r2 + mov r5, r2 + mul r5, r4 + mul r2, r2 + mul r4, r4 + lsr r6, r5, #16 + lsl r5, #16 + add r2, r5 + adc r4, r6 + add r5, r2 + adc r6, r4 + eor r4, r4 + mov r2, r1 + sub r1, r5 + sbc r2, r6 + sbc r4, r4 + add r1, r0 + adc r2, r3 + adc r3, r4 + eor r4, r4 + mov r6, r9 + add r0, r6 + adc r7, r1 + adc r2, r4 + adc r3, r4 + mov r1, r11 + mov r11, r0 + mov r0, r10 + mov r9, r2 + mov r10,r3 + //SQR64, in: r0, r1, out: r0-r3, used: r0-r6 + mov r2, r0 + eor r3, r3 + sub r2, r1 + sbc r3, r3 + eor r2, r3 + sub r2, r3 + lsr r3, r0, #16 + uxth r0, r0 + mov r4, r0 + mul r4, r3 + mul r0, r0 + mul r3, r3 + lsr r5, r4, #16 + lsl r4, #16 + add r0, r4 + adc r3, r5 + add r0, r4 + adc r3, r5 + lsr r4, r1, #16 + uxth r1, r1 + mov r5, r1 + mul r5, r4 + mul r1, r1 + mul r4, r4 + eor r6, r6 + add r1, r3 + adc r4, r6 + lsr r3, r5, #16 + lsl r5, r5, #16 + add r1, r5 + adc r4, r3 + add r1, r5 + adc r3, r4 + lsr r4, r2, #16 + uxth r2, r2 + mov r5, r2 + mul r5, r4 + mul r2, r2 + mul r4, r4 + lsr r6, r5, #16 + lsl r5, #16 + add r2, r5 + adc r4, r6 + add r5, r2 + adc r6, r4 + eor r4, r4 + mov r2, r1 + sub r1, r5 + sbc r2, r6 + sbc r4, r4 + add r1, r0 + adc r2, r3 + adc r3, r4 + mov r6, r11 + mov r4, r11 + mov r5, r7 + sub r6, r0 + sbc r7, r1 + sbc r4, r2 + sbc r5, r3 + eor r1, r1 + sbc r1, r1 + mov r2, r12 + mov r3, r8 + add r2, r6 + adc r3, r7 + mov r6, r9 + mov r7, r10 + adc r4, r6 + adc r5, r7 + adc r6, r1 + adc r7, r1 + //results r12, r8, r2-r7 + /////////END LOW PART //////////////////////// + pop {r0,r1} + stm r0!, {r2, r3} + push {r0, r4-r7} + ldm r1, {r0-r3} + /////////BEGIN HIGH PART ////////////////////// + ///SQR 128, in r0-r3 + mov r8, r2 + mov r9, r3 + eor r4, r4 + sub r2, r0 + sbc r3, r1 + sbc r4, r4 + eor r2, r4 + eor r3, r4 + sub r2, r4 + sbc r3, r4 + mov r10, r2 + mov r11, r3 + //SQR64, in: r0, r1, out: r0-r3, used: r0-r6 + mov r2, r0 + eor r3, r3 + sub r2, r1 + sbc r3, r3 + eor r2, r3 + sub r2, r3 + lsr r3, r0, #16 + uxth r0, r0 + mov r4, r0 + mul r4, r3 + mul r0, r0 + mul r3, r3 + lsr r5, r4, #16 + lsl r4, #16 + add r0, r4 + adc r3, r5 + add r0, r4 + adc r3, r5 + lsr r4, r1, #16 + uxth r1, r1 + mov r5, r1 + mul r5, r4 + mul r1, r1 + mul r4, r4 + eor r6, r6 + add r1, r3 + adc r4, r6 + lsr r3, r5, #16 + lsl r5, r5, #16 + add r1, r5 + adc r4, r3 + add r1, r5 + adc r3, r4 + lsr r4, r2, #16 + uxth r2, r2 + mov r5, r2 + mul r5, r4 + mul r2, r2 + mul r4, r4 + lsr r6, r5, #16 + lsl r5, #16 + add r2, r5 + adc r4, r6 + add r5, r2 + adc r6, r4 + eor r7, r7 + mov r2, r1 + sub r1, r5 + sbc r2, r6 + sbc r7, r7 + add r1, r0 + adc r2, r3 + adc r7, r3 + mov r12, r0 + mov r0, r8 + mov r8, r1 + mov r1, r9 + mov r9, r2 + //SQR64, in: r0, r1, out: r0-r3, used: r0-r6 + mov r2, r0 + eor r3, r3 + sub r2, r1 + sbc r3, r3 + eor r2, r3 + sub r2, r3 + lsr r3, r0, #16 + uxth r0, r0 + mov r4, r0 + mul r4, r3 + mul r0, r0 + mul r3, r3 + lsr r5, r4, #16 + lsl r4, #16 + add r0, r4 + adc r3, r5 + add r0, r4 + adc r3, r5 + lsr r4, r1, #16 + uxth r1, r1 + mov r5, r1 + mul r5, r4 + mul r1, r1 + mul r4, r4 + eor r6, r6 + add r1, r3 + adc r4, r6 + lsr r3, r5, #16 + lsl r5, r5, #16 + add r1, r5 + adc r4, r3 + add r1, r5 + adc r3, r4 + lsr r4, r2, #16 + uxth r2, r2 + mov r5, r2 + mul r5, r4 + mul r2, r2 + mul r4, r4 + lsr r6, r5, #16 + lsl r5, #16 + add r2, r5 + adc r4, r6 + add r5, r2 + adc r6, r4 + eor r4, r4 + mov r2, r1 + sub r1, r5 + sbc r2, r6 + sbc r4, r4 + add r1, r0 + adc r2, r3 + adc r3, r4 + eor r4, r4 + mov r6, r9 + add r0, r6 + adc r7, r1 + adc r2, r4 + adc r3, r4 + mov r1, r11 + mov r11, r0 + mov r0, r10 + mov r9, r2 + mov r10,r3 + //SQR64, in: r0, r1, out: r0-r3, used: r0-r6 + mov r2, r0 + eor r3, r3 + sub r2, r1 + sbc r3, r3 + eor r2, r3 + sub r2, r3 + lsr r3, r0, #16 + uxth r0, r0 + mov r4, r0 + mul r4, r3 + mul r0, r0 + mul r3, r3 + lsr r5, r4, #16 + lsl r4, #16 + add r0, r4 + adc r3, r5 + add r0, r4 + adc r3, r5 + lsr r4, r1, #16 + uxth r1, r1 + mov r5, r1 + mul r5, r4 + mul r1, r1 + mul r4, r4 + eor r6, r6 + add r1, r3 + adc r4, r6 + lsr r3, r5, #16 + lsl r5, r5, #16 + add r1, r5 + adc r4, r3 + add r1, r5 + adc r3, r4 + lsr r4, r2, #16 + uxth r2, r2 + mov r5, r2 + mul r5, r4 + mul r2, r2 + mul r4, r4 + lsr r6, r5, #16 + lsl r5, #16 + add r2, r5 + adc r4, r6 + add r5, r2 + adc r6, r4 + eor r4, r4 + mov r2, r1 + sub r1, r5 + sbc r2, r6 + sbc r4, r4 + add r1, r0 + adc r2, r3 + adc r3, r4 + mov r6, r11 + mov r4, r11 + mov r5, r7 + sub r6, r0 + sbc r7, r1 + sbc r4, r2 + sbc r5, r3 + eor r1, r1 + sbc r1, r1 + mov r2, r12 + mov r3, r8 + add r2, r6 + adc r3, r7 + mov r6, r9 + mov r7, r10 + adc r4, r6 + adc r5, r7 + adc r6, r1 + adc r7, r1 + //results r12, r8, r2-r7 + /////////END HIGH PART //////////////////////// + mov r0, r12 + mov r1, r8 + mov r8, r4 + mov r9, r5 + mov r10, r6 + mov r11, r7 + pop {r4} + mov r12, r4//str + pop {r4-r7} + add r0, r4 + adc r1, r5 + adc r2, r6 + adc r3, r7 + mov r4, r12 + stm r4!, {r0-r3}//low part + mov r4, r8 + mov r5, r9 + mov r6, r10 + mov r7, r11 + eor r0, r0 + adc r4, r0 + adc r5, r0 + adc r6, r0 + adc r7, r0 + pop {r0, r1} //r0->out, r1, in + push {r0,r4-r7} + ldm r1, {r0-r7} + sub r0, r4 + sbc r1, r5 + sbc r2, r6 + sbc r3, r7 + sbc r4, r4 + eor r0, r4 + eor r1, r4 + eor r2, r4 + eor r3, r4 + sub r0, r4 + sbc r1, r4 + sbc r2, r4 + sbc r3, r4 + //////////BEGIN MIDDLE PART//////////////// + ///SQR 128, in r0-r3 + mov r8, r2 + mov r9, r3 + eor r4, r4 + sub r2, r0 + sbc r3, r1 + sbc r4, r4 + eor r2, r4 + eor r3, r4 + sub r2, r4 + sbc r3, r4 + mov r10, r2 + mov r11, r3 + //SQR64, in: r0, r1, out: r0-r3, used: r0-r6 + mov r2, r0 + eor r3, r3 + sub r2, r1 + sbc r3, r3 + eor r2, r3 + sub r2, r3 + lsr r3, r0, #16 + uxth r0, r0 + mov r4, r0 + mul r4, r3 + mul r0, r0 + mul r3, r3 + lsr r5, r4, #16 + lsl r4, #16 + add r0, r4 + adc r3, r5 + add r0, r4 + adc r3, r5 + lsr r4, r1, #16 + uxth r1, r1 + mov r5, r1 + mul r5, r4 + mul r1, r1 + mul r4, r4 + eor r6, r6 + add r1, r3 + adc r4, r6 + lsr r3, r5, #16 + lsl r5, r5, #16 + add r1, r5 + adc r4, r3 + add r1, r5 + adc r3, r4 + lsr r4, r2, #16 + uxth r2, r2 + mov r5, r2 + mul r5, r4 + mul r2, r2 + mul r4, r4 + lsr r6, r5, #16 + lsl r5, #16 + add r2, r5 + adc r4, r6 + add r5, r2 + adc r6, r4 + eor r7, r7 + mov r2, r1 + sub r1, r5 + sbc r2, r6 + sbc r7, r7 + add r1, r0 + adc r2, r3 + adc r7, r3 + mov r12, r0 + mov r0, r8 + mov r8, r1 + mov r1, r9 + mov r9, r2 + //SQR64, in: r0, r1, out: r0-r3, used: r0-r6 + mov r2, r0 + eor r3, r3 + sub r2, r1 + sbc r3, r3 + eor r2, r3 + sub r2, r3 + lsr r3, r0, #16 + uxth r0, r0 + mov r4, r0 + mul r4, r3 + mul r0, r0 + mul r3, r3 + lsr r5, r4, #16 + lsl r4, #16 + add r0, r4 + adc r3, r5 + add r0, r4 + adc r3, r5 + lsr r4, r1, #16 + uxth r1, r1 + mov r5, r1 + mul r5, r4 + mul r1, r1 + mul r4, r4 + eor r6, r6 + add r1, r3 + adc r4, r6 + lsr r3, r5, #16 + lsl r5, r5, #16 + add r1, r5 + adc r4, r3 + add r1, r5 + adc r3, r4 + lsr r4, r2, #16 + uxth r2, r2 + mov r5, r2 + mul r5, r4 + mul r2, r2 + mul r4, r4 + lsr r6, r5, #16 + lsl r5, #16 + add r2, r5 + adc r4, r6 + add r5, r2 + adc r6, r4 + eor r4, r4 + mov r2, r1 + sub r1, r5 + sbc r2, r6 + sbc r4, r4 + add r1, r0 + adc r2, r3 + adc r3, r4 + eor r4, r4 + mov r6, r9 + add r0, r6 + adc r7, r1 + adc r2, r4 + adc r3, r4 + mov r1, r11 + mov r11, r0 + mov r0, r10 + mov r9, r2 + mov r10,r3 + //SQR64, in: r0, r1, out: r0-r3, used: r0-r6 + mov r2, r0 + eor r3, r3 + sub r2, r1 + sbc r3, r3 + eor r2, r3 + sub r2, r3 + lsr r3, r0, #16 + uxth r0, r0 + mov r4, r0 + mul r4, r3 + mul r0, r0 + mul r3, r3 + lsr r5, r4, #16 + lsl r4, #16 + add r0, r4 + adc r3, r5 + add r0, r4 + adc r3, r5 + lsr r4, r1, #16 + uxth r1, r1 + mov r5, r1 + mul r5, r4 + mul r1, r1 + mul r4, r4 + eor r6, r6 + add r1, r3 + adc r4, r6 + lsr r3, r5, #16 + lsl r5, r5, #16 + add r1, r5 + adc r4, r3 + add r1, r5 + adc r3, r4 + lsr r4, r2, #16 + uxth r2, r2 + mov r5, r2 + mul r5, r4 + mul r2, r2 + mul r4, r4 + lsr r6, r5, #16 + lsl r5, #16 + add r2, r5 + adc r4, r6 + add r5, r2 + adc r6, r4 + eor r4, r4 + mov r2, r1 + sub r1, r5 + sbc r2, r6 + sbc r4, r4 + add r1, r0 + adc r2, r3 + adc r3, r4 + mov r6, r11 + mov r4, r11 + mov r5, r7 + sub r6, r0 + sbc r7, r1 + sbc r4, r2 + sbc r5, r3 + eor r1, r1 + sbc r1, r1 + mov r2, r12 + mov r3, r8 + add r2, r6 + adc r3, r7 + mov r6, r9 + mov r7, r10 + adc r4, r6 + adc r5, r7 + adc r6, r1 + adc r7, r1 + //results r12, r8, r2-r7 + //////////END MIDDLE PART////////////////// + mvn r2, r2 + mvn r3, r3 + mvn r4, r4 + mvn r5, r5 + mvn r6, r6 + mvn r7, r7 + pop {r1} + push {r4-r7} + mov r4, #1 + asr r4, #1 + ldm r1!, {r4-r7} + mov r0, r12 + mov r12, r1 ////////ref + mov r1, r8 + mvn r0, r0 + mvn r1, r1 + adc r0, r4 + adc r1, r5 + adc r2, r6 + adc r3, r7 + eor r4, r4 + adc r4, r4 + mov r8, r4 //carry A --ini + mov r4, r12 + ldm r4, {r4-r7} + add r0, r4 + adc r1, r5 + adc r2, r6 + adc r3, r7 + mov r9, r4 + mov r4, r12 + stm r4!, {r0-r3} + mov r12, r4 + mov r4, r9 + pop {r0-r3} + adc r4, r0 + adc r5, r1 + adc r6, r2 + adc r7, r3 + eor r0, r0 + adc r0, r0 + mov r9, r0 //carry B --ini + mov r0, r8 + asr r0, #1 //carry A --end + pop {r0-r3} + adc r4, r0 + adc r5, r1 + adc r6, r2 + adc r7, r3 + mov r8, r0 + mov r0, r12 + stm r0!, {r4-r7} + mov r11, r0 + mov r0, r8 + eor r4, r4 + mov r5, r9 + adc r5, r4 //carry B --end + mvn r6, r4 + add r5, r6 + adc r6, r4 + add r0, r5 + adc r1, r6 + adc r2, r6 + adc r3, r6 + mov r7, r11 + stm r7!, {r0-r3} + + pop {r3-r6} + mov r8, r3 + mov r9, r4 + mov r10, r5 + mov r11, r6 + pop {r4-r7,pc} + bx lr + .size square256_asm, .-square256_asm |