diff options
Diffstat (limited to 'comm/third_party/libgcrypt/cipher/blowfish-arm.S')
-rw-r--r-- | comm/third_party/libgcrypt/cipher/blowfish-arm.S | 743 |
1 files changed, 743 insertions, 0 deletions
diff --git a/comm/third_party/libgcrypt/cipher/blowfish-arm.S b/comm/third_party/libgcrypt/cipher/blowfish-arm.S new file mode 100644 index 0000000000..b30aa31f1d --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/blowfish-arm.S @@ -0,0 +1,743 @@ +/* blowfish-arm.S - ARM assembly implementation of Blowfish cipher + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(__ARMEL__) +#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS + +.text + +.syntax unified +.arm + +/* structure of crypto context */ +#define s0 0 +#define s1 (s0 + (1 * 256) * 4) +#define s2 (s0 + (2 * 256) * 4) +#define s3 (s0 + (3 * 256) * 4) +#define p (s3 + (1 * 256) * 4) + +/* register macros */ +#define CTXs0 %r0 +#define CTXs1 %r9 +#define CTXs2 %r8 +#define CTXs3 %r10 +#define RMASK %lr +#define RKEYL %r2 +#define RKEYR %ip + +#define RL0 %r3 +#define RR0 %r4 + +#define RL1 %r9 +#define RR1 %r10 + +#define RT0 %r11 +#define RT1 %r7 +#define RT2 %r5 +#define RT3 %r6 + +/* helper macros */ +#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 0)]; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 3)]; \ + orr rout, rout, rtmp, lsl #24; + +#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \ + mov rtmp0, rin, lsr #8; \ + strb rin, [rdst, #((offs) + 0)]; \ + mov rtmp1, rin, lsr #16; \ + strb rtmp0, [rdst, #((offs) + 1)]; \ + mov rtmp0, rin, lsr #24; \ + strb rtmp1, [rdst, #((offs) + 2)]; \ + strb rtmp0, [rdst, #((offs) + 3)]; + +#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 3)]; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 0)]; \ + orr rout, rout, rtmp, lsl #24; + +#define str_unaligned_be(rin, rdst, offs, rtmp0, rtmp1) \ + mov rtmp0, rin, lsr #8; \ + strb rin, [rdst, #((offs) + 3)]; \ + mov rtmp1, rin, lsr #16; \ + strb rtmp0, [rdst, #((offs) + 2)]; \ + mov rtmp0, rin, lsr #24; \ + strb rtmp1, [rdst, #((offs) + 1)]; \ + strb rtmp0, [rdst, #((offs) + 0)]; + +#ifdef __ARMEL__ + #define ldr_unaligned_host ldr_unaligned_le + #define str_unaligned_host str_unaligned_le + + /* bswap on little-endian */ +#ifdef HAVE_ARM_ARCH_V6 + #define host_to_be(reg, rtmp) \ + rev reg, reg; + #define be_to_host(reg, rtmp) \ + rev reg, reg; +#else + #define host_to_be(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; + #define be_to_host(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; +#endif +#else + #define ldr_unaligned_host ldr_unaligned_be + #define str_unaligned_host str_unaligned_be + + /* nop on big-endian */ + #define host_to_be(reg, rtmp) /*_*/ + #define be_to_host(reg, rtmp) /*_*/ +#endif + +#define host_to_host(x, y) /*_*/ + +/*********************************************************************** + * 1-way blowfish + ***********************************************************************/ +#define F(l, r) \ + and RT0, RMASK, l, lsr#(24 - 2); \ + and RT1, RMASK, l, lsr#(16 - 2); \ + ldr RT0, [CTXs0, RT0]; \ + and RT2, RMASK, l, lsr#(8 - 2); \ + ldr RT1, [CTXs1, RT1]; \ + and RT3, RMASK, l, lsl#2; \ + ldr RT2, [CTXs2, RT2]; \ + add RT0, RT1; \ + ldr RT3, [CTXs3, RT3]; \ + eor RT0, RT2; \ + add RT0, RT3; \ + eor r, RT0; + +#define load_roundkey_enc(n) \ + ldr RKEYL, [CTXs2, #((p - s2) + (4 * (n) + 0))]; \ + ldr RKEYR, [CTXs2, #((p - s2) + (4 * (n) + 4))]; + +#define add_roundkey_enc() \ + eor RL0, RKEYL; \ + eor RR0, RKEYR; + +#define round_enc(n) \ + add_roundkey_enc(); \ + load_roundkey_enc(n); \ + \ + F(RL0, RR0); \ + F(RR0, RL0); + +#define load_roundkey_dec(n) \ + ldr RKEYL, [CTXs2, #((p - s2) + (4 * ((n) - 1) + 4))]; \ + ldr RKEYR, [CTXs2, #((p - s2) + (4 * ((n) - 1) + 0))]; + +#define add_roundkey_dec() \ + eor RL0, RKEYL; \ + eor RR0, RKEYR; + +#define round_dec(n) \ + add_roundkey_dec(); \ + load_roundkey_dec(n); \ + \ + F(RL0, RR0); \ + F(RR0, RL0); + +#define read_block_aligned(rin, offs, l0, r0, convert, rtmp) \ + ldr l0, [rin, #((offs) + 0)]; \ + ldr r0, [rin, #((offs) + 4)]; \ + convert(l0, rtmp); \ + convert(r0, rtmp); + +#define write_block_aligned(rout, offs, l0, r0, convert, rtmp) \ + convert(l0, rtmp); \ + convert(r0, rtmp); \ + str l0, [rout, #((offs) + 0)]; \ + str r0, [rout, #((offs) + 4)]; + +#ifdef __ARM_FEATURE_UNALIGNED + /* unaligned word reads allowed */ + #define read_block(rin, offs, l0, r0, rtmp0) \ + read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0) + + #define write_block(rout, offs, r0, l0, rtmp0, rtmp1) \ + write_block_aligned(rout, offs, r0, l0, be_to_host, rtmp0) + + #define read_block_host(rin, offs, l0, r0, rtmp0) \ + read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0) + + #define write_block_host(rout, offs, r0, l0, rtmp0, rtmp1) \ + write_block_aligned(rout, offs, r0, l0, host_to_host, rtmp0) +#else + /* need to handle unaligned reads by byte reads */ + #define read_block(rin, offs, l0, r0, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_be(l0, rin, (offs) + 0, rtmp0); \ + ldr_unaligned_be(r0, rin, (offs) + 4, rtmp0); \ + b 2f; \ + 1:;\ + read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0); \ + 2:; + + #define write_block(rout, offs, l0, r0, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_be(l0, rout, (offs) + 0, rtmp0, rtmp1); \ + str_unaligned_be(r0, rout, (offs) + 4, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block_aligned(rout, offs, l0, r0, be_to_host, rtmp0); \ + 2:; + + #define read_block_host(rin, offs, l0, r0, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_host(l0, rin, (offs) + 0, rtmp0); \ + ldr_unaligned_host(r0, rin, (offs) + 4, rtmp0); \ + b 2f; \ + 1:;\ + read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0); \ + 2:; + + #define write_block_host(rout, offs, l0, r0, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_host(l0, rout, (offs) + 0, rtmp0, rtmp1); \ + str_unaligned_host(r0, rout, (offs) + 4, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block_aligned(rout, offs, l0, r0, host_to_host); \ + 2:; +#endif + +.align 3 +.type __blowfish_enc_blk1,%function; + +__blowfish_enc_blk1: + /* input: + * preloaded: CTX + * [RL0, RR0]: src + * output: + * [RR0, RL0]: dst + */ + push {%lr}; + + add CTXs1, CTXs0, #(s1 - s0); + add CTXs2, CTXs0, #(s2 - s0); + mov RMASK, #(0xff << 2); /* byte mask */ + add CTXs3, CTXs1, #(s3 - s1); + + load_roundkey_enc(0); + round_enc(2); + round_enc(4); + round_enc(6); + round_enc(8); + round_enc(10); + round_enc(12); + round_enc(14); + round_enc(16); + add_roundkey_enc(); + + pop {%pc}; +.size __blowfish_enc_blk1,.-__blowfish_enc_blk1; + +.align 8 +.globl _gcry_blowfish_arm_do_encrypt +.type _gcry_blowfish_arm_do_encrypt,%function; + +_gcry_blowfish_arm_do_encrypt: + /* input: + * %r0: ctx, CTX + * %r1: u32 *ret_xl + * %r2: u32 *ret_xr + */ + push {%r2, %r4-%r11, %ip, %lr}; + + ldr RL0, [%r1]; + ldr RR0, [%r2]; + + bl __blowfish_enc_blk1; + + pop {%r2}; + str RR0, [%r1]; + str RL0, [%r2]; + + pop {%r4-%r11, %ip, %pc}; +.size _gcry_blowfish_arm_do_encrypt,.-_gcry_blowfish_arm_do_encrypt; + +.align 3 +.globl _gcry_blowfish_arm_encrypt_block +.type _gcry_blowfish_arm_encrypt_block,%function; + +_gcry_blowfish_arm_encrypt_block: + /* input: + * %r0: ctx, CTX + * %r1: dst + * %r2: src + */ + push {%r4-%r11, %ip, %lr}; + + read_block(%r2, 0, RL0, RR0, RT0); + + bl __blowfish_enc_blk1; + + write_block(%r1, 0, RR0, RL0, RT0, RT1); + + pop {%r4-%r11, %ip, %pc}; +.size _gcry_blowfish_arm_encrypt_block,.-_gcry_blowfish_arm_encrypt_block; + +.align 3 +.globl _gcry_blowfish_arm_decrypt_block +.type _gcry_blowfish_arm_decrypt_block,%function; + +_gcry_blowfish_arm_decrypt_block: + /* input: + * %r0: ctx, CTX + * %r1: dst + * %r2: src + */ + push {%r4-%r11, %ip, %lr}; + + add CTXs1, CTXs0, #(s1 - s0); + add CTXs2, CTXs0, #(s2 - s0); + mov RMASK, #(0xff << 2); /* byte mask */ + add CTXs3, CTXs1, #(s3 - s1); + + read_block(%r2, 0, RL0, RR0, RT0); + + load_roundkey_dec(17); + round_dec(15); + round_dec(13); + round_dec(11); + round_dec(9); + round_dec(7); + round_dec(5); + round_dec(3); + round_dec(1); + add_roundkey_dec(); + + write_block(%r1, 0, RR0, RL0, RT0, RT1); + + pop {%r4-%r11, %ip, %pc}; +.size _gcry_blowfish_arm_decrypt_block,.-_gcry_blowfish_arm_decrypt_block; + +/*********************************************************************** + * 2-way blowfish + ***********************************************************************/ +#define F2(n, l0, r0, l1, r1, set_nextk, dec) \ + \ + and RT0, RMASK, l0, lsr#(24 - 2); \ + and RT1, RMASK, l0, lsr#(16 - 2); \ + and RT2, RMASK, l0, lsr#(8 - 2); \ + add RT1, #(s1 - s0); \ + \ + ldr RT0, [CTXs0, RT0]; \ + and RT3, RMASK, l0, lsl#2; \ + ldr RT1, [CTXs0, RT1]; \ + add RT3, #(s3 - s2); \ + ldr RT2, [CTXs2, RT2]; \ + add RT0, RT1; \ + ldr RT3, [CTXs2, RT3]; \ + \ + and RT1, RMASK, l1, lsr#(24 - 2); \ + eor RT0, RT2; \ + and RT2, RMASK, l1, lsr#(16 - 2); \ + add RT0, RT3; \ + add RT2, #(s1 - s0); \ + and RT3, RMASK, l1, lsr#(8 - 2); \ + eor r0, RT0; \ + \ + ldr RT1, [CTXs0, RT1]; \ + and RT0, RMASK, l1, lsl#2; \ + ldr RT2, [CTXs0, RT2]; \ + add RT0, #(s3 - s2); \ + ldr RT3, [CTXs2, RT3]; \ + add RT1, RT2; \ + ldr RT0, [CTXs2, RT0]; \ + \ + and RT2, RMASK, r0, lsr#(24 - 2); \ + eor RT1, RT3; \ + and RT3, RMASK, r0, lsr#(16 - 2); \ + add RT1, RT0; \ + add RT3, #(s1 - s0); \ + and RT0, RMASK, r0, lsr#(8 - 2); \ + eor r1, RT1; \ + \ + ldr RT2, [CTXs0, RT2]; \ + and RT1, RMASK, r0, lsl#2; \ + ldr RT3, [CTXs0, RT3]; \ + add RT1, #(s3 - s2); \ + ldr RT0, [CTXs2, RT0]; \ + add RT2, RT3; \ + ldr RT1, [CTXs2, RT1]; \ + \ + and RT3, RMASK, r1, lsr#(24 - 2); \ + eor RT2, RT0; \ + and RT0, RMASK, r1, lsr#(16 - 2); \ + add RT2, RT1; \ + add RT0, #(s1 - s0); \ + and RT1, RMASK, r1, lsr#(8 - 2); \ + eor l0, RT2; \ + \ + ldr RT3, [CTXs0, RT3]; \ + and RT2, RMASK, r1, lsl#2; \ + ldr RT0, [CTXs0, RT0]; \ + add RT2, #(s3 - s2); \ + ldr RT1, [CTXs2, RT1]; \ + eor l1, RKEYL; \ + ldr RT2, [CTXs2, RT2]; \ + \ + eor r0, RKEYR; \ + add RT3, RT0; \ + eor r1, RKEYR; \ + eor RT3, RT1; \ + eor l0, RKEYL; \ + add RT3, RT2; \ + set_nextk(RKEYL, (p - s2) + (4 * (n) + ((dec) * 4))); \ + eor l1, RT3; \ + set_nextk(RKEYR, (p - s2) + (4 * (n) + (!(dec) * 4))); + +#define load_n_add_roundkey_enc2(n) \ + load_roundkey_enc(n); \ + eor RL0, RKEYL; \ + eor RR0, RKEYR; \ + eor RL1, RKEYL; \ + eor RR1, RKEYR; \ + load_roundkey_enc((n) + 2); + +#define next_key(reg, offs) \ + ldr reg, [CTXs2, #(offs)]; + +#define dummy(x, y) /* do nothing */ + +#define round_enc2(n, load_next_key) \ + F2((n) + 2, RL0, RR0, RL1, RR1, load_next_key, 0); + +#define load_n_add_roundkey_dec2(n) \ + load_roundkey_dec(n); \ + eor RL0, RKEYL; \ + eor RR0, RKEYR; \ + eor RL1, RKEYL; \ + eor RR1, RKEYR; \ + load_roundkey_dec((n) - 2); + +#define round_dec2(n, load_next_key) \ + F2((n) - 3, RL0, RR0, RL1, RR1, load_next_key, 1); + +#define read_block2_aligned(rin, l0, r0, l1, r1, convert, rtmp) \ + ldr l0, [rin, #(0)]; \ + ldr r0, [rin, #(4)]; \ + convert(l0, rtmp); \ + ldr l1, [rin, #(8)]; \ + convert(r0, rtmp); \ + ldr r1, [rin, #(12)]; \ + convert(l1, rtmp); \ + convert(r1, rtmp); + +#define write_block2_aligned(rout, l0, r0, l1, r1, convert, rtmp) \ + convert(l0, rtmp); \ + convert(r0, rtmp); \ + convert(l1, rtmp); \ + str l0, [rout, #(0)]; \ + convert(r1, rtmp); \ + str r0, [rout, #(4)]; \ + str l1, [rout, #(8)]; \ + str r1, [rout, #(12)]; + +#ifdef __ARM_FEATURE_UNALIGNED + /* unaligned word reads allowed */ + #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0) + + #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0) + + #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0) + + #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0) +#else + /* need to handle unaligned reads by byte reads */ + #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_be(l0, rin, 0, rtmp0); \ + ldr_unaligned_be(r0, rin, 4, rtmp0); \ + ldr_unaligned_be(l1, rin, 8, rtmp0); \ + ldr_unaligned_be(r1, rin, 12, rtmp0); \ + b 2f; \ + 1:;\ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0); \ + 2:; + + #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_be(l0, rout, 0, rtmp0, rtmp1); \ + str_unaligned_be(r0, rout, 4, rtmp0, rtmp1); \ + str_unaligned_be(l1, rout, 8, rtmp0, rtmp1); \ + str_unaligned_be(r1, rout, 12, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0); \ + 2:; + + #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_host(l0, rin, 0, rtmp0); \ + ldr_unaligned_host(r0, rin, 4, rtmp0); \ + ldr_unaligned_host(l1, rin, 8, rtmp0); \ + ldr_unaligned_host(r1, rin, 12, rtmp0); \ + b 2f; \ + 1:;\ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0); \ + 2:; + + #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_host(l0, rout, 0, rtmp0, rtmp1); \ + str_unaligned_host(r0, rout, 4, rtmp0, rtmp1); \ + str_unaligned_host(l1, rout, 8, rtmp0, rtmp1); \ + str_unaligned_host(r1, rout, 12, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0); \ + 2:; +#endif + +.align 3 +.type _gcry_blowfish_arm_enc_blk2,%function; + +_gcry_blowfish_arm_enc_blk2: + /* input: + * preloaded: CTX + * [RL0, RR0], [RL1, RR1]: src + * output: + * [RR0, RL0], [RR1, RL1]: dst + */ + push {RT0,%lr}; + + add CTXs2, CTXs0, #(s2 - s0); + mov RMASK, #(0xff << 2); /* byte mask */ + + load_n_add_roundkey_enc2(0); + round_enc2(2, next_key); + round_enc2(4, next_key); + round_enc2(6, next_key); + round_enc2(8, next_key); + round_enc2(10, next_key); + round_enc2(12, next_key); + round_enc2(14, next_key); + round_enc2(16, dummy); + + host_to_be(RR0, RT0); + host_to_be(RL0, RT0); + host_to_be(RR1, RT0); + host_to_be(RL1, RT0); + + pop {RT0,%pc}; +.size _gcry_blowfish_arm_enc_blk2,.-_gcry_blowfish_arm_enc_blk2; + +.align 3 +.globl _gcry_blowfish_arm_cfb_dec; +.type _gcry_blowfish_arm_cfb_dec,%function; + +_gcry_blowfish_arm_cfb_dec: + /* input: + * %r0: CTX + * %r1: dst (2 blocks) + * %r2: src (2 blocks) + * %r3: iv (64bit) + */ + push {%r2, %r4-%r11, %ip, %lr}; + + mov %lr, %r3; + + /* Load input (iv/%r3 is aligned, src/%r2 might not be) */ + ldm %r3, {RL0, RR0}; + host_to_be(RL0, RT0); + host_to_be(RR0, RT0); + read_block(%r2, 0, RL1, RR1, RT0); + + /* Update IV, load src[1] and save to iv[0] */ + read_block_host(%r2, 8, %r5, %r6, RT0); + stm %lr, {%r5, %r6}; + + bl _gcry_blowfish_arm_enc_blk2; + /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + + /* %r1: dst, %r0: %src */ + pop {%r0}; + + /* dst = src ^ result */ + read_block2_host(%r0, %r5, %r6, %r7, %r8, %lr); + eor %r5, %r4; + eor %r6, %r3; + eor %r7, %r10; + eor %r8, %r9; + write_block2_host(%r1, %r5, %r6, %r7, %r8, %r9, %r10); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_blowfish_arm_cfb_dec,.-_gcry_blowfish_arm_cfb_dec; + +.align 3 +.globl _gcry_blowfish_arm_ctr_enc; +.type _gcry_blowfish_arm_ctr_enc,%function; + +_gcry_blowfish_arm_ctr_enc: + /* input: + * %r0: CTX + * %r1: dst (2 blocks) + * %r2: src (2 blocks) + * %r3: iv (64bit, big-endian) + */ + push {%r2, %r4-%r11, %ip, %lr}; + + mov %lr, %r3; + + /* Load IV (big => host endian) */ + read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT0); + + /* Construct IVs */ + adds RR1, RR0, #1; /* +1 */ + adc RL1, RL0, #0; + adds %r6, RR1, #1; /* +2 */ + adc %r5, RL1, #0; + + /* Store new IV (host => big-endian) */ + write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT0); + + bl _gcry_blowfish_arm_enc_blk2; + /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + + /* %r1: dst, %r0: %src */ + pop {%r0}; + + /* XOR key-stream with plaintext */ + read_block2_host(%r0, %r5, %r6, %r7, %r8, %lr); + eor %r5, %r4; + eor %r6, %r3; + eor %r7, %r10; + eor %r8, %r9; + write_block2_host(%r1, %r5, %r6, %r7, %r8, %r9, %r10); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_blowfish_arm_ctr_enc,.-_gcry_blowfish_arm_ctr_enc; + +.align 3 +.type _gcry_blowfish_arm_dec_blk2,%function; + +_gcry_blowfish_arm_dec_blk2: + /* input: + * preloaded: CTX + * [RL0, RR0], [RL1, RR1]: src + * output: + * [RR0, RL0], [RR1, RL1]: dst + */ + add CTXs2, CTXs0, #(s2 - s0); + mov RMASK, #(0xff << 2); /* byte mask */ + + load_n_add_roundkey_dec2(17); + round_dec2(15, next_key); + round_dec2(13, next_key); + round_dec2(11, next_key); + round_dec2(9, next_key); + round_dec2(7, next_key); + round_dec2(5, next_key); + round_dec2(3, next_key); + round_dec2(1, dummy); + + host_to_be(RR0, RT0); + host_to_be(RL0, RT0); + host_to_be(RR1, RT0); + host_to_be(RL1, RT0); + + b .Ldec_cbc_tail; +.ltorg +.size _gcry_blowfish_arm_dec_blk2,.-_gcry_blowfish_arm_dec_blk2; + +.align 3 +.globl _gcry_blowfish_arm_cbc_dec; +.type _gcry_blowfish_arm_cbc_dec,%function; + +_gcry_blowfish_arm_cbc_dec: + /* input: + * %r0: CTX + * %r1: dst (2 blocks) + * %r2: src (2 blocks) + * %r3: iv (64bit) + */ + push {%r2-%r11, %ip, %lr}; + + read_block2(%r2, RL0, RR0, RL1, RR1, RT0); + + /* dec_blk2 is only used by cbc_dec, jump directly in/out instead + * of function call. */ + b _gcry_blowfish_arm_dec_blk2; +.Ldec_cbc_tail: + /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + + /* %r0: %src, %r1: dst, %r2: iv */ + pop {%r0, %r2}; + + /* load IV+1 (src[0]) to %r7:%r8. Might be unaligned. */ + read_block_host(%r0, 0, %r7, %r8, %r5); + /* load IV (iv[0]) to %r5:%r6. 'iv' is aligned. */ + ldm %r2, {%r5, %r6}; + + /* out[1] ^= IV+1 */ + eor %r10, %r7; + eor %r9, %r8; + /* out[0] ^= IV */ + eor %r4, %r5; + eor %r3, %r6; + + /* load IV+2 (src[1]) to %r7:%r8. Might be unaligned. */ + read_block_host(%r0, 8, %r7, %r8, %r5); + /* store IV+2 to iv[0] (aligned). */ + stm %r2, {%r7, %r8}; + + /* store result to dst[0-3]. Might be unaligned. */ + write_block2_host(%r1, %r4, %r3, %r10, %r9, %r5, %r6); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_blowfish_arm_cbc_dec,.-_gcry_blowfish_arm_cbc_dec; + +#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/ +#endif /*__ARM_ARCH >= 6*/ |