diff options
Diffstat (limited to 'libotr/libgcrypt-1.8.7/cipher/blowfish-amd64.S')
-rw-r--r-- | libotr/libgcrypt-1.8.7/cipher/blowfish-amd64.S | 541 |
1 files changed, 541 insertions, 0 deletions
diff --git a/libotr/libgcrypt-1.8.7/cipher/blowfish-amd64.S b/libotr/libgcrypt-1.8.7/cipher/blowfish-amd64.S new file mode 100644 index 0000000..21b63fc --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/blowfish-amd64.S @@ -0,0 +1,541 @@ +/* blowfish-amd64.S - AMD64 assembly implementation of Blowfish cipher + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef __x86_64 +#include <config.h> +#if defined(USE_BLOWFISH) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.text + +/* structure of BLOWFISH_context: */ +#define s0 0 +#define s1 ((s0) + 256 * 4) +#define s2 ((s1) + 256 * 4) +#define s3 ((s2) + 256 * 4) +#define p ((s3) + 256 * 4) + +/* register macros */ +#define CTX %rdi +#define RIO %rsi + +#define RX0 %rax +#define RX1 %rbx +#define RX2 %rcx +#define RX3 %rdx + +#define RX0d %eax +#define RX1d %ebx +#define RX2d %ecx +#define RX3d %edx + +#define RX0bl %al +#define RX1bl %bl +#define RX2bl %cl +#define RX3bl %dl + +#define RX0bh %ah +#define RX1bh %bh +#define RX2bh %ch +#define RX3bh %dh + +#define RT0 %rbp +#define RT1 %rsi +#define RT2 %r8 +#define RT3 %r9 + +#define RT0d %ebp +#define RT1d %esi +#define RT2d %r8d +#define RT3d %r9d + +#define RKEY %r10 + +/*********************************************************************** + * 1-way blowfish + ***********************************************************************/ +#define F() \ + movzbl RX0bh, RT1d; \ + movzbl RX0bl, RT3d; \ + rorq $16, RX0; \ + movzbl RX0bh, RT0d; \ + movzbl RX0bl, RT2d; \ + rorq $16, RX0; \ + movl s0(CTX,RT0,4), RT0d; \ + addl s1(CTX,RT2,4), RT0d; \ + xorl s2(CTX,RT1,4), RT0d; \ + addl s3(CTX,RT3,4), RT0d; \ + xorq RT0, RX0; + +#define load_roundkey_enc(n) \ + movq p+4*(n)(CTX), RX3; + +#define add_roundkey_enc() \ + xorq RX3, RX0; + +#define round_enc(n) \ + add_roundkey_enc(); \ + load_roundkey_enc(n); \ + \ + F(); \ + F(); + +#define load_roundkey_dec(n) \ + movq p+4*(n-1)(CTX), RX3; \ + rorq $32, RX3; + +#define add_roundkey_dec() \ + xorq RX3, RX0; + +#define round_dec(n) \ + add_roundkey_dec(); \ + load_roundkey_dec(n); \ + \ + F(); \ + F(); + +#define read_block() \ + movq (RIO), RX0; \ + rorq $32, RX0; \ + bswapq RX0; + +#define write_block() \ + bswapq RX0; \ + movq RX0, (RIO); + +.align 8 +ELF(.type __blowfish_enc_blk1,@function;) + +__blowfish_enc_blk1: + /* input: + * %rdi: ctx, CTX + * RX0: input plaintext block + * output: + * RX0: output plaintext block + */ + movq %rbp, %r11; + + load_roundkey_enc(0); + round_enc(2); + round_enc(4); + round_enc(6); + round_enc(8); + round_enc(10); + round_enc(12); + round_enc(14); + round_enc(16); + add_roundkey_enc(); + + movq %r11, %rbp; + + ret; +ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;) + +.align 8 +.globl _gcry_blowfish_amd64_do_encrypt +ELF(.type _gcry_blowfish_amd64_do_encrypt,@function;) + +_gcry_blowfish_amd64_do_encrypt: + /* input: + * %rdi: ctx, CTX + * %rsi: u32 *ret_xl + * %rdx: u32 *ret_xr + */ + movl (%rdx), RX0d; + shlq $32, RX0; + movl (%rsi), RT3d; + movq %rdx, %r10; + orq RT3, RX0; + movq %rsi, RX2; + + call __blowfish_enc_blk1; + + movl RX0d, (%r10); + shrq $32, RX0; + movl RX0d, (RX2); + + ret; +ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;) + +.align 8 +.globl _gcry_blowfish_amd64_encrypt_block +ELF(.type _gcry_blowfish_amd64_encrypt_block,@function;) + +_gcry_blowfish_amd64_encrypt_block: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + movq %rsi, %r10; + + movq %rdx, RIO; + read_block(); + + call __blowfish_enc_blk1; + + movq %r10, RIO; + write_block(); + + ret; +ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;) + +.align 8 +.globl _gcry_blowfish_amd64_decrypt_block +ELF(.type _gcry_blowfish_amd64_decrypt_block,@function;) + +_gcry_blowfish_amd64_decrypt_block: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + movq %rbp, %r11; + + movq %rsi, %r10; + movq %rdx, RIO; + + read_block(); + + load_roundkey_dec(17); + round_dec(15); + round_dec(13); + round_dec(11); + round_dec(9); + round_dec(7); + round_dec(5); + round_dec(3); + round_dec(1); + add_roundkey_dec(); + + movq %r10, RIO; + write_block(); + + movq %r11, %rbp; + + ret; +ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;) + +/********************************************************************** + 4-way blowfish, four blocks parallel + **********************************************************************/ +#define F4(x) \ + movzbl x ## bh, RT1d; \ + movzbl x ## bl, RT3d; \ + rorq $16, x; \ + movzbl x ## bh, RT0d; \ + movzbl x ## bl, RT2d; \ + rorq $16, x; \ + movl s0(CTX,RT0,4), RT0d; \ + addl s1(CTX,RT2,4), RT0d; \ + xorl s2(CTX,RT1,4), RT0d; \ + addl s3(CTX,RT3,4), RT0d; \ + xorq RT0, x; + +#define add_preloaded_roundkey4() \ + xorq RKEY, RX0; \ + xorq RKEY, RX1; \ + xorq RKEY, RX2; \ + xorq RKEY, RX3; + +#define preload_roundkey_enc(n) \ + movq p+4*(n)(CTX), RKEY; + +#define add_roundkey_enc4(n) \ + add_preloaded_roundkey4(); \ + preload_roundkey_enc(n + 2); + +#define round_enc4(n) \ + add_roundkey_enc4(n); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); + +#define preload_roundkey_dec(n) \ + movq p+4*((n)-1)(CTX), RKEY; \ + rorq $32, RKEY; + +#define add_roundkey_dec4(n) \ + add_preloaded_roundkey4(); \ + preload_roundkey_dec(n - 2); + +#define round_dec4(n) \ + add_roundkey_dec4(n); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); + +#define inbswap_block4() \ + rorq $32, RX0; \ + bswapq RX0; \ + rorq $32, RX1; \ + bswapq RX1; \ + rorq $32, RX2; \ + bswapq RX2; \ + rorq $32, RX3; \ + bswapq RX3; + +#define inctrswap_block4() \ + rorq $32, RX0; \ + rorq $32, RX1; \ + rorq $32, RX2; \ + rorq $32, RX3; + +#define outbswap_block4() \ + bswapq RX0; \ + bswapq RX1; \ + bswapq RX2; \ + bswapq RX3; + +.align 8 +ELF(.type __blowfish_enc_blk4,@function;) + +__blowfish_enc_blk4: + /* input: + * %rdi: ctx, CTX + * RX0,RX1,RX2,RX3: four input inbswapped plaintext blocks + * output: + * RX0,RX1,RX2,RX3: four output ciphertext blocks + */ + preload_roundkey_enc(0); + + round_enc4(0); + round_enc4(2); + round_enc4(4); + round_enc4(6); + round_enc4(8); + round_enc4(10); + round_enc4(12); + round_enc4(14); + add_preloaded_roundkey4(); + + outbswap_block4(); + + ret; +ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;) + +.align 8 +ELF(.type __blowfish_dec_blk4,@function;) + +__blowfish_dec_blk4: + /* input: + * %rdi: ctx, CTX + * RX0,RX1,RX2,RX3: four input ciphertext blocks + * output: + * RX0,RX1,RX2,RX3: four output plaintext blocks + */ + preload_roundkey_dec(17); + + inbswap_block4(); + + round_dec4(17); + round_dec4(15); + round_dec4(13); + round_dec4(11); + round_dec4(9); + round_dec4(7); + round_dec4(5); + round_dec4(3); + add_preloaded_roundkey4(); + + outbswap_block4(); + + ret; +ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;) + +.align 8 +.globl _gcry_blowfish_amd64_ctr_enc +ELF(.type _gcry_blowfish_amd64_ctr_enc,@function;) +_gcry_blowfish_amd64_ctr_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (4 blocks) + * %rdx: src (4 blocks) + * %rcx: iv (big endian, 64bit) + */ + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + + /* %r11-%r13 are not used by __blowfish_enc_blk4 */ + movq %rcx, %r13; /*iv*/ + movq %rdx, %r12; /*src*/ + movq %rsi, %r11; /*dst*/ + + /* load IV and byteswap */ + movq (%r13), RT0; + bswapq RT0; + movq RT0, RX0; + + /* construct IVs */ + leaq 1(RT0), RX1; + leaq 2(RT0), RX2; + leaq 3(RT0), RX3; + leaq 4(RT0), RT0; + bswapq RT0; + + inctrswap_block4(); + + /* store new IV */ + movq RT0, (%r13); + + call __blowfish_enc_blk4; + + /* XOR key-stream with plaintext */ + xorq 0 * 8(%r12), RX0; + xorq 1 * 8(%r12), RX1; + xorq 2 * 8(%r12), RX2; + xorq 3 * 8(%r12), RX3; + movq RX0, 0 * 8(%r11); + movq RX1, 1 * 8(%r11); + movq RX2, 2 * 8(%r11); + movq RX3, 3 * 8(%r11); + + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + + ret; +ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;) + +.align 8 +.globl _gcry_blowfish_amd64_cbc_dec +ELF(.type _gcry_blowfish_amd64_cbc_dec,@function;) +_gcry_blowfish_amd64_cbc_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (4 blocks) + * %rdx: src (4 blocks) + * %rcx: iv (64bit) + */ + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + + /* %r11-%r13 are not used by __blowfish_dec_blk4 */ + movq %rsi, %r11; /*dst*/ + movq %rdx, %r12; /*src*/ + movq %rcx, %r13; /*iv*/ + + /* load input */ + movq 0 * 8(%r12), RX0; + movq 1 * 8(%r12), RX1; + movq 2 * 8(%r12), RX2; + movq 3 * 8(%r12), RX3; + + call __blowfish_dec_blk4; + + movq 3 * 8(%r12), RT0; + xorq (%r13), RX0; + xorq 0 * 8(%r12), RX1; + xorq 1 * 8(%r12), RX2; + xorq 2 * 8(%r12), RX3; + movq RT0, (%r13); /* store new IV */ + + movq RX0, 0 * 8(%r11); + movq RX1, 1 * 8(%r11); + movq RX2, 2 * 8(%r11); + movq RX3, 3 * 8(%r11); + + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + + ret; +ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;) + +.align 8 +.globl _gcry_blowfish_amd64_cfb_dec +ELF(.type _gcry_blowfish_amd64_cfb_dec,@function;) +_gcry_blowfish_amd64_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (4 blocks) + * %rdx: src (4 blocks) + * %rcx: iv (64bit) + */ + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + + /* %r11-%r13 are not used by __blowfish_enc_blk4 */ + movq %rcx, %r13; /*iv*/ + movq %rdx, %r12; /*src*/ + movq %rsi, %r11; /*dst*/ + + /* Load input */ + movq (%r13), RX0; + movq 0 * 8(%r12), RX1; + movq 1 * 8(%r12), RX2; + movq 2 * 8(%r12), RX3; + + inbswap_block4(); + + /* Update IV */ + movq 3 * 8(%r12), RT0; + movq RT0, (%r13); + + call __blowfish_enc_blk4; + + xorq 0 * 8(%r12), RX0; + xorq 1 * 8(%r12), RX1; + xorq 2 * 8(%r12), RX2; + xorq 3 * 8(%r12), RX3; + movq RX0, 0 * 8(%r11); + movq RX1, 1 * 8(%r11); + movq RX2, 2 * 8(%r11); + movq RX3, 3 * 8(%r11); + + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + ret; +ELF(.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;) + +#endif /*defined(USE_BLOWFISH)*/ +#endif /*__x86_64*/ |