diff options
Diffstat (limited to 'libotr/libgcrypt-1.8.7/cipher')
145 files changed, 103519 insertions, 0 deletions
diff --git a/libotr/libgcrypt-1.8.7/cipher/ChangeLog-2011 b/libotr/libgcrypt-1.8.7/cipher/ChangeLog-2011 new file mode 100644 index 0000000..1ce6bd1 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/ChangeLog-2011 @@ -0,0 +1,4279 @@ +2011-12-01 Werner Koch <wk@g10code.com> + + NB: ChangeLog files are no longer manually maintained. Starting + on December 1st, 2011 we put change information only in the GIT + commit log, and generate a top-level ChangeLog file from logs at + "make dist". See doc/HACKING for details. + +2011-09-16 Werner Koch <wk@g10code.com> + + * primegen.c (_gcry_primegen_init): New. + +2011-09-15 Werner Koch <wk@g10code.com> + + * cipher-cbc.c, cipher-cfb.c, cipher-ofb.c, cipher-ctr.c: New. + * cipher-aeswrap.c: New. + * cipher-internal.h: New. + * cipher.c (cipher_context_alignment_t, struct gcry_cipher_handle) + (CTX_MAGIC_NORMAL, CTX_MAGIC_SECURE, NEED_16BYTE_ALIGNED_CONTEXT) + (MAX_BLOCKSIZE): Move to cipher-internal.h. + (do_aeswrap_encrypt, do_aeswrap_encrypt) + (do_cbc_encrypt, do_cbc_decrypt, do_ctr_encrypt, do_ctr_decrypt) + (do_ofb_encrypt, do_ofb_decrypt, do_ctr_encrypt): Move to the + respective new cipher-foo.c files. + (do_ctr_decrypt): Remove. + +2011-09-15 Werner Koch <wk@g10code.com> + + * pubkey.c (gcry_pk_list): Remove. + (gcry_pk_unregister): Remove. + * md.c (gcry_md_list): Remove. + (gcry_md_unregister): Remove. + * cipher.c (gcry_cipher_list): Remove. + (gcry_cipher_unregister): Remove. + * ac.c: Remove. + +2011-06-29 Werner Koch <wk@g10code.com> + + * cipher.c (cipher_get_keylen): Return zero for an invalid algorithm. + (cipher_get_blocksize): Ditto. + +2011-06-13 Werner Koch <wk@g10code.com> + + * dsa.c (selftest_sign_1024): Use the raw and not the pkcs1 flag. + + * pubkey.c (gcry_pk_sign): Special case output generation for PKCS1. + (sexp_data_to_mpi): Parse "random-override" for pkcs1 encryption. + (pkcs1_encode_for_encryption): Add args RANDOM_OVERRIDE and + RANDOM_OVERRIDE_LEN. + (gcry_pk_encrypt): Special case output generation for PKCS1. + (sexp_data_to_mpi): Use GCRYMPI_FMT_USG for raw encoding. + +2011-06-10 Werner Koch <wk@g10code.com> + + * pubkey.c (gcry_pk_sign): Use format specifier '%M' to avoid + leading zeroes. Special case output generation for PSS. + (gcry_pk_encrypt): Special case output generation for OAEP. + (sexp_data_to_mpi): Use GCRYMPI_FMT_USG for PSS verify. + +2011-06-09 Werner Koch <wk@g10code.com> + + * pubkey.c (oaep_decode): Make use of octet_string_from_mpi. + (sexp_to_enc): Skip "random-override". + + * pubkey.c (oaep_encode, pss_encode): Add args RANDOM_OVERRIDE and + RANDOM_OVERRIDE_LEN. + (sexp_data_to_mpi): Extract new random-override parameter. + + * pubkey.c (pss_encode, pss_verify): Use VALUE verbatim for MHASH. + (octet_string_from_mpi): Add arg SPACE. + +2011-06-08 Werner Koch <wk@g10code.com> + + * pubkey.c (pss_encode, pss_verify): Restructure and comment code + to match rfc-3447. Replace secure allocs by plain allocs and + wipememory. Use gcry_md_hash_buffer. + (octet_string_from_mpi): New. + +2011-06-03 Werner Koch <wk@g10code.com> + + * pubkey.c (oaep_decode): Add more comments and restructure to + match the description in RFC-3447. + (oaep_encode): Check for mgf1 error. s/dlen/hlen/. + +2011-05-31 Werner Koch <wk@g10code.com> + + * pubkey.c (mgf1): Optimize by using gcry_md_reset. Re-implement + for easier readability. + (oaep_encode): Add more comments and restructure to match the + description in RFC-3447. + + * pubkey.c (pkcs1_encode_for_signature, oaep_decode): Change + return value from one MPI to a buffer. + (gcry_pk_decrypt): Adjust for this change. + +2011-05-30 Werner Koch <wk@g10code.com> + + * pubkey.c (pkcs1_decode_for_encryption): Change handling of + leading zero byte. + +2011-05-27 Daiki Ueno <ueno@unixuser.org> + + * pubkey.c (gcry_pk_decrypt): Fix double-free when un-padding + invalid data. Thanks to Tom Ritter. + +2011-05-24 Daiki Ueno <ueno@unixuser.org> + + * rsa.c (rsa_verify): Use CMP if given, to check the decrypted + sig. + + * pubkey.c (sexp_to_enc, sexp_data_to_mpi): Factor out + CTX initialization to ... + (init_encoding_ctx): .. new. + (gcry_pk_verify): Pass verify func and the arg to pubkey_verify. + (pss_encode, pss_verify, pss_verify_cmp): New. + +2011-05-23 Daiki Ueno <ueno@unixuser.org> + + * pubkey.c (pkcs1_decode_for_encryption, oaep_decode): Fix memleak + when gcry_mpi_print fails. + +2011-05-18 Daiki Ueno <ueno@unixuser.org> + + * pubkey.c (sexp_data_to_mpi): Factor some code out to ... + (pkcs1_encode_for_encryption): .. new, + (pkcs1_encode_for_signature): .. new. + (pkcs1_decode_for_encryption): New. + (gcry_pk_decrypt): Do un-padding for PKCS#1 as well as OAEP. + (sexp_to_enc): Abolish "unpad" flag, which is not necessary since + we can do un-padding implicitly when "pkcs1" or "oaep" is given. + +2011-05-11 Werner Koch <wk@g10code.com> + + * pubkey.c (sexp_to_enc, sexp_data_to_mpi): Set LABEL to NULL + after free. + (sexp_to_enc, sexp_data_to_mpi): Do not allow multiple encoding + flags. + (oaep_encode, oaep_decode, sexp_to_key, sexp_to_sig) + (sexp_to_enc, sexp_data_to_mpi, gcry_pk_encrypt, gcry_pk_sign) + (gcry_pk_genkey, _gcry_pk_get_elements): Replace access to ERRNO + by gpg_err_code_from_syserror. + +2011-05-11 Daiki Ueno <ueno@unixuser.org> + + * pubkey.c (sexp_data_to_mpi): Factor some code out to ... + (get_hash_algo): .. new. + (mgf1, oaep_encode, oaep_decode): New. + (sexp_to_enc): Add arg CTX. Remove arg RET_WANT_PKCS1. Support + OAEP. + (sexp_data_to_mpi): Add arg CTX. Support OAEP. + (gcry_pk_encrypt): Pass a CTX to sexp_data_to_mpi. + (gcry_pk_decrypt): Pass a CTX tp sexp_to_enc and replace + WANT_PKCS1. Implement unpadding for OAEP. + (gcry_pk_sign): Pass NULL for CTX arg of sexp_data_to_mpi. + (gcry_pk_verify): Ditto. + +2011-04-19 Werner Koch <wk@g10code.com> + + * cipher.c (gcry_cipher_open): Replace gpg_err_code_from_errno by + gpg_err_code_from_syserror. + +2011-04-11 Werner Koch <wk@g10code.com> + + * pubkey.c (gcry_pk_get_keygrip): Avoid double free of L2. + + * cipher.c (_gcry_cipher_setctr): Clear unused lastiv info. + (gcry_cipher_ctl) <GCRYCTL_SET_CTR>: Implement by calling + _gcry_cipher_setctr. + (do_ctr_encrypt): Save last counter and reuse it. + + * cipher.c (do_ctr_encrypt): Allow arbitrary length inputs to + match the 1.4 behaviour. + +2011-04-04 Werner Koch <wk@g10code.com> + + * ecc.c (compute_keygrip): Release L1 while parsing "curve". + + * pubkey.c (gcry_pk_get_keygrip): Always release NAME and L2. + Reported by Ben Kibbey. + +2011-03-28 Werner Koch <wk@g10code.com> + + * primegen.c (_gcry_generate_elg_prime): Make sure that PRIME is + NULL if the called func ever returns an error. + + * pubkey.c (gcry_pk_decrypt): Remove unused var PUBKEY. + +2011-03-09 Werner Koch <wk@g10code.com> + + * kdf.c: New. + +2011-02-22 Werner Koch <wk@g10code.com> + + * rijndael.c (aesni_cleanup_2_4): New. + (aesenc_xmm1_xmm0, do_aesni_ctr_4): New. + (_gcry_aes_ctr_enc): New. + * cipher.c (struct gcry_cipher_handle): Add CTR_ENC. Move field + CTR into an u_ctr union and adjust all users. + (gcry_cipher_open): Use _gcry_aes_ctr_enc. + (do_ctr_encrypt): Use bulk mode. + +2011-02-18 Werner Koch <wk@g10code.com> + + * rijndael.c (u32_a_t): New. + (do_encrypt_aligned, do_encrypt_aligned): Use the new type to + avoid problems with strict aliasing rules. + +2011-02-16 Werner Koch <wk@g10code.com> + + * rijndael.c (do_aesni_cfb) [USE_AESNI]: New. + (_gcry_aes_cfb_enc, _gcry_aes_cfb_dec) [USE_AESNI]: Use new fucntion. + +2011-02-15 Werner Koch <wk@g10code.com> + + * rijndael.c (do_aesni_enc_aligned, do_aesni_dec_aligned): Use + movdqa for the key but keep using movdqu for the data. + (do_aesni): Remove alignment detection. Don't burn the stack. + (aesni_prepare, aesni_cleanup): New macros. + (rijndael_encrypt, _gcry_aes_cfb_enc, _gcry_aes_cbc_enc) + (rijndael_decrypt, _gcry_aes_cfb_dec, _gcry_aes_cbc_dec): Use + these macros. Don't burn the stack in the USE_AESNI case. + (do_setkey): Add disabled code to use aeskeygenassist. + +2011-02-14 Werner Koch <wk@g10code.com> + + * rijndael.c (ATTR_ALIGNED_16): New + (do_aesni): Do not copy if already aligned. + (do_encrypt, do_decrypt): Ditto. + (rijndael_decrypt, rijndael_encrypt): Increase stack burning amount. + + * rijndael.c (RIJNDAEL_context): Reorder fields. Change fieldname + ROUNDS to rounds. Move padlock_key into u1. + (keySched, keySched2): Rename macros to keyscherr and keyschdec + and change all users. + (padlockkey): New macro. Change all users of padlock_key. + * cipher.c (NEED_16BYTE_ALIGNED_CONTEXT): Always define if using gcc. + (struct gcry_cipher_handle): Align U_IV to at least 16 byte. + +2011-02-13 Werner Koch <wk@g10code.com> + + * rijndael.c (USE_AESNI): New. Define for ia32 and gcc >= 4. + (m128i_t) [USE_AESNI]: New. + (RIJNDAEL_context) [USE_AESNI]: Add field use_aesni. + (do_setkey): Set USE_AESNI for all key lengths. + (prepare_decryption) [USE_AESNI]: Use aesimc instn if requested. + (do_aesni_enc_aligned, do_aesni_dec_aligned) + (do_aesni) [USE_AESNI]: New. + (rijndael_encrypt, _gcry_aes_cfb_enc, _gcry_aes_cbc_enc) + (rijndael_decrypt, _gcry_aes_cfb_dec) + (_gcry_aes_cbc_dec) [USE_AESNI]: Use do_aesni. + +2011-02-01 Werner Koch <wk@g10code.com> + + * pubkey.c (gcry_pk_get_curve): New. + (sexp_to_key): Add arg OVERRIDE_ELEMS. + (sexp_elements_extract_ecc): Allow for params only. + (gcry_pk_get_param): New. + * ecc.c (ecc_get_curve): New. + (ecc_get_param_sexp): New. + +2011-01-28 Werner Koch <wk@g10code.com> + + * pubkey.c (gcry_pk_genkey): Hack to insert the used curve name. + +2011-01-27 Werner Koch <wk@g10code.com> + + * ecc.c (fill_in_curve): Remove. + (generate_curve): Rename to .. + (fill_in_curve): this. Remove setting of NAME_OID. + (ecc_encrypt_raw): Change name of arg DATA to K for better + readability. Use ECC_public_key instead of ECC_secret_key. + Require a caller to pass a complete pkey array. + (ecc_decrypt_raw): Require a caller to pass a complete skey array. + (elliptic_curve_t): Add field NAME. + (fill_in_curve): Set field. + (generate_key): Add arg R_USED_CURVE. + (ecc_generate_ext): Return used curve name. + +2011-01-13 Andrey Jivsov <openpgp@brainhub.org> (wk) + + * ecc.c (ec2os): Do not free passed parameters X and Y. Adjust + callers. + (ecc_encrypt_raw, ecc_decrypt_raw): New. + (ecdh_names, _gcry_pubkey_spec_ecdh): New. + * pubkey.c (pubkey_table): Support ECDH. + +2010-08-19 Werner Koch <wk@g10code.com> + + * cipher.c (gcry_cipher_open): Remove double release of the module. + Fixes bug#1263. + +2010-06-10 Jeff Johnson <n3npq@mac.com> (wk) + + * ecc.c (ecc_generate_ext): Parse transient-key flag. + (generate_key): Add arg TRANSIENT_KEY and use it to set the random + level. + +2010-04-12 Brad Hards <bradh@frogmouth.net> (wk) + + Spelling fixes. + +2010-03-26 Werner Koch <wk@g10code.com> + + * tiger.c (asn): Unfetter the old TIGER from an OID. + (TIGER_CONTEXT): Add field VARIANT. + (tiger_init): Factor code out to ... + (do_init): New. + (tiger1_init, tiger2_init): New. + (_gcry_digest_spec_tiger1, _gcry_digest_spec_tiger2): New. + * md.c (digest_table): Add TIGER1 and TIGER2 variants. + +2009-12-11 Werner Koch <wk@g10code.com> + + * sha256.c (Cho, Maj, Sum0, Sum1): Turn macros into inline + functions. + (transform): Partly unroll to interweave the chain variables + + * sha512.c (ROTR, Ch, Maj, Sum0, Sum1): Turn macros into inline + functions. + (transform): Partly unroll to interweave the chain variables. + Suggested by Christian Grothoff. + +2009-12-10 Werner Koch <wk@g10code.com> + + * Makefile.am (o_flag_munging): New. + (tiger.o, tiger.lo): Use it. + + * cipher.c (do_ctr_encrypt): Add arg OUTBUFLEN. Check for + suitable value. Add check for valid inputlen. Wipe temporary + memory. + (do_ctr_decrypt): Likewise. + (do_cbc_encrypt, do_cbc_decrypt): Add arg OUTBUFLEN. Check for + suitable value. Move check for valid inputlen to here; change + returned error from INV_ARG to INV_LENGTH. + (do_ecb_encrypt, do_ecb_decrypt): Ditto. + (do_cfb_encrypt, do_cfb_decrypt): Ditto. + (do_ofb_encrypt, do_ofb_decrypt): Ditto. + (cipher_encrypt, cipher_encrypt): Adjust for above changes. + (gcry_cipher_encrypt, gcry_cipher_decrypt): Simplify. + +2009-12-09 Werner Koch <wk@g10code.com> + + * cipher.c (gcry_cipher_open): Allow for GCRY_CIPHER_MODE_AESWRAP. + (cipher_encrypt, cipher_decrypt): Ditto. + (do_aeswrap_encrypt, do_aeswrap_decrypt): New. + (struct gcry_cipher_handle): Add field marks. + (cipher_setkey, cipher_setiv): Update marks flags. + (cipher_reset): Reset marks. + (cipher_encrypt, cipher_decrypt): Add new arg OUTBUFLEN. + (gcry_cipher_encrypt, gcry_cipher_decrypt): Pass outbuflen to + cipher_encrypt. Replace GPG_ERR_TOO_SHORT by + GPG_ERR_BUFFER_TOO_SHORT. + +2009-08-21 Werner Koch <wk@g10code.com> + + * dsa.c (dsa_generate_ext): Release retfactors array before + setting it to NULL. Reported by Daiko Ueno. + +2009-07-02 Werner Koch <wk@g10code.com> + + * md.c (md_read): Fix incomplete check for NULL. + Reported by Fabian Kail. + +2009-03-31 Werner Koch <wk@g10code.com> + + * rsa.c (rsa_check_secret_key): Return GPG_ERR_BAD_SECKEY and not + GPG_ERR_PUBKEY_ALGO. + +2009-02-16 Werner Koch <wk@g10code.com> + + * rsa.c (generate_x931): Do not initialize TBL with automatic + variables. + * whirlpool.c, tiger.c, sha256.c, sha1.c, rmd160.c, md5.c + * md4.c, crc.c: Remove memory.h. This is garbage from gnupg. + Reported by Dan Fandrich. + +2009-01-22 Werner Koch <wk@g10code.com> + + * ecc.c (compute_keygrip): Remove superfluous const. + +2009-01-06 Werner Koch <wk@g10code.com> + + * rmd160.c (oid_spec_rmd160): Add TeleTrust identifier. + +2008-12-10 Werner Koch <wk@g10code.com> + + * dsa.c (generate): Add arg DOMAIN and use it if specified. + (generate_fips186): Ditto. + (dsa_generate_ext): Parse and check the optional "domain" + parameter and pass them to the generate functions. + + * rijndael.c (rijndael_names): Add "AES128" and "AES-128". + (rijndael192_names): Add "AES-192". + (rijndael256_names): Add "AES-256". + +2008-12-05 Werner Koch <wk@g10code.com> + + * dsa.c (generate): Add arg TRANSIENT_KEY and use it to detrmine + the RNG quality needed. + (dsa_generate_ext): Parse the transient-key flag und pass it to + generate. + +2008-11-28 Werner Koch <wk@g10code.com> + + * dsa.c (generate_fips186): Add arg DERIVEPARMS and use the seed + value if available. + + * primegen.c (_gcry_generate_fips186_2_prime): Fix inner p loop. + +2008-11-26 Werner Koch <wk@g10code.com> + + * primegen.c (_gcry_generate_fips186_3_prime): New. + * dsa.c (generate_fips186): Add arg USE_FIPS186_2. + (dsa_generate_ext): Parse new flag use-fips183-2. + +2008-11-25 Werner Koch <wk@g10code.com> + + * dsa.c (generate_fips186): New. + (dsa_generate_ext): Use new function if derive-parms are given or + if in FIPS mode. + * primegen.c (_gcry_generate_fips186_2_prime): New. + +2008-11-24 Werner Koch <wk@g10code.com> + + * pubkey.c (gcry_pk_genkey): Insert code to output extrainfo. + (pubkey_generate): Add arg R_EXTRAINFO and pass it to the extended + key generation function. + * rsa.c (gen_x931_parm_xp, gen_x931_parm_xi): New. + (generate_x931): Generate params if not given. + (rsa_generate_ext): Parse use-x931 flag. Return p-q-swapped + indicator. + * dsa.c (dsa_generate_ext): Put RETFACTORS into R_EXTRAINFO if + possible. + + * pubkey.c (gcry_pk_genkey): Remove parsing of almost all + parameters and pass the parameter S-expression to pubkey_generate. + (pubkey_generate): Simplify by requitring modules to parse the + parameters. Remove the special cases for Elgamal and ECC. + (sexp_elements_extract_ecc): Add arg EXTRASPEC and use it. Fix + small memory leak. + (sexp_to_key): Pass EXTRASPEC to sexp_elements_extract_ecc. + (pubkey_table) [USE_ELGAMAL]: Add real extraspec. + * rsa.c (rsa_generate_ext): Adjust for new calling convention. + * dsa.c (dsa_generate_ext): Ditto. + * elgamal.c (_gcry_elg_generate): Ditto. Rename to elg_generate_ext. + (elg_generate): New. + (_gcry_elg_generate_using_x): Remove after merging code with + elg_generate_ext. + (_gcry_pubkey_extraspec_elg): New. + (_gcry_elg_check_secret_key, _gcry_elg_encrypt, _gcry_elg_sign) + (_gcry_elg_verify, _gcry_elg_get_nbits): Make static and remove + _gcry_ prefix. + * ecc.c (_gcry_ecc_generate): Rename to ecc_generate_ext and + adjust for new calling convention. + (_gcry_ecc_get_param): Rename to ecc_get_param and make static. + (_gcry_pubkey_extraspec_ecdsa): Add ecc_generate_ext and + ecc_get_param. + +2008-11-20 Werner Koch <wk@g10code.com> + + * pubkey.c (pubkey_generate): Add arg DERIVEPARMS. + (gcry_pk_genkey): Parse derive-parms and pass it to above. + * rsa.c (generate_x931): New. + (rsa_generate_ext): Add arg DERIVEPARMS and call new function in + fips mode or if DERIVEPARMS is given. + * primegen.c (_gcry_derive_x931_prime, find_x931_prime): New. + +2008-11-19 Werner Koch <wk@g10code.com> + + * rsa.c (rsa_decrypt): Use gcry_create_nonce for blinding. + (generate): Rename to generate_std. + +2008-11-05 Werner Koch <wk@g10code.com> + + * md.c (md_open): Use a switch to set the Bsize. + (prepare_macpads): Fix long key case for SHA384 and SHA512. + + * cipher.c (gcry_cipher_handle): Add field EXTRASPEC. + (gcry_cipher_open): Set it. + (gcry_cipher_ctl): Add private control code to disable weak key + detection and to return the current input block. + * des.c (_tripledes_ctx): Add field FLAGS. + (do_tripledes_set_extra_info): New. + (_gcry_cipher_extraspec_tripledes): Add new function. + (do_tripledes_setkey): Disable weak key detection. + +2008-10-24 Werner Koch <wk@g10code.com> + + * md.c (digest_table): Allow MD5 in fips mode. + (md_register_default): Take special action for MD5. + (md_enable, gcry_md_hash_buffer): Ditto. + +2008-09-30 Werner Koch <wk@g10code.com> + + * rijndael.c (do_setkey): Properly align "t" and "tk". + (prepare_decryption): Properly align "w". Fixes bug #936. + +2008-09-18 Werner Koch <wk@g10code.com> + + * pubkey.c (gcry_pk_genkey): Parse domain parameter. + (pubkey_generate): Add new arg DOMAIN and remove special case for + DSA with qbits. + * rsa.c (rsa_generate): Add dummy args QBITS, NAME and DOMAIN and + rename to rsa_generate_ext. Change caller. + (_gcry_rsa_generate, _gcry_rsa_check_secret_key) + (_gcry_rsa_encrypt, _gcry_rsa_decrypt, _gcry_rsa_sign) + (_gcry_rsa_verify, _gcry_rsa_get_nbits): Make static and remove + _gcry_ prefix. + (_gcry_pubkey_spec_rsa, _gcry_pubkey_extraspec_rsa): Adjust names. + * dsa.c (dsa_generate_ext): New. + (_gcry_dsa_generate): Replace code by a call to dsa_generate. + (_gcry_dsa_check_secret_key, _gcry_dsa_sign, _gcry_dsa_verify) + (_gcry_dsa_get_nbits): Make static and remove _gcry prefix. + (_gcry_dsa_generate2): Remove. + (_gcry_pubkey_spec_dsa): Adjust to name changes. + (_gcry_pubkey_extraspec_rsa): Add dsa_generate_ext. + +2008-09-16 Werner Koch <wk@g10code.com> + + * ecc.c (run_selftests): Add arg EXTENDED. + +2008-09-12 Werner Koch <wk@g10code.com> + + * rsa.c (test_keys): Do a bad case signature check. + * dsa.c (test_keys): Do a bad case check. + + * cipher.c (_gcry_cipher_selftest): Add arg EXTENDED and pass it + to the called tests. + * md.c (_gcry_md_selftest): Ditto. + * pubkey.c (_gcry_pk_selftest): Ditto. + * rijndael.c (run_selftests): Add arg EXTENDED and pass it to the + called tests. + (selftest_fips_128): Add arg EXTENDED and run only one test + non-extended mode. + (selftest_fips_192): Add dummy arg EXTENDED. + (selftest_fips_256): Ditto. + * hmac-tests.c (_gcry_hmac_selftest): Ditto. + (run_selftests): Ditto. + (selftests_sha1): Add arg EXTENDED and run only one test + non-extended mode. + (selftests_sha224, selftests_sha256): Ditto. + (selftests_sha384, selftests_sha512): Ditto. + * sha1.c (run_selftests): Add arg EXTENDED and pass it to the + called test. + (selftests_sha1): Add arg EXTENDED and run only one test + non-extended mode. + * sha256.c (run_selftests): Add arg EXTENDED and pass it to the + called tests. + (selftests_sha224): Add arg EXTENDED and run only one test + non-extended mode. + (selftests_sha256): Ditto. + * sha512.c (run_selftests): Add arg EXTENDED and pass it to the + called tests. + (selftests_sha384): Add arg EXTENDED and run only one test + non-extended mode. + (selftests_sha512): Ditto. + * des.c (run_selftests): Add arg EXTENDED and pass it to the + called test. + (selftest_fips): Add dummy arg EXTENDED. + * rsa.c (run_selftests): Add dummy arg EXTENDED. + + * dsa.c (run_selftests): Add dummy arg EXTENDED. + + * rsa.c (extract_a_from_sexp): New. + (selftest_encr_1024): Check that the ciphertext does not match the + plaintext. + (test_keys): Improve tests and return an error status. + (generate): Return an error if test_keys fails. + * dsa.c (test_keys): Add comments and return an error status. + (generate): Return an error if test_keys failed. + +2008-09-11 Werner Koch <wk@g10code.com> + + * rsa.c (_gcry_rsa_decrypt): Return an error instead of calling + BUG in case of a practically impossible condition. + (sample_secret_key, sample_public_key): New. + (selftest_sign_1024, selftest_encr_1024): New. + (selftests_rsa): Implement tests. + * dsa.c (sample_secret_key, sample_public_key): New. + (selftest_sign_1024): New. + (selftests_dsa): Implement tests. + +2008-09-09 Werner Koch <wk@g10code.com> + + * hmac-tests.c (selftests_sha1): Add tests. + (selftests_sha224, selftests_sha384, selftests_sha512): Make up tests. + + * hash-common.c, hash-common.h: New. + * sha1.c (selftests_sha1): Add 3 tests. + * sha256.c (selftests_sha256, selftests_sha224): Ditto. + * sha512.c (selftests_sha512, selftests_sha384): Ditto. + +2008-08-29 Werner Koch <wk@g10code.com> + + * pubkey.c (gcry_pk_get_keygrip): Remove the special case for RSA + and check whether a custom computation function has been setup. + * rsa.c (compute_keygrip): New. + (_gcry_pubkey_extraspec_rsa): Setup this function. + * ecc.c (compute_keygrip): New. + (_gcry_pubkey_extraspec_ecdsa): Setup this function. + +2008-08-28 Werner Koch <wk@g10code.com> + + * cipher.c (cipher_decrypt, cipher_encrypt): Return an error if + mode NONE is used. + (gcry_cipher_open): Allow mode NONE only with a debug flag set and + if not in FIPS mode. + +2008-08-26 Werner Koch <wk@g10code.com> + + * pubkey.c (pubkey_generate): Add arg KEYGEN_FLAGS. + (gcry_pk_genkey): Implement new parameter "transient-key" and + pass it as flags to pubkey_generate. + (pubkey_generate): Make use of an ext_generate function. + * rsa.c (generate): Add new arg transient_key and pass appropriate + args to the prime generator. + (_gcry_rsa_generate): Factor all code out to ... + (rsa_generate): .. new func with extra arg KEYGEN_FLAGS. + (_gcry_pubkey_extraspec_ecdsa): Setup rsa_generate. + * primegen.c (_gcry_generate_secret_prime) + (_gcry_generate_public_prime): Add new arg RANDOM_LEVEL. + +2008-08-21 Werner Koch <wk@g10code.com> + + * primegen.c (_gcry_generate_secret_prime) + (_gcry_generate_public_prime): Use a constant macro for the random + level. + +2008-08-19 Werner Koch <wk@g10code.com> + + * pubkey.c (sexp_elements_extract_ecc) [!USE_ECC]: Do not allow + allow "curve" parameter. + +2008-08-15 Werner Koch <wk@g10code.com> + + * pubkey.c (_gcry_pk_selftest): New. + * dsa.c (selftests_dsa, run_selftests): New. + * rsa.c (selftests_rsa, run_selftests): New. + * ecc.c (selftests_ecdsa, run_selftests): New. + + * md.c (_gcry_md_selftest): New. + * sha1.c (run_selftests, selftests_sha1): New. + * sha256.c (selftests_sha224, selftests_sha256, run_selftests): New. + * sha512.c (selftests_sha384, selftests_sha512, run_selftests): New. + + * des.c (selftest): Remove static variable form selftest. + (des_setkey): No on-the-fly self test in fips mode. + (tripledes_set3keys): Ditto. + + * cipher.c (_gcry_cipher_setkey, _gcry_cipher_setiv): + + * dsa.c (generate): Bail out in fips mode if NBITS is less than 1024. + * rsa.c (generate): Return an error code if the the requested size + is less than 1024 and we are in fpis mode. + (_gcry_rsa_generate): Take care of that error code. + + * ecc.c (generate_curve): In fips mode enable only NIST curves. + + * cipher.c (_gcry_cipher_selftest): New. + + * sha512.c (_gcry_digest_extraspec_sha384) + (_gcry_digest_extraspec_sha512): New. + * sha256.c (_gcry_digest_extraspec_sha224) + (_gcry_digest_extraspec_sha256): New. + * sha1.c (_gcry_digest_extraspec_sha1): New. + * ecc.c (_gcry_pubkey_extraspec_ecdsa): New. + * dsa.c (_gcry_pubkey_extraspec_dsa): New. + * rsa.c (_gcry_pubkey_extraspec_rsa): New. + * rijndael.c (_gcry_cipher_extraspec_aes) + (_gcry_cipher_extraspec_aes192, _gcry_cipher_extraspec_aes256): New. + * des.c (_gcry_cipher_extraspec_tripledes): New. + + * cipher.c (gcry_cipher_register): Rename to _gcry_cipher_register. + Add arg EXTRASPEC. + (dummy_extra_spec): New. + (cipher_table_entry): Add extraspec field. + * md.c (_gcry_md_register): Rename to _gcry_md_register. Add + arg EXTRASPEC. + (dummy_extra_spec): New. + (digest_table_entry): Add extraspec field. + * pubkey.c (gcry_pk_register): Rename to _gcry_pk_register. Add + arg EXTRASPEC. + (dummy_extra_spec): New. + (pubkey_table_entry): Add extraspec field. + + * ac.c: Let most public functions return GPG_ERR_UNSUPPORTED in + fips mode. + + * pubkey.c (pubkey_table_entry): Add field FIPS_ALLOWED and mark + appropriate algorithms. + (dummy_generate, dummy_check_secret_key, dummy_encrypt) + (dummy_decrypt, dummy_sign, dummy_verify, dummy_get_nbits): Signal + a fips error when used. + (gcry_pk_register): In fips mode do not allow to register new + algorithms. + + * md.c (digest_table): Add field FIPS_ALLOWED and mark appropriate + algorithms. + (md_register_default): In fips mode register only fips algorithms. + (gcry_md_register): In fips mode do not allow to register new + algorithms. + (gcry_md_get): Signal a fips error if called. + (gcry_md_hash_buffer): Do not allow rmd160 when not in fips mode. + (md_start_debug): Disable in fips_mode. + + * md.c (gcry_md_register_default): Rename to .. + (md_register_default): .. this. + (md_digest): Remove this commented fucntion. + * pubkey.c (gcry_pk_register_default): Rename to .. + (pk_register_default): .. this. + + * cipher.c (cipher_table_entry): Add field FIPS_ALLOWED. + (gcry_cipher_register_default): Register only fips approved + algorithms. + (gcry_cipher_register): Do not allow to register new ciphers. + (cipher_setiv): Signal fips error. + + * cipher (gcry_cipher_register_default): Rename to .. + (cipher_register_default): .. this. + (REGISTER_DEFAULT_CIPHERS): Adjust for that change. + +2008-07-05 Werner Koch <wk@g10code.com> + + * random-daemon.c, random.h, rndhw.c, rndunix.c, rand-internal.h + * random.c, rndegd.c, rndlinux.c, rndw32.c: Move to ../cipher. + * Makefile.am: Remove random stuff. + +2008-06-24 Szakats Istvan <szaki.ms@gmail.com> (wk) + + * ac.c (_gcry_ac_key_destroy, _gcry_ac_key_pair_generate): Relase + some more memory. + +2008-04-22 Werner Koch <wk@g10code.com> + + * rijndael.c (_gcry_aes_cfb_enc, _gcry_aes_cbc_enc) + (_gcry_aes_cfb_dec, _gcry_aes_cbc_dec): Use Padlock if possible. + +2008-04-18 Werner Koch <wk@g10code.com> + + * sha1.c (transform_aligned): Remove. That is will obviosuly not + work because we need a scratch working area and our internal API + does not allow to modify the buffers. + + * rijndael.c: Factor tables out to .. + * rijndael-tables.h: .. new. + + * ac.c (ac_data_extract): Make static. + + * camellia.h [HAVE_CONFIG_H]: Include config.h. + + * rndw32.c (registry_poll): Only print the performance data + problem warning once. Suggested by Simon Josefsson. + +2008-03-19 Werner Koch <wk@g10code.com> + + * cipher.c (gcry_cipher_open) [USE_AES]: Init bulk encryption only + if requested. Suggested by Dirk Stoecker. + +2008-03-18 Werner Koch <wk@g10code.com> + + * sha1.c: Include stdint.h. + (transform): Add arg NBLOCKS so that we can work on more than one + block and avoid updates of the chaining variables. Changed all + callers to use 1. + (sha1_write): Replace loop around transform. + (transform_aligned) [WORDS_BIGENDIAN]: New. + (TRANSFORM): New macro to replace all direct calls of transform. + +2008-03-17 Werner Koch <wk@g10code.com> + + * rijndael.c (_gcry_aes_cfb_dec): New. + (do_encrypt): Factor code out to .. + (do_encrypt_aligned): .. New. + (_gcry_aes_cfb_enc, _gcry_aes_cfb_dec): Use new function. + (do_decrypt): Factor code out to .. + (do_decrypt_aligned): .. new. + (_gcry_aes_cbc_enc, _gcry_aes_cbc_dec): New. + * cipher.c (struct gcry_cipher_handle): Put field IV into new + union U_IV to enforce proper alignment. Change all users. + (do_cfb_decrypt): Optimize. + (do_cbc_encrypt, do_cbc_decrypt): Optimize. + +2008-03-15 Werner Koch <wk@g10code.com> + + * rijndael.c (_gcry_aes_cfb_enc): New. + * cipher.c (struct gcry_cipher_handle): Add field ALGO and BULK. + (gcry_cipher_open): Set ALGO and BULK. + (do_cfb_encrypt): Optimize. + +2008-02-18 Werner Koch <wk@g10code.com> + + * rsa.c (_gcry_rsa_verify) [IS_DEVELOPMENT_VERSION]: Print + intermediate results. + +2008-01-08 Werner Koch <wk@g10code.com> + + * random.c (add_randomness): Do not just increment + POOL_FILLED_COUNTER but update it by the actual amount of data. + +2007-12-13 Werner Koch <wk@g10code.com> + + * pubkey.c (sexp_data_to_mpi): Support SHA-224. + +2007-12-05 Werner Koch <wk@g10code.com> + + * rijndael.c (USE_PADLOCK): Depend on ENABLE_PADLOCK_SUPPORT. + * rndhw.c (USE_PADLOCK): Ditto + + * rsa.c (secret): Fixed condition test for using CRT. Reported by + Dean Scarff. Fixes bug#864. + (_gcry_rsa_check_secret_key): Return an erro if the optional + parameters are missing. + * pubkey.c (sexp_elements_extract): Add arg ALGO_NAME. Changed all + callers to pass NULL. Add hack to allow for optional RSA + parameters. + (sexp_to_key): Pass algo name to sexp_elements_extract. + +2007-12-03 Werner Koch <wk@g10code.com> + + * random.c (gcry_random_add_bytes): Implement it. + * rand-internal.h (RANDOM_ORIGIN_EXTERNAL): New. + +2007-11-30 Werner Koch <wk@g10code.com> + + * rndhw.c: New. + * rndlinux.c (_gcry_rndlinux_gather_random): Try to read 50% + directly from the hwrng. + * random.c (do_fast_random_poll): Also run the hw rng fast poll. + (_gcry_random_dump_stats): Tell whether the hw rng failed. + +2007-11-29 Werner Koch <wk@g10code.com> + + * rijndael.c (USE_PADLOCK): Define new macro used for ia32. + (RIJNDAEL_context) [USE_PADLOCK]: Add fields USE_PADLOCK and + PADLOCK_KEY. + (do_setkey) [USE_PADLOCK]: Enable padlock if available for 128 bit + AES. + (do_padlock) [USE_PADLOCK]: New. + (rijndael_encrypt, rijndael_decrypt) [USE_PADLOCK]: Divert to + do_padlock. + * cipher.c (cipher_context_alignment_t): New. Use it in this + module in place of PROPERLY_ALIGNED_TYPE. + (NEED_16BYTE_ALIGNED_CONTEXT): Define macro for ia32. + (struct gcry_cipher_handle): Add field HANDLE_OFFSET. + (gcry_cipher_open): Take care of increased alignment requirements. + (gcry_cipher_close): Ditto. + +2007-11-28 Werner Koch <wk@g10code.com> + + * sha256.c (asn224): Fixed wrong template. It happened due to a + bug in RFC4880. SHA-224 is not in the stable version of libgcrypt + so the consequences are limited to users of this devel version. + +2007-10-31 Werner Koch <wk@g10code.com> + + * ac.c (gcry_ac_data_new): Remove due to the visibility wrapper. + (gcry_ac_data_destroy, gcry_ac_data_copy, gcry_ac_data_length) + (gcry_ac_data_set, gcry_ac_data_get_name, gcry_ac_data_get_index) + (gcry_ac_data_to_sexp, gcry_ac_data_from_sexp) + (gcry_ac_data_clear, gcry_ac_io_init, gcry_ac_open) + (gcry_ac_close, gcry_ac_key_init, gcry_ac_key_pair_generate) + (gcry_ac_key_pair_extract, gcry_ac_key_destroy) + (gcry_ac_key_pair_destroy, gcry_ac_key_data_get) + (gcry_ac_key_test, gcry_ac_key_get_nbits, gcry_ac_key_get_grip) + (gcry_ac_data_encrypt, gcry_ac_data_decrypt, gcry_ac_data_sign) + (gcry_ac_data_verify, gcry_ac_data_encode, gcry_ac_data_decode) + (gcry_ac_mpi_to_os, gcry_ac_mpi_to_os_alloc, gcry_ac_os_to_mpi) + (gcry_ac_data_encrypt_scheme, gcry_ac_data_decrypt_scheme) + (gcry_ac_data_sign_scheme, gcry_ac_data_verify_scheme) + (gcry_ac_io_init_va): Ditto. + (gcry_ac_id_to_name, gcry_ac_name_to_id): Remove as these + deprecated functions are now implemented by visibility.c. + +2007-10-26 Werner Koch <wk@g10code.com> + + * rndw32.c: Disable debug flag. + +2007-10-25 Werner Koch <wk@g10code.com> + + * rndw32.c: Updated from current cryptlib snapshot and modified + for our use. Removed support from pre NT systems. + (slow_gatherer_windows95): Remove. + (_gcry_rndw32_gather_random): Require an NT platform. + (init_system_rng, read_system_rng, read_mbm_data): New. + (slow_gatherer_windowsNT): Rename to ... + (slow_gatherer): .. this. Read system RNG and MBM. + (registry_poll): New with code factored out from slow_gatherer. + +2007-08-23 Werner Koch <wk@g10code.com> + + * random.c (pool_filled_counter): New. + (add_randomness): Use it. + +2007-08-22 Werner Koch <wk@g10code.com> + + * rndw32.c, rndunix.c: Switched to LGPL. + +2007-05-30 Werner Koch <wk@g10code.com> + + * camellia.h, camellia.c: Replace by new LGPL version and adjusted + camellia.h. + +2007-05-09 Marcus Brinkmann <marcus@g10code.de> + + * ac.c (_gcry_ac_io_init_va, _gcry_ac_io_write, _gcry_ac_io_read): + Adjust users of gcry_ac_io_t because union is not anonymous + anymore. + +2007-05-02 Werner Koch <wk@g10code.com> + + * camellia-glue.c (camellia_setkey, camellia_encrypt) + (camellia_decrypt): Recalculated used stack size in called + functions. + * camellia.h: Redefine external symbols. + +2007-05-02 David Shaw <dshaw@jabberwocky.com> + + * Makefile.am, cipher.c: Add Camellia. + + * camellia-glue.c: New. The necessary glue to interface libgcrypt + to the stock NTT Camellia distribution. + + * camellia.h, camellia.c: The stock NTT Camellia distribution + (GPL). + +2007-04-30 David Shaw <dshaw@jabberwocky.com> + + * cipher.c: Use #if instead of #ifdef as configure defines the + USE_cipher defines as 0 for disabled. + +2007-04-30 Werner Koch <wk@g10code.com> + + * rndegd.c (_gcry_rndegd_set_socket_name): New. + +2007-04-30 Marcus Brinkmann <marcus@g10code.de> + + * ecc.c (ec2os): Fix relocation of short numbers. + + * ecc.c (generate_key): Do not allocate D, which will be allocated + by GEN_K. Remove G. Fix test if g_x, g_y resp. q_x, q_y are + requested. + (_gcry_ecc_generate): Release unneeded members of SK. + * pubkey.c (sexp_to_key): Release NAME. + +2007-04-28 Marcus Brinkmann <marcus@g10code.de> + + * ac.c (gcry_ac_mpi): Remove member NAME_PROVIDED. + (ac_data_mpi_copy, _gcry_ac_data_set, _gcry_ac_data_get_name) + (_gcry_ac_data_get_index, ac_data_construct): Adjust handling of + NAME accordingly. + +2007-04-20 Werner Koch <wk@g10code.com> + + * ecc.c (domain_parms): Add standard brainpool curves. + +2007-04-18 Werner Koch <wk@g10code.com> + + * ecc.c (generate_curve): Implement alias mechanism. + + * pubkey.c (sexp_elements_extract_ecc): New. + (sexp_to_key): Add special case for ecc. + (sexp_to_key, sexp_to_sig, sexp_to_enc, gcry_pk_genkey): Replace + name_terminated stuff by a call to _gcry_sexp_nth_string. + (gcry_pk_get_keygrip): Ditto. + +2007-04-16 Werner Koch <wk@g10code.com> + + * ecc.c (_gcry_ecc_generate): Renamed DUMMY to CURVE and use it. + +2007-04-13 Marcus Brinkmann <marcus@g10code.de> + + * ac.c (ac_data_construct): Cast const away to suppress compiler + warning. + + * ecc.c (ecc_generate): Avoid compiler warning for unused argument + DUMMY. + (ecc_verify): Avoid compiler warning for unused arguments CMP and + OPAQUEV. + +2007-04-06 Werner Koch <wk@g10code.com> + + * sha1.c (oid_spec_sha1): Add another oid from X9.62. + +2007-03-28 Werner Koch <wk@g10code.com> + + * pubkey.c (gcry_pk_genkey): Do not issue misc-key-info if it is + empty. + (gcry_pk_genkey): New parameter "curve". + + * ecc.c: Entirely rewritten with only a few traces of the old + code left. + (_gcry_ecc_generate): New. + (generate_key) New arg NAME. + (generate_curve): Ditto. Return actual number of NBITS. + +2007-03-26 Werner Koch <wk@g10code.com> + + * pubkey.c (gcry_pk_genkey): Increase size of SKEY array and add a + runtime bounds check. + +2007-03-23 Werner Koch <wk@g10code.com> + + * ecc.c (ecc_ctx_init, ecc_ctx_free, ecc_mod, ecc_mulm): New. + (duplicate_point, sum_points, escalar_mult): Don't use a + copy of base->p. Replaced all mpi_mulm by ecc_mulm so that we can + experiment with different algorithms. + (generate_key, check_secret_key, sign, verify): Initialize a + computation context for use by ecc_mulm. + +2007-03-22 Werner Koch <wk@g10code.com> + + * pubkey.c (pubkey_table): Initialize ECC. + * Makefile.am (EXTRA_libcipher_la_SOURCES): Add ecc.c. + * ecc.c: New. Heavily reformatted and changed for use in libgcrypt. + (point_init): New. + (escalar_mult): Make arg R the first arg to be similar to the mpi + functions. + (duplicate_point): Ditto + (sum_points): Ditto + (sign, verify): Remove unneeded copy operations. + (sum_points): Removed memory leaks and optimized some compares. + (verify): Simplified input check. + +2007-03-14 Werner Koch <wk@g10code.com> + + * random.c (MASK_LEVEL): Removed macro as it was used only at one + place. Open coded it there. + (gcry_randomize, _gcry_update_random_seed_file) + (_gcry_fast_random_poll): Factor lock code out to .. + (lock_pool, unlock_pool): .. new. + (initialize): Look the pool while allocating. + (read_random_source, do_fast_random_poll): Moved intialization to ... + (initialize): .. here. + (_gcry_enable_quick_random_gen): No more need for initialization. + (is_initialized): Moved this global flag to .. + (initialize): .. here and changed all users to unconditionally call + initialize. + (add_randomness): Remove initalization here. It simply can't + happen. + + * random.c (enum random_origins): Moved to .. + * rand-internal.h: .. here. + * rndunix.c (_gcry_rndunix_gather_random): Use enum in prototype + for ORIGIN and renamed REQUESTOR to ORIGIN. + * rndegd.c (_gcry_rndegd_gather_random): Ditto. + * rndlinux.c (_gcry_rndlinux_gather_random): Ditto. + * rndw32.c (_gcry_rndw32_gather_random): Ditto. + (_gcry_rndw32_gather_random_fast): Ditto. + +2007-03-13 Werner Koch <wk@g10code.com> + + * random.c (enum random_origins): New. + (add_randomness): Renamed arg SOURCE to ORIGIN. + (read_random_source): Renamed arg REQUESTOR to ORIGIN. + (getfnc_gather_random): Removed static variable because this + function is only called one and thus we don't need this + optimization. + (_gcry_quick_random_gen): Removed and replaced by.. + (_gcry_enable_quick_random_gen): .. this. It is onlyu used to + enable it and it does not make sense to disable it later. Changed + the only one caller too. + (get_random_bytes): Removed. + (gcry_random_bytes, gcry_random_bytes_secure): Implement in terms + of gcry_randomize. + * random-daemon.c (_gcry_daemon_get_random_bytes): Removed. + +2007-02-23 Werner Koch <wk@g10code.com> + + * elgamal.c (generate): Removed unused variable TEMP. + (test_keys): New arg NODIE. + (generate_using_x, _gcry_elg_generate_using_x): New. + * pubkey.c (pubkey_generate): New arg XVALUE and direct call to + the new elgamal generate fucntion. + (gcry_pk_genkey): Parse the new "xvalue" tag. + +2007-02-22 Werner Koch <wk@g10code.com> + + * pubkey.c (sexp_data_to_mpi): Handle dynamically allocated + algorithms. Suggested by Neil Dunbar. Fixes bug#596. + + * rndw32.c (_gcry_rndw32_gather_random_fast): Make it return void. + + * cipher.c (gcry_cipher_algo_name): Simplified. + + * random.c: Use the daemon only if compiled with USE_RANDOM_DAEMON. + + * Makefile.am (libcipher_la_SOURCES): Build random-daemon support + only if requested. + +2007-02-21 Werner Koch <wk@g10code.com> + + * random.c (rndpool, keypool): Make unsigned. + (mix_pool): Change char* variables to unsigned char*. + (gcry_randomize): Make arg BUFFER a void*. + (gcry_create_nonce): Ditto. + + * rmd160.c (gcry_rmd160_mixblock): Make BUFFER a void*. + (_gcry_rmd160_hash_buffer): Make OUTBUF and BUFFER void*. + * sha1.c (_gcry_sha1_hash_buffer): Ditto. + + * cipher.c (gcry_cipher_encrypt, cry_cipher_decrypt): Change + buffer args to void*. + (gcry_cipher_register): Make ALGORITHM_ID a int *. + + * md.c (md_start_debug): Make SUFFIX a const char*. Use snprintf. + (gcry_md_debug): New. + (gcry_md_ctl): Changed arg BUFFER from unsigned char*. + + * md.c (md_write): Make INBUF a const void*. + (gcry_md_write): Remove needless cast. + * crc.c (crc32_write): Make INBUF a const void* + (update_crc32, crc24rfc2440_write): Ditto. + * sha512.c (sha512_write, transform): Ditto. + * sha256.c (sha256_write, transform): Ditto. + * rmd160.c (rmd160_write, transform): Ditto. + * md5.c (md5_write, transform): Ditto. + * md4.c (md4_write, transform): Ditto. + * sha1.c (sha1_write, transform): Ditto. + + * tiger.c (tiger_write, transform): Ditto. + * whirlpool.c (whirlpool_write, whirlpool_add, transform): Ditto. + + * elgamal.c (elg_names): Change to a const*. + * dsa.c (dsa_names): Ditto. + * rsa.c (rsa_names): Ditto. + * pubkey.c (gcry_pk_lookup_func_name): Make ALIASES a const. + +2007-02-20 Werner Koch <wk@g10code.com> + + * rndlinux.c (open_device): Remove unsused arg MINOR. + +2007-01-30 Werner Koch <wk@g10code.com> + + * sha256.c (oid_spec_sha256): Add alias from pkcs#1. + * sha512.c (oid_spec_sha512): Ditto. + (oid_spec_sha384): Ditto. + +2006-12-18 Werner Koch <wk@g10code.com> + + * rndlinux.c (set_cloexec_flag): New. + (open_device): Set close-on-exit flags. Suggested by Max + Kellermann. Fixes Debian#403613. + + * Makefile.am (AM_CPPFLAGS, AM_CFLAGS): Splitted and merged + Moritz' changes. + (INCLUDES): Removed. + +2006-11-30 Werner Koch <wk@g10code.com> + + * serpent.c (byte_swap_32): Remove trailing semicolon. + +2006-11-15 Werner Koch <wk@g10code.com> + + * Makefile.am (INCLUDES): Include ../src/ + +2006-11-03 Werner Koch <wk@g10code.com> + + * random.c [HAVE_GETTIMEOFDAY]: Included sys/time.h and not + sys/times.h. Reported by Rafaël Carré. + +2006-11-05 Moritz Schulte <moritz@g10code.com> + + * Makefile.am (AM_CFLAGS): Added -I$(top_builddir)/src so that the + new gcrypt.h is used, not the one installed in the system. + +2006-10-25 Werner Koch <wk@g10code.com> + + * primegen.c (prime_generate_internal): Tweaked use of secure + memory and entropy use. Safe unused primes from the pool. Allocate + at least a pool of 30. + (save_pool_prime, get_pool_prime): New. + +2006-10-23 Werner Koch <wk@g10code.com> + + * ac.c (_gcry_ac_data_from_sexp): Reset sexp_tmp for failsafe + means. Release sexp_cur if needed. Reported by Dirk Stoecker. + + * pubkey.c (pubkeys_registered_lock): Intialized it. It is not + realy needed because this is a mere initialization to 0 anyway. + Noted by Victor Stinner. + +2006-10-17 Werner Koch <wk@g10code.com> + + * dsa.c (_gcry_dsa_generate2): New. + (generate): New arg QBITS. Add sanity checks for reasonable qbits + and nbits. + * pubkey.c (gcry_pk_genkey): Parse an qbits element. + (pubkey_generate): New arg QBITS. Pass it to the DSA generation. + +2006-10-05 Werner Koch <wk@g10code.com> + + * md.c (gcry_md_algo_info) <get_asnoid>: Check that the algo is + available. + +2006-10-04 David Shaw <dshaw@jabberwocky.com> (wk) + + * tiger.c (round): Rename to tiger_round as gcc 4 has a built-in + round function that this conflicts with. + +2006-09-11 Werner Koch <wk@g10code.com> + + * rndw32.c (slow_gatherer_windowsNT): While adding data use the + size of the diskPerformance and not its address. Has been fixed in + GnuPG more than a year ago. Noted by Lee Fisher. + +2006-08-30 Werner Koch <wk@g10code.com> + + * pubkey.c (sexp_data_to_mpi): Need to allow "ripemd160" here as + this is the canonical name. + +2006-08-29 Hye-Shik Chang <perky@FreeBSD.org> (wk) + + * seed.c: New. + +2006-08-03 Werner Koch <wk@g10code.com> + + * random-daemon.c (_gcry_daemon_initialize_basics): Don't + initialize the socket. Remove arg SOCKETNAME. + (connect_to_socket): Make sure that daemon is set to -1 on error. + (call_daemon): Initialize the socket on the first call. + (_gcry_daemon_randomize, _gcry_daemon_get_random_bytes) + (_gcry_daemon_create_nonce): New arg SOCKETNAME. + * random.c (initialize): Call new daemon initializator. + (get_random_bytes, gcry_randomize, gcry_create_nonce): Pass socket + name to daemon call and reset allow_daemon on failure. + +2006-07-26 Werner Koch <wk@g10code.com> + + * rmd160.c (_gcry_rmd160_mixblock): Add cast to transform call. + + * blowfish.c (selftest): Cast string to usnigned char*. + + * primegen.c (prime_generate_internal): Cast unsigned/char* + mismatch in calling m_out_of_n. + (is_prime): Changed COUNT to unsigned int *. + + * ac.c (_gcry_ac_data_copy): Initialize DATA_MPIS. + + * random.c (gcry_create_nonce): Update the pid after a fork. + Reported by Uoti Urpala. + +2006-07-04 Marcus Brinkmann <marcus@g10code.de> + + * sha512.c: Fix typo in copyright notice. + +2006-06-21 Werner Koch <wk@g10code.com> + + * rsa.c (_gcry_rsa_generate): Replace xcalloc by calloc. + * pubkey.c (gcry_pk_encrypt, gcry_pk_sign): Ditto. + (sexp_to_key, sexp_to_sig, sexp_to_enc, gcry_pk_encrypt) + (gcry_pk_sign, gcry_pk_genkey, gcry_pk_get_keygrip): Ditto. + * md.c (md_copy): Ditto. + +2006-04-22 Moritz Schulte <moritz@g10code.com> + + * random-daemon.c (_gcry_daemon_initialize_basics): New argument: + SOCKETNAME. Passing on to connect_to_socket() if non-NULL. + (connect_to_socket, writen, readn, call_daemon): New functions. + (_gcry_daemon_randomize, _gcry_daemon_get_random_bytes) + (_gcry_daemon_create_nonce): Call call_daemon(). + (RANDOM_DAEMON_SOCKET): New symbol. + (daemon_socket): New static variable. + + * random.h (_gcry_daemon_initialize_basics): New parameter: + SOCKETNAME. + (_gcry_set_random_daemon_socket): New declaration. + + * random.c (initialize_basics): Pass DAEMON_SOCKET_NAME to + _gcry_daemon_initialize_basics. + (_gcry_set_random_daemon_socket): New function, setting + DAEMON_SOCKET_NAME. + +2006-04-01 Moritz Schulte <moritz@g10code.com> + + * ac.c (eme_pkcs_v1_5_encode): Use KEY_SIZE directly, no need to + call gcry_ac_key_get_nbits. + (eme_pkcs_v1_5_decode): Likewise. + (ac_es_dencode_prepare_pkcs_v1_5): Fill options_em structure with + key_size. + (_gcry_ac_data_dump, gcry_ac_data_dump): New functions. + (_gcry_ac_data_to_sexp, _gcry_ac_data_from_sexp): More or less + rewritten; changed S-Expression format so that it matches the one + used in pubkey.c. + +2006-03-15 Werner Koch <wk@g10code.com> + + * random-daemon.c: New. + * random.c (_gcry_use_random_daemon): New. + (get_random_bytes, gcry_randomize, gcry_create_nonce): Try + diverting to the daemon functions. + +2006-03-14 Werner Koch <wk@g10code.com> + + * random.c (lock_seed_file): New. + (read_seed_file, _gcry_update_random_seed_file): Use it. + + * random.c (gcry_create_nonce): Detect a fork and re-seed. + (read_pool): Fixed the fork detection; it used to work only for + multi-threaded processes. + +2006-03-12 Brad Hards <bradh@frogmouth.net> (wk) + + * md.c (md_open): Use new variable macpads_Bsize instead of + hardwiring the block size. Changed at all places. + +2006-03-10 Brad Hards <bradh@frogmouth.net> (wk, patch 2005-04-22) + + * md.c, sha256.c: Add support for SHA-224. + (sha224_init): New. + +2006-01-18 Brad Hards <bradh@frogmouth.net> (wk 2006-03-07) + + * cipher.c (cipher_encrypt, cipher_decrypt, do_ofb_encrypt) + (do_ofb_decrypt, gcry_cipher_open): Implement Output Feedback Mode. + +2005-11-02 Moritz Schulte <moritz@g10code.com> + + * pubkey.c (gcry_pk_algo_name): Return "?" instead of NULL for + unknown algorithm IDs. + * cipher.c (cipher_algo_to_string): Likewise. + +2005-11-01 Moritz Schulte <moritz@g10code.com> + + * pubkey.c (gcry_pk_algo_info): Don't forget to break after switch + case. + +2005-09-19 Werner Koch <wk@g10code.com> + + * dsa.c (generate): Add preliminary support for 2 and 4 keys. + Return an error code if the key size is not supported. + (_gcry_dsa_generate): Return an error. + +2005-08-22 Werner Koch <wk@g10code.com> + + * primegen.c (check_prime): New arg RM_ROUNDS. + (prime_generate_internal): Call it here with 5 rounds as used + before. + (gcry_prime_check): But here with 64 rounds. + (is_prime): Make sure never to use less than 5 rounds. + +2005-04-16 Moritz Schulte <moritz@g10code.com> + + * ac.c (_gcry_ac_init): New function. + +2005-04-12 Moritz Schulte <moritz@g10code.com> + + * ac.c (_gcry_ac_io_write, _gcry_ac_io_read): Initialize err to + make the compiler happy. + Always use errno, now that gcry_malloc() is guaranteed to set + errno on failure. + (_gcry_ac_data_to_sexp): Don't forget to goto out after error in + loop. + (_gcry_ac_data_to_sexp): Remove unused variable: mpi_list; + (_gcry_ac_data_to_sexp): Always deallocate sexp_buffer. + (_gcry_ac_data_from_sexp): Don't forget to initialize data_set_new. + (_gcry_ac_data_from_sexp): Handle special case, which is + necessary, since gcry_sexp_nth() does not distinguish between + "element does not exist" and "element is the empty list". + (_gcry_ac_io_init_va): Use assert to make sure that mode and type + are correct. + Use gcry_error_t types where gcry_err_code_t types have been used + before. + +2005-04-11 Moritz Schulte <moritz@g10code.com> + + * ac.c (_gcry_ac_data_sign_scheme): Don't forget to initialize + buffer. + + * whirlpool.c: New file. + * md.c (digest_table): Add whirlpool. + * Makefile.am (EXTRA_libcipher_la_SOURCES): Added: whirlpool.c. + +2005-03-30 Moritz Schulte <moritz@g10code.com> + + * ac.c (_gcry_ac_data_from_sexp): Use length of SEXP_CUR, not + length of SEXP; do not forget to set SEXP_TMP to NULL after it has + been released. + + (struct gcry_ac_mpi): New member: name_provided. + (_gcry_ac_data_set): Rename variable `name_final' to `name_cp'; + remove const qualifier; change code to not cast away const + qualifiers; use name_provided member as well. + (_gcry_ac_data_set, _gcry_ac_data_get_name): Use name_provided + member of named mpi structure. + + (gcry_ac_name_to_id): Do not forget to initialize err. + (_gcry_ac_data_get_index): Do not forget to initialize mpi_return; + use gcry_free() instead of free(); remove unnecessary cast; rename + mpi_return and name_return to mpi_cp and name_cp; adjust code. + (ac_data_mpi_copy): Do not cast away const qualifier. + (ac_data_values_destroy): Likewise. + (ac_data_construct): Likewise. + + (ac_data_mpi_copy): Initialize flags to GCRY_AC_FLAG_DEALLOC. + (ac_data_extract): Use GCRY_AC_FLAG_DEALLOC instead of + GCRY_AC_FLAG_COPY. + + (_gcry_ac_io_init_va, _gcry_ac_io_init, gcry_ac_io_init) + (gcry_ac_io_init_va, _gcry_ac_io_write, _gcry_ac_io_read) + (_gcry_ac_io_read_all, _gcry_ac_io_process): New functions. + (gry_ac_em_dencode_t): Use gcry_ac_io_t in prototype instead of + memroy strings directly; adjust encode/decode functions to use io + objects. + (emsa_pkcs_v1_5_encode_data_cb): New function ... + (emsa_pkcs_v1_5_encode): ... use it here. + (ac_data_dencode): Use io objects. + (_gcry_ac_data_encode, _gcry_ac_data_decode, gcry_ac_data_encode) + (gcry_ac_data_decode): Likewise. + (_gcry_ac_data_encrypt_scheme, gcry_ac_data_encrypt_scheme) + (_gcry_ac_data_decrypt_scheme, gcry_ac_data_decrypt_scheme) + (_gcry_ac_data_sign_scheme, gcry_ac_data_sign_scheme) + (_gcry_ac_data_verify_scheme, gcry_ac_data_verify_scheme): + Likewise. + +2005-03-23 Werner Koch <wk@g10code.com> + + * rndw32.c (_gcry_rndw32_gather_random_fast): While adding data + use the size of the object and not the one of its address. Bug + reported by Sascha Kiefer. + +2005-03-19 Moritz Schulte <moritz@g10code.com> + + * cipher.c (do_cbc_encrypt): Be careful to not overwrite data, + which is to be used later on. This happend, in case CTS is + enabled and OUTBUF is equal to INBUF. + +2005-02-25 Werner Koch <wk@g10code.com> + + * pubkey.c (gcry_pk_get_keygrip): Allow for shadowed-private-key. + +2005-02-13 Moritz Schulte <moritz@g10code.com> + + * serpent.c: Updated from 1.2 branch: + + s/u32_t/u32/ and s/byte_t/byte/. Too match what we have always + used and are using in all other files too + (serpent_test): Moved prototype out of a fucntion. + +2005-02-07 Moritz Schulte <moritz@g10code.com> + + * ac.c: Major parts rewritten. + * pubkey.c (_gcry_pk_get_elements): New function. + +2004-12-09 Werner Koch <wk@g10code.com> + + * serpent.c (serpent_setkey): Moved prototype of serpent_test to + outer scope. + +2004-09-11 Moritz Schulte <moritz@g10code.com> + + * pubkey.c (pubkey_table): Added an alias entry for GCRY_PK_ELG_E. + +2004-08-23 Moritz Schulte <moritz@g10code.com> + + * ac.c: Do not include <assert.h>. + * rndegd.c: Likewise. + * sha1.c: Likewise. + * rndunix.c: Likewise. + * rndlinux.c: Likewise. + * rmd160.c: Likewise. + * md5.c: Likewise. + * md4.c: Likewise. + * cipher.c: Likewise. + * crc.c: Likewise. + * blowfish.c: Likewise. + + * pubkey.c (dummy_generate, dummy_check_secret_key) + (dummy_encrypt, dummy_decrypt, dummy_sign, dummy_verify): Return + err code GPG_ERR_NOT_IMPLEMENTED instead of aborting through + log_bug(). + (dummy_get_nbits): Return 0 instead of aborting though log_bug(). + +2004-08-19 Werner Koch <wk@g10code.de> + + * pubkey.c (sexp_data_to_mpi): Changed the zero random byte + substituting code to actually do clever things. Thanks to + Matthias Urlichs for noting the implementation problem. + +2004-08-09 Moritz Schulte <moritz@g10code.com> + + * pubkey.c (gcry_pk_sign): Fixed memory leak; fix provided by + Modestas Vainius. + +2004-07-16 Werner Koch <wk@gnupg.org> + + * rijndael.c (do_encrypt): Fix alignment problem. Bugs found by + Matthias Urlichs. + (do_decrypt): Ditto. + (keySched, keySched2): Use 2 macros along with unions in the key + schedule context. + +2004-07-14 Moritz Schulte <moritz@g10code.com> + + * rsa.c (_gcry_rsa_decrypt): Don't forget to free "a". Thanks to + Nikos Mavroyanopoulos. + +2004-05-09 Werner Koch <wk@gnupg.org> + + * random.c (read_pool): Mix the PID in to better protect after a + fork. + +2004-07-04 Moritz Schulte <moritz@g10code.com> + + * serpent.c: Use "u32_t" instead of "unsigned long", do not + declare S-Box variables as "register". Fixes failure on + OpenBSD/sparc64, reported by Nikolay Sturm. + +2004-05-07 Werner Koch <wk@gnupg.org> + + * random.c (initialize): Factored out some code to .. + (initialize_basics): .. new function. + (_gcry_random_initialize): Just call initialize_basics unless the + new arg FULL is set to TRUE. + (_gcry_fast_random_poll): Don't do anything unless the random + system has been really initialized. + +2004-05-07 Moritz Schulte <moritz@g10code.de> + + * ac.c (gcry_ac_open): Do not dereference NULL pointer. Reported + by Umberto Salsi. + +2004-02-20 Werner Koch <wk@gnupg.org> + + * primegen.c (check_prime): New args CB_FUNC and CB_ARG; call them + at different stages. Pass these arguments through all callers. + +2004-02-06 Werner Koch <wk@gnupg.org> + + * des.c: Add a new OID as used by pkcs#12. + + * rfc2268.c: New. Taken from libgcrypt. + * cipher.c: Setup the rfc2268 algorithm. + +2004-01-25 Moritz Schulte <mo@g10code.com> + + * primegen.c (prime_generate_internal): Do not forget to free + `q_factor'; fixed by Brieuc Jeunhomme. + (prime_generate_internal): Do not forget to free `prime'. + +2004-01-14 Moritz Schulte <mo@g10code.com> + + * ac.c (gcry_ac_data_set): New argument: flags; slightly + rewritten. + (gcry_ac_data_get_name, gcry_ac_data_get_index): Likewise. + (gcry_ac_key_pair_generate): New argument: misc_data; modified + order of arguments. + (gcry_ac_key_test): New argument: handle. + (gcry_ac_key_get_nbits, gcry_ac_key_get_grip): Likewise. + Use GCRY_AC_FLAG_NO_BLINDING instead of + GCRY_AC_DATA_FLAG_NO_BLINDING. + (gcry_ac_mpi): New member: flags. + (gcry_ac_data_search, gcry_ac_data_add): Removed functions. + +2003-12-22 Werner Koch <wk@gnupg.org> + + * primegen.c (is_prime): Release A2. + +2003-12-19 Werner Koch <wk@gnupg.org> + + * md.c: Moved a couple of functions down below the data structure + definitions. + (struct gcry_md_context): New field ACTUAL_HANDLE_SIZE. + (md_open): Set it here. + (strcut gcry_md_list): New field ACTUAL_STRUCT_SIZE. + (md_enable): Set it here. + (md_close): Wipe the context memory. + secure memory. + * cipher.c (struct gcry_cipher_handle): New field ACTUAL_HANDLE_SIZE. + (gcry_cipher_open): Set it here. + (gcry_cipher_close): Use it to always wipe out the handle data. + + * ac.c (gcry_ac_open): Make sure HANDLE gets initialized even when + the function is not successful. + (gcry_ac_close): Allow a NULL handle. + (gcry_ac_key_destroy, gcry_ac_key_pair_destroy): Ditto. + (gcry_ac_key_get_grip): Return INV_OBJ on error. + + * primegen.c (prime_generate_internal): Fixed error code for + failed malloc. Replaced the !err if chain by gotos. + (gcry_prime_group_generator): Remove the extra sanity check. + + * md.c: Minor code and comment cleanups. + +2003-12-16 Werner Koch <wk@gnupg.org> + + * primegen.c (gen_prime): Doc fix. Thanks to Newton Hammet. + +2003-12-11 Werner Koch <wk@gnupg.org> + + * rndunix.c (slow_poll): Don't use #warning but #error. + + * rndegd.c: Changed indentation. + (my_make_filename): Removd the var_arg cruft becuase we + don't need it here. Changed caller. + + * rndlinux.c: Changed indentation. + (open_device): Remove the superfluous stat call and clarify + comment. + + * rsa.c: Changed indentation. + (secret): Use the standard algorithm if p, q and u are not + available. + (rsa_blind, rsa_unblind): Renamed from _gcry_rsa_blind, + _gcry_rsa_unblind and moved more to the top. + + * md4.c: Changed indentation. Removed unnecessary casts. + * md5.c, rmd160.c, sha1.c, tiger.c: Ditto. + * rijndael.c, twofish.c: Ditto. + * serpent.c: Removed unnecessary casts. + * sha256.c, sha512.c: Ditto. + +2003-12-09 Werner Koch <wk@gnupg.org> + + * dsa.c: Unified indentation style. + * elgamal.c: Ditto. + * des.c (des_key_schedule): Code beautifications. + * blowfish.c: Changed indentation style. + * cast5.c (do_cast_setkey): Ditto. + + * pubkey.c (gcry_pk_encrypt): Replaced the chain of if(!err) tests + by straightforward gotos. Other cleanups. + (gcry_pk_decrypt): Ditto. + (gcry_pk_sign): Ditto. + (gcry_pk_verify): Ditto. + (gcry_pk_genkey): Ditto. Use strtoul instead of strtol. + (gcry_pk_ctl): Use GPG_ERR_INV_ARG to indicate bad arguments. + +2003-12-07 Werner Koch <wk@gnupg.org> + + * pubkey.c (gcry_pk_register_default): Undef the helper macro. + (gcry_pk_map_name): Allow NULL for string. + (sexp_to_key): Use memcpy and not strncpy. Use gcry_free and not + free. + (sexp_to_sig): Ditto. + (sexp_to_enc): Ditto. Replaced the chain of if(!err) tests by + straightforward gotos. + +2003-12-05 Werner Koch <wk@gnupg.org> + + * cipher.c: Documentation cleanups. + (gcry_cipher_mode_from_oid): Allow NULL for STRING. + +2003-12-03 Werner Koch <wk@gnupg.org> + + * elgamal.c (sign, do_encrypt, gen_k): Make sure that a small K is + only used for encryption. + +2003-11-18 Werner Koch <wk@gnupg.org> + + * random.h (rndw32_set_dll_name): Removed unused prototype. + + * Makefile.am (EXTRA_DIST): Added Manifest. + +2003-11-11 Werner Koch <wk@gnupg.org> + + * Manifest: New. + +2003-11-04 Werner Koch <wk@gnupg.org> + + * md.c (gcry_md_hash_buffer): Use shortcut for SHA1 + * sha1.c (_gcry_sha1_hash_buffer): New. + + * random.c: Reformatted most functions. + (mix_pool): Moved the failsafe_digest from global + scope to here. + (do_fast_random_poll): Use the generic fucntions even if a fast + gathering function has been used. + (read_pool): Detect a fork and retry. + (gcry_randomize, get_random_bytes): Don't distinguish anymore + between weak and strong random. + (gcry_create_nonce): New. + +2003-10-31 Werner Koch <wk@gnupg.org> + + * rndw32.c (slow_gatherer_windowsNT): Use a plain buffer for the + disk performance values and not the W32 API structure. + + * dsa.c (verify): s/exp/ex/ due to shadowing of a builtin. + * elgamal.c (verify): Ditto. + + * ac.c (gcry_ac_data_get_index): s/index/idx/ + (gcry_ac_data_copy_internal): Remove the cast in _gcry_malloc. + (gcry_ac_data_add): Must use gcry_realloc instead of realloc. + * pubkey.c (sexp_elements_extract): s/index/idx/ as tribute to the + forehackers. + (gcry_pk_encrypt): Removed shadowed definition of I. Reordered + arguments to malloc for clarity. + (gcry_pk_sign, gcry_pk_genkey): Ditto. + * primegen.c (prime_generate_internal): s/random/randomlevel/. + +2003-10-27 Moritz Schulte <mo@g10code.com> + + * pubkey.c (gcry_pk_encrypt): Don't forget to deallocate pkey. + +2003-10-27 Werner Koch <wk@gnupg.org> + + * random.c (gcry_random_add_bytes): Return if buflen is zero to + avoid gcc warning about unsed parameter. + (MASK_LEVEL): Simplified; does now work for signed and unsigned + w/o warnings. + + * md.c (md_start_debug): Removed the const from SUFFIX, because + this function is called from the control fucntion which does not + require const. + + Prefixed all (pubkey,digest,cipher}_spec_* globale variables with + _gcry_. + + * ac.c (ac_key_identifiers): Made static. + + * random.c (getfnc_gather_random,getfnc_fast_random_poll): Move + prototypes to .. + * rand-internal.h: .. here + * random.c (getfnc_gather_random): Include rndw32 gatherer. + * rndunix.c, rndw32.c, rndegd.c: Include them here. + * rndlinux.c (_gcry_rndlinux_gather_random): Prepend the _gcry_ + prefix. Changed all callers. + * rndegd.c (_gcry_rndegd_gather_random): Likewise. + (_gcry_rndegd_connect_socket): Likewise. + * rndunix.c (_gcry_rndunix_gather_random): Likewise. + (waitpid): Made static. + * rndw32.c: Removed the old and unused winseed.dll cruft. + (_gcry_rndw32_gather_random_fast): Renamed from + gather_random_fast. + (_gcry_rndw32_gather_random): Renamed from gather_random. Note, + that the changes 2003-04-08 somehow got lost. + + * sha512.c (sha512_init, sha384_init): Made static. + + * cipher.c (do_ctr_decrypt): Removed "return" from this void + function. + +2003-10-24 Moritz Schulte <mo@g10code.com> + + * serpent.c: Fix an issue on big-endian systems. + + * rndw32.c: Removed IS_MODULE -cruft. + * rndlinux.c (rndlinux_gather_random): Likewise. + +2003-10-10 Werner Koch <wk@gnupg.org> + + * primegen.c (gen_prime): Bail out if NBITS is less than 16. + (prime_generate_internal): Initialize prime variable to suppress + compiler warning. Check pbits, initialize qbits when passed as + zero. + + * primegen.c (prime_generate_internal): New arg + ALL_FACTORS. Changed all callers. + (gcry_prime_generate): Make the factors arg optional. Request + all_factors. Make sure PRIME is set to NULL even on error. + (gcry_prime_group_generator): New. + (gcry_prime_release_factors): New. + +2003-10-06 Werner Koch <wk@gnupg.org> + + * primegen.c (gen_prime): Assert that NBITS is never zero, it + would cause a segv. + +2003-09-28 Moritz Schulte <mo@g10code.com> + + * ac.c: Include "cipher.h". + +2003-09-27 Moritz Schulte <mo@g10code.com> + + * rndegd.c (do_read): Return nread instead of nbytes; thanks to + Michael Caerwyn. + +2003-09-04 Werner Koch <wk@gnupg.org> + + * pubkey.c (_gcry_pk_aliased_algo_name): New. + * ac.c (gcry_ac_open): Use it here. + + * Makefile.am (EXTRA_libcipher_la_SOURCES): Add serpent.c + +2003-09-02 Moritz Schulte <mo@g10code.com> + + * primegen.c (gcry_prime_check, gcry_prime_generate): New + functions. + (prime_generate_internal): New function, based on + _gcry_generate_elg_prime. + (_gcry_generate_elg_prime): Rewritten as a wrapper for + prime_generate_internal. + +2003-08-28 Werner Koch <wk@gnupg.org> + + * pubkey.c (gcry_pk_encrypt): Don't include the flags list in the + return value. This does not make sense and breaks any programs + parsing the output strictly (e.g. current gpgsm). + (gcry_pk_encrypt): If aliases for the algorithm name exists, take + the first one instead of the regular name to adhere to SPKI + conventions. + (gcry_pk_genkey): Ditto. + (gcry_pk_sign): Ditto. Removed unused KEY_ALGO_NAME. + +2003-08-19 Moritz Schulte <mo@g10code.com> + + * cipher.c: Add support for Serpent + * serpent.c: New file. + +2003-08-10 Moritz Schulte <moritz@g10code.com> + + * rsa.c (_gcry_rsa_blind, _gcry_rsa_unblind): Declare static. + +2003-08-09 Timo Schulz <twoaday@freakmail.de> + + * random.c (getfnc_gather_random): Don't check NAME_OF_DEV_RANDOM + two times, but also the NAME_OF_DEV_URANDOM device. + +2003-08-08 Moritz Schulte <moritz@g10code.com> + + * pubkey.c (sexp_to_enc): Fixed extraction of S-Expression: do not + fail if no `flags' sub S-Expression is found. + +2003-07-27 Werner Koch <wk@gnupg.org> + + * md.c (gcry_md_lookup_func_oid): Allow for empty OID lists. + +2003-07-23 Moritz Schulte <moritz@g10code.com> + + * ac.c (gcry_ac_data_construct): New argument: include_flags, only + include `flags' S-expression, if include_flags is true. Adjust + callers. Thanks for triggering a bug caused by `flags' + sub-S-expression where they are not expected to Ralf Schneider. + +2003-07-21 Moritz Schulte <moritz@g10code.com> + + * pubkey.c (gcry_pk_lookup_func_name): Use new member name + `aliases' instead of `sexp_names'. + + * ac.c (gcry_ac_key_data_get): New function. + + * cipher.c (gcry_cipher_lookup_func_name): Fix return value. + +2003-07-20 Moritz Schulte <moritz@g10code.com> + + * blowfish.c: Adjusted for new gcry_cipher_spec_t structure. + * cast5.c: Likewise. + * twofish.c: Likewise. + * arcfour.c: Likewise. + * rijndael.c (rijndael_oids, rijndael192_oids, rijndael256_oids): + New variables, adjust for new gcry_cipher_spec_t structure. + * des.c (oids_tripledes): New variable, adjust for new + gcry_cipher_spec_t structure. + + * md.c (oid_table): Removed. + + * tiger.c (oid_spec_tiger): New variable. + (digest_spec_tiger): Adjusted for new gry_md_spec_t structure. + + * sha512.c (oid_spec_sha512): New variable. + (digest_spec_sha512): Adjusted for new gry_md_spec_t structure. + + * sha512.c (oid_spec_sha384): New variable. + (digest_spec_sha384): Adjusted for new gry_md_spec_t structure. + + * sha256.c (oid_spec_sha256): New variable. + (digest_spec_sha256): Adjusted for new gry_md_spec_t structure. + + * sha1.c (oid_spec_sha1): New variable. + (digest_spec_sha1): Adjusted for new gry_md_spec_t structure. + + * rmd160.c (oid_spec_rmd160): New variable. + (digest_spec_rnd160): Adjusted for new gry_md_spec_t structure. + + * md5.c (oid_spec_md5): New variable. + (digest_spec_md5): Adjusted for new gry_md_spec_t structure. + + * md4.c (oid_spec_md4): New variable. + (digest_spec_md4): Adjusted for new gry_md_spec_t structure. + + * crc.c (digest_spec_crc32, digest_spec_crc32_rfc1510, + digest_spec_crc32_rfc2440): Adjusted for new gry_md_spec_t + structure. + +2003-07-19 Moritz Schulte <moritz@g10code.com> + + * md.c (gcry_md_lookup_func_oid): New function. + (search_oid): New function, copied from cipher.c. + (gcry_md_map_name): Adjust for new search_oid_interface. + + * cipher.c (oid_table): Removed table. + (gcry_cipher_lookup_func_oid): New function. + (search_oid): Rewritten to use the module functions. + (gcry_cipher_map_name): Adjust for new search_oid interface. + (gcry_cipher_mode_from_oid): Likewise. + +2003-07-18 Werner Koch <wk@gnupg.org> + + * md.c (gcry_md_hash_buffer): Convert ERR to gpg_error_t in + gpg_strerror. + +2003-07-14 Moritz Schulte <moritz@g10code.com> + + * cipher.c (gcry_cipher_lookup_func_name): Also check the cipher + name aliases, not just the primary name. + (gcry_cipher_map_name): Remove kludge for aliasing Rijndael to + AES. + + * arcfour.c, blowfish.c, cast5.c, des.c, twofish.c: Adjust cipher + specification structures. + + * rijndael.c (rijndael_names, rijndael192_names, + rijndael256_names): New variables, use them in the cipher + specifications. + + * rmd160test.c: Removed file. + + * ac.c, arcfour.c, blowfish.c, cast5.c, cipher.c, des.c, dsa.c, + elgamal.c, md.c, pubkey.c, random.c, rijndael.c, rsa.c, twofish.c: + Used gcry_err* wrappers for libgpg symbols. + + * primegen.c (gen_prime): Correct the order arguments to + extra_check. + +2003-07-12 Moritz Schulte <moritz@g10code.com> + + * ac.c: Replaced all public occurences of gpg_error_t with + gcry_error_t. + * cipher.c: Likewise. + * md.c: Likewise. + * pubkey.c: Likewise. + * random.c: Likewise. + + * cipher.c: Added support for TWOFISH128. + +2003-07-08 Moritz Schulte <moritz@g10code.com> + + * ac.c (gcry_ac_data_copy_internal): New function, based on + gcry_ac_data_copy. + (gcry_ac_data_copy): Made public, use gcry_ac_data_copy_internal. + (gcry_ac_key_init): Use gcry_ac_data_copy_internal. + +2003-07-07 Moritz Schulte <moritz@g10code.com> + + * ac.c (gcry_ac_data_set): Only release old MPI value if it is + different from the new value. Bug reported by Simon Josefsson + <jas@extundo.com>. + + * pubkey.c (gcry_pk_list): New function. + * md.c (gcry_md_list): New function. + + * ac.c (gcry_ac_key_pair_generate): Fix calculation of format + string size. + +2003-07-05 Moritz Schulte <moritz@g10code.com> + + * md.c: Named struct of digest_table `digest_table_entry'. + (digest_table_entry): New member: algorithm; filled in. + (digest_table_entry): Removed unused member: flags. + (gcry_md_register): New argument: algorithm_id, filled in. + (gcry_md_register_default): Used algorithm ID from module + structure. + (gcry_md_map_name): Likewise. + (md_enable): Likewise. + (md_read): Likewise. + (gcry_md_info): Likewise. + + * pubkey.c: Named truct for pubkey_table `pubkey_table_entry'. + (pubkey_table_entry): New member: algorithm; filled in. + (gcry_pk_register_default): Used algorithm ID from pubkey_table. + (gcry_pk_register): New argument: algorithm_id, filled in. + (gcry_pk_map_name): Used algorithm ID from module structure. + (gcry_pk_decrypt): Likewise. + (gcry_pk_encrypt): Likewise. + (gcry_pk_verify): Likewise. + (gcry_pk_sign): Likewise. + (gcry_pk_testkey): Likewise. + (gcry_pk_genkey): Likewise. + (gcry_pk_get_nbits): Likewise. + (sexp_to_key): Removed unused variable: algo. + (sexp_to_sig): Likewise. + + * cipher.c: Named struct for cipher_table `cipher_table_entry'. + (cipher_table_entry): New member: algorithm; filled in. + (gcry_cipher_register_default): Used algorithm ID from + cipher_table. + (gcry_cipher_register): New argument: algorithm_id, filled in. + (gcry_cipher_map_name): Used algorithm ID from module structure. + + * arcfour.c (cipher_spec_arcfour): Removed algorithm ID. + * blowfish.c (cipher_spec_blowfish): Likewise. + * cast5.c (cipher_spec_cast5): Likewise. + * crc.c (digest_spec_crc32): Likewise. + * crc.c (digest_spec_crc32_rfc1510): Likewise. + * crc.c (digest_spec_crc32_rfc2440): Likewise. + * des.c (cipher_spec_des): Likewise. + * des.c (cipher_spec_tripledes): Likewise. + * dsa.c (pubkey_spec_dsa): Likewise. + * elgamal.c (pubkey_spec_elg): Likewise. + * md4.c (digest_spec_md4): Likewise. + * md5.c (digest_spec_md5): Likewise. + * aes.c (cipher_spec_aes): Likewise. + * aes.c (cipher_spec_aes192): Likewise. + * aes.c (cipher_spec_aes256): Likewise. + * rsa.c (pubkey_spec_rsa): Likewise. + * sha1.c (digest_spec_sha1): Likewise. + * sha256.c (digest_spec_sha256): Likewise. + * sha512.c (digest_spec_sha512): Likewise. + * tiger.c (digest_spec_tiger): Likewise. + * twofish.c (cipher_spec_twofish): Likewise. + * twofish.c (cipher_spec_twofish128): Likewise. + + * Makefile.am (EXTRA_libcipher_la_SOURCES): Fix list of source + files; reported by Simon Josefsson <jas@extundo.com>. + + * pubkey.c: Replaced all occurences of `id' with `algorithm', + since `id' is a keyword in obj-c. + * md.c: Likewise. + * cipher.c: Likewise. + + * crc.c, md4.c, md5.c, rmd160.c, sha1.c, sha256.c, tiger.c: + Replaced all occurences of gcry_digest_spec_t with gcry_md_spec_t. + + * dsa.c, rsa.c, elgamal.c: Replaced all occurencens of + gcry_pubkey_spec_t with gcry_pk_spec_t. + + * md.c: Replaced all occurences of gcry_digest_spec_t with + gcry_md_spec_t. + (gcry_digest_register_default): Renamed to ... + (gcry_md_register_default): ... this; adjusted callers. + (gcry_digest_lookup_func_name): Renamed to ... + (gcry_md_lookup_func_name): ... this; adjusted callers. + (gcry_digest_lookup_name): Renamed to ... + (gcry_md_lookup_name): ... this; adjusted callers. + (gcry_digest_register): Renamed to ... + (gcry_md_register): ... this. + (gcry_digest_unregister): Renamed to ... + (gcry_md_unregister): ... this. + + * pubkey.c (gcry_pubkey_register): Renamed to ... + (gcry_pk_register): ... this. + (gcry_pubkey_unregister): Renamed to ... + (gcry_pk_unregister): ... this. + Replaced all occurences of gcry_pubkey_spec_t with gcry_pk_spec_t. + (gcry_pubkey_register_default): Renamed to ... + (gcry_pk_register_default): ... this; adjusted callers. + (gcry_pubkey_lookup_func_name): Renamed to ... + (gcry_pk_lookup_func_name): ... this; adjusted callers. + (gcry_pubkey_lookup_name): Renamed to ... + (gcry_pk_lookup_name): ... this; adjusted callers. + + * md.c (gcry_md_hash_buffer): Fix error checking. Thanks to Simon + Josefsson <jas@extunde.com>. + +2003-07-04 Moritz Schulte <moritz@g10code.com> + + * cipher.c (gcry_cipher_list): New function. + +2003-07-01 Moritz Schulte <moritz@g10code.com> + + * pubkey.c (sexp_to_sig): Accept a `flags' S-expression to be more + consistent with sexp_to_enc. + +2003-06-30 Moritz Schulte <moritz@g10code.com> + + * Makefile.am (libcipher_la_SOURCES): Added: ac.c. + + * pubkey.c (_gcry_pk_module_lookup): New function. + (_gcry_pk_module_release): New function. + +2003-06-29 Moritz Schulte <moritz@g10code.com> + + * ac.c: New file. + +2003-06-26 Werner Koch <wk@gnupg.org> + + * md.c (gcry_md_hash_buffer): Trigger BUG correcly with new API. + +2003-06-19 Werner Koch <wk@gnupg.org> + + * md.c (gcry_md_is_enabled): Fixed. + +2003-06-18 Werner Koch <wk@gnupg.org> + + * cipher.c (gcry_cipher_get_algo_keylen): New. + (gcry_cipher_get_algo_blklen): New. + +2003-06-18 Moritz Schulte <moritz@g10code.com> + + * arcfour.c, cipher.c, blowfish.c, md.c, cast5.c, pubkey.c, crc.c, + des.c, dsa.c, elgamal.c, md4.c, md5.c, random.c, rijndael.c, + rmd160.c, rsa.c, sha1.c, sha256.c, sha512.c, tiger.c, twofish.c: + Replaced older types GcryDigestSpec, GcryCipherSpec and + GcryPubkeySpec with newer types: gcry_digest_spec_t, + gcry_cipher_spec_t and gcry_pubkey_spec_t. + + * md.c (gcry_digest_id_new): Removed function. + (gcry_digest_register): Removed code for generating a new module + ID. + + * pubkey.c (gcry_pubkey_id_new): Removed function. + (gcry_pubkey_register): Removed code for generating a new module + ID. + + * cipher.c, md.c, pubkey.c: Replace old type GcryModule with newer + one: gcry_module_t. + (gcry_cipher_id_new): Removed function. + (gcry_cipher_register): Removed code for generating a new module + ID. + + * cipher.c (gcry_cipher_register): Adjust call to + _gcry_module_add. + (gcry_cipher_register_default): Likewise. + * pubkey.c (gcry_pubkey_register_default): Likewise. + (gcry_pubkey_register): Likewise. + * md.c (gcry_digest_register_default): Likewise. + (gcry_digest_register): Likewise. + + * md.c (gcry_digest_lookup_func_id): Removed function. + (gcry_digest_lookup_id): Likewise. + (gcry_digest_id_new): Use _gcry_module_lookup_id instead of + gcry_digest_lookup_id. + (digest_algo_to_string): Likewise. + (check_digest_algo): Likewise. + (md_enable): Likewise. + (md_digest_length): Likewise. + (md_asn_oid): Likewise. + + * pubkey.c (gcry_pubkey_lookup_id): Removed function. + (gcry_pubkey_lookup_func_id): Likewise. + (gcry_pubkey_id_new): Use _gcry_module_lookup_id instead of + gcry_pubkey_id_new. + (gcry_pk_algo_name): Likewise. + (disable_pubkey_algo): Likewise. + (check_pubkey_algo): Likewise. + (pubkey_get_npkey): Likewise. + (pubkey_get_nskey): Likewise. + (pubkey_get_nsig): Likewise. + (pubkey_get_nenc): Likewise. + (pubkey_generate): Likewise. + (pubkey_check_secret_key): Likewise. + (pubkey_encrypt): Likewise. + (pubkey_decrypt): Likewise. + (pubkey_sign): Likewise. + (pubkey_verify): Likewise. + (gcry_pk_algo_info): Likewise. + + * cipher.c (gcry_cipher_lookup_func_id): Removed function. + (gcry_cipher_lookup_id): Likewise. + (cipher_algo_to_string): use _gcry_module_lookup_id instead of + gcry_cipher_lookup_id. + (disable_cipher_algo): Likewise. + (check_cipher_algo): Likewise. + (cipher_get_blocksize): Likewise. + (gcry_cipher_open): Likewise. + (gcry_cipher_id_new): Likewise. + +2003-06-17 Moritz Schulte <moritz@g10code.com> + + * Makefile.am (GCRYPT_MODULES): Set to @GCRYPT_CIPHERS@, + @GCRYPT_PUBKEY_CIPHERS@, @GCRYPT_DIGESTS@ and @GCRYPT_RANDOM@. + (libcipher_la_DEPENDENCIES): Set to $(GCRYPT_MODULES). + (libcipher_la_LIBADD): Likewise. + (AM_CFLAGS): Added: @GPG_ERROR_CFLAGS@. + (EXTRA_libcipher_la_SOURCES): Added all conditional sources. + + * md.c (md_open): Use _gcry_fast_random_poll instead of + fast_random_poll. + * cipher.c (gcry_cipher_open): Likewise. + + * random.h (fast_random_poll): Removed macro. + + * blowfish.c, md4.c, md5.c, rmd160.c, sha1.c, sha256.c, sha512.c, + tiger.c: Use Autoconf's WORDS_BIGENDIAN instead of our own + BIG_ENDIAN_HOST. + +2003-06-16 Moritz Schulte <moritz@g10code.com> + + * random.c (getfnc_gather_random): Do not special-case + USE_ALL_RANDOM_MODULES, make it the default. + + * dsa.c: Replace last occurences of old type names with newer + names (i.e. replace MPI with gcry_mpi_t). + * elgamal.c: Likewise. + * primegen.c: Likewise. + * pubkey.c: Likewise. + * rsa.c: Likewise. + +2003-06-14 Moritz Schulte <moritz@g10code.com> + + * des.c (des_setkey): Add selftest check. + (tripledes_set3keys): Likewise. + (do_tripledes_setkey): Remove selftest check. + (do_des_setkey): Likewise. + +2003-06-11 Moritz Schulte <moritz@g10code.com> + + * md.c (_gcry_md_init): New function. + * cipher.c (_gcry_cipher_init): New function. + * pubkey.c (_gcry_pk_init): New function. + +2003-06-13 Werner Koch <wk@gnupg.org> + + * md.c (gcry_md_get_algo): Reverted to old API. This is a + convenience function anyway and error checking is not approriate. + (gcry_md_is_secure): New. + (gcry_md_is_enabled): New. + +2003-06-12 Werner Koch <wk@gnupg.org> + + * cipher.c (gcry_cipher_open): Make sure HANDLE is set to NULL on + error. + +2003-06-11 Werner Koch <wk@gnupg.org> + + * md.c (gcry_md_open): Make sure H receives either NULL or an + valid handle. + (gcry_md_copy): Swapped arguments so that it is more in lione with + md_open and most other API fucntions like memcpy (destination + comes first). Make sure HANDLE is set to NULL on error. + + * rijndael.c (do_encrypt): Hack to force correct alignment. It + seems not to be not sufficient, though. We should rework this + fucntions and remove all these ugly casts. Let the compiler + optimize or have an assembler implementation. + +2003-06-09 Moritz Schulte <moritz@g10code.com> + + * Makefile.am: Removed rules serpent, since that is not commited + yet. + +2003-06-08 Moritz Schulte <moritz@g10code.com> + + * pubkey.c (gcry_pk_encrypt): Improve calculation for size of the + format string. + +2003-06-07 Moritz Schulte <moritz@g10code.com> + + * arcfour.c, bithelp.h, blowfish.c, cast5.c, cipher.c, crc.c, + des.c, dsa.c, elgamal.c, md4.c, md5.c, md.c, primegen.c, pubkey.c, + rand-internal.h, random.c, random.h, rijndael.c, rmd160.c, + rmd160test.c, rmd.h, rndeged.c, rndlinux.c, rndunix.c, rndw32.c, + rsa.c, sha1.c, sha256.c, sha512.c, tiger.c, twofish.c: Edited all + preprocessor instructions to remove whitespace before the '#'. + This is not required by C89, but there are some compilers out + there that don't like it. Replaced any occurence of the now + deprecated type names with the new ones. + +2003-06-04 Moritz Schulte <moritz@g10code.com> + + * pubkey.c (gcry_pk_encrypt): Construct an arg_list and use + gcry_sexp_build_array instead of gcry_sexp_build. + (gcry_pk_sign): Likewise. + (gcry_pk_genkey): Likewise. + +2003-06-01 Moritz Schulte <moritz@g10code.com> + + * dsa.c (_gcry_dsa_generate): Do not check wether the algorithm ID + does indeed belong to DSA. + (_gcry_dsa_sign): Likewise. + (_gcry_dsa_verify): Likewise. + (_gcry_dsa_get_nbits): Likewise. + + * elgamal.c (_gcry_elg_check_secret_key): Do not check wether the + algorithm ID does indeed belong to ElGamal. + (_gcry_elg_encrypt): Likewise. + (_gcry_elg_decrypt): Likewise. + (_gcry_elg_sign): Likewise. + (_gcry_elg_verify): Likewise. + (_gcry_elg_get_nbits): Likewise. + (_gcry_elg_generate): Likewise. + + * rsa.c (_gcry_rsa_generate): Do not check wether the algorithm ID + does indeed belong to RSA. + (_gcry_rsa_encrypt): Likewise. + (_gcry_rsa_decrypt): Likewise. + (_gcry_rsa_sign): Likewise. + (_gcry_rsa_verify): Likewise. + (_gcry_rsa_get_nbits): Likewise. + +2003-05-30 Moritz Schulte <moritz@g10code.com> + + * md.c (md_get_algo): Return zero in case to algorithm is enabled. + + * md.c (gcry_md_info): Adjusted for new no-errno-API. + (md_final): Likewise. + (gcry_md_get_algo): Likewise. + * pubkey.c (gcry_pk_get_keygrip): Likewise. + (gcry_pk_ctl): Likewise. + (gcry_pk_algo_info): Likewise. + * des.c (selftest): Likewise. + +2003-05-29 Moritz Schulte <moritz@g10code.com> + + * md.c (md_enable): Do not forget to release module on error. + (gcry_md_open): Adjusted for new no-errno-API. + (md_open): Likewise. + (md_copy): Likewise. + (gcry_md_copy): Likewise. + (gcry_md_setkey): Likewise. + (gcry_md_algo_info): Likewise. + + * cipher.c (gcry_cipher_open): Adjusted for new no-errno-API and + also fixed a locking bug. + (gcry_cipher_encrypt): Adjusted for new no-errno-API. + (gcry_cipher_decrypt): Likewise. + (gcry_cipher_ctl): Likewise. + (gcry_cipher_info): Likewise. + (gcry_cipher_algo_info): Likewise. + +2003-05-28 Moritz Schulte <moritz@g10code.com> + + * md.c (md_enable): Adjusted for libgpg-error. + (gcry_md_enable): Likewise. + (gcry_digest_register_default): Likewise. + (gcry_digest_register): Likewise. + (check_digest_algo): Likewise. + (prepare_macpads): Likewise. + (gcry_md_setkey): Likewise. + (gcry_md_ctl): Likewise. + (gcry_md_get): Likewise. + (gcry_md_algo_info): Likewise. + (gcry_md_info): Likewise. + * dsa.c (_gcry_dsa_generate): Likewise. + (_gcry_dsa_check_secret_key): Likewise. + (_gcry_dsa_sign): Likewie. + (_gcry_dsa_verify): Likewise. + * twofish.c (do_twofish_setkey): Likewise. + (twofish_setkey): Likewise. + * cipher.c (gcry_cipher_register): Likewise. + +2003-05-25 Moritz Schulte <moritz@g10code.com> + + * rijndael.c (do_setkey): Adjusted for libgpg-error. + (rijndael_setkey): Likewise. + * random.c (gcry_random_add_bytes): Likewise. + * elgamal.c (_gcry_elg_generate): Likewise. + (_gcry_elg_check_secret_key): Likewise. + (_gcry_elg_encrypt): Likewise. + (_gcry_elg_decrypt): Likewise. + (_gcry_elg_sign): Likewise. + (_gcry_elg_verify): Likewise. + * rsa.c (_gcry_rsa_generate): Likewise. + (_gcry_rsa_check_secret_key): Likewise. + (_gcry_rsa_encrypt): Likewise. + (_gcry_rsa_decrypt): Likewise. + (_gcry_rsa_sign): Likewise. + (_gcry_rsa_verify): Likewise. + * pubkey.c (dummy_generate, dummy_check_secret_key, dummy_encrypt, + dummy_decrypt, dummy_sign, dummy_verify): Likewise. + (gcry_pubkey_register): Likewise. + (check_pubkey_algo): Likewise. + (pubkey_generate): Likewise. + (pubkey_check_secret_key): Likewise. + (pubkey_encrypt): Likewise. + (pubkey_decrypt): Likewise. + (pubkey_sign): Likewise. + (pubkey_verify): Likewise. + (sexp_elements_extract): Likewise. + (sexp_to_key): Likewise. + (sexp_to_sig): Likewise. + (sexp_to_enc): Likewise. + (sexp_data_to_mpi): Likewise. + (gcry_pk_encrypt): Likewise. + (gcry_pk_decrypt): Likewise. + (gcry_pk_sign): Likewise. + (gcry_pk_verify): Likewise. + (gcry_pk_testkey): Likewise. + (gcry_pk_genkey): Likewise. + (gcry_pk_ctl): Likewise. + * cipher.c (dummy_setkey): Likewise. + (check_cipher_algo): Likewise. + (gcry_cipher_open): Likewise. + (cipher_setkey): Likewise. + (gcry_cipher_ctl): Likewise. + (cipher_encrypt): Likewise. + (gcry_cipher_encrypt): Likewise. + (cipher_decrypt): Likewise. + (gcry_cipher_decrypt): Likewise. + (gcry_cipher_info): Likewise. + (gcry_cipher_algo_info): Likewise. + * cast5.c (cast_setkey): Likewise. + (do_cast_setkey): Likewise. + * arcfour.c (arcfour_setkey): Likewise. + (do_arcfour_setkey): Likewise. + * blowfish.c (do_bf_setkey): Likewise. + (bf_setkey): Likewise. + * des.c (do_des_setkey): Likewise. + (do_tripledes_setkey): Likewise. + +2003-05-22 Moritz Schulte <moritz@g10code.com> + + * tiger.c: Merged code ussing the U64_C macro from GnuPG. + + * sha512.c: Likewise. + +2003-05-17 Moritz Schulte <moritz@g10code.com> + + * pubkey.c (gcry_pk_genkey): Fix type: acquire a lock, instead of + releasing it. + +2003-05-11 Moritz Schulte <moritz@g10code.com> + + * pubkey.c (gcry_pk_testkey): Call REGISTER_DEFAULT_CIPHERS. + (gcry_pk_ctl): Likewise. + +2003-04-27 Moritz Schulte <moritz@g10code.com> + + * pubkey.c (gcry_pk_genkey): Release sexp after extracted data has + been used. + + * md.c (gcry_md_get_algo_dlen): Simplified, simply call + md_digest_length to do the job. + + * des.c (do_des_setkey): Check for selftest failure not only + during initialization. + (do_tripledes_setkey): Include check for selftest failure. + + * pubkey.c (gcry_pubkey_register_default): New macro + `pubkey_use_dummy', use it. + + * elgamal.c (elg_names): New variable. + (pubkey_spec_elg): Include elg_names. + + * dsa.c (dsa_names): New variable. + (pubkey_spec_dsa): Include dsa_names. + + * rsa.c (rsa_names): New variable. + (pubkey_spec_rsa): Include rsa_names. + + * pubkey.c (gcry_pubkey_lookup_func_name): Compare name also with + the names listed in `sexp_names'. + +2003-04-24 Moritz Schulte <moritz@g10code.com> + + * pubkey.c (sexp_to_key): New variables: module, pubkey. Adjusted + to new module interface. + (sexp_to_key): Changend type of argument `retalgo' from `int *' to + `GcryModule **'. Adjusted all callers. Removed argument: + r_algotblidx. + (sexp_to_sig): Changend type of argument `retalgo' from `int *' to + `GcryModule **'. Adjusted all callers. + (sexp_to_enc): Likewise. + + (pubkey_get_npkey, pubkey_get_nskey, pubkey_get_nsig, + pubkey_get_nenc): Use strlen to find out the number. + + * rsa.c: Adjust pubkey_spec_rsa to new internal interface. + * dsa.c: Likewise. + * elgamal.c: Likewise. + +2003-04-17 Moritz Schulte <moritz@g10code.com> + + * pubkey.c (sexp_elements_extract): New function. + * pubkey.c (sexp_to_key): Removed variable `idx', added `err', use + sexp_elements_extract. + (sexp_to_sig): Likewise. + (sexp_to_enc): Likewise. + + * pubkey.c: Terminate list correctly. + * md.c: Include sha512/sha384 in digest_table. + +2003-04-16 Moritz Schulte <moritz@g10code.com> + + * Makefile.am: Include support for sha512.c. + + * sha512.c: New file, merged from GnuPG, with few modifications + for libgcrypt. + + * rand-internal.h: Removed declarations for constructor functions. + + * md.c (md_copy): Call _gcry_module_use for incrementing the usage + counter of the digest modules. + + * rsa.c: Do not include "rsa.h". + * dsa.c: Do not include "dsa.h". + * elgamal.c: Do not include "elgamal.h". + * des.c: Do not include "des.h". + * cast5.c: Do not include "cast5.h". + * blowfish.c: Do not include "blowfish.h". + * arcfour.c: Do not include "arcfour.h". + + * Makefile.am (libcipher_la_DEPENDENCIES): Removed. + (libcipher_la_LIBADD): Removed. + Use Automake conditionals for conditional compilation. + +2003-04-13 Moritz Schulte <moritz@g10code.com> + + * cipher.c (gcry_cipher_open): Call REGISTER_DEFAULT_CIPHERS. + + * md.c (gcry_md_list): New member: module. + (md_enable): New variable: module, changed use of module and + digest. + (md_enable): Initialize member: module. + (md_close): Call _gcry_module_release. + + * cipher.c (gcry_cipher_open): New variable: module, changed use of + module and cipher. + (struct gcry_cipher_handle): New member: module. + (gcry_cipher_open): Initialize member: module. + (gcry_cipher_close): Call _gcry_module_release. + +2003-04-09 Moritz Schulte <moritz@g10code.com> + + * cipher.c: Include "ath.h". + * md.c: Likewise. + * pubkey.c: Likewise. + + * cipher.c (ciphers_registered_lock): New variable. + * md.c (digests_registered_lock): New variable. + * pubkey.c (pubkeys_registered_lock): New variable. + + * rndlinux.c (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_rndlinux_constructor): Removed function. + + * rndegd.c (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_rndegd_constructor): Removed function. + + * rndunix.c (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_rndunix_constructor): Removed function. + + * rndw32.c (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_rndw32_constructor): Removed function. + + * rndegd.c (rndegd_connect_socket): Simplify code for creating the + egd socket address. + (rndegd_connect_socket): Call log_fatal use instead of + g10_log_fatal. + (egd_gather_random): Renamed to ... + (rndegd_gather_random): ... here. + +2003-04-08 Moritz Schulte <moritz@g10code.com> + + * rndlinux.c: Do not include "dynload.h". + * rndunix.c: Likewise. + * rndw32.c: Likewise. + + * rndegd.c (rndegd_connect_socket): Factored out from ... + (egd_gather_random): here; call it. + (egd_socket): New variable. + (egd_gather_random): Initialize fd with egd_socket, do not declare + fd static. + (do_read): Merged few changes from GnuPG. FIXME - not finished? + Do not include "dynload.h". + + * rndw32.c (gather_random): Renamed to rndw32_gather_random, do + not declare static. + (gather_random_fast): Renamed to rndw32_gather_random_fast, do not + declare static. + + * rndunix.c (gather_random): Renamed to rndunix_gather_random, do + not declare static. + * rndegd.c (gather_random): Renamed to rndegd_gather_random, do + not declare static. + * rndlinux.c (gather_random): Renamed to rndlinux_gather_random, + do not declare static. + +2003-04-07 Moritz Schulte <moritz@g10code.com> + + * Makefile.am (libcipher_la_SOURCES): Removed construct.c. + (libcipher_la_SOURCES): Added sha1.c, sha256.c, rmd160.c, md4.c, + md5.c, tiger.c and crc.c + (EXTRA_PROGRAMS): Removed sha1, sha256, rmd160, md4, md5, tiger + and crc. Removed definitions: EXTRA_md4_SOURCES, + EXTRA_md5_SOURCES, EXTRA_rmd160_SOURCES, EXTRA_sha1_SOURCES, + EXTRA_sha256_SOURCES, EXTRA_tiger_SOURCES and EXTRA_crc_SOURCES, + BUILT_SOURCES, DISTCLEANFILES. + + * pubkey.c: Do not include "elgamal.h", "dsa.h" and "rsa.h". + + * Makefile.am (libcipher_la_SOURCES): Removed rsa.h, elgamal.h, + dsa.h, des.h, cast5.h, arcfour.h and blowfish.h. + + * rsa.h: Removed file. + * elgamal.h: Removed file. + * dsa.h: Removed file. + * des.h: Removed file. + * cast5.h: Removed file. + * arcfour.h: Removed file. + * blowfish.h: Removed file. + + * Makefile.am (libcipher_la_SOURCES): Removed dynload.c and + dynload.h. + + * rsa.c (pubkey_spec_rsa): New variable. + * dsa.c (pubkey_spec_rsa): New variable. + * elgamal.c (pubkey_spec_elg): New variable. + + * rsa.c (_gcry_rsa_get_info): Removed function. + * elgamal.c (_gcry_elg_get_info): Removed function. + * dsa.c (_gcry_dsa_get_info): Removed function. + + * tiger.c (tiger_get_info): Removed function. + (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_tiger_constructor): Removed function. + + * sha1.c (sha1_get_info): Removed function. + (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_sha1_constructor): Removed function. + + * sha256.c (sha256_get_info): Removed function. + (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_sha256_constructor): Removed function. + + * rmd160.c (rmd160_get_info): Removed function. + (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_rmd160_constructor): Removed function. + + * md5.c (md5_get_info): Removed function. + (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_md5_constructor): Removed function. + + * md4.c (md4_get_info): Removed function. + (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_md4_constructor): Removed function. + + * crc.c (crc_get_info): Removed function. + + * arcfour.c (do_arcfour_setkey): Changed type of context argument + to `void *', added local variable for cast, adjusted callers. + (arcfour_setkey): Likewise. + (encrypt_stream): Likewise. + * cast5.c (cast_setkey): Likewise. + (encrypt_block): Likewise. + * rijndael.c (rijndael_setkey): Likewise. + (rijndael_encrypt): Likewise. + (rijndael_decrypt): Likewise. + * twofish.c (twofish_setkey): Likewise. + (twofish_encrypt): Likewise. + (twofish_decrypt): Likewise. + * des.c (do_des_setkey): Likewise. + (do_des_encrypt): Likewise. + (do_des_encrypt): Likewise. + (do_tripledes_encrypt): Likewise. + (do_tripledes_encrypt): Likewise. + * blowfish.c (bf_setkey: Likewise. + (encrypt_block): Likewise. + (decrypt_block): Likewise. + + * arcfour.c (encrypt_stream): Likewise. + + * rijndael.c (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func) Removed function. + + * twofish.c (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func) Removed function. + + * cast5.c (CIPHER_ALGO_CAST5): Removed. + + * blowfish.c (FNCCAST_SETKEY, FNCCAST_CRYPT): Removed macros. + (CIPHER_ALGO_BLOWFISH): Removed symbol. + * cast5.c (FNCCAST_SETKEY, FNCCAST_CRYPT): Likewise. + * des.c (selftest_failed): Removed. + (initialized): New variable. + (do_des_setkey): Run selftest, if not yet done. + (FNCCAST_SETKEY, FNCCAST_CRYPT): Removed macros. + + * arcfour.c (_gcry_arcfour_get_info): Removed function. + * blowfish.c (_gcry_blowfish_get_info): Removed function. + * cast5.c (_gcry_cast5_get_info): Removed function. + * des.c (_gcry_des_get_info): Removed function. + * rijndael.c (_gcry_rijndael_get_info): Removed function. + * twofish.c (_gcry_twofish_get_info): Removed function. + + * arcfour.c (cipher_spec_arcfour): New variable. + * twofish.c (cipher_spec_twofish, cipher_spec_twofish128): New + variables. + * rijndael.c (cipher_spec_aes, cipher_spec_aes192, + cipher_spec256): New variables. + * des.c (cipher_spec_des, cipher_spec_tripledes): New variables. + * cast5.c (cipher_spec_cast5): New variable. + * blowfish.c (cipher_spec_blowfish): Likewise. + + * twofish.c: Do not include "dynload.h". + * rijndael.c: Likewise. + * des.c: Likewise. + * cast5.c: Likewise. + * blowfish.c: Likewise. + * cipher.c: Likewise. + * crc.c: Likewise. + * md4.c: Likewise. + * md5.c: Likewise. + * md.c: Likewise. + * pubkey.c: Likewise. + * rijndael.c: Likewise. + * sha1.c: Likewise. + * sha256.c: Likewise. + + * arcfour.c: Include "cipher.h". + * twofish.c: Likewise. + * rijndael.c: Likewise. + * des.c: Likewise. + * cast5.c: Likewise. + * blowfish.c: Likewise. + + * twofish.c (twofish_setkey): Declared argument `key' const. + (twofish_encrypt): Declared argument `inbuf' const. + (twofish_decrypt): Likewise. + + * rijndael.c (rijndael_setkey): Declared argument `key' const. + (rijndael_encrypt): Declared argument `inbuf' const. + (rijndael_decrypt): Likewise. + + * des.c (do_des_setkey): Declared argument `key' const. + (do_tripledes_setkey): Likewise. + (do_des_encrypt): Declared argument `inbuf' const. + (do_des_decrypt): Likewise. + (do_tripledes_encrypt): Likewise. + (do_tripledes_decrypt): Likewise. + + * cast5.c (encrypt_block): Declared argument `inbuf' const. + (decrypt_block): Likewise. + (cast_setkey): Declared argument `key' const. + + * blowfish.c (do_bf_setkey): Declared argument `key' const. + (encrypt_block): Declared argument `inbuf' const. + (encrypt_block): Likewise. + + + + * cipher.c: Remove CIPHER_ALGO_DUMMY related code. + Removed struct cipher_table_s. + Changed definition of cipher_table. + Removed definition of disabled_algos. + (ciphers_registered, default_ciphers_registered): New variables. + (REGISTER_DEFAULT_CIPHERS): New macro. + (dummy_setkey): Declared argument `key' const. + (dummy_encrypt_block): Declared argument `inbuf' const. + (dummy_encrypt_block): Likewise. + (dummy_encrypt_stream): Likewise. + (dummy_encrypt_stream): Likewise. + (dummy_setkey): Use `unsigned char' instead of `byte'. + (dummy_encrypt_block): Likewise. + (dummy_decrypt_block): Likewise. + (dummy_encrypt_stream): Likewise. + (dummy_decrypt_stream): Likewise. + (gcry_cipher_register_default): New function. + (gcry_cipher_lookup_func_id): New function. + (gcry_cipher_lookup_func_name): New function. + (gcry_cipher_lookup_id): New function. + (gcry_cipher_lookup_name): New function. + (gcry_cipher_id_new): New function. + (gcry_cipher_register): New function. + (gcry_cipher_unregister): New function. + (setup_cipher_table): Removed function. + (load_cipher_modules): Removed function. + (gcry_cipher_map_name): Adjusted to use new module management. + (cipher_algo_to_string): Likewise. + (disable_cipher_algo): Likewise. + (check_cipher_algo): Likewise. + (cipher_get_keylen): Likewise. + (cipher_get_blocksize): Likewise. + (gcry_cipher_open): Likewise. + (struct gcry_cipher_handle): Replaced members algo, algo_index, + blocksize, setkey, encrypt, decrypt, stencrypt, stdecrypt with one + member: cipher. + (gcry_cipher_open): Adjusted code for new handle structure. + (cipher_setkey): Likewise. + (cipher_setiv): Likewise. + (cipher_reset): Likewise. + (do_ecb_encrypt): Likewise. + (do_ecb_decrypt): Likewise. + (do_cbc_encrypt): Likewise. + (do_cbc_decrypt): Likewise. + (do_cfb_encrypt): Likewise. + (do_cfb_decrypt): Likewise. + (do_ctr_encrypt): Likewise. + (cipher_encrypt): Likewise. + (gcry_cipher_encrypt): Likewise. + (cipher_decrypt): Likewise. + (gcry_cipher_decrypt): Likewise. + (cipher_sync): Likewise. + (gcry_cipher_ctl): Likewise. + + * pubkey.c: Removed struct pubkey_table_s. + Changed definition of pubkey_table. + Removed definition of disabled_algos. + (pubkeys_registered, default_pubkeys_registered): New variables. + (REGISTER_DEFAULT_PUBKEYS): New macro. + (setup_pubkey_table): Removed function. + (load_pubkey_modules): Removed function. + (gcry_pubkey_register_default): New function. + (gcry_pubkey_lookup_func_id): New function. + (gcry_pubkey_lookup_func_name): New function. + (gcry_pubkey_lookup_id): New function. + (gcry_pubkey_lookup_name): New function. + (gcry_pubkey_id_new): New function. + (gcry_pubkey_register): New function. + (gcry_pubkey_unregister): New function. + (gcry_pk_map_name): Adjusted to use new module management. + (gcry_pk_algo_name): Likewise. + (disable_pubkey_algo): Likewise. + (check_pubkey_algo): Likewise. + (pubkey_get_npkey): Likewise. + (pubkey_get_nskey): Likewise. + (pubkey_get_nsig): Likewise. + (pubkey_get_nenc): Likewise. + (pubkey_generate): Likewise. + (pubkey_check_secret_key): Likewise. + (pubkey_encrypt): Likewise. + (pubkey_decrypt): Likewise. + (pubkey_sign): Likewise. + (pubkey_verify): Likewise. + (gcry_pk_get_nbits): Likewise. + (gcry_pk_algo_info): Likewise. + + * md.c: Removed struct md_digest_list_s. + (digest_list): Changed definition. + (digests_registered, default_digests_registered): New variables. + (REGISTER_DEFAULT_DIGESTS): New macro. + (new_list_item): Removed function. + (setup_md_table): Removed function. + (load_digest_module): Removed function. + (gcry_digest_register_default): New function. + (gcry_digest_lookup_func_id): New function. + (gcry_digest_lookup_func_name): New function. + (gcry_digest_lookup_id): New function. + (gcry_digest_lookup_name): New function. + (gcry_digest_id_new): New function. + (gcry_digest_register): New function. + (gcry_digest_unregister): New function. + (GcryDigestEntry): New type. + (struct gcry_md_context): Adjusted type of `list'. + (gcry_md_map_name): Adjusted to use new module management. + (digest_algo_to_string): Likewise. + (check_digest_algo): Likewise. + (md_enable): Likewise. + (md_digest_length): Likewise. + (md_asn_oid): Likewise. + +2003-04-07 Moritz Schulte <moritz@g10code.com> + + * pubkey.c: Replaced PUBKEY_ALGO_DSA with GCRY_PK_DSA, + PUBKEY_ALGO_RSA with GCRY_PK_RSA and PUBKEY_ALGO_ELGAMAL with + GCRY_PK_ELG. + + * dsa.c: Replaced PUBKEY_ALGO_DSA with GCRY_PK_DSA. + +2003-04-01 Moritz Schulte <moritz@g10code.com> + + * des.c: Removed checks for GCRY_CIPHER_3DES and GCRY_CIPHER_DES. + +2003-03-31 Moritz Schulte <moritz@g10code.com> + + * tiger.c (tiger_get_info): Do not declare static. + * sha256.c (sha256_get_info): Likewise. + * sha1.c (sha1_get_info): Likewise. + * rmd160.c (rmd160_get_info): Likewise. + * md5.c (md5_get_info): Likewise. + * md4.c (md4_get_info): Likewise. + * crc.c (crc_get_info): Likewise. + + * md.c (load_digest_module): Call setup_md_table during + initialization. + (new_list_item): Link new element into digest_list. + + * cipher.c (do_ctr_decrypt): Made do_ctr_encrypt act as a wrapper + for do_ctr_encrypt, since these functions are identical. + +2003-03-30 Simon Josefsson <jas@extundo.com> + + * cipher.c (struct gcry_cipher_handle): Add counter field. + (gcry_cipher_open): Add CTR. + (cipher_reset): Clear counter field. + (do_ctr_encrypt, do_ctr_decrypt): New functions. + (cipher_encrypt, cipher_decrypt): Call CTR functions. + (gcry_cipher_ctl): Add SET_CTR to set counter. + +2003-03-30 Moritz Schulte <moritz@g10code.com> + + * rsa.c (_gcry_rsa_blind): New function. + (_gcry_rsa_unblind): New function. + (_gcry_rsa_decrypt): Use _gcry_rsa_blind and _gcry_rsa_decrypt. + +2003-03-26 Moritz Schulte <moritz@g10code.com> + + * dynload.c (_gcry_enum_gnupgext_pubkeys): Adjust `encrypt' and + `decrypt' function arguments. + (_gcry_enum_gnupgext_pubkeys): Likewise. + * dynload.h: Likewise. + + * pubkey.c (dummy_decrypt): Add argument: int flags. + (dummy_encrypt): Likewise. + + * elgamal.c (_gcry_elg_encrypt): Add argument: int flags. + (_gcry_elg_decrypt): Likewise. + + * rsa.c (_gcry_rsa_encrypt): Add argument: int flags. + (_gcry_rsa_decrypt): Likewise. + + * pubkey.c: Add `flags' argument to members `encrypt' and + `decrypt' of struct `pubkey_table_s'. + + * rsa.h: Add `flags' argument to function declarations. + * elgamal.h: Likewise. + + * pubkey.c (sexp_data_to_mpi): New variable: int parsed_flags. + (sexp_data_to_mpi): Set `parsed_flags'. + (sexp_data_to_mpi): New argument: int *flags. + (gcry_pk_encrypt): New variable: int flags. + (gcry_pk_encrypt): Pass `flags' to pubkey_encrypt. + (pubkey_encrypt): New variable: int flags. + (pubkey_encrypt): Pass `flags' to pubkey encrypt function. + (pubkey_decrypt): Likewise. + (pubkey_decrypt): Pass `flags' to pubkey encrypt function. + (gcry_pk_encrypt): Include `flags' s-exp in return list. + (sexp_to_enc): New argument: int *flags. + (gcry_pk_decrypt): New variable: int flags. + (gcry_pk_decrypt): Pass `flags' to pubkey_decrypt. + (sexp_to_enc): New variable: int parsed_flags. + (sexp_to_enc): Set `parsed_flags'. + +2003-03-22 Simon Josefsson <jas@extundo.com> + + * cipher.c (gcry_cipher_open, do_cbc_encrypt) + (gcry_cipher_encrypt): Support GCRY_CIPHER_CBC_MAC. + (gcry_cipher_ctl): Support GCRYCTL_SET_CBC_MAC. + +2003-03-19 Werner Koch <wk@gnupg.org> + + * primegen.c (gen_prime): New args EXTRA_CHECK and EXTRA_CHECK_ARG + to allow for a user callback. Changed all callers. + (_gcry_generate_secret_prime) + (_gcry_generate_public_prime): Ditto, pass them to gen_prime. + * rsa.c (check_exponent): New. + (generate): Use a callback to ensure that a given exponent is + actually generated. + +2003-03-12 Moritz Schulte <moritz@g10code.com> + + * primegen.c: Initialize `no_of_small_prime_numbers' statically. + (gen_prime): Remove calculation of `no_of_small_prime_numbers'. + +2003-03-03 Moritz Schulte <moritz@g10code.com> + + * md.c (gcry_md_ctl): Rewritten to use same style like the other + functions dispatchers. + +2003-03-02 Moritz Schulte <moritz@g10code.com> + + * cipher.c (struct gcry_cipher_handle): New member: algo_index. + (gcry_cipher_open): Allocate memory for two cipher contexts. + Initialize algo_index. + (cipher_setkey): Duplicate context into reserved memory. + (cipher_reset): New function, which resets the context and clear + the IV. + (gcry_cipher_ctl): Call cipher_reset. + +2003-02-23 Moritz Schulte <moritz@g10code.com> + + * cipher.c: Remove (bogus) `digitp' macro definition. + * md.c: Likewise. + + * blowfish.c (burn_stack): Removed. + * arcfour.c (burn_stack): Likewise. + * cast5.c (burn_stack): Likewise. + * des.c (burn_stack): Likewise. + * md4.c (burn_stack): Likewise. + * md5.c (burn_stack): Likewise. + * random.c (burn_stack): Likewise. + * rijndael.c (burn_stack): Likewise. + * rmd160.c (burn_stack): Likewise. + * sha1.c (burn_stack): Likewise. + * sha256.c (burn_stack): Likewise. + * tiger.c (burn_stack): Likewise. + * twofish.c (burn_stack): Likewise. + + * blowfish.c: Changed all occurences of burn_stack to + _gcry_burn_stack. + * arcfour.c: Likewise. + * cast5.c: Likewise. + * des.c: Likewise. + * md4.c: Likewise. + * md5.c: Likewise. + * random.c: Likewise. + * rijndael.c: Likewise. + * rmd160.c: Likewise. + * sha1.c: Likewise. + * sha256.c: Likewise. + * tiger.c: Likewise. + * twofish.c: Likewise. + + * arcfour.c (_gcry_arcfour_get_info): Use GCRY_CIPHER_ARCFOUR + instead of hard-coded value `301'. + +2003-01-24 Werner Koch <wk@gnupg.org> + + * random.c (_gcry_register_random_progress): New. + (_gcry_random_progress): New. + + * rndlinux.c (gather_random): Call the random progress function. + +2003-01-23 Werner Koch <wk@gnupg.org> + + * rsa.c (generate): New arg USE_E to request a specific public + exponent. + (_gcry_rsa_generate): Ditto. + * elgamal.c (_gcry_elg_generate): Must add an dummy argument + instead of USE_E. + * dsa.c (_gcry_dsa_generate): Ditto. + * pubkey.c (dummy_generate): Ditto. + (pubkey_generate): Add USE_E arg and pass it down. + (gcry_pk_genkey): Detect "rsa-use-e" parameter and pass it to generate. + + * pubkey.c (sexp_to_enc): New arg RET_MODERN. + (gcry_pk_decrypt): Make use of it to return a real S-expression. + Return better error codes. + (gcry_pk_verify): Return better error codes. + +2003-01-21 Werner Koch <wk@gnupg.org> + + * random.c (gcry_random_add_bytes): Add QUALITY argument, let + function return an error code and disable its core for now. + +2003-01-21 Timo Schulz <twoaday@freakmail.de> + + * random.c (gcry_random_add_bytes): New. Function to add external + random to the pool. + +2003-01-20 Simon Josefsson <jas@extundo.com> + + * crc.c: New. + * Makefile.am (EXTRA_PROGRAMS, EXTRA_crc_SOURCES): Add crc.c. + * md.c (gcry_md_get_algo_dlen): Add values for CRC. + +2003-01-20 Werner Koch <wk@gnupg.org> + + * sha256.c: New. + * bithelp.h (ror): New. + * Makfile.am: Add sha256.c. + * md.c (oid_table): Add values for SHA256 et al. + (gcry_md_get_algo_dlen): Likewise + +2003-01-20 Werner Koch <wk@gnupg.org> + + * pubkey.c (gcry_pk_get_keygrip): Implemented keygrips for DSA + and ElGamal. + +2003-01-17 Werner Koch <wk@gnupg.org> + + * cipher.c (gcry_cipher_encrypt): Reworked so that the output will + never contain the plaintext even if the caller did not checked the + return value. + + * md.c (gcry_md_get_algo): Changed error code to GCRYERR_GENERAL + because we don't have an invalid md algo but no algorithm enabled. + + * pubkey.c (gcry_pk_genkey): Changed error code for bounds check + of table parameters to GCRYERR_INTERNAL. + + * md.c (gcry_md_open): Partly reverted Timo's change from + 2002-10-10 by removing the check for the algorithm. An algorithm + of 0 is allowed and anyway we should not double check it or check + it using a different function. Also fixed the flags check. + + * pubkey.c (gcry_pk_encrypt): Make sure that R_CIPH points to NULL + on error. + (gcry_pk_decrypt): Ditto for R_PLAIN. + (gcry_pk_sign): Ditto for R_SIG. + (gcry_pk_genkey): Ditto for R_KEY. + +2003-01-16 Werner Koch <wk@gnupg.org> + + * md.c (gcry_md_write): Changed 2nd argument type to void*. + (gcry_md_hash_buffer): Changed type of boths buffers to void*. + (gcry_md_setkey): Changed 2nd argument type to void*. + +2003-01-15 Werner Koch <wk@gnupg.org> + + * pubkey.c (sexp_data_to_mpi): New. This handles pkcs1 padding. + (gcry_pk_sign, gcry_pk_verify): Use it here. + (gcry_pk_encrypt): And here. + (pubkey_verify): Add debug code. + (sexp_to_enc): Handle flags in the input and return the pkcs1 flag + in a new parameter. + (gcry_pk_decrypt): Prepare for future pkcs1 handling. + +2002-12-19 Werner Koch <wk@gnupg.org> + + * random.c (_gcry_random_initialize): New. + +2002-12-16 Werner Koch <wk@gnupg.org> + + * cipher.c: Added a Teletrust specific OID for 3DES. + +2002-12-12 Werner Koch <wk@gnupg.org> + + * md.c: Added another oddball OIW OID (sha-1WithRSAEncryption). + +2002-11-23 Werner Koch <wk@gnupg.org> + + * md.c (load_digest_module): Enlarged checked_algos bitmap. + * md4.c (func_table): Fixed entry for md4. + Both by Simon Josephson. + (transform): Copy data to get the alignment straight. Tested only + on i386. + +2002-11-10 Simon Josefsson <jas@extundo.com> + + * cipher.c (gcry_cipher_open): Don't reject CTS flag. + (do_cbc_encrypt, do_cbc_decrypt, cipher_encrypt) + (gcry_cipher_encrypt, cipher_decrypt) + (gcry_cipher_decrypt): Support CTS flag. + (gcry_cipher_ctl): Toggle CTS flag. + +2002-11-10 Werner Koch <wk@gnupg.org> + + * md4.c: New. By Simon Josefsson. + * Makefile.am (EXTRA_PROGRAMS): Add md4.c. + * md.c (oid_table,gcry_md_get_algo_dlen): MD4 support. + +2002-10-14 Werner Koch <wk@gnupg.org> + + * arcfour.c (do_encrypt_stream): Don't use increment op when + assigning to the same variable. + +2002-10-10 Timo Schulz <ts@winpt.org> + + * pubkey.c (gcry_pk_genkey): Check boundaries. + + * md.c (gcry_md_open): Check that algo is available and only + valid flag values are used. + (gcry_md_get_algo): Add error handling. + +2002-09-26 Werner Koch <wk@gnupg.org> + + * md.c: Include an OID for TIGER. + * tiger.c (tiger_get_info): Use a regular OID. + +2002-09-17 Werner Koch <wk@gnupg.org> + + * random.c: Replaced mutex.h by the new ath.h. Changed all calls. + +2002-09-16 Werner Koch <wk@gnupg.org> + + * arcfour.c (do_encrypt_stream): Use register modifier and modulo. + According to Nikos Mavroyanopoulos this increases perfromace on + i386 system noticable. And I always tought gcc is clever enough. + * md5.c (transform): Use register modifier. + * rmd160.c (transform): Ditto. + * sha1.c (transform): Ditto. We hope that there are 6 free registers. + * random.c (gcry_randomize): Rewrote to avoid malloc calls. + + * rndlinux.c (gather_random): Replaced remaining fprintfs by log_*. + * arcfour.c (do_arcfour_setkey): Ditto. + * twofish.c (do_twofish_setkey): Ditto. + * rndegd.c (gather_random): Ditto. + * rijndael.c (do_setkey): Ditto. + * random.c (_gcry_random_dump_stats): Ditto. + * primegen.c (_gcry_generate_elg_prime): Ditto. + * des.c (_gcry_des_get_info): Ditto. + * cast5.c (do_cast_setkey): Ditto. + * blowfish.c (do_bf_setkey): Ditto. + +2002-08-26 Werner Koch <wk@gnupg.org> + + * des.c (weak_keys): Fixed one entry in the table and compared + all entries against the literature. + (selftest): Checksum the weak key table. + +2002-08-21 Werner Koch <wk@gnupg.org> + + * pubkey.c: Enable keygrip calculation for "openpgp-rsa". + +2002-08-17 Werner Koch <wk@gnupg.org> + + * cipher.c (setup_cipher_table): Don't overwrite the DES entry + with the entry for DUMMY. + +2002-08-14 Werner Koch <wk@gnupg.org> + + * des.c (do_des_setkey,do_des_encrypt, do_des_decrypt): New. + (_gcry_des_get_info): Support plain old DES. + * cipher.c (setup_cipher_table): Put DES into the table. + +2002-07-25 Werner Koch <wk@gnupg.org> + + * rndunix.c (_gcry_rndunix_constructor): Prefixed with _gcry_. + Noted by Stephan Austermuehle. + +2002-07-08 Timo Schulz <ts@winpt.org> + + * rndw32.c: Replaced the m_ memory functions with the real + gcry_ functions. Renamed all g10_ prefixed functions to log_. + +2002-06-12 Werner Koch <wk@gnupg.org> + + * rsa.c (generate): Use e = 65537 for now. + +2002-06-11 Werner Koch <wk@gnupg.org> + + * pubkey.c (gcry_pk_get_keygrip): Allow a "protected-private-key". + +2002-06-05 Timo Schulz <ts@winpt.org> + + * cipher.c (gcry_cipher_encrypt, gcry_cipher_decrypt): + Check that the input size is a multiple of the blocksize. + +2002-05-23 Werner Koch <wk@gnupg.org> + + * md.c (oid_table): Add an rsadsi OID for MD5. + +2002-05-21 Werner Koch <wk@gnupg.org> + + * primegen.c, elgamal.c, dsa.c (progress): Do not print anything + by default. Pass an extra identifying string to the callback and + reserved 2 argumenst for current and total counters. Changed the + register function prototype. + +2002-05-17 Werner Koch <wk@gnupg.org> + + * rndegd.c (rndegd_constructor): Fixed name of register function + and prefixed the function name with _gcry_. + * rndw32.c (rndw32_constructor): Ditto. + * tiger.c (tiger_constructor): Ditto. + + * Makefile.am: Removed all dynamic loading stuff. + * dynload.c: Ditto. Now only used for the constructor system. + +2002-05-15 Werner Koch <wk@gnupg.org> + + * random.c (gcry_random_bytes,gcry_random_bytes_secure) + (gcry_randomize): Make sure we are initialized. + +2002-05-14 Werner Koch <wk@gnupg.org> + + Changed license of most files to the LGPL. + +2002-05-02 Werner Koch <wk@gnupg.org> + + * random.c (_gcry_fast_random_poll): Initialize the module so the + mutex can be used. + + * primegen.c (small_prime_numbers): Moved table from smallprime.c + * smallprime.c: File removed. + + * des.c (leftkey_swap, rightkey_swap, working_memcmp): Made static. + + * cipher.c (gcry_cipher_map_name): Map "RIJNDAEL" to "AES". + * rijndael.c (rijndael_get_info): We do only support a 128 bit + blocksize so it makes sense to change the algorithm strings to + AES. + + * tiger.c (tiger_final): Removed superfluous token pasting operators. + * md5.c (md5_final): Ditto. + +2002-04-30 Werner Koch <wk@gnupg.org> + + * cipher.c: Fixed list of copyright years. + +2002-03-18 Werner Koch <wk@gnupg.org> + + * random.c (initialize): Initialize the new pool lock mutex. + (_gcry_fast_random_poll): Add locking and moved main + code out to... + (do_fast_random_poll): new function. + (read_pool): Use the new function here. + (get_random_bytes): Add locking. + (_gcry_update_random_seed_file): Ditto. + +2002-03-11 Werner Koch <wk@gnupg.org> + + * md.c: Add rsaSignatureWithripemd160 to OID table. + +2002-02-20 Werner Koch <wk@gnupg.org> + + * sha1.c: Removed a left over comment note. The code has been + rewritten from scratch in 1998. Thanks to Niels Möller for + reporting this misleading comment. + +2002-02-18 Werner Koch <wk@gnupg.org> + + * rndunix.c (rndunix_constructor): Use the the new prefixed + function name. Reported by Jordi Mallach. + +2002-02-10 Werner Koch <wk@gnupg.org> + + * random.c (mix_pool): Carry an extra failsafe_digest buffer + around to make the function more robust. + +2002-02-08 Werner Koch <wk@gnupg.org> + + * random.c (add_randomness): Xor new data into the pool and not + just copy it. This avoids any choosen input attacks which are not + serious in our setting because an outsider won't be able to mix + data in and even then we keep going with a PRNG. Thanks to Stefan + Keller for pointing this out. + +2002-01-04 Werner Koch <wk@gnupg.org> + + * pubkey.c (gcry_pk_genkey): Do not release skey - it is static. + + * primegen.c (gen_prime): Of course we should use set_bit + and not set_highbit to set the second high bit. + +2001-12-18 Werner Koch <wk@gnupg.org> + + * rsa.c (generate): Loop until we find the exact modulus size. + Changed the exponent to 41. + (rsa_get_info): s/usage/r_usage/ to avoid shadow warnings. + * primegen.c (gen_prime): Set 2 high order bits for secret primes. + + * Makefile.am (DISTCLEANFILES): Include construct.c. + +2001-12-17 Werner Koch <wk@gnupg.org> + + * pubkey.c (gcry_pk_get_keygrip): New - experimental. + +2001-12-11 Werner Koch <wk@gnupg.org> + + * cipher.c: Added OIDs for AES. + (gcry_cipher_mode_from_oid): New. + (gcry_cipher_map_name): Moved OID search code to .. + (search_oid): .. new function. + +2001-12-10 Werner Koch <wk@gnupg.org> + + * pubkey.c (gcry_pk_encrypt): Find the signature algorithm by name + and not by number. + + * pubkey.c (gcry_pk_encrypt,gcry_pk_decrypt,gcry_pk_sign) + (gcry_pk_verify,gcry_pk_testkey, gcry_pk_genkey) + (gcry_pk_get_nbits): Release the arrays. Noted by Nikos + Mavroyanopoulos. + +2001-12-06 Werner Koch <wk@gnupg.org> + + * cipher.c (gcry_cipher_map_name): Look also for OIDs prefixed + with "oid." or "OID.". + +2001-12-05 Werner Koch <wk@gnupg.org> + + * pubkey.c (algo_info_table): Fixed entry for openpgp-rsa. + +2001-11-24 Werner Koch <wk@gnupg.org> + + * pubkey.c: Added the rsaEncryption OID to the tables. + (sexp_to_key): Add an arg to return the index of the algorithm, + changed all callers. + (gcry_pk_sign): Find the signature algorithm by name and not by + number. + (gcry_pk_get_nbits): Fixed so that we can now really pass a secret + key to get the result. + + * md.c (gcry_md_map_name): Look also for OIDs prefixed with "oid." + or "OID." so that an OID string can be used as an S-Exp token. + +2001-11-20 Werner Koch <wk@gnupg.org> + + * md.c (gcry_md_map_name): Lookup by OID if the the name begins + with a digit. + (oid_table): New. + +2001-11-16 Werner Koch <wk@gnupg.org> + + * md.c (gcry_md_info): New operator GCRYCTL_IS_ALGO_ENABLED. + +2001-11-07 Werner Koch <wk@gnupg.org> + + * md.c (gcry_md_hash_buffer): Close the handle which was left open + for algorithms other than rmd160. + +2001-08-08 Werner Koch <wk@gnupg.org> + + * rndw32.c (gather_random): Use toolhelp in addition to the NT + gatherer for Windows2000. Suggested by Sami Tolvanen. + + * random.c (read_pool): Fixed length check, this used to be one + byte to strict. Made an assert out of it because the caller has + already made sure that only poolsize bytes are requested. + Reported by Marcus Brinkmann. + +2001-08-03 Werner Koch <wk@gnupg.org> + + * cipher.c (cipher_encrypt, cipher_decrypt): Prepare to return + errors. We have to change the interface to all ciphers to make + this really work but we should do so to prepare for hardware + encryption modules. + (gcry_cipher_encrypt, gcry_cipher_decrypt): Return the error and + set lasterr. + (gcry_cipher_ctl): Make sure that errors from setkey are returned. + +2001-08-02 Werner Koch <wk@gnupg.org> + + * rndlinux.c (gather_random): casted a size_t arg to int so that + the format string is correct. Casting is okay here and avoids + translation changes. + + * random.c (fast_random_poll): Do not check the return code of + getrusage. + + * rndunix.c: Add a signal.h header to avoid warnings on Solaris 7 + and 8. + + * tiger.c (print_abc,print_data): Removed. + + * rijndael.c, des.c, blowfish.c, twofish.c, cast5.c, arcfour.c + (burn_stack): New. Add wrappers for most functions to be able to + call burn_stack after the function invocation. This methods seems + to be the most portable way to zeroise the stack used. It does + only work on stack frame based machines but it is highly portable + and has no side effects. Just setting the automatic variables at + the end of a function to zero does not work well because the + compiler will optimize them away - marking them as volatile would + be bad for performance. + * md5.c, sha1.c, rmd160.c, tiger.c (burn_stack): Likewise. + * random.c (burn_stack): New. + (mix_pool): Use it here to burn the stack of the mixblock function. + + * primegen.c (_gcry_generate_elg_prime): Freed q at 3 places. + Thanks to Tommi Komulainen. + + * arcfour.c (arcfour_setkey): Check the minimim keylength against + bytes and not bits. + (selftest): Must reset the key before decryption. + +2001-05-31 Werner Koch <wk@gnupg.org> + + * sha1.c (sha1_init): Made static. + + Changed all g10_ prefixed function names as well as some mpi_ + function names to cope with the introduced naming changes. + + * md.c (prepare_macpads): Made key const. + +2001-05-28 Werner Koch <wk@gnupg.org> + + * rndegd.c (gather_random): Removed the use of tty_printf. + +2001-03-29 Werner Koch <wk@gnupg.org> + + * md5.c (md5_final): Fixed calculation of hashed length. Thanks + to disastry@saiknes.lv for pointing out that it was horrible wrong + for more than 512MB of input. + * sha1.c (sha1_final): Ditto. + * rmd160.c (rmd160_final): Ditto. + * tiger.c (tiger_final): Ditto. + + * blowfish.c (encrypt,do_encrypt): Changed name to do_encrypt to + avoid name clashes with an encrypt function in stdlib.h of + Dynix/PIX. Thanks to Gene Carter. + * elgamal.c (encrypt,do_encrypt): Ditto. + + * twofish.c (gnupgext_enum_func): Use only when when compiled as a + module. + * rijndael.c (gnupgext_enum_func): Ditto. + + * tiger.c (tiger_get_info): Return "TIGER192" and not just + "TIGER". By Edwin Woudt. + + * random.c: Always include time.h - standard requirement. Thanks + to James Troup. + + * rndw32.c: Fixes to the macros. + +2001-01-11 Werner Koch <wk@gnupg.org> + + * cipher.c (cipher_encrypt,gcry_cipher_encrypt): Use blocksize and + not 8. + +2000-12-19 Werner Koch <wk@gnupg.org> + + Major change: + Removed all GnuPG stuff and renamed this piece of software + to gcrypt. + +2000-11-14 Werner Koch <wk@gnupg.org> + + * dsa.c (test_keys): Replaced mpi_alloc by gcry_mpi_new and + mpi_free by gcry_mpi_release. + * elgamal.c (test_keys,generate): Ditto, also for mpi_alloc_secure. + * rsa.c (test_keys,generate,rsa_verify): Ditto. + * primegen.c (generate_elg_prime): Ditto. + (gen_prime): Ditto and removed nlimbs. + + * rsa.c (generate): Allocate 2 more vars in secure memory. + + * Makefile.am (OMIT_DEPENDENCIES): Hack to work around dependency + problems. + +2000-10-09 Werner Koch <wk@gnupg.org> + + * arcfour.c, arcfour.h: New. + * cipher.c (cipher_encrypt, cipher_decrypt): Add stream mode. + (setup_cipher_table): Add Arcfour. + (gcry_cipher_open): Kludge to allow stream mode. + +Wed Oct 4 13:16:18 CEST 2000 Werner Koch <wk@openit.de> + + * sha1.c (transform): Use rol() macro. Actually this is not needed + for a newer gcc but there are still aoter compilers. + + * rsa.c (test_keys): Use new random function. + + * md.c (gcry_md_setkey): New function to overcome problems with + const conflics. + (gcry_md_ctl): Pass set key to the new functions. + + * rijndael.c: New. + * cipher.c: Add Rijndael support. + +Mon Sep 18 16:35:45 CEST 2000 Werner Koch <wk@openit.de> + + * rndlinux.c (open_device): Loose random device checking. + By Nils Ellmenreich. + + * random.c (fast_random_poll): Check ENOSYS for getrusage. + * rndunix.c: Add 2 sources for QNX. By Sam Roberts. + + * pubkey.c (gcry_pk_algo_info): Add GCRYCTL_GET_ALGO_USAGE. + + * rsa.c: Changed the comment about the patent. + (secret): Speed up by using the CRT. For a 2k keys this + is about 3 times faster. + (stronger_key_check): New but unused code to check the secret key. + * Makefile.am: Included rsa.[ch]. + * pubkey.c: Enabled RSA support. + (pubkey_get_npkey): Removed RSA workaround. + +Mon Jul 31 10:04:47 CEST 2000 Werner Koch <wk@openit.de> + + * pubkey.c: Replaced all gcry_sexp_{car,cdr}_{data,mpi} by the new + gcry_sexp_nth_{data,mpi} functions. + +Tue Jul 25 17:44:15 CEST 2000 Werner Koch <wk@openit.de> + + * pubkey.c (exp_to_key,sexp_to_sig,sexp_to_enc,gcry_pk_encrypt, + gcry_pk_decrypt,gcry_pk_sign,gcry_pk_genkey): Changed to work with + the new S-Exp interface. + +Mon Jul 17 16:35:47 CEST 2000 Werner Koch <wk@> + + * random.c (gather_faked): Replaced make_timestamp by time(2) again. + +Fri Jul 14 19:38:23 CEST 2000 Werner Koch <wk@> + + * md.c (gcry_md_ctl): Support GCRYCTL_{START,STOP}_DUMP. + + * Makefile.am: Never compile mingw32 as module. + + * Makefile.am: Tweaked module build and removed libtool + + * Makefile.am: Replaced -O1 by -O. Suggested by Alec Habig. + + * elgamal.c (sign): Removed inactive code. + + * rsa.c, rsa.h: New based on the old module version (only in CVS for now). + * pubkey.c (setup_pubkey_table): Added commented support for RSA. + + * rndunix.c (waitpid): New. For UTS 2.1. All by Dave Dykstra. + (my_popen): Do the FD_CLOEXEC only if it is available + (start_gatherer): Cope with missing _SC_OPEN_MAX + + * rndunix.c: Add some more headers for QNX. By Sam Roberts. + + * rndegd.c (gather_random): Shortcut level 0. + * rndunix.c (gather_random): Ditto. + * rndw32.c (gather_random): Ditto. + + * rndw32.c: Replaced with code from Cryptlib and commented the old stuff. + * rndw32.c: Add some debuging code enabled by an environment variable. + + * random.c (read_seed_file): Binary open for DOSish system + (update_random_seed_file): Ditto. + * random.c [MINGW32]: Include process.h for getpid. + * random.c (fast_random_poll): Add clock_gettime() as fallback for + system which support this POSIX.4 fucntion. By Sam Roberts. + + * random.c (read_seed_file): Removed the S_ISLNK test becuase it + is already covered by !S_ISREG and is not defined in Unixware. + Reported by Dave Dykstra. + (update_random_seed_file): Silently ignore update request when pool + is not filled. + + * random.c (read_seed_file): New. + (set_random_seed_file): New. + (read_pool): Try to read the seeding file. + (update_random_seed_file): New. + + (read_pool): Do an initial extra seeding when level 2 quality random + is requested the first time. This requestes at least POOLSIZE/2 bytes + of entropy. Compined with the seeding file this should make normal + random bytes cheaper and increase the quality of the random bytes + used for key generation. + + * random.c (read_pool): Print a more friendly error message in + cases when too much random is requested in one call. + + * random.c (fast_random_poll): Check whether RUSAGE_SELF is defined; + this is not the case for some ESIX and Unixware, although they have + getrusage(). + + * primegen.c (generate_elg_prime): All primes are now generated with + the lowest random quality level. Because they are public anyway we + don't need stronger random and by this we do not drain the systems + entropy so much. + + * primegen.c (register_primegen_progress): New. + * dsa.c (register_pk_dsa_progress): New. + * elgamal.c (register_pk_elg_progress): New. + + * elgamal.c (wiener_map): New. + (gen_k): Use a much smaller k. + (generate): Calculate the qbits using the wiener map and + choose an x at a size comparable to the one choosen in gen_k + + * rmd160.c (rmd160_get_info): Moved casting to the left side due to a + problem with UTS4.3. Suggested by Dave Dykstra. + * sha1.c (sha1_get_info): Ditto. + * tiger.c (tiger_get_info): Ditto. + * md5.c (md5_get_info): Ditto + * des.c (des_get_info): Ditto. + * blowfish.c (blowfish_get_info): Ditto. + * cast5.c (cast5_get_info): Ditto. + * twofish.c (twofish_get_info): Ditto. + +Fri Mar 24 11:25:45 CET 2000 Werner Koch <wk@openit.de> + + * md.c (md_open): Add hmac arg and allocate space for the pads. + (md_finalize): Add HMAC support. + (md_copy): Ditto. + (md_close): Ditto. + (gcry_md_reset): Ditto. + (gcry_md_ctl): Ditto. + (prepare_macpdas): New. + +Mon Mar 13 19:22:46 CET 2000 Werner Koch <wk@openit.de> + + * md.c (gcry_md_hash_buffer): Add support for the other algorithms. + +Mon Jan 31 16:37:34 CET 2000 Werner Koch <wk@gnupg.de> + + * genprime.c (generate_elg_prime): Fixed returned factors which never + worked for non-DSA keys. + +Thu Jan 27 18:00:44 CET 2000 Werner Koch <wk@gnupg.de> + + * pubkey.c (sexp_to_key): Fixed mem leaks in case of errors. + +Mon Jan 24 22:24:38 CET 2000 Werner Koch <wk@gnupg.de> + + * pubkey.c (gcry_pk_decrypt): Implemented. + (gcry_pk_encrypt): Implemented. + (gcry_pk_testkey): New. + (gcry_pk_genkey): New. + (pubkey_decrypt): Made static. + (pubkey_encrypt): Ditto. + (pubkey_check_secret_key): Ditto. + (pubkey_generate): Ditto. + +Mon Jan 24 13:04:28 CET 2000 Werner Koch <wk@gnupg.de> + + * pubkey.c (pubkey_nbits): Removed and replaced by ... + (gcry_pk_get_nbits): this new one. + +Wed Dec 8 21:58:32 CET 1999 Werner Koch <wk@gnupg.de> + + * dsa.c: s/mpi_powm/gcry_mpi_powm/g + * elgamal.c: Ditto. + * primegen.c: Ditto. + + * : Replaced g10_opt_verbose by g10_log_verbosity(). + + * Makefile.am (INCLUDES): removed intl, add ../gcrypt + +Fri Nov 19 17:15:20 CET 1999 Werner Koch <wk@gnupg.de> + + * dynload.c (cmp_filenames): New to replaced compare_filename() in + module. + (register_cipher_extension): Removed the tilde expansion stuff. + * rndeg.c (my_make_filename): New. + + * : Replaced header util.h by g10lib.h + + * random.c (gather_faked): Replaced make_timestamp by time(2). + Disabled wrning printed with tty_printf. + * rndlinux.c (gather_random): Always use fprintf instead of tty_xxx; + this should be replaced by a callback function. + + * primegen.c (gen_prime): Use gcry_mpi_randomize. + (is_prime): Ditto. + * elgamal.c (test_keys): Ditto. + * dsa.c (test_keys): Ditto. + + * cipher.c (gcry_cipher_close): Die on invalid handle. + +Mon Nov 15 21:36:02 CET 1999 Werner Koch <wk@gnupg.de> + + * elgamal.c (gen_k): Use the new random API. + (generate): Ditto. + * dsa.c (gen_k): Ditto. + (generate): Ditto. + +Sat Nov 13 17:44:23 CET 1999 Werner Koch <wk@gnupg.de> + + * pubkey.c (disable_pubkey_algo): Made static. + (gcry_pk_ctl): New. + + * random.c (get_random_bits): Renamed to ... + (get_random_bytes): ... this and made static. + (gcry_random_bytes): New. + (gcry_random_bytes_secure): New. + (randomize_buffer): Renamed to ... + (gcry_randomize): ...this. + + * md.c (gcry_md_hash_buffer): New. + + * pubkey.c (gcry_pk_algo_info): 4 new commands. + (pubkey_get_npkey): Made static. + (pubkey_get_nskey): Made static. + (pubkey_get_nsig): Made static. + (pubkey_get_nenc): Made static. + + * pubkey.c: Removed all G10ERR_xxx. + * cipher.c: Changed all GCRYERR_INV_ALGO to GCRYERR_INV_CIPHER_ALGO. + * md.c: Changed all GCRYERR_INV_ALGO to GCRYERR_INV_MD_ALGO. + * cast5.c (cast_setkey): Changed errocodes to GCRYERR_xxx. + * blowfish.c: Ditto. + * des.c: Ditto. + * twofish.c: Ditto. + * dsa.c: Ditto. + * elgamal.c: Ditto. + + * g10c.c: Removed + + * cipher.c (gcry_cipher_open): Replaced alloc functions and return NULL + if we are out of core. + * dynload.c: Replaced all memory allocation functions. + * md.c: Ditto. + * primegen.c: Ditto. + * pubkey.c: Ditto. + * random.c: Ditto. + * rndw32.c: Ditto. + * elgamal.c: Ditto. + * dsa.c: Ditto. + +Tue Oct 26 14:10:21 CEST 1999 Werner Koch <wk@gnupg.de> + + * elgamal.c (sign): Hugh found strange code here. Replaced by BUG(). + + * cipher.c: Merged with gcrypt/symapi.c. + + * pubkey.c (string_to_pubkey_algo): Renamed function to ... + (gcry_pk_map_name): ... this. + (pubkey_algo_to_string): Renamed function to ... + (gcry_pk_algo_name): ... this. + (gcry_pk_algo_info): New. + * pubkey.c: Merged with gcrypt/pkapi.c. + + * md.c (md_reset): Clear finalized; thanks to Ulf Moeller for + fixing this bug. + + * md.c: Merged with gcrypt/mdapi.c + +Wed Sep 15 14:39:59 CEST 1999 Michael Roth <mroth@nessie.de> + + * des.c: Various speed improvements: One bit pre rotation + trick after initial permutation (Richard Outerbridge). + Finished test of SSLeay Tripple-DES patterns. + +Wed Sep 15 16:22:17 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * rndw32.c: New. + +Mon Sep 13 10:51:29 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * bithelp.h: New. + * rmd160.h, sha1.h, md5.h: Use the rol macro from bithelp.h + +Tue Sep 7 16:23:36 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * Makefile.am: Fixed seds for latest egcc. By Ollivier Robert. + +Mon Sep 6 19:59:08 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * des.c (selftest): Add some testpattern + +Mon Aug 30 20:38:33 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * cipher.c (do_cbc_encrypt): Fixed serious bug occuring when not using + in place encryption. Pointed out by Frank Stajano. + +Mon Jul 26 09:34:46 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * md5.c (md5_final): Fix for a SCO cpp bug. + +Thu Jul 15 10:15:35 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * elgamal.c (elg_check_secret_key,elg_encrypt + elg_decrypt,elg_sign,elg_verify): Sanity check on the args. + * dsa.c (dsa_check_secret_key,dsa_sign,dsa_verify): Ditto. + + * pubkey.c (disable_pubkey_algo): New. + (check_pubkey_algo2): Look at disabled algo table. + * cipher.c (disable_cipher_algo): New. + (check_cipher_algo): Look at disabled algo table. + +Wed Jul 7 13:08:40 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * Makefile.am: Support for libtool. + +Fri Jul 2 11:45:54 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * dsa.c (gen_k): Changed algorithm to consume less random bytes + * elgamal.c (gen_k): Ditto. + + * random.c (random_dump_stats): New. + +Thu Jul 1 12:47:31 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * primegen.c, elgamal.c, dsa.c (progess): New and replaced all + fputc with a call to this function. + +Sat Jun 26 12:15:59 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * rndegd.c (do_write): s/ssize_t/int/ due to SunOS 4.1 probs. + + * cipher.c (do_cbc_encrypt, do_cbc_decrypt): New. + + * dynload.c (HAVE_DL_SHL_LOAD): Map hpux API to dlopen (Dave Dykstra). + * Makefile.am (install-exec-hook): Removed. + +Sun May 23 14:20:22 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * cipher.c (setup_cipher_table): Enable Twofish + + * random.c (fast_random_poll): Disable use of times() for mingw32. + +Mon May 17 21:54:43 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * dynload.c (register_internal_cipher_extension): Minor init fix. + +Tue May 4 15:47:53 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * primegen.c (gen_prime): Readded the Fermat test. Fixed the bug + that we didn't correct for step when passing the prime to the + Rabin-Miller test which led to bad performance (Stefan Keller). + (check_prime): Add a first Fermat test. + +Sun Apr 18 10:11:28 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * cipher.c (cipher_setiv): Add ivlen arg, changed all callers. + + * random.c (randomize_buffer): alway use secure memory because + we can't use m_is_secure() on a statically allocated buffer. + + * twofish.c: Replaced some macros by a loop to reduce text size. + * Makefile.am (twofish): No more need for sed editing. + +Fri Apr 9 12:26:25 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * cipher.c (cipher_open): Reversed the changes for AUTO_CFB. + + * blowfish.c: Dropped the Blowfish 160 mode. + * cipher.c (cipher_open): Ditto. + (setup_cipher_table): Ditto. And removed support of twofish128 + +Wed Apr 7 20:51:39 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * random.c (get_random_bits): Can now handle requests > POOLSIZE + + * cipher.c (cipher_open): Now uses standard CFB for automode if + the blocksize is gt 8 (according to rfc2440). + + * twofish.c: Applied Matthew Skala's patches for 256 bit key. + +Tue Apr 6 19:58:12 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * random.c (get_random_bits): Can now handle requests > POOLSIZE + + * cipher.c (cipher_open): Now uses standard CFB for automode if + the blocksize is gt 8 (according to rfc2440). + +Sat Mar 20 11:44:21 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * rndlinux.c (tty_printf) [IS_MODULE]: Removed. + + * rndegd.c (gather_random): Some fixes. + +Wed Mar 17 13:09:03 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * rndegd.c (do_read): New. + (gather_random): Changed the implementation. + +Mon Mar 8 20:47:17 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * dynload.c (DLSYM_NEEDS_UNDERSCORE): Renamed. + +Fri Feb 26 17:55:41 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * md.c: Nearly a total rewrote. + +Wed Feb 24 11:07:27 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * cipher.c (context): Fixed alignment + * md.c: Ditto. + + * rndegd.c: New + +Mon Feb 22 20:04:00 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * rndegd.c: New. + +Wed Feb 10 17:15:39 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * Makefile.am: Modules are now figured out by configure + * construct.c: New. Generated by configure. Changed all modules + to work with that. + * sha1.h: Removed. + * md5.h: Removed. + + * twofish.c: Changed interface to allow Twofish/256 + + * rndunix.c (start_gatherer): Die on SIGPIPE. + +Wed Jan 20 18:59:49 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * rndunix.c (gather_random): Fix to avoid infinite loop. + +Sun Jan 17 11:04:33 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * des.c (is_weak_key): Replace system memcmp due to bugs + in SunOS's memcmp. + (des_get_info): Return error on failed selftest. + * twofish.c (twofish_setkey): Return error on failed selftest or + invalid keylength. + * cast5.c (cast_setkey): Ditto. + * blowfish.c (bf_setkey): Return error on failed selftest. + +Tue Jan 12 11:17:18 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * random.c (random_is_faked): New. + + * tiger.c: Only compile if we have the u64 type + +Sat Jan 9 16:02:23 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * rndunix.c (gather_random): check for setuid. + + * Makefile.am: Add a way to staically link random modules + +Thu Jan 7 18:00:58 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * md.c (md_stop_debug): Do a flush first. + (md_open): size of buffer now depends on the secure parameter + +Sun Jan 3 15:28:44 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * rndunix.c (start_gatherer): Fixed stupid ==/= bug + +1998-12-31 Geoff Keating <geoffk@ozemail.com.au> + + * des.c (is_weak_key): Rewrite loop end condition. + +Tue Dec 29 14:41:47 CET 1998 Werner Koch <wk@isil.d.shuttle.de> + + * random.c: add unistd.h for getpid(). + (RAND_MAX): Fallback value for Sun. + +Wed Dec 23 17:12:24 CET 1998 Werner Koch <wk@isil.d.shuttle.de> + + * md.c (md_copy): Reset debug. + +Mon Dec 14 21:18:49 CET 1998 Werner Koch <wk@isil.d.shuttle.de> + + * random.c (read_random_source): Changed the interface to the + random gathering function. + (gather_faked): Use new interface. + * dynload.c (dynload_getfnc_fast_random_poll): Ditto. + (dynload_getfnc_gather_random): Ditto. + * rndlinux.c (gather_random): Ditto. + * rndunix.c (gather_random): Ditto. + +Sat Dec 12 18:40:32 CET 1998 Werner Koch <wk@isil.d.shuttle.de> + + * dynload.c (SYMBOL_VERSION): New to cope with system which needs + underscores. + + * rndunix.c: Rewrote large parts + +Thu Dec 10 20:15:36 CET 1998 Werner Koch <wk@isil.d.shuttle.de> + + * dynload.c (load_extension): increased needed verbosity level. + + * random.c (fast_random_poll): Fallback to a default fast random + poll function. + (read_random_source): Always use the faked entroy gatherer if no + gather module is available. + * rndlinux.c (fast_poll): Removed. + * rndunix.c (fast_poll): Removed. + + +Wed Nov 25 12:33:41 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rand-*.c: Removed. + * rndlinux.c : New. + * rndunix.c : New. + * random.c : Restructured the interface to the gather modules. + (intialize): Call constructor functions + (read_radnom_source): Moved to here. + * dynload.c (dynload_getfnc_gather_random): New. + (dynload_getfnc_fast_random_poll): New. + (register_internal_cipher_extension): New. + (register_cipher_extension): Support of internal modules. + +Sun Nov 8 17:44:36 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rand-unix.c (read_random_source): Removed the assert. + +Mon Oct 19 18:34:30 1998 me,,, (wk@tobold) + + * pubkey.c: Hack to allow us to give some info about RSA keys back. + +Thu Oct 15 11:47:57 1998 Werner Koch (wk@isil.d.shuttle.de) + + * dynload.c: Support for DLD + +Wed Oct 14 12:13:07 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rand-unix.c: Now uses names from configure for /dev/random. + +1998-10-10 SL Baur <steve@altair.xemacs.org> + + * Makefile.am: fix sed -O substitutions to catch -O6, etc. + +Tue Oct 6 10:06:32 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rand-unix.c (HAVE_GETTIMEOFDAY): Fixed (was ..GETTIMEOFTIME :-) + * rand-dummy.c (HAVE_GETTIMEOFDAY): Ditto. + +Mon Sep 28 13:23:09 1998 Werner Koch (wk@isil.d.shuttle.de) + + * md.c (md_digest): New. + (md_reset): New. + +Wed Sep 23 12:27:02 1998 Werner Koch (wk@isil.d.shuttle.de) + + * tiger.c (TIGER_CONTEXT): moved "buf", so that it is 64 bit aligned. + +Mon Sep 21 06:22:53 1998 Werner Koch (wk@(none)) + + * des.c: Some patches from Michael. + +Thu Sep 17 19:00:06 1998 Werner Koch (wk@(none)) + + * des.c : New file from Michael Roth <mroth@nessie.de> + +Mon Sep 14 11:10:55 1998 Werner Koch (wk@(none)) + + * blowfish.c (bf_setkey): Niklas Hernaeus patch to detect weak keys. + +Mon Sep 14 09:19:25 1998 Werner Koch (wk@(none)) + + * dynload.c (RTLD_NOW): Now defined to 1 if it is undefined. + +Mon Sep 7 17:04:33 1998 Werner Koch (wk@(none)) + + * Makefile.am: Fixes to allow a different build directory + +Thu Aug 6 17:25:38 1998 Werner Koch,mobil,,, (wk@tobold) + + * random.c (get_random_byte): Removed and changed all callers + to use get_random_bits() + +Mon Jul 27 10:30:22 1998 Werner Koch (wk@(none)) + + * cipher.c : Support for other blocksizes + (cipher_get_blocksize): New. + * twofish.c: New. + * Makefile.am: Add twofish module. + +Mon Jul 13 21:30:52 1998 Werner Koch (wk@isil.d.shuttle.de) + + * random.c (read_pool): Simple alloc if secure_alloc is not set. + (get_random_bits): Ditto. + +Thu Jul 9 13:01:14 1998 Werner Koch (wk@isil.d.shuttle.de) + + * dynload.c (load_extension): Function now nbails out if + the program is run setuid. + +Wed Jul 8 18:58:23 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rmd160.c (rmd160_hash_buffer): New. + +Thu Jul 2 10:50:30 1998 Werner Koch (wk@isil.d.shuttle.de) + + * cipher.c (cipher_open): algos >=100 use standard CFB + +Thu Jun 25 11:18:25 1998 Werner Koch (wk@isil.d.shuttle.de) + + * Makefile.am: Support for extensions + +Thu Jun 18 12:09:38 1998 Werner Koch (wk@isil.d.shuttle.de) + + * random.c (mix_pool): simpler handling for level 0 + +Mon Jun 15 14:40:48 1998 Werner Koch (wk@isil.d.shuttle.de) + + * tiger.c: Removed from dist, will reappear as dynload module + +Sat Jun 13 14:16:57 1998 Werner Koch (wk@isil.d.shuttle.de) + + * pubkey.c: Major changes to allow extensions. Changed the inteface + of all public key ciphers and added the ability to load extensions + on demand. + + * misc.c: Removed. + +Wed Jun 10 07:52:08 1998 Werner Koch,mobil,,, (wk@tobold) + + * dynload.c: New. + * cipher.c: Major changes to allow extensions. + +Mon Jun 8 22:43:00 1998 Werner Koch (wk@isil.d.shuttle.de) + + * cipher.c: Major internal chnages to support extensions. + * blowfish.c (blowfish_get_info): New and made all internal + functions static, changed heder. + * cast5.c (cast5_get_info): Likewise. + +Mon Jun 8 12:27:52 1998 Werner Koch (wk@isil.d.shuttle.de) + + * tiger.c (transform): Fix for big endian + + * cipher.c (do_cfb_decrypt): Big endian fix. + +Fri May 22 07:30:39 1998 Werner Koch (wk@isil.d.shuttle.de) + + * md.c (md_get_oid): Add a new one for TIGER. + +Thu May 21 13:24:52 1998 Werner Koch (wk@isil.d.shuttle.de) + + * cipher.c: Add support for a dummy cipher + +Thu May 14 15:40:36 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rmd160.c (transform): fixed sigbus - I should better + add Christian von Roques's new implemenation of rmd160_write. + +Fri May 8 18:07:44 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rand-internal.h, rand-unix.c, rand-w32.c, rand_dummy.c: New + * random.c: Moved system specific functions to rand-****.c + +Fri May 8 14:01:17 1998 Werner Koch (wk@isil.d.shuttle.de) + + * random.c (fast_random_poll): add call to gethrtime. + +Tue May 5 21:28:55 1998 Werner Koch (wk@isil.d.shuttle.de) + + * elgamal.c (elg_generate): choosing x was not correct, could + yield 6 bytes which are not from the random pool, tsss, tsss.. + +Tue May 5 14:09:06 1998 Werner Koch (wk@isil.d.shuttle.de) + + * primegen.c (generate_elg_prime): Add arg mode, changed all + callers and implemented mode 1. + +Mon Apr 27 14:41:58 1998 Werner Koch (wk@isil.d.shuttle.de) + + * cipher.c (cipher_get_keylen): New. + +Sun Apr 26 14:44:52 1998 Werner Koch (wk@isil.d.shuttle.de) + + * tiger.c, tiger.h: New. + +Wed Apr 8 14:57:11 1998 Werner Koch (wk@isil.d.shuttle.de) + + * misc.c (check_pubkey_algo2): New. + +Tue Apr 7 18:46:49 1998 Werner Koch (wk@isil.d.shuttle.de) + + * cipher.c: New + * misc.c (check_cipher_algo): Moved to cipher.c + * cast5.c: Moved many functions to cipher.c + * blowfish.c: Likewise. + +Sat Apr 4 19:52:08 1998 Werner Koch (wk@isil.d.shuttle.de) + + * cast5.c: Implemented and tested. + +Wed Apr 1 16:38:27 1998 Werner Koch (wk@isil.d.shuttle.de) + + * elgamal.c (elg_generate): Faster generation of x in some cases. + +Thu Mar 19 13:54:48 1998 Werner Koch (wk@isil.d.shuttle.de) + + * blowfish.c (blowfish_decode_cfb): changed XOR operation + (blowfish_encode_cfb): Ditto. + +Thu Mar 12 14:04:05 1998 Werner Koch (wk@isil.d.shuttle.de) + + * sha1.c (transform): Rewrote + + * blowfish.c (encrypt): Unrolled for rounds == 16 + (decrypt): Ditto. + +Tue Mar 10 16:32:08 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rmd160.c (transform): Unrolled the loop. + +Tue Mar 10 13:05:14 1998 Werner Koch (wk@isil.d.shuttle.de) + + * random.c (read_pool): Add pool_balance stuff. + (get_random_bits): New. + + * elgamal.c (elg_generate): Now uses get_random_bits to generate x. + + +Tue Mar 10 11:33:51 1998 Werner Koch (wk@isil.d.shuttle.de) + + * md.c (md_digest_length): New. + +Tue Mar 10 11:27:41 1998 Werner Koch (wk@isil.d.shuttle.de) + + * dsa.c (dsa_verify): Works. + +Mon Mar 9 12:59:08 1998 Werner Koch (wk@isil.d.shuttle.de) + + * dsa.c, dsa.h: Removed some unused code. + +Wed Mar 4 10:39:22 1998 Werner Koch (wk@isil.d.shuttle.de) + + * md.c (md_open): Add call to fast_random_poll. + blowfish.c (blowfish_setkey): Ditto. + +Tue Mar 3 13:32:54 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rmd160.c (rmd160_mixblock): New. + * random.c: Restructured to start with a new RNG implementation. + * random.h: New. + +Mon Mar 2 19:21:46 1998 Werner Koch (wk@isil.d.shuttle.de) + + * gost.c, gost.h: Removed because they did only contain trash. + +Sun Mar 1 16:42:29 1998 Werner Koch (wk@isil.d.shuttle.de) + + * random.c (fill_buffer): removed error message if n == -1. + +Fri Feb 27 16:39:34 1998 Werner Koch (wk@isil.d.shuttle.de) + + * md.c (md_enable): No init if called twice. + +Thu Feb 26 07:57:02 1998 Werner Koch (wk@isil.d.shuttle.de) + + * primegen.c (generate_elg_prime): Changed the progress printing. + (gen_prime): Ditto. + +Tue Feb 24 12:28:42 1998 Werner Koch (wk@isil.d.shuttle.de) + + * md5.c, md.5 : Replaced by a modified version of md5.c from + GNU textutils 1.22. + +Wed Feb 18 14:08:30 1998 Werner Koch (wk@isil.d.shuttle.de) + + * md.c, md.h : New debugging support + +Mon Feb 16 10:08:47 1998 Werner Koch (wk@isil.d.shuttle.de) + + * misc.c (cipher_algo_to_string): New + (pubkey_algo_to_string): New. + (digest_algo_to_string): New. + + + Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 + 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. + + This file is free software; as a special exception the author gives + unlimited permission to copy and/or distribute it, with or without + modifications, as long as this notice is preserved. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY, to the extent permitted by law; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +Local Variables: +buffer-read-only: t +End: diff --git a/libotr/libgcrypt-1.8.7/cipher/Makefile.am b/libotr/libgcrypt-1.8.7/cipher/Makefile.am new file mode 100644 index 0000000..95c4510 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/Makefile.am @@ -0,0 +1,130 @@ +# Makefile for cipher modules +# Copyright (C) 1998, 1999, 2000, 2001, 2002, +# 2003, 2009 Free Software Foundation, Inc. +# +# This file is part of Libgcrypt. +# +# Libgcrypt is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. +# +# Libgcrypt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program; if not, see <http://www.gnu.org/licenses/>. + +# Process this file with automake to produce Makefile.in + +# Need to include ../src in addition to top_srcdir because gcrypt.h is +# a built header. +AM_CPPFLAGS = -I../src -I$(top_srcdir)/src +AM_CFLAGS = $(GPG_ERROR_CFLAGS) + +AM_CCASFLAGS = $(NOEXECSTACK_FLAGS) + +EXTRA_DIST = gost-s-box.c + +CLEANFILES = gost-s-box +DISTCLEANFILES = gost-sb.h + +noinst_LTLIBRARIES = libcipher.la + +GCRYPT_MODULES = @GCRYPT_CIPHERS@ @GCRYPT_PUBKEY_CIPHERS@ \ + @GCRYPT_DIGESTS@ @GCRYPT_KDFS@ + +libcipher_la_DEPENDENCIES = $(GCRYPT_MODULES) +libcipher_la_LIBADD = $(GCRYPT_MODULES) + +libcipher_la_SOURCES = \ +cipher.c cipher-internal.h \ +cipher-cbc.c cipher-cfb.c cipher-ofb.c cipher-ctr.c cipher-aeswrap.c \ +cipher-ccm.c cipher-cmac.c cipher-gcm.c cipher-gcm-intel-pclmul.c \ + cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \ +cipher-poly1305.c cipher-ocb.c cipher-xts.c \ +cipher-selftest.c cipher-selftest.h \ +pubkey.c pubkey-internal.h pubkey-util.c \ +md.c \ +mac.c mac-internal.h \ +mac-hmac.c mac-cmac.c mac-gmac.c mac-poly1305.c \ +poly1305.c poly1305-internal.h \ +kdf.c kdf-internal.h \ +hmac-tests.c \ +bithelp.h \ +bufhelp.h \ +primegen.c \ +hash-common.c hash-common.h \ +dsa-common.c rsa-common.c \ +sha1.h + +EXTRA_libcipher_la_SOURCES = \ +arcfour.c arcfour-amd64.S \ +blowfish.c blowfish-amd64.S blowfish-arm.S \ +cast5.c cast5-amd64.S cast5-arm.S \ +chacha20.c chacha20-sse2-amd64.S chacha20-ssse3-amd64.S chacha20-avx2-amd64.S \ + chacha20-armv7-neon.S \ +crc.c \ + crc-intel-pclmul.c \ +des.c des-amd64.S \ +dsa.c \ +elgamal.c \ +ecc.c ecc-curves.c ecc-misc.c ecc-common.h \ +ecc-ecdsa.c ecc-eddsa.c ecc-gost.c \ +idea.c \ +gost28147.c gost.h \ +gostr3411-94.c \ +md4.c \ +md5.c \ +poly1305-sse2-amd64.S poly1305-avx2-amd64.S poly1305-armv7-neon.S \ +rijndael.c rijndael-internal.h rijndael-tables.h rijndael-aesni.c \ + rijndael-padlock.c rijndael-amd64.S rijndael-arm.S \ + rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ + rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S rijndael-armv8-aarch64-ce.S \ + rijndael-aarch64.S \ +rmd160.c \ +rsa.c \ +salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ +scrypt.c \ +seed.c \ +serpent.c serpent-sse2-amd64.S serpent-avx2-amd64.S serpent-armv7-neon.S \ +sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \ + sha1-armv7-neon.S sha1-armv8-aarch32-ce.S sha1-armv8-aarch64-ce.S \ +sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S sha256-avx2-bmi2-amd64.S \ + sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \ +sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S sha512-avx2-bmi2-amd64.S \ + sha512-armv7-neon.S sha512-arm.S \ +keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \ +stribog.c \ +tiger.c \ +whirlpool.c whirlpool-sse2-amd64.S \ +twofish.c twofish-amd64.S twofish-arm.S twofish-aarch64.S \ + twofish-avx2-amd64.S \ +rfc2268.c \ +camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \ + camellia-aesni-avx2-amd64.S camellia-arm.S camellia-aarch64.S \ +blake2.c + +gost28147.lo: gost-sb.h +gost-sb.h: gost-s-box + ./gost-s-box $@ + +gost-s-box: gost-s-box.c + $(CC_FOR_BUILD) -o $@ $(srcdir)/gost-s-box.c + + +if ENABLE_O_FLAG_MUNGING +o_flag_munging = sed -e 's/-O\([2-9s][2-9s]*\)/-O1/' -e 's/-Ofast/-O1/g' +else +o_flag_munging = cat +endif + + +# We need to lower the optimization for this module. +tiger.o: $(srcdir)/tiger.c + `echo $(COMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` + +tiger.lo: $(srcdir)/tiger.c + `echo $(LTCOMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` diff --git a/libotr/libgcrypt-1.8.7/cipher/Makefile.in b/libotr/libgcrypt-1.8.7/cipher/Makefile.in new file mode 100644 index 0000000..51dac21 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/Makefile.in @@ -0,0 +1,1247 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for cipher modules +# Copyright (C) 1998, 1999, 2000, 2001, 2002, +# 2003, 2009 Free Software Foundation, Inc. +# +# This file is part of Libgcrypt. +# +# Libgcrypt is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. +# +# Libgcrypt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program; if not, see <http://www.gnu.org/licenses/>. + +# Process this file with automake to produce Makefile.in + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = cipher +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/gpg-error.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/noexecstack.m4 $(top_srcdir)/m4/onceonly.m4 \ + $(top_srcdir)/m4/socklen.m4 $(top_srcdir)/m4/sys_socket_h.m4 \ + $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +am__DEPENDENCIES_1 = +am_libcipher_la_OBJECTS = cipher.lo cipher-cbc.lo cipher-cfb.lo \ + cipher-ofb.lo cipher-ctr.lo cipher-aeswrap.lo cipher-ccm.lo \ + cipher-cmac.lo cipher-gcm.lo cipher-gcm-intel-pclmul.lo \ + cipher-gcm-armv8-aarch32-ce.lo cipher-gcm-armv8-aarch64-ce.lo \ + cipher-poly1305.lo cipher-ocb.lo cipher-xts.lo \ + cipher-selftest.lo pubkey.lo pubkey-util.lo md.lo mac.lo \ + mac-hmac.lo mac-cmac.lo mac-gmac.lo mac-poly1305.lo \ + poly1305.lo kdf.lo hmac-tests.lo primegen.lo hash-common.lo \ + dsa-common.lo rsa-common.lo +libcipher_la_OBJECTS = $(am_libcipher_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/arcfour-amd64.Plo \ + ./$(DEPDIR)/arcfour.Plo ./$(DEPDIR)/blake2.Plo \ + ./$(DEPDIR)/blowfish-amd64.Plo ./$(DEPDIR)/blowfish-arm.Plo \ + ./$(DEPDIR)/blowfish.Plo ./$(DEPDIR)/camellia-aarch64.Plo \ + ./$(DEPDIR)/camellia-aesni-avx-amd64.Plo \ + ./$(DEPDIR)/camellia-aesni-avx2-amd64.Plo \ + ./$(DEPDIR)/camellia-arm.Plo ./$(DEPDIR)/camellia-glue.Plo \ + ./$(DEPDIR)/camellia.Plo ./$(DEPDIR)/cast5-amd64.Plo \ + ./$(DEPDIR)/cast5-arm.Plo ./$(DEPDIR)/cast5.Plo \ + ./$(DEPDIR)/chacha20-armv7-neon.Plo \ + ./$(DEPDIR)/chacha20-avx2-amd64.Plo \ + ./$(DEPDIR)/chacha20-sse2-amd64.Plo \ + ./$(DEPDIR)/chacha20-ssse3-amd64.Plo ./$(DEPDIR)/chacha20.Plo \ + ./$(DEPDIR)/cipher-aeswrap.Plo ./$(DEPDIR)/cipher-cbc.Plo \ + ./$(DEPDIR)/cipher-ccm.Plo ./$(DEPDIR)/cipher-cfb.Plo \ + ./$(DEPDIR)/cipher-cmac.Plo ./$(DEPDIR)/cipher-ctr.Plo \ + ./$(DEPDIR)/cipher-gcm-armv8-aarch32-ce.Plo \ + ./$(DEPDIR)/cipher-gcm-armv8-aarch64-ce.Plo \ + ./$(DEPDIR)/cipher-gcm-intel-pclmul.Plo \ + ./$(DEPDIR)/cipher-gcm.Plo ./$(DEPDIR)/cipher-ocb.Plo \ + ./$(DEPDIR)/cipher-ofb.Plo ./$(DEPDIR)/cipher-poly1305.Plo \ + ./$(DEPDIR)/cipher-selftest.Plo ./$(DEPDIR)/cipher-xts.Plo \ + ./$(DEPDIR)/cipher.Plo ./$(DEPDIR)/crc-intel-pclmul.Plo \ + ./$(DEPDIR)/crc.Plo ./$(DEPDIR)/des-amd64.Plo \ + ./$(DEPDIR)/des.Plo ./$(DEPDIR)/dsa-common.Plo \ + ./$(DEPDIR)/dsa.Plo ./$(DEPDIR)/ecc-curves.Plo \ + ./$(DEPDIR)/ecc-ecdsa.Plo ./$(DEPDIR)/ecc-eddsa.Plo \ + ./$(DEPDIR)/ecc-gost.Plo ./$(DEPDIR)/ecc-misc.Plo \ + ./$(DEPDIR)/ecc.Plo ./$(DEPDIR)/elgamal.Plo \ + ./$(DEPDIR)/gost28147.Plo ./$(DEPDIR)/gostr3411-94.Plo \ + ./$(DEPDIR)/hash-common.Plo ./$(DEPDIR)/hmac-tests.Plo \ + ./$(DEPDIR)/idea.Plo ./$(DEPDIR)/kdf.Plo \ + ./$(DEPDIR)/keccak-armv7-neon.Plo ./$(DEPDIR)/keccak.Plo \ + ./$(DEPDIR)/mac-cmac.Plo ./$(DEPDIR)/mac-gmac.Plo \ + ./$(DEPDIR)/mac-hmac.Plo ./$(DEPDIR)/mac-poly1305.Plo \ + ./$(DEPDIR)/mac.Plo ./$(DEPDIR)/md.Plo ./$(DEPDIR)/md4.Plo \ + ./$(DEPDIR)/md5.Plo ./$(DEPDIR)/poly1305-armv7-neon.Plo \ + ./$(DEPDIR)/poly1305-avx2-amd64.Plo \ + ./$(DEPDIR)/poly1305-sse2-amd64.Plo ./$(DEPDIR)/poly1305.Plo \ + ./$(DEPDIR)/primegen.Plo ./$(DEPDIR)/pubkey-util.Plo \ + ./$(DEPDIR)/pubkey.Plo ./$(DEPDIR)/rfc2268.Plo \ + ./$(DEPDIR)/rijndael-aarch64.Plo \ + ./$(DEPDIR)/rijndael-aesni.Plo ./$(DEPDIR)/rijndael-amd64.Plo \ + ./$(DEPDIR)/rijndael-arm.Plo \ + ./$(DEPDIR)/rijndael-armv8-aarch32-ce.Plo \ + ./$(DEPDIR)/rijndael-armv8-aarch64-ce.Plo \ + ./$(DEPDIR)/rijndael-armv8-ce.Plo \ + ./$(DEPDIR)/rijndael-padlock.Plo \ + ./$(DEPDIR)/rijndael-ssse3-amd64-asm.Plo \ + ./$(DEPDIR)/rijndael-ssse3-amd64.Plo ./$(DEPDIR)/rijndael.Plo \ + ./$(DEPDIR)/rmd160.Plo ./$(DEPDIR)/rsa-common.Plo \ + ./$(DEPDIR)/rsa.Plo ./$(DEPDIR)/salsa20-amd64.Plo \ + ./$(DEPDIR)/salsa20-armv7-neon.Plo ./$(DEPDIR)/salsa20.Plo \ + ./$(DEPDIR)/scrypt.Plo ./$(DEPDIR)/seed.Plo \ + ./$(DEPDIR)/serpent-armv7-neon.Plo \ + ./$(DEPDIR)/serpent-avx2-amd64.Plo \ + ./$(DEPDIR)/serpent-sse2-amd64.Plo ./$(DEPDIR)/serpent.Plo \ + ./$(DEPDIR)/sha1-armv7-neon.Plo \ + ./$(DEPDIR)/sha1-armv8-aarch32-ce.Plo \ + ./$(DEPDIR)/sha1-armv8-aarch64-ce.Plo \ + ./$(DEPDIR)/sha1-avx-amd64.Plo \ + ./$(DEPDIR)/sha1-avx-bmi2-amd64.Plo \ + ./$(DEPDIR)/sha1-ssse3-amd64.Plo ./$(DEPDIR)/sha1.Plo \ + ./$(DEPDIR)/sha256-armv8-aarch32-ce.Plo \ + ./$(DEPDIR)/sha256-armv8-aarch64-ce.Plo \ + ./$(DEPDIR)/sha256-avx-amd64.Plo \ + ./$(DEPDIR)/sha256-avx2-bmi2-amd64.Plo \ + ./$(DEPDIR)/sha256-ssse3-amd64.Plo ./$(DEPDIR)/sha256.Plo \ + ./$(DEPDIR)/sha512-arm.Plo ./$(DEPDIR)/sha512-armv7-neon.Plo \ + ./$(DEPDIR)/sha512-avx-amd64.Plo \ + ./$(DEPDIR)/sha512-avx2-bmi2-amd64.Plo \ + ./$(DEPDIR)/sha512-ssse3-amd64.Plo ./$(DEPDIR)/sha512.Plo \ + ./$(DEPDIR)/stribog.Plo ./$(DEPDIR)/tiger.Plo \ + ./$(DEPDIR)/twofish-aarch64.Plo ./$(DEPDIR)/twofish-amd64.Plo \ + ./$(DEPDIR)/twofish-arm.Plo ./$(DEPDIR)/twofish-avx2-amd64.Plo \ + ./$(DEPDIR)/twofish.Plo ./$(DEPDIR)/whirlpool-sse2-amd64.Plo \ + ./$(DEPDIR)/whirlpool.Plo +am__mv = mv -f +CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) +LTCPPASCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CCASFLAGS) $(CCASFLAGS) +AM_V_CPPAS = $(am__v_CPPAS_@AM_V@) +am__v_CPPAS_ = $(am__v_CPPAS_@AM_DEFAULT_V@) +am__v_CPPAS_0 = @echo " CPPAS " $@; +am__v_CPPAS_1 = +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libcipher_la_SOURCES) $(EXTRA_libcipher_la_SOURCES) +DIST_SOURCES = $(libcipher_la_SOURCES) $(EXTRA_libcipher_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BUILD_FILEVERSION = @BUILD_FILEVERSION@ +BUILD_REVISION = @BUILD_REVISION@ +BUILD_TIMESTAMP = @BUILD_TIMESTAMP@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CC_FOR_BUILD = @CC_FOR_BUILD@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FALLBACK_SOCKLEN_T = @FALLBACK_SOCKLEN_T@ +FGREP = @FGREP@ +GCRYPT_CIPHERS = @GCRYPT_CIPHERS@ +GCRYPT_DIGESTS = @GCRYPT_DIGESTS@ +GCRYPT_HWF_MODULES = @GCRYPT_HWF_MODULES@ +GCRYPT_KDFS = @GCRYPT_KDFS@ +GCRYPT_PUBKEY_CIPHERS = @GCRYPT_PUBKEY_CIPHERS@ +GCRYPT_RANDOM = @GCRYPT_RANDOM@ +GPG_ERROR_CFLAGS = @GPG_ERROR_CFLAGS@ +GPG_ERROR_CONFIG = @GPG_ERROR_CONFIG@ +GPG_ERROR_LIBS = @GPG_ERROR_LIBS@ +GPG_ERROR_MT_CFLAGS = @GPG_ERROR_MT_CFLAGS@ +GPG_ERROR_MT_LIBS = @GPG_ERROR_MT_LIBS@ +GREP = @GREP@ +INSERT_SYS_SELECT_H = @INSERT_SYS_SELECT_H@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBGCRYPT_CIPHERS = @LIBGCRYPT_CIPHERS@ +LIBGCRYPT_CONFIG_API_VERSION = @LIBGCRYPT_CONFIG_API_VERSION@ +LIBGCRYPT_CONFIG_CFLAGS = @LIBGCRYPT_CONFIG_CFLAGS@ +LIBGCRYPT_CONFIG_HOST = @LIBGCRYPT_CONFIG_HOST@ +LIBGCRYPT_CONFIG_LIBS = @LIBGCRYPT_CONFIG_LIBS@ +LIBGCRYPT_DIGESTS = @LIBGCRYPT_DIGESTS@ +LIBGCRYPT_LT_AGE = @LIBGCRYPT_LT_AGE@ +LIBGCRYPT_LT_CURRENT = @LIBGCRYPT_LT_CURRENT@ +LIBGCRYPT_LT_REVISION = @LIBGCRYPT_LT_REVISION@ +LIBGCRYPT_PUBKEY_CIPHERS = @LIBGCRYPT_PUBKEY_CIPHERS@ +LIBGCRYPT_THREAD_MODULES = @LIBGCRYPT_THREAD_MODULES@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPI_SFLAGS = @MPI_SFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NOEXECSTACK_FLAGS = @NOEXECSTACK_FLAGS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PTH_CFLAGS = @PTH_CFLAGS@ +PTH_CONFIG = @PTH_CONFIG@ +PTH_LIBS = @PTH_LIBS@ +RANLIB = @RANLIB@ +RC = @RC@ +RUN_LARGE_DATA_TESTS = @RUN_LARGE_DATA_TESTS@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +SYSROOT = @SYSROOT@ +SYS_SOCKET_H = @SYS_SOCKET_H@ +VERSION = @VERSION@ +VERSION_NUMBER = @VERSION_NUMBER@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +emacs_local_vars_begin = @emacs_local_vars_begin@ +emacs_local_vars_end = @emacs_local_vars_end@ +emacs_local_vars_read_only = @emacs_local_vars_read_only@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ + +# Need to include ../src in addition to top_srcdir because gcrypt.h is +# a built header. +AM_CPPFLAGS = -I../src -I$(top_srcdir)/src +AM_CFLAGS = $(GPG_ERROR_CFLAGS) +AM_CCASFLAGS = $(NOEXECSTACK_FLAGS) +EXTRA_DIST = gost-s-box.c +CLEANFILES = gost-s-box +DISTCLEANFILES = gost-sb.h +noinst_LTLIBRARIES = libcipher.la +GCRYPT_MODULES = @GCRYPT_CIPHERS@ @GCRYPT_PUBKEY_CIPHERS@ \ + @GCRYPT_DIGESTS@ @GCRYPT_KDFS@ + +libcipher_la_DEPENDENCIES = $(GCRYPT_MODULES) +libcipher_la_LIBADD = $(GCRYPT_MODULES) +libcipher_la_SOURCES = \ +cipher.c cipher-internal.h \ +cipher-cbc.c cipher-cfb.c cipher-ofb.c cipher-ctr.c cipher-aeswrap.c \ +cipher-ccm.c cipher-cmac.c cipher-gcm.c cipher-gcm-intel-pclmul.c \ + cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \ +cipher-poly1305.c cipher-ocb.c cipher-xts.c \ +cipher-selftest.c cipher-selftest.h \ +pubkey.c pubkey-internal.h pubkey-util.c \ +md.c \ +mac.c mac-internal.h \ +mac-hmac.c mac-cmac.c mac-gmac.c mac-poly1305.c \ +poly1305.c poly1305-internal.h \ +kdf.c kdf-internal.h \ +hmac-tests.c \ +bithelp.h \ +bufhelp.h \ +primegen.c \ +hash-common.c hash-common.h \ +dsa-common.c rsa-common.c \ +sha1.h + +EXTRA_libcipher_la_SOURCES = \ +arcfour.c arcfour-amd64.S \ +blowfish.c blowfish-amd64.S blowfish-arm.S \ +cast5.c cast5-amd64.S cast5-arm.S \ +chacha20.c chacha20-sse2-amd64.S chacha20-ssse3-amd64.S chacha20-avx2-amd64.S \ + chacha20-armv7-neon.S \ +crc.c \ + crc-intel-pclmul.c \ +des.c des-amd64.S \ +dsa.c \ +elgamal.c \ +ecc.c ecc-curves.c ecc-misc.c ecc-common.h \ +ecc-ecdsa.c ecc-eddsa.c ecc-gost.c \ +idea.c \ +gost28147.c gost.h \ +gostr3411-94.c \ +md4.c \ +md5.c \ +poly1305-sse2-amd64.S poly1305-avx2-amd64.S poly1305-armv7-neon.S \ +rijndael.c rijndael-internal.h rijndael-tables.h rijndael-aesni.c \ + rijndael-padlock.c rijndael-amd64.S rijndael-arm.S \ + rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ + rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S rijndael-armv8-aarch64-ce.S \ + rijndael-aarch64.S \ +rmd160.c \ +rsa.c \ +salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ +scrypt.c \ +seed.c \ +serpent.c serpent-sse2-amd64.S serpent-avx2-amd64.S serpent-armv7-neon.S \ +sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \ + sha1-armv7-neon.S sha1-armv8-aarch32-ce.S sha1-armv8-aarch64-ce.S \ +sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S sha256-avx2-bmi2-amd64.S \ + sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \ +sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S sha512-avx2-bmi2-amd64.S \ + sha512-armv7-neon.S sha512-arm.S \ +keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \ +stribog.c \ +tiger.c \ +whirlpool.c whirlpool-sse2-amd64.S \ +twofish.c twofish-amd64.S twofish-arm.S twofish-aarch64.S \ + twofish-avx2-amd64.S \ +rfc2268.c \ +camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \ + camellia-aesni-avx2-amd64.S camellia-arm.S camellia-aarch64.S \ +blake2.c + +@ENABLE_O_FLAG_MUNGING_FALSE@o_flag_munging = cat +@ENABLE_O_FLAG_MUNGING_TRUE@o_flag_munging = sed -e 's/-O\([2-9s][2-9s]*\)/-O1/' -e 's/-Ofast/-O1/g' +all: all-am + +.SUFFIXES: +.SUFFIXES: .S .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu cipher/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu cipher/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libcipher.la: $(libcipher_la_OBJECTS) $(libcipher_la_DEPENDENCIES) $(EXTRA_libcipher_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(libcipher_la_OBJECTS) $(libcipher_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/arcfour-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/arcfour.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blake2.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blowfish-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blowfish-arm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blowfish.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia-aarch64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia-aesni-avx-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia-aesni-avx2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia-arm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia-glue.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cast5-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cast5-arm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cast5.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-armv7-neon.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-avx2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-sse2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-ssse3-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-aeswrap.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-cbc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-ccm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-cfb.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-cmac.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-ctr.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-gcm-armv8-aarch32-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-gcm-armv8-aarch64-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-gcm-intel-pclmul.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-gcm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-ocb.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-ofb.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-poly1305.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-selftest.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-xts.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crc-intel-pclmul.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/des-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/des.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dsa-common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dsa.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-curves.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-ecdsa.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-eddsa.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-gost.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-misc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/elgamal.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gost28147.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gostr3411-94.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hash-common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hmac-tests.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/idea.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kdf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/keccak-armv7-neon.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/keccak.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mac-cmac.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mac-gmac.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mac-hmac.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mac-poly1305.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mac.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md4.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md5.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/poly1305-armv7-neon.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/poly1305-avx2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/poly1305-sse2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/poly1305.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/primegen.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pubkey-util.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pubkey.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rfc2268.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-aarch64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-aesni.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-arm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-armv8-aarch32-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-armv8-aarch64-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-armv8-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-padlock.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-ssse3-amd64-asm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-ssse3-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rmd160.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rsa-common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rsa.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/salsa20-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/salsa20-armv7-neon.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/salsa20.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scrypt.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/seed.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/serpent-armv7-neon.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/serpent-avx2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/serpent-sse2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/serpent.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-armv7-neon.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-armv8-aarch32-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-armv8-aarch64-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-avx-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-avx-bmi2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-ssse3-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-armv8-aarch32-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-armv8-aarch64-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-avx-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-avx2-bmi2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-ssse3-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-arm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-armv7-neon.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-avx-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-avx2-bmi2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-ssse3-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stribog.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tiger.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twofish-aarch64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twofish-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twofish-arm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twofish-avx2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twofish.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/whirlpool-sse2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/whirlpool.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.S.o: +@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ $< + +.S.obj: +@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.S.lo: +@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(LTCPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(LTCPPASCOMPILE) -c -o $@ $< + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/arcfour-amd64.Plo + -rm -f ./$(DEPDIR)/arcfour.Plo + -rm -f ./$(DEPDIR)/blake2.Plo + -rm -f ./$(DEPDIR)/blowfish-amd64.Plo + -rm -f ./$(DEPDIR)/blowfish-arm.Plo + -rm -f ./$(DEPDIR)/blowfish.Plo + -rm -f ./$(DEPDIR)/camellia-aarch64.Plo + -rm -f ./$(DEPDIR)/camellia-aesni-avx-amd64.Plo + -rm -f ./$(DEPDIR)/camellia-aesni-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/camellia-arm.Plo + -rm -f ./$(DEPDIR)/camellia-glue.Plo + -rm -f ./$(DEPDIR)/camellia.Plo + -rm -f ./$(DEPDIR)/cast5-amd64.Plo + -rm -f ./$(DEPDIR)/cast5-arm.Plo + -rm -f ./$(DEPDIR)/cast5.Plo + -rm -f ./$(DEPDIR)/chacha20-armv7-neon.Plo + -rm -f ./$(DEPDIR)/chacha20-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/chacha20-sse2-amd64.Plo + -rm -f ./$(DEPDIR)/chacha20-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/chacha20.Plo + -rm -f ./$(DEPDIR)/cipher-aeswrap.Plo + -rm -f ./$(DEPDIR)/cipher-cbc.Plo + -rm -f ./$(DEPDIR)/cipher-ccm.Plo + -rm -f ./$(DEPDIR)/cipher-cfb.Plo + -rm -f ./$(DEPDIR)/cipher-cmac.Plo + -rm -f ./$(DEPDIR)/cipher-ctr.Plo + -rm -f ./$(DEPDIR)/cipher-gcm-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/cipher-gcm-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/cipher-gcm-intel-pclmul.Plo + -rm -f ./$(DEPDIR)/cipher-gcm.Plo + -rm -f ./$(DEPDIR)/cipher-ocb.Plo + -rm -f ./$(DEPDIR)/cipher-ofb.Plo + -rm -f ./$(DEPDIR)/cipher-poly1305.Plo + -rm -f ./$(DEPDIR)/cipher-selftest.Plo + -rm -f ./$(DEPDIR)/cipher-xts.Plo + -rm -f ./$(DEPDIR)/cipher.Plo + -rm -f ./$(DEPDIR)/crc-intel-pclmul.Plo + -rm -f ./$(DEPDIR)/crc.Plo + -rm -f ./$(DEPDIR)/des-amd64.Plo + -rm -f ./$(DEPDIR)/des.Plo + -rm -f ./$(DEPDIR)/dsa-common.Plo + -rm -f ./$(DEPDIR)/dsa.Plo + -rm -f ./$(DEPDIR)/ecc-curves.Plo + -rm -f ./$(DEPDIR)/ecc-ecdsa.Plo + -rm -f ./$(DEPDIR)/ecc-eddsa.Plo + -rm -f ./$(DEPDIR)/ecc-gost.Plo + -rm -f ./$(DEPDIR)/ecc-misc.Plo + -rm -f ./$(DEPDIR)/ecc.Plo + -rm -f ./$(DEPDIR)/elgamal.Plo + -rm -f ./$(DEPDIR)/gost28147.Plo + -rm -f ./$(DEPDIR)/gostr3411-94.Plo + -rm -f ./$(DEPDIR)/hash-common.Plo + -rm -f ./$(DEPDIR)/hmac-tests.Plo + -rm -f ./$(DEPDIR)/idea.Plo + -rm -f ./$(DEPDIR)/kdf.Plo + -rm -f ./$(DEPDIR)/keccak-armv7-neon.Plo + -rm -f ./$(DEPDIR)/keccak.Plo + -rm -f ./$(DEPDIR)/mac-cmac.Plo + -rm -f ./$(DEPDIR)/mac-gmac.Plo + -rm -f ./$(DEPDIR)/mac-hmac.Plo + -rm -f ./$(DEPDIR)/mac-poly1305.Plo + -rm -f ./$(DEPDIR)/mac.Plo + -rm -f ./$(DEPDIR)/md.Plo + -rm -f ./$(DEPDIR)/md4.Plo + -rm -f ./$(DEPDIR)/md5.Plo + -rm -f ./$(DEPDIR)/poly1305-armv7-neon.Plo + -rm -f ./$(DEPDIR)/poly1305-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/poly1305-sse2-amd64.Plo + -rm -f ./$(DEPDIR)/poly1305.Plo + -rm -f ./$(DEPDIR)/primegen.Plo + -rm -f ./$(DEPDIR)/pubkey-util.Plo + -rm -f ./$(DEPDIR)/pubkey.Plo + -rm -f ./$(DEPDIR)/rfc2268.Plo + -rm -f ./$(DEPDIR)/rijndael-aarch64.Plo + -rm -f ./$(DEPDIR)/rijndael-aesni.Plo + -rm -f ./$(DEPDIR)/rijndael-amd64.Plo + -rm -f ./$(DEPDIR)/rijndael-arm.Plo + -rm -f ./$(DEPDIR)/rijndael-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/rijndael-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/rijndael-armv8-ce.Plo + -rm -f ./$(DEPDIR)/rijndael-padlock.Plo + -rm -f ./$(DEPDIR)/rijndael-ssse3-amd64-asm.Plo + -rm -f ./$(DEPDIR)/rijndael-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/rijndael.Plo + -rm -f ./$(DEPDIR)/rmd160.Plo + -rm -f ./$(DEPDIR)/rsa-common.Plo + -rm -f ./$(DEPDIR)/rsa.Plo + -rm -f ./$(DEPDIR)/salsa20-amd64.Plo + -rm -f ./$(DEPDIR)/salsa20-armv7-neon.Plo + -rm -f ./$(DEPDIR)/salsa20.Plo + -rm -f ./$(DEPDIR)/scrypt.Plo + -rm -f ./$(DEPDIR)/seed.Plo + -rm -f ./$(DEPDIR)/serpent-armv7-neon.Plo + -rm -f ./$(DEPDIR)/serpent-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/serpent-sse2-amd64.Plo + -rm -f ./$(DEPDIR)/serpent.Plo + -rm -f ./$(DEPDIR)/sha1-armv7-neon.Plo + -rm -f ./$(DEPDIR)/sha1-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/sha1-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/sha1-avx-amd64.Plo + -rm -f ./$(DEPDIR)/sha1-avx-bmi2-amd64.Plo + -rm -f ./$(DEPDIR)/sha1-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/sha1.Plo + -rm -f ./$(DEPDIR)/sha256-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/sha256-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/sha256-avx-amd64.Plo + -rm -f ./$(DEPDIR)/sha256-avx2-bmi2-amd64.Plo + -rm -f ./$(DEPDIR)/sha256-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/sha256.Plo + -rm -f ./$(DEPDIR)/sha512-arm.Plo + -rm -f ./$(DEPDIR)/sha512-armv7-neon.Plo + -rm -f ./$(DEPDIR)/sha512-avx-amd64.Plo + -rm -f ./$(DEPDIR)/sha512-avx2-bmi2-amd64.Plo + -rm -f ./$(DEPDIR)/sha512-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/sha512.Plo + -rm -f ./$(DEPDIR)/stribog.Plo + -rm -f ./$(DEPDIR)/tiger.Plo + -rm -f ./$(DEPDIR)/twofish-aarch64.Plo + -rm -f ./$(DEPDIR)/twofish-amd64.Plo + -rm -f ./$(DEPDIR)/twofish-arm.Plo + -rm -f ./$(DEPDIR)/twofish-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/twofish.Plo + -rm -f ./$(DEPDIR)/whirlpool-sse2-amd64.Plo + -rm -f ./$(DEPDIR)/whirlpool.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/arcfour-amd64.Plo + -rm -f ./$(DEPDIR)/arcfour.Plo + -rm -f ./$(DEPDIR)/blake2.Plo + -rm -f ./$(DEPDIR)/blowfish-amd64.Plo + -rm -f ./$(DEPDIR)/blowfish-arm.Plo + -rm -f ./$(DEPDIR)/blowfish.Plo + -rm -f ./$(DEPDIR)/camellia-aarch64.Plo + -rm -f ./$(DEPDIR)/camellia-aesni-avx-amd64.Plo + -rm -f ./$(DEPDIR)/camellia-aesni-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/camellia-arm.Plo + -rm -f ./$(DEPDIR)/camellia-glue.Plo + -rm -f ./$(DEPDIR)/camellia.Plo + -rm -f ./$(DEPDIR)/cast5-amd64.Plo + -rm -f ./$(DEPDIR)/cast5-arm.Plo + -rm -f ./$(DEPDIR)/cast5.Plo + -rm -f ./$(DEPDIR)/chacha20-armv7-neon.Plo + -rm -f ./$(DEPDIR)/chacha20-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/chacha20-sse2-amd64.Plo + -rm -f ./$(DEPDIR)/chacha20-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/chacha20.Plo + -rm -f ./$(DEPDIR)/cipher-aeswrap.Plo + -rm -f ./$(DEPDIR)/cipher-cbc.Plo + -rm -f ./$(DEPDIR)/cipher-ccm.Plo + -rm -f ./$(DEPDIR)/cipher-cfb.Plo + -rm -f ./$(DEPDIR)/cipher-cmac.Plo + -rm -f ./$(DEPDIR)/cipher-ctr.Plo + -rm -f ./$(DEPDIR)/cipher-gcm-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/cipher-gcm-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/cipher-gcm-intel-pclmul.Plo + -rm -f ./$(DEPDIR)/cipher-gcm.Plo + -rm -f ./$(DEPDIR)/cipher-ocb.Plo + -rm -f ./$(DEPDIR)/cipher-ofb.Plo + -rm -f ./$(DEPDIR)/cipher-poly1305.Plo + -rm -f ./$(DEPDIR)/cipher-selftest.Plo + -rm -f ./$(DEPDIR)/cipher-xts.Plo + -rm -f ./$(DEPDIR)/cipher.Plo + -rm -f ./$(DEPDIR)/crc-intel-pclmul.Plo + -rm -f ./$(DEPDIR)/crc.Plo + -rm -f ./$(DEPDIR)/des-amd64.Plo + -rm -f ./$(DEPDIR)/des.Plo + -rm -f ./$(DEPDIR)/dsa-common.Plo + -rm -f ./$(DEPDIR)/dsa.Plo + -rm -f ./$(DEPDIR)/ecc-curves.Plo + -rm -f ./$(DEPDIR)/ecc-ecdsa.Plo + -rm -f ./$(DEPDIR)/ecc-eddsa.Plo + -rm -f ./$(DEPDIR)/ecc-gost.Plo + -rm -f ./$(DEPDIR)/ecc-misc.Plo + -rm -f ./$(DEPDIR)/ecc.Plo + -rm -f ./$(DEPDIR)/elgamal.Plo + -rm -f ./$(DEPDIR)/gost28147.Plo + -rm -f ./$(DEPDIR)/gostr3411-94.Plo + -rm -f ./$(DEPDIR)/hash-common.Plo + -rm -f ./$(DEPDIR)/hmac-tests.Plo + -rm -f ./$(DEPDIR)/idea.Plo + -rm -f ./$(DEPDIR)/kdf.Plo + -rm -f ./$(DEPDIR)/keccak-armv7-neon.Plo + -rm -f ./$(DEPDIR)/keccak.Plo + -rm -f ./$(DEPDIR)/mac-cmac.Plo + -rm -f ./$(DEPDIR)/mac-gmac.Plo + -rm -f ./$(DEPDIR)/mac-hmac.Plo + -rm -f ./$(DEPDIR)/mac-poly1305.Plo + -rm -f ./$(DEPDIR)/mac.Plo + -rm -f ./$(DEPDIR)/md.Plo + -rm -f ./$(DEPDIR)/md4.Plo + -rm -f ./$(DEPDIR)/md5.Plo + -rm -f ./$(DEPDIR)/poly1305-armv7-neon.Plo + -rm -f ./$(DEPDIR)/poly1305-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/poly1305-sse2-amd64.Plo + -rm -f ./$(DEPDIR)/poly1305.Plo + -rm -f ./$(DEPDIR)/primegen.Plo + -rm -f ./$(DEPDIR)/pubkey-util.Plo + -rm -f ./$(DEPDIR)/pubkey.Plo + -rm -f ./$(DEPDIR)/rfc2268.Plo + -rm -f ./$(DEPDIR)/rijndael-aarch64.Plo + -rm -f ./$(DEPDIR)/rijndael-aesni.Plo + -rm -f ./$(DEPDIR)/rijndael-amd64.Plo + -rm -f ./$(DEPDIR)/rijndael-arm.Plo + -rm -f ./$(DEPDIR)/rijndael-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/rijndael-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/rijndael-armv8-ce.Plo + -rm -f ./$(DEPDIR)/rijndael-padlock.Plo + -rm -f ./$(DEPDIR)/rijndael-ssse3-amd64-asm.Plo + -rm -f ./$(DEPDIR)/rijndael-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/rijndael.Plo + -rm -f ./$(DEPDIR)/rmd160.Plo + -rm -f ./$(DEPDIR)/rsa-common.Plo + -rm -f ./$(DEPDIR)/rsa.Plo + -rm -f ./$(DEPDIR)/salsa20-amd64.Plo + -rm -f ./$(DEPDIR)/salsa20-armv7-neon.Plo + -rm -f ./$(DEPDIR)/salsa20.Plo + -rm -f ./$(DEPDIR)/scrypt.Plo + -rm -f ./$(DEPDIR)/seed.Plo + -rm -f ./$(DEPDIR)/serpent-armv7-neon.Plo + -rm -f ./$(DEPDIR)/serpent-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/serpent-sse2-amd64.Plo + -rm -f ./$(DEPDIR)/serpent.Plo + -rm -f ./$(DEPDIR)/sha1-armv7-neon.Plo + -rm -f ./$(DEPDIR)/sha1-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/sha1-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/sha1-avx-amd64.Plo + -rm -f ./$(DEPDIR)/sha1-avx-bmi2-amd64.Plo + -rm -f ./$(DEPDIR)/sha1-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/sha1.Plo + -rm -f ./$(DEPDIR)/sha256-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/sha256-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/sha256-avx-amd64.Plo + -rm -f ./$(DEPDIR)/sha256-avx2-bmi2-amd64.Plo + -rm -f ./$(DEPDIR)/sha256-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/sha256.Plo + -rm -f ./$(DEPDIR)/sha512-arm.Plo + -rm -f ./$(DEPDIR)/sha512-armv7-neon.Plo + -rm -f ./$(DEPDIR)/sha512-avx-amd64.Plo + -rm -f ./$(DEPDIR)/sha512-avx2-bmi2-amd64.Plo + -rm -f ./$(DEPDIR)/sha512-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/sha512.Plo + -rm -f ./$(DEPDIR)/stribog.Plo + -rm -f ./$(DEPDIR)/tiger.Plo + -rm -f ./$(DEPDIR)/twofish-aarch64.Plo + -rm -f ./$(DEPDIR)/twofish-amd64.Plo + -rm -f ./$(DEPDIR)/twofish-arm.Plo + -rm -f ./$(DEPDIR)/twofish-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/twofish.Plo + -rm -f ./$(DEPDIR)/whirlpool-sse2-amd64.Plo + -rm -f ./$(DEPDIR)/whirlpool.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libtool clean-noinstLTLIBRARIES \ + cscopelist-am ctags ctags-am distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +gost28147.lo: gost-sb.h +gost-sb.h: gost-s-box + ./gost-s-box $@ + +gost-s-box: gost-s-box.c + $(CC_FOR_BUILD) -o $@ $(srcdir)/gost-s-box.c + +# We need to lower the optimization for this module. +tiger.o: $(srcdir)/tiger.c + `echo $(COMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` + +tiger.lo: $(srcdir)/tiger.c + `echo $(LTCOMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/libotr/libgcrypt-1.8.7/cipher/arcfour-amd64.S b/libotr/libgcrypt-1.8.7/cipher/arcfour-amd64.S new file mode 100644 index 0000000..2e52ea0 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/arcfour-amd64.S @@ -0,0 +1,104 @@ +/* +** RC4 implementation optimized for AMD64. +** +** Author: Marc Bevand <bevand_m (at) epita.fr> +** Licence: I hereby disclaim the copyright on this code and place it +** in the public domain. +** +** The throughput achieved by this code is about 320 MBytes/sec, on +** a 1.8 GHz AMD Opteron (rev C0) processor. +** +** 2013/12/20 <jussi.kivilinna@iki.fi>: +** - Integrated to libgcrypt +** - 4.18 cycles/byte on Intel i5-4570 +*/ + +#ifdef __x86_64__ +#include <config.h> +#if defined(USE_ARCFOUR) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.text +.align 16 +.globl _gcry_arcfour_amd64 +ELF(.type _gcry_arcfour_amd64,@function) +_gcry_arcfour_amd64: + push %rbp + push %rbx + mov %rdi, %rbp # key = ARG(key) + mov %rsi, %rbx # rbx = ARG(len) + mov %rdx, %rsi # in = ARG(in) + mov %rcx, %rdi # out = ARG(out) + mov (4*256)(%rbp), %ecx # x = key->x + mov (4*256+4)(%rbp),%edx # y = key->y + inc %rcx # x++ + and $255, %rcx # x &= 0xff + lea -8(%rbx,%rsi), %rbx # rbx = in+len-8 + mov %rbx, %r9 # tmp = in+len-8 + mov (%rbp,%rcx,4), %eax # tx = d[x] + cmp %rsi, %rbx # cmp in with in+len-8 + jl .Lend # jump if (in+len-8 < in) + +.Lstart: + add $8, %rsi # increment in + add $8, %rdi # increment out + + # generate the next 8 bytes of the rc4 stream into %r8 + mov $8, %r11 # byte counter +1: add %al, %dl # y += tx + mov (%rbp,%rdx,4), %ebx # ty = d[y] + mov %ebx, (%rbp,%rcx,4) # d[x] = ty + add %al, %bl # val = ty + tx + mov %eax, (%rbp,%rdx,4) # d[y] = tx + inc %cl # x++ (NEXT ROUND) + mov (%rbp,%rcx,4), %eax # tx = d[x] (NEXT ROUND) + shl $8, %r8 + movb (%rbp,%rbx,4), %r8b # val = d[val] + dec %r11b + jnz 1b + + # xor 8 bytes + bswap %r8 + xor -8(%rsi), %r8 + cmp %r9, %rsi # cmp in+len-8 with in + mov %r8, -8(%rdi) + jle .Lstart # jump if (in <= in+len-8) + +.Lend: + add $8, %r9 # tmp = in+len + + # handle the last bytes, one by one +1: cmp %rsi, %r9 # cmp in with in+len + jle .Lfinished # jump if (in+len <= in) + add %al, %dl # y += tx + mov (%rbp,%rdx,4), %ebx # ty = d[y] + mov %ebx, (%rbp,%rcx,4) # d[x] = ty + add %al, %bl # val = ty + tx + mov %eax, (%rbp,%rdx,4) # d[y] = tx + inc %cl # x++ (NEXT ROUND) + mov (%rbp,%rcx,4), %eax # tx = d[x] (NEXT ROUND) + movb (%rbp,%rbx,4), %r8b # val = d[val] + xor (%rsi), %r8b # xor 1 byte + movb %r8b, (%rdi) + inc %rsi # in++ + inc %rdi # out++ + jmp 1b + +.Lfinished: + dec %rcx # x-- + movb %cl, (4*256)(%rbp) # key->y = y + movb %dl, (4*256+4)(%rbp) # key->x = x + pop %rbx + pop %rbp + ret +.L__gcry_arcfour_amd64_end: +ELF(.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64) + +#endif +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/arcfour.c b/libotr/libgcrypt-1.8.7/cipher/arcfour.c new file mode 100644 index 0000000..44e8ef4 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/arcfour.c @@ -0,0 +1,227 @@ +/* arcfour.c - The arcfour stream cipher + * Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * For a description of the algorithm, see: + * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996. + * ISBN 0-471-11709-9. Pages 397 ff. + */ + + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "types.h" +#include "g10lib.h" +#include "cipher.h" + +/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ +#undef USE_AMD64_ASM +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AMD64_ASM 1 +#endif + +static const char *selftest(void); + +#ifdef USE_AMD64_ASM + +typedef struct { + u32 sbox[256]; + u32 idx_i, idx_j; +} ARCFOUR_context; + +void _gcry_arcfour_amd64(void *key, size_t len, const byte *indata, + byte *outdata); + +static void +encrypt_stream (void *context, + byte *outbuf, const byte *inbuf, size_t length) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + const void *fn = _gcry_arcfour_amd64; + /* Call SystemV ABI function without storing non-volatile XMM registers, + * as target function does not use vector instruction sets. */ + asm volatile ("callq *%0\n\t" + : "+a" (fn), + "+D" (context), + "+S" (length), + "+d" (inbuf), + "+c" (outbuf) + : + : "cc", "memory", "r8", "r9", "r10", "r11"); +#else + _gcry_arcfour_amd64 (context, length, inbuf, outbuf ); +#endif +} + +#else /*!USE_AMD64_ASM*/ + +typedef struct { + byte sbox[256]; + int idx_i, idx_j; +} ARCFOUR_context; + +static void +do_encrypt_stream( ARCFOUR_context *ctx, + byte *outbuf, const byte *inbuf, size_t length ) +{ +#ifndef __i386__ + register unsigned int i = ctx->idx_i; + register byte j = ctx->idx_j; + register byte *sbox = ctx->sbox; + register byte t, u; + + while ( length-- ) + { + i++; + t = sbox[(byte)i]; + j += t; + u = sbox[j]; + sbox[(byte)i] = u; + u += t; + sbox[j] = t; + *outbuf++ = sbox[u] ^ *inbuf++; + } + + ctx->idx_i = (byte)i; + ctx->idx_j = (byte)j; +#else /*__i386__*/ + /* Old implementation of arcfour is faster on i386 than the version above. + * This is because version above increases register pressure which on i386 + * would push some of the variables to memory/stack. Therefore keep this + * version for i386 to avoid regressing performance. */ + register int i = ctx->idx_i; + register int j = ctx->idx_j; + register byte *sbox = ctx->sbox; + register int t; + + while ( length-- ) + { + i++; + i = i & 255; /* The and-op seems to be faster than the mod-op. */ + j += sbox[i]; + j &= 255; + t = sbox[i]; sbox[i] = sbox[j]; sbox[j] = t; + *outbuf++ = *inbuf++ ^ sbox[(sbox[i] + sbox[j]) & 255]; + } + + ctx->idx_i = i; + ctx->idx_j = j; +#endif +} + +static void +encrypt_stream (void *context, + byte *outbuf, const byte *inbuf, size_t length) +{ + ARCFOUR_context *ctx = (ARCFOUR_context *) context; + do_encrypt_stream (ctx, outbuf, inbuf, length ); + _gcry_burn_stack (64); +} + +#endif /*!USE_AMD64_ASM*/ + + +static gcry_err_code_t +do_arcfour_setkey (void *context, const byte *key, unsigned int keylen) +{ + static int initialized; + static const char* selftest_failed; + int i, j; + byte karr[256]; + ARCFOUR_context *ctx = (ARCFOUR_context *) context; + + if (!initialized ) + { + initialized = 1; + selftest_failed = selftest(); + if( selftest_failed ) + log_error ("ARCFOUR selftest failed (%s)\n", selftest_failed ); + } + if( selftest_failed ) + return GPG_ERR_SELFTEST_FAILED; + + if( keylen < 40/8 ) /* we want at least 40 bits */ + return GPG_ERR_INV_KEYLEN; + + ctx->idx_i = ctx->idx_j = 0; + for (i=0; i < 256; i++ ) + ctx->sbox[i] = i; + for (i=j=0; i < 256; i++,j++ ) + { + if (j >= keylen) + j = 0; + karr[i] = key[j]; + } + for (i=j=0; i < 256; i++ ) + { + int t; + j = (j + ctx->sbox[i] + karr[i]) & 255; + t = ctx->sbox[i]; + ctx->sbox[i] = ctx->sbox[j]; + ctx->sbox[j] = t; + } + wipememory( karr, sizeof(karr) ); + + return GPG_ERR_NO_ERROR; +} + +static gcry_err_code_t +arcfour_setkey ( void *context, const byte *key, unsigned int keylen ) +{ + ARCFOUR_context *ctx = (ARCFOUR_context *) context; + gcry_err_code_t rc = do_arcfour_setkey (ctx, key, keylen ); + return rc; +} + + +static const char* +selftest(void) +{ + ARCFOUR_context ctx; + byte scratch[16]; + + /* Test vector from Cryptlib labeled there: "from the + State/Commerce Department". */ + static const byte key_1[] = + { 0x61, 0x8A, 0x63, 0xD2, 0xFB }; + static const byte plaintext_1[] = + { 0xDC, 0xEE, 0x4C, 0xF9, 0x2C }; + static const byte ciphertext_1[] = + { 0xF1, 0x38, 0x29, 0xC9, 0xDE }; + + arcfour_setkey( &ctx, key_1, sizeof(key_1)); + encrypt_stream( &ctx, scratch, plaintext_1, sizeof(plaintext_1)); + if ( memcmp (scratch, ciphertext_1, sizeof (ciphertext_1))) + return "Arcfour encryption test 1 failed."; + arcfour_setkey( &ctx, key_1, sizeof(key_1)); + encrypt_stream(&ctx, scratch, scratch, sizeof(plaintext_1)); /* decrypt */ + if ( memcmp (scratch, plaintext_1, sizeof (plaintext_1))) + return "Arcfour decryption test 1 failed."; + return NULL; +} + + +gcry_cipher_spec_t _gcry_cipher_spec_arcfour = + { + GCRY_CIPHER_ARCFOUR, {0, 0}, + "ARCFOUR", NULL, NULL, 1, 128, sizeof (ARCFOUR_context), + arcfour_setkey, NULL, NULL, encrypt_stream, encrypt_stream, + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/bithelp.h b/libotr/libgcrypt-1.8.7/cipher/bithelp.h new file mode 100644 index 0000000..26ef7c3 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/bithelp.h @@ -0,0 +1,121 @@ +/* bithelp.h - Some bit manipulation helpers + * Copyright (C) 1999, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ +#ifndef GCRYPT_BITHELP_H +#define GCRYPT_BITHELP_H + +#include "types.h" + + +/**************** + * Rotate the 32 bit unsigned integer X by N bits left/right + */ +static inline u32 rol(u32 x, int n) +{ + return ( (x << (n&(32-1))) | (x >> ((32-n)&(32-1))) ); +} + +static inline u32 ror(u32 x, int n) +{ + return ( (x >> (n&(32-1))) | (x << ((32-n)&(32-1))) ); +} + +static inline u64 rol64(u64 x, int n) +{ + return ( (x << (n&(64-1))) | (x >> ((64-n)&(64-1))) ); +} + +/* Byte swap for 32-bit and 64-bit integers. If available, use compiler + provided helpers. */ +#ifdef HAVE_BUILTIN_BSWAP32 +# define _gcry_bswap32 __builtin_bswap32 +#else +static inline u32 +_gcry_bswap32(u32 x) +{ + return ((rol(x, 8) & 0x00ff00ffL) | (ror(x, 8) & 0xff00ff00L)); +} +#endif + +#ifdef HAVE_BUILTIN_BSWAP64 +# define _gcry_bswap64 __builtin_bswap64 +#else +static inline u64 +_gcry_bswap64(u64 x) +{ + return ((u64)_gcry_bswap32(x) << 32) | (_gcry_bswap32(x >> 32)); +} +#endif + +/* Endian dependent byte swap operations. */ +#ifdef WORDS_BIGENDIAN +# define le_bswap32(x) _gcry_bswap32(x) +# define be_bswap32(x) ((u32)(x)) +# define le_bswap64(x) _gcry_bswap64(x) +# define be_bswap64(x) ((u64)(x)) +#else +# define le_bswap32(x) ((u32)(x)) +# define be_bswap32(x) _gcry_bswap32(x) +# define le_bswap64(x) ((u64)(x)) +# define be_bswap64(x) _gcry_bswap64(x) +#endif + + +/* Count trailing zero bits in an unsigend int. We return an int + because that is what gcc's builtin does. Returns the number of + bits in X if X is 0. */ +static inline int +_gcry_ctz (unsigned int x) +{ +#if defined (HAVE_BUILTIN_CTZ) + return x? __builtin_ctz (x) : 8 * sizeof (x); +#else + /* See + * http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightModLookup + */ + static const unsigned char mod37[] = + { + sizeof (unsigned int)*8, + 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13, + 4, 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9, + 5, 20, 8, 19, 18 + }; + return (int)mod37[(-x & x) % 37]; +#endif +} + + +/* Count trailing zero bits in an u64. We return an int because that + is what gcc's builtin does. Returns the number of bits in X if X + is 0. */ +static inline int +_gcry_ctz64(u64 x) +{ +#if defined (HAVE_BUILTIN_CTZ) && SIZEOF_UNSIGNED_INT >= 8 +#warning hello + return x? __builtin_ctz (x) : 8 * sizeof (x); +#else + if ((x & 0xffffffff)) + return _gcry_ctz (x); + else + return 32 + _gcry_ctz (x >> 32); +#endif +} + + +#endif /*GCRYPT_BITHELP_H*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/blake2.c b/libotr/libgcrypt-1.8.7/cipher/blake2.c new file mode 100644 index 0000000..0e4cf9b --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/blake2.c @@ -0,0 +1,872 @@ +/* blake2.c - BLAKE2b and BLAKE2s hash functions (RFC 7693) + * Copyright (C) 2017 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* The code is based on public-domain/CC0 BLAKE2 reference implementation + * by Samual Neves, at https://github.com/BLAKE2/BLAKE2/tree/master/ref + * Copyright 2012, Samuel Neves <sneves@dei.uc.pt> + */ + +#include <config.h> +#include <string.h> +#include "g10lib.h" +#include "bithelp.h" +#include "bufhelp.h" +#include "cipher.h" +#include "hash-common.h" + +#define BLAKE2B_BLOCKBYTES 128 +#define BLAKE2B_OUTBYTES 64 +#define BLAKE2B_KEYBYTES 64 + +#define BLAKE2S_BLOCKBYTES 64 +#define BLAKE2S_OUTBYTES 32 +#define BLAKE2S_KEYBYTES 32 + +typedef struct +{ + u64 h[8]; + u64 t[2]; + u64 f[2]; +} BLAKE2B_STATE; + +struct blake2b_param_s +{ + byte digest_length; + byte key_length; + byte fanout; + byte depth; + byte leaf_length[4]; + byte node_offset[4]; + byte xof_length[4]; + byte node_depth; + byte inner_length; + byte reserved[14]; + byte salt[16]; + byte personal[16]; +}; + +typedef struct BLAKE2B_CONTEXT_S +{ + BLAKE2B_STATE state; + byte buf[BLAKE2B_BLOCKBYTES]; + size_t buflen; + size_t outlen; +} BLAKE2B_CONTEXT; + +typedef struct +{ + u32 h[8]; + u32 t[2]; + u32 f[2]; +} BLAKE2S_STATE; + +struct blake2s_param_s +{ + byte digest_length; + byte key_length; + byte fanout; + byte depth; + byte leaf_length[4]; + byte node_offset[4]; + byte xof_length[2]; + byte node_depth; + byte inner_length; + /* byte reserved[0]; */ + byte salt[8]; + byte personal[8]; +}; + +typedef struct BLAKE2S_CONTEXT_S +{ + BLAKE2S_STATE state; + byte buf[BLAKE2S_BLOCKBYTES]; + size_t buflen; + size_t outlen; +} BLAKE2S_CONTEXT; + +typedef unsigned int (*blake2_transform_t)(void *S, const void *inblk, + size_t nblks); + + +static const u64 blake2b_IV[8] = +{ + U64_C(0x6a09e667f3bcc908), U64_C(0xbb67ae8584caa73b), + U64_C(0x3c6ef372fe94f82b), U64_C(0xa54ff53a5f1d36f1), + U64_C(0x510e527fade682d1), U64_C(0x9b05688c2b3e6c1f), + U64_C(0x1f83d9abfb41bd6b), U64_C(0x5be0cd19137e2179) +}; + +static const u32 blake2s_IV[8] = +{ + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL +}; + +static byte zero_block[BLAKE2B_BLOCKBYTES] = { 0, }; + + +static void blake2_write(void *S, const void *inbuf, size_t inlen, + byte *tmpbuf, size_t *tmpbuflen, size_t blkbytes, + blake2_transform_t transform_fn) +{ + const byte* in = inbuf; + unsigned int burn = 0; + + if (inlen > 0) + { + size_t left = *tmpbuflen; + size_t fill = blkbytes - left; + size_t nblks; + + if (inlen > fill) + { + if (fill > 0) + buf_cpy (tmpbuf + left, in, fill); /* Fill buffer */ + left = 0; + + burn = transform_fn (S, tmpbuf, 1); /* Increment counter + Compress */ + + in += fill; + inlen -= fill; + + nblks = inlen / blkbytes - !(inlen % blkbytes); + if (nblks) + { + burn = transform_fn(S, in, nblks); + in += blkbytes * nblks; + inlen -= blkbytes * nblks; + } + } + + gcry_assert (inlen > 0); + + buf_cpy (tmpbuf + left, in, inlen); + *tmpbuflen = left + inlen; + } + + if (burn) + _gcry_burn_stack (burn); + + return; +} + + +static inline void blake2b_set_lastblock(BLAKE2B_STATE *S) +{ + S->f[0] = U64_C(0xffffffffffffffff); +} + +static inline int blake2b_is_lastblock(const BLAKE2B_STATE *S) +{ + return S->f[0] != 0; +} + +static inline void blake2b_increment_counter(BLAKE2B_STATE *S, const int inc) +{ + S->t[0] += (u64)inc; + S->t[1] += (S->t[0] < (u64)inc) - (inc < 0); +} + +static inline u64 rotr64(u64 x, u64 n) +{ + return ((x >> (n & 63)) | (x << ((64 - n) & 63))); +} + +static unsigned int blake2b_transform(void *vS, const void *inblks, + size_t nblks) +{ + static const byte blake2b_sigma[12][16] = + { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } + }; + BLAKE2B_STATE *S = vS; + const byte* in = inblks; + u64 m[16]; + u64 v[16]; + + while (nblks--) + { + /* Increment counter */ + blake2b_increment_counter (S, BLAKE2B_BLOCKBYTES); + + /* Compress */ + m[0] = buf_get_le64 (in + 0 * sizeof(m[0])); + m[1] = buf_get_le64 (in + 1 * sizeof(m[0])); + m[2] = buf_get_le64 (in + 2 * sizeof(m[0])); + m[3] = buf_get_le64 (in + 3 * sizeof(m[0])); + m[4] = buf_get_le64 (in + 4 * sizeof(m[0])); + m[5] = buf_get_le64 (in + 5 * sizeof(m[0])); + m[6] = buf_get_le64 (in + 6 * sizeof(m[0])); + m[7] = buf_get_le64 (in + 7 * sizeof(m[0])); + m[8] = buf_get_le64 (in + 8 * sizeof(m[0])); + m[9] = buf_get_le64 (in + 9 * sizeof(m[0])); + m[10] = buf_get_le64 (in + 10 * sizeof(m[0])); + m[11] = buf_get_le64 (in + 11 * sizeof(m[0])); + m[12] = buf_get_le64 (in + 12 * sizeof(m[0])); + m[13] = buf_get_le64 (in + 13 * sizeof(m[0])); + m[14] = buf_get_le64 (in + 14 * sizeof(m[0])); + m[15] = buf_get_le64 (in + 15 * sizeof(m[0])); + + v[ 0] = S->h[0]; + v[ 1] = S->h[1]; + v[ 2] = S->h[2]; + v[ 3] = S->h[3]; + v[ 4] = S->h[4]; + v[ 5] = S->h[5]; + v[ 6] = S->h[6]; + v[ 7] = S->h[7]; + v[ 8] = blake2b_IV[0]; + v[ 9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ S->f[0]; + v[15] = blake2b_IV[7] ^ S->f[1]; + +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2*i+0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2*i+1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while(0) + +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + ROUND(4); + ROUND(5); + ROUND(6); + ROUND(7); + ROUND(8); + ROUND(9); + ROUND(10); + ROUND(11); + +#undef G +#undef ROUND + + S->h[0] = S->h[0] ^ v[0] ^ v[0 + 8]; + S->h[1] = S->h[1] ^ v[1] ^ v[1 + 8]; + S->h[2] = S->h[2] ^ v[2] ^ v[2 + 8]; + S->h[3] = S->h[3] ^ v[3] ^ v[3 + 8]; + S->h[4] = S->h[4] ^ v[4] ^ v[4 + 8]; + S->h[5] = S->h[5] ^ v[5] ^ v[5 + 8]; + S->h[6] = S->h[6] ^ v[6] ^ v[6 + 8]; + S->h[7] = S->h[7] ^ v[7] ^ v[7 + 8]; + + in += BLAKE2B_BLOCKBYTES; + } + + return sizeof(void *) * 4 + sizeof(u64) * 16 * 2; +} + +static void blake2b_final(void *ctx) +{ + BLAKE2B_CONTEXT *c = ctx; + BLAKE2B_STATE *S = &c->state; + unsigned int burn; + size_t i; + + gcry_assert (sizeof(c->buf) >= c->outlen); + if (blake2b_is_lastblock(S)) + return; + + if (c->buflen < BLAKE2B_BLOCKBYTES) + memset (c->buf + c->buflen, 0, BLAKE2B_BLOCKBYTES - c->buflen); /* Padding */ + blake2b_set_lastblock (S); + blake2b_increment_counter (S, (int)c->buflen - BLAKE2B_BLOCKBYTES); + burn = blake2b_transform (S, c->buf, 1); + + /* Output full hash to buffer */ + for (i = 0; i < 8; ++i) + buf_put_le64 (c->buf + sizeof(S->h[i]) * i, S->h[i]); + + /* Zero out extra buffer bytes. */ + if (c->outlen < sizeof(c->buf)) + memset (c->buf + c->outlen, 0, sizeof(c->buf) - c->outlen); + + if (burn) + _gcry_burn_stack (burn); +} + +static byte *blake2b_read(void *ctx) +{ + BLAKE2B_CONTEXT *c = ctx; + return c->buf; +} + +static void blake2b_write(void *ctx, const void *inbuf, size_t inlen) +{ + BLAKE2B_CONTEXT *c = ctx; + BLAKE2B_STATE *S = &c->state; + blake2_write(S, inbuf, inlen, c->buf, &c->buflen, BLAKE2B_BLOCKBYTES, + blake2b_transform); +} + +static inline void blake2b_init_param(BLAKE2B_STATE *S, + const struct blake2b_param_s *P) +{ + const byte *p = (const byte *)P; + size_t i; + + /* init xors IV with input parameter block */ + + /* IV XOR ParamBlock */ + for (i = 0; i < 8; ++i) + S->h[i] = blake2b_IV[i] ^ buf_get_le64(p + sizeof(S->h[i]) * i); +} + +static inline gcry_err_code_t blake2b_init(BLAKE2B_CONTEXT *ctx, + const byte *key, size_t keylen) +{ + struct blake2b_param_s P[1] = { { 0, } }; + BLAKE2B_STATE *S = &ctx->state; + + if (!ctx->outlen || ctx->outlen > BLAKE2B_OUTBYTES) + return GPG_ERR_INV_ARG; + if (sizeof(P[0]) != sizeof(u64) * 8) + return GPG_ERR_INTERNAL; + if (keylen && (!key || keylen > BLAKE2B_KEYBYTES)) + return GPG_ERR_INV_KEYLEN; + + P->digest_length = ctx->outlen; + P->key_length = keylen; + P->fanout = 1; + P->depth = 1; + + blake2b_init_param (S, P); + wipememory (P, sizeof(P)); + + if (key) + { + blake2b_write (ctx, key, keylen); + blake2b_write (ctx, zero_block, BLAKE2B_BLOCKBYTES - keylen); + } + + return 0; +} + +static gcry_err_code_t blake2b_init_ctx(void *ctx, unsigned int flags, + const byte *key, size_t keylen, + unsigned int dbits) +{ + BLAKE2B_CONTEXT *c = ctx; + + (void)flags; + + memset (c, 0, sizeof (*c)); + + c->outlen = dbits / 8; + c->buflen = 0; + return blake2b_init(c, key, keylen); +} + +static inline void blake2s_set_lastblock(BLAKE2S_STATE *S) +{ + S->f[0] = 0xFFFFFFFFUL; +} + +static inline int blake2s_is_lastblock(BLAKE2S_STATE *S) +{ + return S->f[0] != 0; +} + +static inline void blake2s_increment_counter(BLAKE2S_STATE *S, const int inc) +{ + S->t[0] += (u32)inc; + S->t[1] += (S->t[0] < (u32)inc) - (inc < 0); +} + +static unsigned int blake2s_transform(void *vS, const void *inblks, + size_t nblks) +{ + static const byte blake2s_sigma[10][16] = + { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 }, + }; + BLAKE2S_STATE *S = vS; + unsigned int burn = 0; + const byte* in = inblks; + u32 m[16]; + u32 v[16]; + + while (nblks--) + { + /* Increment counter */ + blake2s_increment_counter (S, BLAKE2S_BLOCKBYTES); + + /* Compress */ + m[0] = buf_get_le32 (in + 0 * sizeof(m[0])); + m[1] = buf_get_le32 (in + 1 * sizeof(m[0])); + m[2] = buf_get_le32 (in + 2 * sizeof(m[0])); + m[3] = buf_get_le32 (in + 3 * sizeof(m[0])); + m[4] = buf_get_le32 (in + 4 * sizeof(m[0])); + m[5] = buf_get_le32 (in + 5 * sizeof(m[0])); + m[6] = buf_get_le32 (in + 6 * sizeof(m[0])); + m[7] = buf_get_le32 (in + 7 * sizeof(m[0])); + m[8] = buf_get_le32 (in + 8 * sizeof(m[0])); + m[9] = buf_get_le32 (in + 9 * sizeof(m[0])); + m[10] = buf_get_le32 (in + 10 * sizeof(m[0])); + m[11] = buf_get_le32 (in + 11 * sizeof(m[0])); + m[12] = buf_get_le32 (in + 12 * sizeof(m[0])); + m[13] = buf_get_le32 (in + 13 * sizeof(m[0])); + m[14] = buf_get_le32 (in + 14 * sizeof(m[0])); + m[15] = buf_get_le32 (in + 15 * sizeof(m[0])); + + v[ 0] = S->h[0]; + v[ 1] = S->h[1]; + v[ 2] = S->h[2]; + v[ 3] = S->h[3]; + v[ 4] = S->h[4]; + v[ 5] = S->h[5]; + v[ 6] = S->h[6]; + v[ 7] = S->h[7]; + v[ 8] = blake2s_IV[0]; + v[ 9] = blake2s_IV[1]; + v[10] = blake2s_IV[2]; + v[11] = blake2s_IV[3]; + v[12] = S->t[0] ^ blake2s_IV[4]; + v[13] = S->t[1] ^ blake2s_IV[5]; + v[14] = S->f[0] ^ blake2s_IV[6]; + v[15] = S->f[1] ^ blake2s_IV[7]; + +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2s_sigma[r][2*i+0]]; \ + d = ror(d ^ a, 16); \ + c = c + d; \ + b = ror(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2*i+1]]; \ + d = ror(d ^ a, 8); \ + c = c + d; \ + b = ror(b ^ c, 7); \ + } while(0) + +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + ROUND(4); + ROUND(5); + ROUND(6); + ROUND(7); + ROUND(8); + ROUND(9); + +#undef G +#undef ROUND + + S->h[0] = S->h[0] ^ v[0] ^ v[0 + 8]; + S->h[1] = S->h[1] ^ v[1] ^ v[1 + 8]; + S->h[2] = S->h[2] ^ v[2] ^ v[2 + 8]; + S->h[3] = S->h[3] ^ v[3] ^ v[3 + 8]; + S->h[4] = S->h[4] ^ v[4] ^ v[4 + 8]; + S->h[5] = S->h[5] ^ v[5] ^ v[5 + 8]; + S->h[6] = S->h[6] ^ v[6] ^ v[6 + 8]; + S->h[7] = S->h[7] ^ v[7] ^ v[7 + 8]; + + in += BLAKE2S_BLOCKBYTES; + } + + return burn; +} + +static void blake2s_final(void *ctx) +{ + BLAKE2S_CONTEXT *c = ctx; + BLAKE2S_STATE *S = &c->state; + unsigned int burn; + size_t i; + + gcry_assert (sizeof(c->buf) >= c->outlen); + if (blake2s_is_lastblock(S)) + return; + + if (c->buflen < BLAKE2S_BLOCKBYTES) + memset (c->buf + c->buflen, 0, BLAKE2S_BLOCKBYTES - c->buflen); /* Padding */ + blake2s_set_lastblock (S); + blake2s_increment_counter (S, (int)c->buflen - BLAKE2S_BLOCKBYTES); + burn = blake2s_transform (S, c->buf, 1); + + /* Output full hash to buffer */ + for (i = 0; i < 8; ++i) + buf_put_le32 (c->buf + sizeof(S->h[i]) * i, S->h[i]); + + /* Zero out extra buffer bytes. */ + if (c->outlen < sizeof(c->buf)) + memset (c->buf + c->outlen, 0, sizeof(c->buf) - c->outlen); + + if (burn) + _gcry_burn_stack (burn); +} + +static byte *blake2s_read(void *ctx) +{ + BLAKE2S_CONTEXT *c = ctx; + return c->buf; +} + +static void blake2s_write(void *ctx, const void *inbuf, size_t inlen) +{ + BLAKE2S_CONTEXT *c = ctx; + BLAKE2S_STATE *S = &c->state; + blake2_write(S, inbuf, inlen, c->buf, &c->buflen, BLAKE2S_BLOCKBYTES, + blake2s_transform); +} + +static inline void blake2s_init_param(BLAKE2S_STATE *S, + const struct blake2s_param_s *P) +{ + const byte *p = (const byte *)P; + size_t i; + + /* init2 xors IV with input parameter block */ + + /* IV XOR ParamBlock */ + for (i = 0; i < 8; ++i) + S->h[i] ^= blake2s_IV[i] ^ buf_get_le32(&p[i * 4]); +} + +static inline gcry_err_code_t blake2s_init(BLAKE2S_CONTEXT *ctx, + const byte *key, size_t keylen) +{ + struct blake2s_param_s P[1] = { { 0, } }; + BLAKE2S_STATE *S = &ctx->state; + + if (!ctx->outlen || ctx->outlen > BLAKE2S_OUTBYTES) + return GPG_ERR_INV_ARG; + if (sizeof(P[0]) != sizeof(u32) * 8) + return GPG_ERR_INTERNAL; + if (keylen && (!key || keylen > BLAKE2S_KEYBYTES)) + return GPG_ERR_INV_KEYLEN; + + P->digest_length = ctx->outlen; + P->key_length = keylen; + P->fanout = 1; + P->depth = 1; + + blake2s_init_param (S, P); + wipememory (P, sizeof(P)); + + if (key) + { + blake2s_write (ctx, key, keylen); + blake2s_write (ctx, zero_block, BLAKE2S_BLOCKBYTES - keylen); + } + + return 0; +} + +static gcry_err_code_t blake2s_init_ctx(void *ctx, unsigned int flags, + const byte *key, size_t keylen, + unsigned int dbits) +{ + BLAKE2S_CONTEXT *c = ctx; + + (void)flags; + + memset (c, 0, sizeof (*c)); + + c->outlen = dbits / 8; + c->buflen = 0; + return blake2s_init(c, key, keylen); +} + +/* Selftests from "RFC 7693, Appendix E. BLAKE2b and BLAKE2s Self-Test + * Module C Source". */ +static void selftest_seq(byte *out, size_t len, u32 seed) +{ + size_t i; + u32 t, a, b; + + a = 0xDEAD4BAD * seed; + b = 1; + + for (i = 0; i < len; i++) + { + t = a + b; + a = b; + b = t; + out[i] = (t >> 24) & 0xFF; + } +} + +static gpg_err_code_t +selftests_blake2b (int algo, int extended, selftest_report_func_t report) +{ + static const byte blake2b_res[32] = + { + 0xC2, 0x3A, 0x78, 0x00, 0xD9, 0x81, 0x23, 0xBD, + 0x10, 0xF5, 0x06, 0xC6, 0x1E, 0x29, 0xDA, 0x56, + 0x03, 0xD7, 0x63, 0xB8, 0xBB, 0xAD, 0x2E, 0x73, + 0x7F, 0x5E, 0x76, 0x5A, 0x7B, 0xCC, 0xD4, 0x75 + }; + static const size_t b2b_md_len[4] = { 20, 32, 48, 64 }; + static const size_t b2b_in_len[6] = { 0, 3, 128, 129, 255, 1024 }; + size_t i, j, outlen, inlen; + byte in[1024], key[64]; + BLAKE2B_CONTEXT ctx; + BLAKE2B_CONTEXT ctx2; + const char *what; + const char *errtxt; + + (void)extended; + + what = "rfc7693 BLAKE2b selftest"; + + /* 256-bit hash for testing */ + if (blake2b_init_ctx(&ctx, 0, NULL, 0, 32 * 8)) + { + errtxt = "init failed"; + goto failed; + } + + for (i = 0; i < 4; i++) + { + outlen = b2b_md_len[i]; + for (j = 0; j < 6; j++) + { + inlen = b2b_in_len[j]; + + selftest_seq(in, inlen, inlen); /* unkeyed hash */ + blake2b_init_ctx(&ctx2, 0, NULL, 0, outlen * 8); + blake2b_write(&ctx2, in, inlen); + blake2b_final(&ctx2); + blake2b_write(&ctx, ctx2.buf, outlen); /* hash the hash */ + + selftest_seq(key, outlen, outlen); /* keyed hash */ + blake2b_init_ctx(&ctx2, 0, key, outlen, outlen * 8); + blake2b_write(&ctx2, in, inlen); + blake2b_final(&ctx2); + blake2b_write(&ctx, ctx2.buf, outlen); /* hash the hash */ + } + } + + /* compute and compare the hash of hashes */ + blake2b_final(&ctx); + for (i = 0; i < 32; i++) + { + if (ctx.buf[i] != blake2b_res[i]) + { + errtxt = "digest mismatch"; + goto failed; + } + } + + return 0; + +failed: + if (report) + report ("digest", algo, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + +static gpg_err_code_t +selftests_blake2s (int algo, int extended, selftest_report_func_t report) +{ + static const byte blake2s_res[32] = + { + 0x6A, 0x41, 0x1F, 0x08, 0xCE, 0x25, 0xAD, 0xCD, + 0xFB, 0x02, 0xAB, 0xA6, 0x41, 0x45, 0x1C, 0xEC, + 0x53, 0xC5, 0x98, 0xB2, 0x4F, 0x4F, 0xC7, 0x87, + 0xFB, 0xDC, 0x88, 0x79, 0x7F, 0x4C, 0x1D, 0xFE + }; + static const size_t b2s_md_len[4] = { 16, 20, 28, 32 }; + static const size_t b2s_in_len[6] = { 0, 3, 64, 65, 255, 1024 }; + size_t i, j, outlen, inlen; + byte in[1024], key[32]; + BLAKE2S_CONTEXT ctx; + BLAKE2S_CONTEXT ctx2; + const char *what; + const char *errtxt; + + (void)extended; + + what = "rfc7693 BLAKE2s selftest"; + + /* 256-bit hash for testing */ + if (blake2s_init_ctx(&ctx, 0, NULL, 0, 32 * 8)) + { + errtxt = "init failed"; + goto failed; + } + + for (i = 0; i < 4; i++) + { + outlen = b2s_md_len[i]; + for (j = 0; j < 6; j++) + { + inlen = b2s_in_len[j]; + + selftest_seq(in, inlen, inlen); /* unkeyed hash */ + blake2s_init_ctx(&ctx2, 0, NULL, 0, outlen * 8); + blake2s_write(&ctx2, in, inlen); + blake2s_final(&ctx2); + blake2s_write(&ctx, ctx2.buf, outlen); /* hash the hash */ + + selftest_seq(key, outlen, outlen); /* keyed hash */ + blake2s_init_ctx(&ctx2, 0, key, outlen, outlen * 8); + blake2s_write(&ctx2, in, inlen); + blake2s_final(&ctx2); + blake2s_write(&ctx, ctx2.buf, outlen); /* hash the hash */ + } + } + + /* compute and compare the hash of hashes */ + blake2s_final(&ctx); + for (i = 0; i < 32; i++) + { + if (ctx.buf[i] != blake2s_res[i]) + { + errtxt = "digest mismatch"; + goto failed; + } + } + + return 0; + +failed: + if (report) + report ("digest", algo, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +gcry_err_code_t _gcry_blake2_init_with_key(void *ctx, unsigned int flags, + const unsigned char *key, + size_t keylen, int algo) +{ + gcry_err_code_t rc; + switch (algo) + { + case GCRY_MD_BLAKE2B_512: + rc = blake2b_init_ctx (ctx, flags, key, keylen, 512); + break; + case GCRY_MD_BLAKE2B_384: + rc = blake2b_init_ctx (ctx, flags, key, keylen, 384); + break; + case GCRY_MD_BLAKE2B_256: + rc = blake2b_init_ctx (ctx, flags, key, keylen, 256); + break; + case GCRY_MD_BLAKE2B_160: + rc = blake2b_init_ctx (ctx, flags, key, keylen, 160); + break; + case GCRY_MD_BLAKE2S_256: + rc = blake2s_init_ctx (ctx, flags, key, keylen, 256); + break; + case GCRY_MD_BLAKE2S_224: + rc = blake2s_init_ctx (ctx, flags, key, keylen, 224); + break; + case GCRY_MD_BLAKE2S_160: + rc = blake2s_init_ctx (ctx, flags, key, keylen, 160); + break; + case GCRY_MD_BLAKE2S_128: + rc = blake2s_init_ctx (ctx, flags, key, keylen, 128); + break; + default: + rc = GPG_ERR_DIGEST_ALGO; + break; + } + + return rc; +} + + +#define DEFINE_BLAKE2_VARIANT(bs, BS, dbits, oid_branch) \ + static void blake2##bs##_##dbits##_init(void *ctx, unsigned int flags) \ + { \ + int err = blake2##bs##_init_ctx (ctx, flags, NULL, 0, dbits); \ + gcry_assert (err == 0); \ + } \ + static byte blake2##bs##_##dbits##_asn[] = { 0x30 }; \ + static gcry_md_oid_spec_t oid_spec_blake2##bs##_##dbits[] = \ + { \ + { " 1.3.6.1.4.1.1722.12.2." oid_branch }, \ + { NULL } \ + }; \ + gcry_md_spec_t _gcry_digest_spec_blake2##bs##_##dbits = \ + { \ + GCRY_MD_BLAKE2##BS##_##dbits, {0, 0}, \ + "BLAKE2" #BS "_" #dbits, blake2##bs##_##dbits##_asn, \ + DIM (blake2##bs##_##dbits##_asn), oid_spec_blake2##bs##_##dbits, \ + dbits / 8, blake2##bs##_##dbits##_init, blake2##bs##_write, \ + blake2##bs##_final, blake2##bs##_read, NULL, \ + sizeof (BLAKE2##BS##_CONTEXT), selftests_blake2##bs \ + }; + +DEFINE_BLAKE2_VARIANT(b, B, 512, "1.16") +DEFINE_BLAKE2_VARIANT(b, B, 384, "1.12") +DEFINE_BLAKE2_VARIANT(b, B, 256, "1.8") +DEFINE_BLAKE2_VARIANT(b, B, 160, "1.5") + +DEFINE_BLAKE2_VARIANT(s, S, 256, "2.8") +DEFINE_BLAKE2_VARIANT(s, S, 224, "2.7") +DEFINE_BLAKE2_VARIANT(s, S, 160, "2.5") +DEFINE_BLAKE2_VARIANT(s, S, 128, "2.4") diff --git a/libotr/libgcrypt-1.8.7/cipher/blowfish-amd64.S b/libotr/libgcrypt-1.8.7/cipher/blowfish-amd64.S new file mode 100644 index 0000000..21b63fc --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/blowfish-amd64.S @@ -0,0 +1,541 @@ +/* blowfish-amd64.S - AMD64 assembly implementation of Blowfish cipher + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef __x86_64 +#include <config.h> +#if defined(USE_BLOWFISH) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.text + +/* structure of BLOWFISH_context: */ +#define s0 0 +#define s1 ((s0) + 256 * 4) +#define s2 ((s1) + 256 * 4) +#define s3 ((s2) + 256 * 4) +#define p ((s3) + 256 * 4) + +/* register macros */ +#define CTX %rdi +#define RIO %rsi + +#define RX0 %rax +#define RX1 %rbx +#define RX2 %rcx +#define RX3 %rdx + +#define RX0d %eax +#define RX1d %ebx +#define RX2d %ecx +#define RX3d %edx + +#define RX0bl %al +#define RX1bl %bl +#define RX2bl %cl +#define RX3bl %dl + +#define RX0bh %ah +#define RX1bh %bh +#define RX2bh %ch +#define RX3bh %dh + +#define RT0 %rbp +#define RT1 %rsi +#define RT2 %r8 +#define RT3 %r9 + +#define RT0d %ebp +#define RT1d %esi +#define RT2d %r8d +#define RT3d %r9d + +#define RKEY %r10 + +/*********************************************************************** + * 1-way blowfish + ***********************************************************************/ +#define F() \ + movzbl RX0bh, RT1d; \ + movzbl RX0bl, RT3d; \ + rorq $16, RX0; \ + movzbl RX0bh, RT0d; \ + movzbl RX0bl, RT2d; \ + rorq $16, RX0; \ + movl s0(CTX,RT0,4), RT0d; \ + addl s1(CTX,RT2,4), RT0d; \ + xorl s2(CTX,RT1,4), RT0d; \ + addl s3(CTX,RT3,4), RT0d; \ + xorq RT0, RX0; + +#define load_roundkey_enc(n) \ + movq p+4*(n)(CTX), RX3; + +#define add_roundkey_enc() \ + xorq RX3, RX0; + +#define round_enc(n) \ + add_roundkey_enc(); \ + load_roundkey_enc(n); \ + \ + F(); \ + F(); + +#define load_roundkey_dec(n) \ + movq p+4*(n-1)(CTX), RX3; \ + rorq $32, RX3; + +#define add_roundkey_dec() \ + xorq RX3, RX0; + +#define round_dec(n) \ + add_roundkey_dec(); \ + load_roundkey_dec(n); \ + \ + F(); \ + F(); + +#define read_block() \ + movq (RIO), RX0; \ + rorq $32, RX0; \ + bswapq RX0; + +#define write_block() \ + bswapq RX0; \ + movq RX0, (RIO); + +.align 8 +ELF(.type __blowfish_enc_blk1,@function;) + +__blowfish_enc_blk1: + /* input: + * %rdi: ctx, CTX + * RX0: input plaintext block + * output: + * RX0: output plaintext block + */ + movq %rbp, %r11; + + load_roundkey_enc(0); + round_enc(2); + round_enc(4); + round_enc(6); + round_enc(8); + round_enc(10); + round_enc(12); + round_enc(14); + round_enc(16); + add_roundkey_enc(); + + movq %r11, %rbp; + + ret; +ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;) + +.align 8 +.globl _gcry_blowfish_amd64_do_encrypt +ELF(.type _gcry_blowfish_amd64_do_encrypt,@function;) + +_gcry_blowfish_amd64_do_encrypt: + /* input: + * %rdi: ctx, CTX + * %rsi: u32 *ret_xl + * %rdx: u32 *ret_xr + */ + movl (%rdx), RX0d; + shlq $32, RX0; + movl (%rsi), RT3d; + movq %rdx, %r10; + orq RT3, RX0; + movq %rsi, RX2; + + call __blowfish_enc_blk1; + + movl RX0d, (%r10); + shrq $32, RX0; + movl RX0d, (RX2); + + ret; +ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;) + +.align 8 +.globl _gcry_blowfish_amd64_encrypt_block +ELF(.type _gcry_blowfish_amd64_encrypt_block,@function;) + +_gcry_blowfish_amd64_encrypt_block: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + movq %rsi, %r10; + + movq %rdx, RIO; + read_block(); + + call __blowfish_enc_blk1; + + movq %r10, RIO; + write_block(); + + ret; +ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;) + +.align 8 +.globl _gcry_blowfish_amd64_decrypt_block +ELF(.type _gcry_blowfish_amd64_decrypt_block,@function;) + +_gcry_blowfish_amd64_decrypt_block: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + movq %rbp, %r11; + + movq %rsi, %r10; + movq %rdx, RIO; + + read_block(); + + load_roundkey_dec(17); + round_dec(15); + round_dec(13); + round_dec(11); + round_dec(9); + round_dec(7); + round_dec(5); + round_dec(3); + round_dec(1); + add_roundkey_dec(); + + movq %r10, RIO; + write_block(); + + movq %r11, %rbp; + + ret; +ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;) + +/********************************************************************** + 4-way blowfish, four blocks parallel + **********************************************************************/ +#define F4(x) \ + movzbl x ## bh, RT1d; \ + movzbl x ## bl, RT3d; \ + rorq $16, x; \ + movzbl x ## bh, RT0d; \ + movzbl x ## bl, RT2d; \ + rorq $16, x; \ + movl s0(CTX,RT0,4), RT0d; \ + addl s1(CTX,RT2,4), RT0d; \ + xorl s2(CTX,RT1,4), RT0d; \ + addl s3(CTX,RT3,4), RT0d; \ + xorq RT0, x; + +#define add_preloaded_roundkey4() \ + xorq RKEY, RX0; \ + xorq RKEY, RX1; \ + xorq RKEY, RX2; \ + xorq RKEY, RX3; + +#define preload_roundkey_enc(n) \ + movq p+4*(n)(CTX), RKEY; + +#define add_roundkey_enc4(n) \ + add_preloaded_roundkey4(); \ + preload_roundkey_enc(n + 2); + +#define round_enc4(n) \ + add_roundkey_enc4(n); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); + +#define preload_roundkey_dec(n) \ + movq p+4*((n)-1)(CTX), RKEY; \ + rorq $32, RKEY; + +#define add_roundkey_dec4(n) \ + add_preloaded_roundkey4(); \ + preload_roundkey_dec(n - 2); + +#define round_dec4(n) \ + add_roundkey_dec4(n); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); + +#define inbswap_block4() \ + rorq $32, RX0; \ + bswapq RX0; \ + rorq $32, RX1; \ + bswapq RX1; \ + rorq $32, RX2; \ + bswapq RX2; \ + rorq $32, RX3; \ + bswapq RX3; + +#define inctrswap_block4() \ + rorq $32, RX0; \ + rorq $32, RX1; \ + rorq $32, RX2; \ + rorq $32, RX3; + +#define outbswap_block4() \ + bswapq RX0; \ + bswapq RX1; \ + bswapq RX2; \ + bswapq RX3; + +.align 8 +ELF(.type __blowfish_enc_blk4,@function;) + +__blowfish_enc_blk4: + /* input: + * %rdi: ctx, CTX + * RX0,RX1,RX2,RX3: four input inbswapped plaintext blocks + * output: + * RX0,RX1,RX2,RX3: four output ciphertext blocks + */ + preload_roundkey_enc(0); + + round_enc4(0); + round_enc4(2); + round_enc4(4); + round_enc4(6); + round_enc4(8); + round_enc4(10); + round_enc4(12); + round_enc4(14); + add_preloaded_roundkey4(); + + outbswap_block4(); + + ret; +ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;) + +.align 8 +ELF(.type __blowfish_dec_blk4,@function;) + +__blowfish_dec_blk4: + /* input: + * %rdi: ctx, CTX + * RX0,RX1,RX2,RX3: four input ciphertext blocks + * output: + * RX0,RX1,RX2,RX3: four output plaintext blocks + */ + preload_roundkey_dec(17); + + inbswap_block4(); + + round_dec4(17); + round_dec4(15); + round_dec4(13); + round_dec4(11); + round_dec4(9); + round_dec4(7); + round_dec4(5); + round_dec4(3); + add_preloaded_roundkey4(); + + outbswap_block4(); + + ret; +ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;) + +.align 8 +.globl _gcry_blowfish_amd64_ctr_enc +ELF(.type _gcry_blowfish_amd64_ctr_enc,@function;) +_gcry_blowfish_amd64_ctr_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (4 blocks) + * %rdx: src (4 blocks) + * %rcx: iv (big endian, 64bit) + */ + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + + /* %r11-%r13 are not used by __blowfish_enc_blk4 */ + movq %rcx, %r13; /*iv*/ + movq %rdx, %r12; /*src*/ + movq %rsi, %r11; /*dst*/ + + /* load IV and byteswap */ + movq (%r13), RT0; + bswapq RT0; + movq RT0, RX0; + + /* construct IVs */ + leaq 1(RT0), RX1; + leaq 2(RT0), RX2; + leaq 3(RT0), RX3; + leaq 4(RT0), RT0; + bswapq RT0; + + inctrswap_block4(); + + /* store new IV */ + movq RT0, (%r13); + + call __blowfish_enc_blk4; + + /* XOR key-stream with plaintext */ + xorq 0 * 8(%r12), RX0; + xorq 1 * 8(%r12), RX1; + xorq 2 * 8(%r12), RX2; + xorq 3 * 8(%r12), RX3; + movq RX0, 0 * 8(%r11); + movq RX1, 1 * 8(%r11); + movq RX2, 2 * 8(%r11); + movq RX3, 3 * 8(%r11); + + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + + ret; +ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;) + +.align 8 +.globl _gcry_blowfish_amd64_cbc_dec +ELF(.type _gcry_blowfish_amd64_cbc_dec,@function;) +_gcry_blowfish_amd64_cbc_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (4 blocks) + * %rdx: src (4 blocks) + * %rcx: iv (64bit) + */ + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + + /* %r11-%r13 are not used by __blowfish_dec_blk4 */ + movq %rsi, %r11; /*dst*/ + movq %rdx, %r12; /*src*/ + movq %rcx, %r13; /*iv*/ + + /* load input */ + movq 0 * 8(%r12), RX0; + movq 1 * 8(%r12), RX1; + movq 2 * 8(%r12), RX2; + movq 3 * 8(%r12), RX3; + + call __blowfish_dec_blk4; + + movq 3 * 8(%r12), RT0; + xorq (%r13), RX0; + xorq 0 * 8(%r12), RX1; + xorq 1 * 8(%r12), RX2; + xorq 2 * 8(%r12), RX3; + movq RT0, (%r13); /* store new IV */ + + movq RX0, 0 * 8(%r11); + movq RX1, 1 * 8(%r11); + movq RX2, 2 * 8(%r11); + movq RX3, 3 * 8(%r11); + + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + + ret; +ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;) + +.align 8 +.globl _gcry_blowfish_amd64_cfb_dec +ELF(.type _gcry_blowfish_amd64_cfb_dec,@function;) +_gcry_blowfish_amd64_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (4 blocks) + * %rdx: src (4 blocks) + * %rcx: iv (64bit) + */ + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + + /* %r11-%r13 are not used by __blowfish_enc_blk4 */ + movq %rcx, %r13; /*iv*/ + movq %rdx, %r12; /*src*/ + movq %rsi, %r11; /*dst*/ + + /* Load input */ + movq (%r13), RX0; + movq 0 * 8(%r12), RX1; + movq 1 * 8(%r12), RX2; + movq 2 * 8(%r12), RX3; + + inbswap_block4(); + + /* Update IV */ + movq 3 * 8(%r12), RT0; + movq RT0, (%r13); + + call __blowfish_enc_blk4; + + xorq 0 * 8(%r12), RX0; + xorq 1 * 8(%r12), RX1; + xorq 2 * 8(%r12), RX2; + xorq 3 * 8(%r12), RX3; + movq RX0, 0 * 8(%r11); + movq RX1, 1 * 8(%r11); + movq RX2, 2 * 8(%r11); + movq RX3, 3 * 8(%r11); + + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + ret; +ELF(.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;) + +#endif /*defined(USE_BLOWFISH)*/ +#endif /*__x86_64*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/blowfish-arm.S b/libotr/libgcrypt-1.8.7/cipher/blowfish-arm.S new file mode 100644 index 0000000..b30aa31 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/blowfish-arm.S @@ -0,0 +1,743 @@ +/* blowfish-arm.S - ARM assembly implementation of Blowfish cipher + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(__ARMEL__) +#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS + +.text + +.syntax unified +.arm + +/* structure of crypto context */ +#define s0 0 +#define s1 (s0 + (1 * 256) * 4) +#define s2 (s0 + (2 * 256) * 4) +#define s3 (s0 + (3 * 256) * 4) +#define p (s3 + (1 * 256) * 4) + +/* register macros */ +#define CTXs0 %r0 +#define CTXs1 %r9 +#define CTXs2 %r8 +#define CTXs3 %r10 +#define RMASK %lr +#define RKEYL %r2 +#define RKEYR %ip + +#define RL0 %r3 +#define RR0 %r4 + +#define RL1 %r9 +#define RR1 %r10 + +#define RT0 %r11 +#define RT1 %r7 +#define RT2 %r5 +#define RT3 %r6 + +/* helper macros */ +#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 0)]; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 3)]; \ + orr rout, rout, rtmp, lsl #24; + +#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \ + mov rtmp0, rin, lsr #8; \ + strb rin, [rdst, #((offs) + 0)]; \ + mov rtmp1, rin, lsr #16; \ + strb rtmp0, [rdst, #((offs) + 1)]; \ + mov rtmp0, rin, lsr #24; \ + strb rtmp1, [rdst, #((offs) + 2)]; \ + strb rtmp0, [rdst, #((offs) + 3)]; + +#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 3)]; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 0)]; \ + orr rout, rout, rtmp, lsl #24; + +#define str_unaligned_be(rin, rdst, offs, rtmp0, rtmp1) \ + mov rtmp0, rin, lsr #8; \ + strb rin, [rdst, #((offs) + 3)]; \ + mov rtmp1, rin, lsr #16; \ + strb rtmp0, [rdst, #((offs) + 2)]; \ + mov rtmp0, rin, lsr #24; \ + strb rtmp1, [rdst, #((offs) + 1)]; \ + strb rtmp0, [rdst, #((offs) + 0)]; + +#ifdef __ARMEL__ + #define ldr_unaligned_host ldr_unaligned_le + #define str_unaligned_host str_unaligned_le + + /* bswap on little-endian */ +#ifdef HAVE_ARM_ARCH_V6 + #define host_to_be(reg, rtmp) \ + rev reg, reg; + #define be_to_host(reg, rtmp) \ + rev reg, reg; +#else + #define host_to_be(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; + #define be_to_host(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; +#endif +#else + #define ldr_unaligned_host ldr_unaligned_be + #define str_unaligned_host str_unaligned_be + + /* nop on big-endian */ + #define host_to_be(reg, rtmp) /*_*/ + #define be_to_host(reg, rtmp) /*_*/ +#endif + +#define host_to_host(x, y) /*_*/ + +/*********************************************************************** + * 1-way blowfish + ***********************************************************************/ +#define F(l, r) \ + and RT0, RMASK, l, lsr#(24 - 2); \ + and RT1, RMASK, l, lsr#(16 - 2); \ + ldr RT0, [CTXs0, RT0]; \ + and RT2, RMASK, l, lsr#(8 - 2); \ + ldr RT1, [CTXs1, RT1]; \ + and RT3, RMASK, l, lsl#2; \ + ldr RT2, [CTXs2, RT2]; \ + add RT0, RT1; \ + ldr RT3, [CTXs3, RT3]; \ + eor RT0, RT2; \ + add RT0, RT3; \ + eor r, RT0; + +#define load_roundkey_enc(n) \ + ldr RKEYL, [CTXs2, #((p - s2) + (4 * (n) + 0))]; \ + ldr RKEYR, [CTXs2, #((p - s2) + (4 * (n) + 4))]; + +#define add_roundkey_enc() \ + eor RL0, RKEYL; \ + eor RR0, RKEYR; + +#define round_enc(n) \ + add_roundkey_enc(); \ + load_roundkey_enc(n); \ + \ + F(RL0, RR0); \ + F(RR0, RL0); + +#define load_roundkey_dec(n) \ + ldr RKEYL, [CTXs2, #((p - s2) + (4 * ((n) - 1) + 4))]; \ + ldr RKEYR, [CTXs2, #((p - s2) + (4 * ((n) - 1) + 0))]; + +#define add_roundkey_dec() \ + eor RL0, RKEYL; \ + eor RR0, RKEYR; + +#define round_dec(n) \ + add_roundkey_dec(); \ + load_roundkey_dec(n); \ + \ + F(RL0, RR0); \ + F(RR0, RL0); + +#define read_block_aligned(rin, offs, l0, r0, convert, rtmp) \ + ldr l0, [rin, #((offs) + 0)]; \ + ldr r0, [rin, #((offs) + 4)]; \ + convert(l0, rtmp); \ + convert(r0, rtmp); + +#define write_block_aligned(rout, offs, l0, r0, convert, rtmp) \ + convert(l0, rtmp); \ + convert(r0, rtmp); \ + str l0, [rout, #((offs) + 0)]; \ + str r0, [rout, #((offs) + 4)]; + +#ifdef __ARM_FEATURE_UNALIGNED + /* unaligned word reads allowed */ + #define read_block(rin, offs, l0, r0, rtmp0) \ + read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0) + + #define write_block(rout, offs, r0, l0, rtmp0, rtmp1) \ + write_block_aligned(rout, offs, r0, l0, be_to_host, rtmp0) + + #define read_block_host(rin, offs, l0, r0, rtmp0) \ + read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0) + + #define write_block_host(rout, offs, r0, l0, rtmp0, rtmp1) \ + write_block_aligned(rout, offs, r0, l0, host_to_host, rtmp0) +#else + /* need to handle unaligned reads by byte reads */ + #define read_block(rin, offs, l0, r0, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_be(l0, rin, (offs) + 0, rtmp0); \ + ldr_unaligned_be(r0, rin, (offs) + 4, rtmp0); \ + b 2f; \ + 1:;\ + read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0); \ + 2:; + + #define write_block(rout, offs, l0, r0, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_be(l0, rout, (offs) + 0, rtmp0, rtmp1); \ + str_unaligned_be(r0, rout, (offs) + 4, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block_aligned(rout, offs, l0, r0, be_to_host, rtmp0); \ + 2:; + + #define read_block_host(rin, offs, l0, r0, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_host(l0, rin, (offs) + 0, rtmp0); \ + ldr_unaligned_host(r0, rin, (offs) + 4, rtmp0); \ + b 2f; \ + 1:;\ + read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0); \ + 2:; + + #define write_block_host(rout, offs, l0, r0, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_host(l0, rout, (offs) + 0, rtmp0, rtmp1); \ + str_unaligned_host(r0, rout, (offs) + 4, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block_aligned(rout, offs, l0, r0, host_to_host); \ + 2:; +#endif + +.align 3 +.type __blowfish_enc_blk1,%function; + +__blowfish_enc_blk1: + /* input: + * preloaded: CTX + * [RL0, RR0]: src + * output: + * [RR0, RL0]: dst + */ + push {%lr}; + + add CTXs1, CTXs0, #(s1 - s0); + add CTXs2, CTXs0, #(s2 - s0); + mov RMASK, #(0xff << 2); /* byte mask */ + add CTXs3, CTXs1, #(s3 - s1); + + load_roundkey_enc(0); + round_enc(2); + round_enc(4); + round_enc(6); + round_enc(8); + round_enc(10); + round_enc(12); + round_enc(14); + round_enc(16); + add_roundkey_enc(); + + pop {%pc}; +.size __blowfish_enc_blk1,.-__blowfish_enc_blk1; + +.align 8 +.globl _gcry_blowfish_arm_do_encrypt +.type _gcry_blowfish_arm_do_encrypt,%function; + +_gcry_blowfish_arm_do_encrypt: + /* input: + * %r0: ctx, CTX + * %r1: u32 *ret_xl + * %r2: u32 *ret_xr + */ + push {%r2, %r4-%r11, %ip, %lr}; + + ldr RL0, [%r1]; + ldr RR0, [%r2]; + + bl __blowfish_enc_blk1; + + pop {%r2}; + str RR0, [%r1]; + str RL0, [%r2]; + + pop {%r4-%r11, %ip, %pc}; +.size _gcry_blowfish_arm_do_encrypt,.-_gcry_blowfish_arm_do_encrypt; + +.align 3 +.globl _gcry_blowfish_arm_encrypt_block +.type _gcry_blowfish_arm_encrypt_block,%function; + +_gcry_blowfish_arm_encrypt_block: + /* input: + * %r0: ctx, CTX + * %r1: dst + * %r2: src + */ + push {%r4-%r11, %ip, %lr}; + + read_block(%r2, 0, RL0, RR0, RT0); + + bl __blowfish_enc_blk1; + + write_block(%r1, 0, RR0, RL0, RT0, RT1); + + pop {%r4-%r11, %ip, %pc}; +.size _gcry_blowfish_arm_encrypt_block,.-_gcry_blowfish_arm_encrypt_block; + +.align 3 +.globl _gcry_blowfish_arm_decrypt_block +.type _gcry_blowfish_arm_decrypt_block,%function; + +_gcry_blowfish_arm_decrypt_block: + /* input: + * %r0: ctx, CTX + * %r1: dst + * %r2: src + */ + push {%r4-%r11, %ip, %lr}; + + add CTXs1, CTXs0, #(s1 - s0); + add CTXs2, CTXs0, #(s2 - s0); + mov RMASK, #(0xff << 2); /* byte mask */ + add CTXs3, CTXs1, #(s3 - s1); + + read_block(%r2, 0, RL0, RR0, RT0); + + load_roundkey_dec(17); + round_dec(15); + round_dec(13); + round_dec(11); + round_dec(9); + round_dec(7); + round_dec(5); + round_dec(3); + round_dec(1); + add_roundkey_dec(); + + write_block(%r1, 0, RR0, RL0, RT0, RT1); + + pop {%r4-%r11, %ip, %pc}; +.size _gcry_blowfish_arm_decrypt_block,.-_gcry_blowfish_arm_decrypt_block; + +/*********************************************************************** + * 2-way blowfish + ***********************************************************************/ +#define F2(n, l0, r0, l1, r1, set_nextk, dec) \ + \ + and RT0, RMASK, l0, lsr#(24 - 2); \ + and RT1, RMASK, l0, lsr#(16 - 2); \ + and RT2, RMASK, l0, lsr#(8 - 2); \ + add RT1, #(s1 - s0); \ + \ + ldr RT0, [CTXs0, RT0]; \ + and RT3, RMASK, l0, lsl#2; \ + ldr RT1, [CTXs0, RT1]; \ + add RT3, #(s3 - s2); \ + ldr RT2, [CTXs2, RT2]; \ + add RT0, RT1; \ + ldr RT3, [CTXs2, RT3]; \ + \ + and RT1, RMASK, l1, lsr#(24 - 2); \ + eor RT0, RT2; \ + and RT2, RMASK, l1, lsr#(16 - 2); \ + add RT0, RT3; \ + add RT2, #(s1 - s0); \ + and RT3, RMASK, l1, lsr#(8 - 2); \ + eor r0, RT0; \ + \ + ldr RT1, [CTXs0, RT1]; \ + and RT0, RMASK, l1, lsl#2; \ + ldr RT2, [CTXs0, RT2]; \ + add RT0, #(s3 - s2); \ + ldr RT3, [CTXs2, RT3]; \ + add RT1, RT2; \ + ldr RT0, [CTXs2, RT0]; \ + \ + and RT2, RMASK, r0, lsr#(24 - 2); \ + eor RT1, RT3; \ + and RT3, RMASK, r0, lsr#(16 - 2); \ + add RT1, RT0; \ + add RT3, #(s1 - s0); \ + and RT0, RMASK, r0, lsr#(8 - 2); \ + eor r1, RT1; \ + \ + ldr RT2, [CTXs0, RT2]; \ + and RT1, RMASK, r0, lsl#2; \ + ldr RT3, [CTXs0, RT3]; \ + add RT1, #(s3 - s2); \ + ldr RT0, [CTXs2, RT0]; \ + add RT2, RT3; \ + ldr RT1, [CTXs2, RT1]; \ + \ + and RT3, RMASK, r1, lsr#(24 - 2); \ + eor RT2, RT0; \ + and RT0, RMASK, r1, lsr#(16 - 2); \ + add RT2, RT1; \ + add RT0, #(s1 - s0); \ + and RT1, RMASK, r1, lsr#(8 - 2); \ + eor l0, RT2; \ + \ + ldr RT3, [CTXs0, RT3]; \ + and RT2, RMASK, r1, lsl#2; \ + ldr RT0, [CTXs0, RT0]; \ + add RT2, #(s3 - s2); \ + ldr RT1, [CTXs2, RT1]; \ + eor l1, RKEYL; \ + ldr RT2, [CTXs2, RT2]; \ + \ + eor r0, RKEYR; \ + add RT3, RT0; \ + eor r1, RKEYR; \ + eor RT3, RT1; \ + eor l0, RKEYL; \ + add RT3, RT2; \ + set_nextk(RKEYL, (p - s2) + (4 * (n) + ((dec) * 4))); \ + eor l1, RT3; \ + set_nextk(RKEYR, (p - s2) + (4 * (n) + (!(dec) * 4))); + +#define load_n_add_roundkey_enc2(n) \ + load_roundkey_enc(n); \ + eor RL0, RKEYL; \ + eor RR0, RKEYR; \ + eor RL1, RKEYL; \ + eor RR1, RKEYR; \ + load_roundkey_enc((n) + 2); + +#define next_key(reg, offs) \ + ldr reg, [CTXs2, #(offs)]; + +#define dummy(x, y) /* do nothing */ + +#define round_enc2(n, load_next_key) \ + F2((n) + 2, RL0, RR0, RL1, RR1, load_next_key, 0); + +#define load_n_add_roundkey_dec2(n) \ + load_roundkey_dec(n); \ + eor RL0, RKEYL; \ + eor RR0, RKEYR; \ + eor RL1, RKEYL; \ + eor RR1, RKEYR; \ + load_roundkey_dec((n) - 2); + +#define round_dec2(n, load_next_key) \ + F2((n) - 3, RL0, RR0, RL1, RR1, load_next_key, 1); + +#define read_block2_aligned(rin, l0, r0, l1, r1, convert, rtmp) \ + ldr l0, [rin, #(0)]; \ + ldr r0, [rin, #(4)]; \ + convert(l0, rtmp); \ + ldr l1, [rin, #(8)]; \ + convert(r0, rtmp); \ + ldr r1, [rin, #(12)]; \ + convert(l1, rtmp); \ + convert(r1, rtmp); + +#define write_block2_aligned(rout, l0, r0, l1, r1, convert, rtmp) \ + convert(l0, rtmp); \ + convert(r0, rtmp); \ + convert(l1, rtmp); \ + str l0, [rout, #(0)]; \ + convert(r1, rtmp); \ + str r0, [rout, #(4)]; \ + str l1, [rout, #(8)]; \ + str r1, [rout, #(12)]; + +#ifdef __ARM_FEATURE_UNALIGNED + /* unaligned word reads allowed */ + #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0) + + #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0) + + #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0) + + #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0) +#else + /* need to handle unaligned reads by byte reads */ + #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_be(l0, rin, 0, rtmp0); \ + ldr_unaligned_be(r0, rin, 4, rtmp0); \ + ldr_unaligned_be(l1, rin, 8, rtmp0); \ + ldr_unaligned_be(r1, rin, 12, rtmp0); \ + b 2f; \ + 1:;\ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0); \ + 2:; + + #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_be(l0, rout, 0, rtmp0, rtmp1); \ + str_unaligned_be(r0, rout, 4, rtmp0, rtmp1); \ + str_unaligned_be(l1, rout, 8, rtmp0, rtmp1); \ + str_unaligned_be(r1, rout, 12, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0); \ + 2:; + + #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_host(l0, rin, 0, rtmp0); \ + ldr_unaligned_host(r0, rin, 4, rtmp0); \ + ldr_unaligned_host(l1, rin, 8, rtmp0); \ + ldr_unaligned_host(r1, rin, 12, rtmp0); \ + b 2f; \ + 1:;\ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0); \ + 2:; + + #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_host(l0, rout, 0, rtmp0, rtmp1); \ + str_unaligned_host(r0, rout, 4, rtmp0, rtmp1); \ + str_unaligned_host(l1, rout, 8, rtmp0, rtmp1); \ + str_unaligned_host(r1, rout, 12, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0); \ + 2:; +#endif + +.align 3 +.type _gcry_blowfish_arm_enc_blk2,%function; + +_gcry_blowfish_arm_enc_blk2: + /* input: + * preloaded: CTX + * [RL0, RR0], [RL1, RR1]: src + * output: + * [RR0, RL0], [RR1, RL1]: dst + */ + push {RT0,%lr}; + + add CTXs2, CTXs0, #(s2 - s0); + mov RMASK, #(0xff << 2); /* byte mask */ + + load_n_add_roundkey_enc2(0); + round_enc2(2, next_key); + round_enc2(4, next_key); + round_enc2(6, next_key); + round_enc2(8, next_key); + round_enc2(10, next_key); + round_enc2(12, next_key); + round_enc2(14, next_key); + round_enc2(16, dummy); + + host_to_be(RR0, RT0); + host_to_be(RL0, RT0); + host_to_be(RR1, RT0); + host_to_be(RL1, RT0); + + pop {RT0,%pc}; +.size _gcry_blowfish_arm_enc_blk2,.-_gcry_blowfish_arm_enc_blk2; + +.align 3 +.globl _gcry_blowfish_arm_cfb_dec; +.type _gcry_blowfish_arm_cfb_dec,%function; + +_gcry_blowfish_arm_cfb_dec: + /* input: + * %r0: CTX + * %r1: dst (2 blocks) + * %r2: src (2 blocks) + * %r3: iv (64bit) + */ + push {%r2, %r4-%r11, %ip, %lr}; + + mov %lr, %r3; + + /* Load input (iv/%r3 is aligned, src/%r2 might not be) */ + ldm %r3, {RL0, RR0}; + host_to_be(RL0, RT0); + host_to_be(RR0, RT0); + read_block(%r2, 0, RL1, RR1, RT0); + + /* Update IV, load src[1] and save to iv[0] */ + read_block_host(%r2, 8, %r5, %r6, RT0); + stm %lr, {%r5, %r6}; + + bl _gcry_blowfish_arm_enc_blk2; + /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + + /* %r1: dst, %r0: %src */ + pop {%r0}; + + /* dst = src ^ result */ + read_block2_host(%r0, %r5, %r6, %r7, %r8, %lr); + eor %r5, %r4; + eor %r6, %r3; + eor %r7, %r10; + eor %r8, %r9; + write_block2_host(%r1, %r5, %r6, %r7, %r8, %r9, %r10); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_blowfish_arm_cfb_dec,.-_gcry_blowfish_arm_cfb_dec; + +.align 3 +.globl _gcry_blowfish_arm_ctr_enc; +.type _gcry_blowfish_arm_ctr_enc,%function; + +_gcry_blowfish_arm_ctr_enc: + /* input: + * %r0: CTX + * %r1: dst (2 blocks) + * %r2: src (2 blocks) + * %r3: iv (64bit, big-endian) + */ + push {%r2, %r4-%r11, %ip, %lr}; + + mov %lr, %r3; + + /* Load IV (big => host endian) */ + read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT0); + + /* Construct IVs */ + adds RR1, RR0, #1; /* +1 */ + adc RL1, RL0, #0; + adds %r6, RR1, #1; /* +2 */ + adc %r5, RL1, #0; + + /* Store new IV (host => big-endian) */ + write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT0); + + bl _gcry_blowfish_arm_enc_blk2; + /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + + /* %r1: dst, %r0: %src */ + pop {%r0}; + + /* XOR key-stream with plaintext */ + read_block2_host(%r0, %r5, %r6, %r7, %r8, %lr); + eor %r5, %r4; + eor %r6, %r3; + eor %r7, %r10; + eor %r8, %r9; + write_block2_host(%r1, %r5, %r6, %r7, %r8, %r9, %r10); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_blowfish_arm_ctr_enc,.-_gcry_blowfish_arm_ctr_enc; + +.align 3 +.type _gcry_blowfish_arm_dec_blk2,%function; + +_gcry_blowfish_arm_dec_blk2: + /* input: + * preloaded: CTX + * [RL0, RR0], [RL1, RR1]: src + * output: + * [RR0, RL0], [RR1, RL1]: dst + */ + add CTXs2, CTXs0, #(s2 - s0); + mov RMASK, #(0xff << 2); /* byte mask */ + + load_n_add_roundkey_dec2(17); + round_dec2(15, next_key); + round_dec2(13, next_key); + round_dec2(11, next_key); + round_dec2(9, next_key); + round_dec2(7, next_key); + round_dec2(5, next_key); + round_dec2(3, next_key); + round_dec2(1, dummy); + + host_to_be(RR0, RT0); + host_to_be(RL0, RT0); + host_to_be(RR1, RT0); + host_to_be(RL1, RT0); + + b .Ldec_cbc_tail; +.ltorg +.size _gcry_blowfish_arm_dec_blk2,.-_gcry_blowfish_arm_dec_blk2; + +.align 3 +.globl _gcry_blowfish_arm_cbc_dec; +.type _gcry_blowfish_arm_cbc_dec,%function; + +_gcry_blowfish_arm_cbc_dec: + /* input: + * %r0: CTX + * %r1: dst (2 blocks) + * %r2: src (2 blocks) + * %r3: iv (64bit) + */ + push {%r2-%r11, %ip, %lr}; + + read_block2(%r2, RL0, RR0, RL1, RR1, RT0); + + /* dec_blk2 is only used by cbc_dec, jump directly in/out instead + * of function call. */ + b _gcry_blowfish_arm_dec_blk2; +.Ldec_cbc_tail: + /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + + /* %r0: %src, %r1: dst, %r2: iv */ + pop {%r0, %r2}; + + /* load IV+1 (src[0]) to %r7:%r8. Might be unaligned. */ + read_block_host(%r0, 0, %r7, %r8, %r5); + /* load IV (iv[0]) to %r5:%r6. 'iv' is aligned. */ + ldm %r2, {%r5, %r6}; + + /* out[1] ^= IV+1 */ + eor %r10, %r7; + eor %r9, %r8; + /* out[0] ^= IV */ + eor %r4, %r5; + eor %r3, %r6; + + /* load IV+2 (src[1]) to %r7:%r8. Might be unaligned. */ + read_block_host(%r0, 8, %r7, %r8, %r5); + /* store IV+2 to iv[0] (aligned). */ + stm %r2, {%r7, %r8}; + + /* store result to dst[0-3]. Might be unaligned. */ + write_block2_host(%r1, %r4, %r3, %r10, %r9, %r5, %r6); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_blowfish_arm_cbc_dec,.-_gcry_blowfish_arm_cbc_dec; + +#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/ +#endif /*__ARM_ARCH >= 6*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/blowfish.c b/libotr/libgcrypt-1.8.7/cipher/blowfish.c new file mode 100644 index 0000000..a3fc26c --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/blowfish.c @@ -0,0 +1,1112 @@ +/* blowfish.c - Blowfish encryption + * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * For a description of the algorithm, see: + * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996. + * ISBN 0-471-11709-9. Pages 336 ff. + */ + +/* Test values: + * key "abcdefghijklmnopqrstuvwxyz"; + * plain "BLOWFISH" + * cipher 32 4E D0 FE F4 13 A2 03 + * + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "cipher-selftest.h" + +#define BLOWFISH_BLOCKSIZE 8 +#define BLOWFISH_ROUNDS 16 + + +/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ +#undef USE_AMD64_ASM +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + (BLOWFISH_ROUNDS == 16) +# define USE_AMD64_ASM 1 +#endif + +/* USE_ARM_ASM indicates whether to use ARM assembly code. */ +#undef USE_ARM_ASM +#if defined(__ARMEL__) +# if (BLOWFISH_ROUNDS == 16) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) +# define USE_ARM_ASM 1 +# endif +#endif + +typedef struct { + u32 s0[256]; + u32 s1[256]; + u32 s2[256]; + u32 s3[256]; + u32 p[BLOWFISH_ROUNDS+2]; +} BLOWFISH_context; + +static gcry_err_code_t bf_setkey (void *c, const byte *key, unsigned keylen); +static unsigned int encrypt_block (void *bc, byte *outbuf, const byte *inbuf); +static unsigned int decrypt_block (void *bc, byte *outbuf, const byte *inbuf); + + +/* precomputed S boxes */ +static const u32 ks0[256] = { + 0xD1310BA6,0x98DFB5AC,0x2FFD72DB,0xD01ADFB7,0xB8E1AFED,0x6A267E96, + 0xBA7C9045,0xF12C7F99,0x24A19947,0xB3916CF7,0x0801F2E2,0x858EFC16, + 0x636920D8,0x71574E69,0xA458FEA3,0xF4933D7E,0x0D95748F,0x728EB658, + 0x718BCD58,0x82154AEE,0x7B54A41D,0xC25A59B5,0x9C30D539,0x2AF26013, + 0xC5D1B023,0x286085F0,0xCA417918,0xB8DB38EF,0x8E79DCB0,0x603A180E, + 0x6C9E0E8B,0xB01E8A3E,0xD71577C1,0xBD314B27,0x78AF2FDA,0x55605C60, + 0xE65525F3,0xAA55AB94,0x57489862,0x63E81440,0x55CA396A,0x2AAB10B6, + 0xB4CC5C34,0x1141E8CE,0xA15486AF,0x7C72E993,0xB3EE1411,0x636FBC2A, + 0x2BA9C55D,0x741831F6,0xCE5C3E16,0x9B87931E,0xAFD6BA33,0x6C24CF5C, + 0x7A325381,0x28958677,0x3B8F4898,0x6B4BB9AF,0xC4BFE81B,0x66282193, + 0x61D809CC,0xFB21A991,0x487CAC60,0x5DEC8032,0xEF845D5D,0xE98575B1, + 0xDC262302,0xEB651B88,0x23893E81,0xD396ACC5,0x0F6D6FF3,0x83F44239, + 0x2E0B4482,0xA4842004,0x69C8F04A,0x9E1F9B5E,0x21C66842,0xF6E96C9A, + 0x670C9C61,0xABD388F0,0x6A51A0D2,0xD8542F68,0x960FA728,0xAB5133A3, + 0x6EEF0B6C,0x137A3BE4,0xBA3BF050,0x7EFB2A98,0xA1F1651D,0x39AF0176, + 0x66CA593E,0x82430E88,0x8CEE8619,0x456F9FB4,0x7D84A5C3,0x3B8B5EBE, + 0xE06F75D8,0x85C12073,0x401A449F,0x56C16AA6,0x4ED3AA62,0x363F7706, + 0x1BFEDF72,0x429B023D,0x37D0D724,0xD00A1248,0xDB0FEAD3,0x49F1C09B, + 0x075372C9,0x80991B7B,0x25D479D8,0xF6E8DEF7,0xE3FE501A,0xB6794C3B, + 0x976CE0BD,0x04C006BA,0xC1A94FB6,0x409F60C4,0x5E5C9EC2,0x196A2463, + 0x68FB6FAF,0x3E6C53B5,0x1339B2EB,0x3B52EC6F,0x6DFC511F,0x9B30952C, + 0xCC814544,0xAF5EBD09,0xBEE3D004,0xDE334AFD,0x660F2807,0x192E4BB3, + 0xC0CBA857,0x45C8740F,0xD20B5F39,0xB9D3FBDB,0x5579C0BD,0x1A60320A, + 0xD6A100C6,0x402C7279,0x679F25FE,0xFB1FA3CC,0x8EA5E9F8,0xDB3222F8, + 0x3C7516DF,0xFD616B15,0x2F501EC8,0xAD0552AB,0x323DB5FA,0xFD238760, + 0x53317B48,0x3E00DF82,0x9E5C57BB,0xCA6F8CA0,0x1A87562E,0xDF1769DB, + 0xD542A8F6,0x287EFFC3,0xAC6732C6,0x8C4F5573,0x695B27B0,0xBBCA58C8, + 0xE1FFA35D,0xB8F011A0,0x10FA3D98,0xFD2183B8,0x4AFCB56C,0x2DD1D35B, + 0x9A53E479,0xB6F84565,0xD28E49BC,0x4BFB9790,0xE1DDF2DA,0xA4CB7E33, + 0x62FB1341,0xCEE4C6E8,0xEF20CADA,0x36774C01,0xD07E9EFE,0x2BF11FB4, + 0x95DBDA4D,0xAE909198,0xEAAD8E71,0x6B93D5A0,0xD08ED1D0,0xAFC725E0, + 0x8E3C5B2F,0x8E7594B7,0x8FF6E2FB,0xF2122B64,0x8888B812,0x900DF01C, + 0x4FAD5EA0,0x688FC31C,0xD1CFF191,0xB3A8C1AD,0x2F2F2218,0xBE0E1777, + 0xEA752DFE,0x8B021FA1,0xE5A0CC0F,0xB56F74E8,0x18ACF3D6,0xCE89E299, + 0xB4A84FE0,0xFD13E0B7,0x7CC43B81,0xD2ADA8D9,0x165FA266,0x80957705, + 0x93CC7314,0x211A1477,0xE6AD2065,0x77B5FA86,0xC75442F5,0xFB9D35CF, + 0xEBCDAF0C,0x7B3E89A0,0xD6411BD3,0xAE1E7E49,0x00250E2D,0x2071B35E, + 0x226800BB,0x57B8E0AF,0x2464369B,0xF009B91E,0x5563911D,0x59DFA6AA, + 0x78C14389,0xD95A537F,0x207D5BA2,0x02E5B9C5,0x83260376,0x6295CFA9, + 0x11C81968,0x4E734A41,0xB3472DCA,0x7B14A94A,0x1B510052,0x9A532915, + 0xD60F573F,0xBC9BC6E4,0x2B60A476,0x81E67400,0x08BA6FB5,0x571BE91F, + 0xF296EC6B,0x2A0DD915,0xB6636521,0xE7B9F9B6,0xFF34052E,0xC5855664, + 0x53B02D5D,0xA99F8FA1,0x08BA4799,0x6E85076A }; + +static const u32 ks1[256] = { + 0x4B7A70E9,0xB5B32944,0xDB75092E,0xC4192623,0xAD6EA6B0,0x49A7DF7D, + 0x9CEE60B8,0x8FEDB266,0xECAA8C71,0x699A17FF,0x5664526C,0xC2B19EE1, + 0x193602A5,0x75094C29,0xA0591340,0xE4183A3E,0x3F54989A,0x5B429D65, + 0x6B8FE4D6,0x99F73FD6,0xA1D29C07,0xEFE830F5,0x4D2D38E6,0xF0255DC1, + 0x4CDD2086,0x8470EB26,0x6382E9C6,0x021ECC5E,0x09686B3F,0x3EBAEFC9, + 0x3C971814,0x6B6A70A1,0x687F3584,0x52A0E286,0xB79C5305,0xAA500737, + 0x3E07841C,0x7FDEAE5C,0x8E7D44EC,0x5716F2B8,0xB03ADA37,0xF0500C0D, + 0xF01C1F04,0x0200B3FF,0xAE0CF51A,0x3CB574B2,0x25837A58,0xDC0921BD, + 0xD19113F9,0x7CA92FF6,0x94324773,0x22F54701,0x3AE5E581,0x37C2DADC, + 0xC8B57634,0x9AF3DDA7,0xA9446146,0x0FD0030E,0xECC8C73E,0xA4751E41, + 0xE238CD99,0x3BEA0E2F,0x3280BBA1,0x183EB331,0x4E548B38,0x4F6DB908, + 0x6F420D03,0xF60A04BF,0x2CB81290,0x24977C79,0x5679B072,0xBCAF89AF, + 0xDE9A771F,0xD9930810,0xB38BAE12,0xDCCF3F2E,0x5512721F,0x2E6B7124, + 0x501ADDE6,0x9F84CD87,0x7A584718,0x7408DA17,0xBC9F9ABC,0xE94B7D8C, + 0xEC7AEC3A,0xDB851DFA,0x63094366,0xC464C3D2,0xEF1C1847,0x3215D908, + 0xDD433B37,0x24C2BA16,0x12A14D43,0x2A65C451,0x50940002,0x133AE4DD, + 0x71DFF89E,0x10314E55,0x81AC77D6,0x5F11199B,0x043556F1,0xD7A3C76B, + 0x3C11183B,0x5924A509,0xF28FE6ED,0x97F1FBFA,0x9EBABF2C,0x1E153C6E, + 0x86E34570,0xEAE96FB1,0x860E5E0A,0x5A3E2AB3,0x771FE71C,0x4E3D06FA, + 0x2965DCB9,0x99E71D0F,0x803E89D6,0x5266C825,0x2E4CC978,0x9C10B36A, + 0xC6150EBA,0x94E2EA78,0xA5FC3C53,0x1E0A2DF4,0xF2F74EA7,0x361D2B3D, + 0x1939260F,0x19C27960,0x5223A708,0xF71312B6,0xEBADFE6E,0xEAC31F66, + 0xE3BC4595,0xA67BC883,0xB17F37D1,0x018CFF28,0xC332DDEF,0xBE6C5AA5, + 0x65582185,0x68AB9802,0xEECEA50F,0xDB2F953B,0x2AEF7DAD,0x5B6E2F84, + 0x1521B628,0x29076170,0xECDD4775,0x619F1510,0x13CCA830,0xEB61BD96, + 0x0334FE1E,0xAA0363CF,0xB5735C90,0x4C70A239,0xD59E9E0B,0xCBAADE14, + 0xEECC86BC,0x60622CA7,0x9CAB5CAB,0xB2F3846E,0x648B1EAF,0x19BDF0CA, + 0xA02369B9,0x655ABB50,0x40685A32,0x3C2AB4B3,0x319EE9D5,0xC021B8F7, + 0x9B540B19,0x875FA099,0x95F7997E,0x623D7DA8,0xF837889A,0x97E32D77, + 0x11ED935F,0x16681281,0x0E358829,0xC7E61FD6,0x96DEDFA1,0x7858BA99, + 0x57F584A5,0x1B227263,0x9B83C3FF,0x1AC24696,0xCDB30AEB,0x532E3054, + 0x8FD948E4,0x6DBC3128,0x58EBF2EF,0x34C6FFEA,0xFE28ED61,0xEE7C3C73, + 0x5D4A14D9,0xE864B7E3,0x42105D14,0x203E13E0,0x45EEE2B6,0xA3AAABEA, + 0xDB6C4F15,0xFACB4FD0,0xC742F442,0xEF6ABBB5,0x654F3B1D,0x41CD2105, + 0xD81E799E,0x86854DC7,0xE44B476A,0x3D816250,0xCF62A1F2,0x5B8D2646, + 0xFC8883A0,0xC1C7B6A3,0x7F1524C3,0x69CB7492,0x47848A0B,0x5692B285, + 0x095BBF00,0xAD19489D,0x1462B174,0x23820E00,0x58428D2A,0x0C55F5EA, + 0x1DADF43E,0x233F7061,0x3372F092,0x8D937E41,0xD65FECF1,0x6C223BDB, + 0x7CDE3759,0xCBEE7460,0x4085F2A7,0xCE77326E,0xA6078084,0x19F8509E, + 0xE8EFD855,0x61D99735,0xA969A7AA,0xC50C06C2,0x5A04ABFC,0x800BCADC, + 0x9E447A2E,0xC3453484,0xFDD56705,0x0E1E9EC9,0xDB73DBD3,0x105588CD, + 0x675FDA79,0xE3674340,0xC5C43465,0x713E38D8,0x3D28F89E,0xF16DFF20, + 0x153E21E7,0x8FB03D4A,0xE6E39F2B,0xDB83ADF7 }; + +static const u32 ks2[256] = { + 0xE93D5A68,0x948140F7,0xF64C261C,0x94692934,0x411520F7,0x7602D4F7, + 0xBCF46B2E,0xD4A20068,0xD4082471,0x3320F46A,0x43B7D4B7,0x500061AF, + 0x1E39F62E,0x97244546,0x14214F74,0xBF8B8840,0x4D95FC1D,0x96B591AF, + 0x70F4DDD3,0x66A02F45,0xBFBC09EC,0x03BD9785,0x7FAC6DD0,0x31CB8504, + 0x96EB27B3,0x55FD3941,0xDA2547E6,0xABCA0A9A,0x28507825,0x530429F4, + 0x0A2C86DA,0xE9B66DFB,0x68DC1462,0xD7486900,0x680EC0A4,0x27A18DEE, + 0x4F3FFEA2,0xE887AD8C,0xB58CE006,0x7AF4D6B6,0xAACE1E7C,0xD3375FEC, + 0xCE78A399,0x406B2A42,0x20FE9E35,0xD9F385B9,0xEE39D7AB,0x3B124E8B, + 0x1DC9FAF7,0x4B6D1856,0x26A36631,0xEAE397B2,0x3A6EFA74,0xDD5B4332, + 0x6841E7F7,0xCA7820FB,0xFB0AF54E,0xD8FEB397,0x454056AC,0xBA489527, + 0x55533A3A,0x20838D87,0xFE6BA9B7,0xD096954B,0x55A867BC,0xA1159A58, + 0xCCA92963,0x99E1DB33,0xA62A4A56,0x3F3125F9,0x5EF47E1C,0x9029317C, + 0xFDF8E802,0x04272F70,0x80BB155C,0x05282CE3,0x95C11548,0xE4C66D22, + 0x48C1133F,0xC70F86DC,0x07F9C9EE,0x41041F0F,0x404779A4,0x5D886E17, + 0x325F51EB,0xD59BC0D1,0xF2BCC18F,0x41113564,0x257B7834,0x602A9C60, + 0xDFF8E8A3,0x1F636C1B,0x0E12B4C2,0x02E1329E,0xAF664FD1,0xCAD18115, + 0x6B2395E0,0x333E92E1,0x3B240B62,0xEEBEB922,0x85B2A20E,0xE6BA0D99, + 0xDE720C8C,0x2DA2F728,0xD0127845,0x95B794FD,0x647D0862,0xE7CCF5F0, + 0x5449A36F,0x877D48FA,0xC39DFD27,0xF33E8D1E,0x0A476341,0x992EFF74, + 0x3A6F6EAB,0xF4F8FD37,0xA812DC60,0xA1EBDDF8,0x991BE14C,0xDB6E6B0D, + 0xC67B5510,0x6D672C37,0x2765D43B,0xDCD0E804,0xF1290DC7,0xCC00FFA3, + 0xB5390F92,0x690FED0B,0x667B9FFB,0xCEDB7D9C,0xA091CF0B,0xD9155EA3, + 0xBB132F88,0x515BAD24,0x7B9479BF,0x763BD6EB,0x37392EB3,0xCC115979, + 0x8026E297,0xF42E312D,0x6842ADA7,0xC66A2B3B,0x12754CCC,0x782EF11C, + 0x6A124237,0xB79251E7,0x06A1BBE6,0x4BFB6350,0x1A6B1018,0x11CAEDFA, + 0x3D25BDD8,0xE2E1C3C9,0x44421659,0x0A121386,0xD90CEC6E,0xD5ABEA2A, + 0x64AF674E,0xDA86A85F,0xBEBFE988,0x64E4C3FE,0x9DBC8057,0xF0F7C086, + 0x60787BF8,0x6003604D,0xD1FD8346,0xF6381FB0,0x7745AE04,0xD736FCCC, + 0x83426B33,0xF01EAB71,0xB0804187,0x3C005E5F,0x77A057BE,0xBDE8AE24, + 0x55464299,0xBF582E61,0x4E58F48F,0xF2DDFDA2,0xF474EF38,0x8789BDC2, + 0x5366F9C3,0xC8B38E74,0xB475F255,0x46FCD9B9,0x7AEB2661,0x8B1DDF84, + 0x846A0E79,0x915F95E2,0x466E598E,0x20B45770,0x8CD55591,0xC902DE4C, + 0xB90BACE1,0xBB8205D0,0x11A86248,0x7574A99E,0xB77F19B6,0xE0A9DC09, + 0x662D09A1,0xC4324633,0xE85A1F02,0x09F0BE8C,0x4A99A025,0x1D6EFE10, + 0x1AB93D1D,0x0BA5A4DF,0xA186F20F,0x2868F169,0xDCB7DA83,0x573906FE, + 0xA1E2CE9B,0x4FCD7F52,0x50115E01,0xA70683FA,0xA002B5C4,0x0DE6D027, + 0x9AF88C27,0x773F8641,0xC3604C06,0x61A806B5,0xF0177A28,0xC0F586E0, + 0x006058AA,0x30DC7D62,0x11E69ED7,0x2338EA63,0x53C2DD94,0xC2C21634, + 0xBBCBEE56,0x90BCB6DE,0xEBFC7DA1,0xCE591D76,0x6F05E409,0x4B7C0188, + 0x39720A3D,0x7C927C24,0x86E3725F,0x724D9DB9,0x1AC15BB4,0xD39EB8FC, + 0xED545578,0x08FCA5B5,0xD83D7CD3,0x4DAD0FC4,0x1E50EF5E,0xB161E6F8, + 0xA28514D9,0x6C51133C,0x6FD5C7E7,0x56E14EC4,0x362ABFCE,0xDDC6C837, + 0xD79A3234,0x92638212,0x670EFA8E,0x406000E0 }; + +static const u32 ks3[256] = { + 0x3A39CE37,0xD3FAF5CF,0xABC27737,0x5AC52D1B,0x5CB0679E,0x4FA33742, + 0xD3822740,0x99BC9BBE,0xD5118E9D,0xBF0F7315,0xD62D1C7E,0xC700C47B, + 0xB78C1B6B,0x21A19045,0xB26EB1BE,0x6A366EB4,0x5748AB2F,0xBC946E79, + 0xC6A376D2,0x6549C2C8,0x530FF8EE,0x468DDE7D,0xD5730A1D,0x4CD04DC6, + 0x2939BBDB,0xA9BA4650,0xAC9526E8,0xBE5EE304,0xA1FAD5F0,0x6A2D519A, + 0x63EF8CE2,0x9A86EE22,0xC089C2B8,0x43242EF6,0xA51E03AA,0x9CF2D0A4, + 0x83C061BA,0x9BE96A4D,0x8FE51550,0xBA645BD6,0x2826A2F9,0xA73A3AE1, + 0x4BA99586,0xEF5562E9,0xC72FEFD3,0xF752F7DA,0x3F046F69,0x77FA0A59, + 0x80E4A915,0x87B08601,0x9B09E6AD,0x3B3EE593,0xE990FD5A,0x9E34D797, + 0x2CF0B7D9,0x022B8B51,0x96D5AC3A,0x017DA67D,0xD1CF3ED6,0x7C7D2D28, + 0x1F9F25CF,0xADF2B89B,0x5AD6B472,0x5A88F54C,0xE029AC71,0xE019A5E6, + 0x47B0ACFD,0xED93FA9B,0xE8D3C48D,0x283B57CC,0xF8D56629,0x79132E28, + 0x785F0191,0xED756055,0xF7960E44,0xE3D35E8C,0x15056DD4,0x88F46DBA, + 0x03A16125,0x0564F0BD,0xC3EB9E15,0x3C9057A2,0x97271AEC,0xA93A072A, + 0x1B3F6D9B,0x1E6321F5,0xF59C66FB,0x26DCF319,0x7533D928,0xB155FDF5, + 0x03563482,0x8ABA3CBB,0x28517711,0xC20AD9F8,0xABCC5167,0xCCAD925F, + 0x4DE81751,0x3830DC8E,0x379D5862,0x9320F991,0xEA7A90C2,0xFB3E7BCE, + 0x5121CE64,0x774FBE32,0xA8B6E37E,0xC3293D46,0x48DE5369,0x6413E680, + 0xA2AE0810,0xDD6DB224,0x69852DFD,0x09072166,0xB39A460A,0x6445C0DD, + 0x586CDECF,0x1C20C8AE,0x5BBEF7DD,0x1B588D40,0xCCD2017F,0x6BB4E3BB, + 0xDDA26A7E,0x3A59FF45,0x3E350A44,0xBCB4CDD5,0x72EACEA8,0xFA6484BB, + 0x8D6612AE,0xBF3C6F47,0xD29BE463,0x542F5D9E,0xAEC2771B,0xF64E6370, + 0x740E0D8D,0xE75B1357,0xF8721671,0xAF537D5D,0x4040CB08,0x4EB4E2CC, + 0x34D2466A,0x0115AF84,0xE1B00428,0x95983A1D,0x06B89FB4,0xCE6EA048, + 0x6F3F3B82,0x3520AB82,0x011A1D4B,0x277227F8,0x611560B1,0xE7933FDC, + 0xBB3A792B,0x344525BD,0xA08839E1,0x51CE794B,0x2F32C9B7,0xA01FBAC9, + 0xE01CC87E,0xBCC7D1F6,0xCF0111C3,0xA1E8AAC7,0x1A908749,0xD44FBD9A, + 0xD0DADECB,0xD50ADA38,0x0339C32A,0xC6913667,0x8DF9317C,0xE0B12B4F, + 0xF79E59B7,0x43F5BB3A,0xF2D519FF,0x27D9459C,0xBF97222C,0x15E6FC2A, + 0x0F91FC71,0x9B941525,0xFAE59361,0xCEB69CEB,0xC2A86459,0x12BAA8D1, + 0xB6C1075E,0xE3056A0C,0x10D25065,0xCB03A442,0xE0EC6E0E,0x1698DB3B, + 0x4C98A0BE,0x3278E964,0x9F1F9532,0xE0D392DF,0xD3A0342B,0x8971F21E, + 0x1B0A7441,0x4BA3348C,0xC5BE7120,0xC37632D8,0xDF359F8D,0x9B992F2E, + 0xE60B6F47,0x0FE3F11D,0xE54CDA54,0x1EDAD891,0xCE6279CF,0xCD3E7E6F, + 0x1618B166,0xFD2C1D05,0x848FD2C5,0xF6FB2299,0xF523F357,0xA6327623, + 0x93A83531,0x56CCCD02,0xACF08162,0x5A75EBB5,0x6E163697,0x88D273CC, + 0xDE966292,0x81B949D0,0x4C50901B,0x71C65614,0xE6C6C7BD,0x327A140A, + 0x45E1D006,0xC3F27B9A,0xC9AA53FD,0x62A80F00,0xBB25BFE2,0x35BDD2F6, + 0x71126905,0xB2040222,0xB6CBCF7C,0xCD769C2B,0x53113EC0,0x1640E3D3, + 0x38ABBD60,0x2547ADF0,0xBA38209C,0xF746CE76,0x77AFA1C5,0x20756060, + 0x85CBFE4E,0x8AE88DD8,0x7AAAF9B0,0x4CF9AA7E,0x1948C25C,0x02FB8A8C, + 0x01C36AE4,0xD6EBE1F9,0x90D4F869,0xA65CDEA0,0x3F09252D,0xC208E69F, + 0xB74E6132,0xCE77E25B,0x578FDFE3,0x3AC372E6 }; + +static const u32 ps[BLOWFISH_ROUNDS+2] = { + 0x243F6A88,0x85A308D3,0x13198A2E,0x03707344,0xA4093822,0x299F31D0, + 0x082EFA98,0xEC4E6C89,0x452821E6,0x38D01377,0xBE5466CF,0x34E90C6C, + 0xC0AC29B7,0xC97C50DD,0x3F84D5B5,0xB5470917,0x9216D5D9,0x8979FB1B }; + + +#ifdef USE_AMD64_ASM + +/* Assembly implementations of Blowfish. */ +extern void _gcry_blowfish_amd64_do_encrypt(BLOWFISH_context *c, u32 *ret_xl, + u32 *ret_xr); + +extern void _gcry_blowfish_amd64_encrypt_block(BLOWFISH_context *c, byte *out, + const byte *in); + +extern void _gcry_blowfish_amd64_decrypt_block(BLOWFISH_context *c, byte *out, + const byte *in); + +/* These assembly implementations process four blocks in parallel. */ +extern void _gcry_blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out, + const byte *in, byte *ctr); + +extern void _gcry_blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, + const byte *in, byte *iv); + +extern void _gcry_blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, + const byte *in, byte *iv); + +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +static inline void +call_sysv_fn (const void *fn, const void *arg1, const void *arg2, + const void *arg3, const void *arg4) +{ + /* Call SystemV ABI function without storing non-volatile XMM registers, + * as target function does not use vector instruction sets. */ + asm volatile ("callq *%0\n\t" + : "+a" (fn), + "+D" (arg1), + "+S" (arg2), + "+d" (arg3), + "+c" (arg4) + : + : "cc", "memory", "r8", "r9", "r10", "r11"); +} +#endif + +static void +do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr ) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_blowfish_amd64_do_encrypt, bc, ret_xl, ret_xr, NULL); +#else + _gcry_blowfish_amd64_do_encrypt (bc, ret_xl, ret_xr); +#endif +} + +static void +do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_blowfish_amd64_encrypt_block, context, outbuf, inbuf, + NULL); +#else + _gcry_blowfish_amd64_encrypt_block (context, outbuf, inbuf); +#endif +} + +static void +do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_blowfish_amd64_decrypt_block, context, outbuf, inbuf, + NULL); +#else + _gcry_blowfish_amd64_decrypt_block (context, outbuf, inbuf); +#endif +} + +static inline void +blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out, const byte *in, + byte *ctr) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_blowfish_amd64_ctr_enc, ctx, out, in, ctr); +#else + _gcry_blowfish_amd64_ctr_enc(ctx, out, in, ctr); +#endif +} + +static inline void +blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, const byte *in, + byte *iv) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_blowfish_amd64_cbc_dec, ctx, out, in, iv); +#else + _gcry_blowfish_amd64_cbc_dec(ctx, out, in, iv); +#endif +} + +static inline void +blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in, + byte *iv) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_blowfish_amd64_cfb_dec, ctx, out, in, iv); +#else + _gcry_blowfish_amd64_cfb_dec(ctx, out, in, iv); +#endif +} + +static unsigned int +encrypt_block (void *context , byte *outbuf, const byte *inbuf) +{ + BLOWFISH_context *c = (BLOWFISH_context *) context; + do_encrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (2*8); +} + +static unsigned int +decrypt_block (void *context, byte *outbuf, const byte *inbuf) +{ + BLOWFISH_context *c = (BLOWFISH_context *) context; + do_decrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (2*8); +} + +#elif defined(USE_ARM_ASM) + +/* Assembly implementations of Blowfish. */ +extern void _gcry_blowfish_arm_do_encrypt(BLOWFISH_context *c, u32 *ret_xl, + u32 *ret_xr); + +extern void _gcry_blowfish_arm_encrypt_block(BLOWFISH_context *c, byte *out, + const byte *in); + +extern void _gcry_blowfish_arm_decrypt_block(BLOWFISH_context *c, byte *out, + const byte *in); + +/* These assembly implementations process two blocks in parallel. */ +extern void _gcry_blowfish_arm_ctr_enc(BLOWFISH_context *ctx, byte *out, + const byte *in, byte *ctr); + +extern void _gcry_blowfish_arm_cbc_dec(BLOWFISH_context *ctx, byte *out, + const byte *in, byte *iv); + +extern void _gcry_blowfish_arm_cfb_dec(BLOWFISH_context *ctx, byte *out, + const byte *in, byte *iv); + +static void +do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr ) +{ + _gcry_blowfish_arm_do_encrypt (bc, ret_xl, ret_xr); +} + +static void +do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) +{ + _gcry_blowfish_arm_encrypt_block (context, outbuf, inbuf); +} + +static void +do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) +{ + _gcry_blowfish_arm_decrypt_block (context, outbuf, inbuf); +} + +static unsigned int +encrypt_block (void *context , byte *outbuf, const byte *inbuf) +{ + BLOWFISH_context *c = (BLOWFISH_context *) context; + do_encrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (10*4); +} + +static unsigned int +decrypt_block (void *context, byte *outbuf, const byte *inbuf) +{ + BLOWFISH_context *c = (BLOWFISH_context *) context; + do_decrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (10*4); +} + +#else /*USE_ARM_ASM*/ + +#if BLOWFISH_ROUNDS != 16 +static inline u32 +function_F( BLOWFISH_context *bc, u32 x ) +{ + u16 a, b, c, d; + +#ifdef WORDS_BIGENDIAN + a = ((byte*)&x)[0]; + b = ((byte*)&x)[1]; + c = ((byte*)&x)[2]; + d = ((byte*)&x)[3]; +#else + a = ((byte*)&x)[3]; + b = ((byte*)&x)[2]; + c = ((byte*)&x)[1]; + d = ((byte*)&x)[0]; +#endif + + return ((bc->s0[a] + bc->s1[b]) ^ bc->s2[c] ) + bc->s3[d]; +} +#endif + +#ifdef WORDS_BIGENDIAN +#define F(x) ((( s0[((byte*)&x)[0]] + s1[((byte*)&x)[1]]) \ + ^ s2[((byte*)&x)[2]]) + s3[((byte*)&x)[3]] ) +#else +#define F(x) ((( s0[((byte*)&x)[3]] + s1[((byte*)&x)[2]]) \ + ^ s2[((byte*)&x)[1]]) + s3[((byte*)&x)[0]] ) +#endif +#define R(l,r,i) do { l ^= p[i]; r ^= F(l); } while(0) + + +static void +do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr ) +{ +#if BLOWFISH_ROUNDS == 16 + u32 xl, xr, *s0, *s1, *s2, *s3, *p; + + xl = *ret_xl; + xr = *ret_xr; + p = bc->p; + s0 = bc->s0; + s1 = bc->s1; + s2 = bc->s2; + s3 = bc->s3; + + R( xl, xr, 0); + R( xr, xl, 1); + R( xl, xr, 2); + R( xr, xl, 3); + R( xl, xr, 4); + R( xr, xl, 5); + R( xl, xr, 6); + R( xr, xl, 7); + R( xl, xr, 8); + R( xr, xl, 9); + R( xl, xr, 10); + R( xr, xl, 11); + R( xl, xr, 12); + R( xr, xl, 13); + R( xl, xr, 14); + R( xr, xl, 15); + + xl ^= p[BLOWFISH_ROUNDS]; + xr ^= p[BLOWFISH_ROUNDS+1]; + + *ret_xl = xr; + *ret_xr = xl; + +#else + u32 xl, xr, temp, *p; + int i; + + xl = *ret_xl; + xr = *ret_xr; + p = bc->p; + + for(i=0; i < BLOWFISH_ROUNDS; i++ ) + { + xl ^= p[i]; + xr ^= function_F(bc, xl); + temp = xl; + xl = xr; + xr = temp; + } + temp = xl; + xl = xr; + xr = temp; + + xr ^= p[BLOWFISH_ROUNDS]; + xl ^= p[BLOWFISH_ROUNDS+1]; + + *ret_xl = xl; + *ret_xr = xr; +#endif +} + + +static void +decrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr ) +{ +#if BLOWFISH_ROUNDS == 16 + u32 xl, xr, *s0, *s1, *s2, *s3, *p; + + xl = *ret_xl; + xr = *ret_xr; + p = bc->p; + s0 = bc->s0; + s1 = bc->s1; + s2 = bc->s2; + s3 = bc->s3; + + R( xl, xr, 17); + R( xr, xl, 16); + R( xl, xr, 15); + R( xr, xl, 14); + R( xl, xr, 13); + R( xr, xl, 12); + R( xl, xr, 11); + R( xr, xl, 10); + R( xl, xr, 9); + R( xr, xl, 8); + R( xl, xr, 7); + R( xr, xl, 6); + R( xl, xr, 5); + R( xr, xl, 4); + R( xl, xr, 3); + R( xr, xl, 2); + + xl ^= p[1]; + xr ^= p[0]; + + *ret_xl = xr; + *ret_xr = xl; + +#else + u32 xl, xr, temp, *p; + int i; + + xl = *ret_xl; + xr = *ret_xr; + p = bc->p; + + for (i=BLOWFISH_ROUNDS+1; i > 1; i-- ) + { + xl ^= p[i]; + xr ^= function_F(bc, xl); + temp = xl; + xl = xr; + xr = temp; + } + + temp = xl; + xl = xr; + xr = temp; + + xr ^= p[1]; + xl ^= p[0]; + + *ret_xl = xl; + *ret_xr = xr; +#endif +} + +#undef F +#undef R + +static void +do_encrypt_block ( BLOWFISH_context *bc, byte *outbuf, const byte *inbuf ) +{ + u32 d1, d2; + + d1 = buf_get_be32(inbuf); + d2 = buf_get_be32(inbuf + 4); + do_encrypt( bc, &d1, &d2 ); + buf_put_be32(outbuf, d1); + buf_put_be32(outbuf + 4, d2); +} + +static unsigned int +encrypt_block (void *context, byte *outbuf, const byte *inbuf) +{ + BLOWFISH_context *bc = (BLOWFISH_context *) context; + do_encrypt_block (bc, outbuf, inbuf); + return /*burn_stack*/ (64); +} + + +static void +do_decrypt_block (BLOWFISH_context *bc, byte *outbuf, const byte *inbuf) +{ + u32 d1, d2; + + d1 = buf_get_be32(inbuf); + d2 = buf_get_be32(inbuf + 4); + decrypt( bc, &d1, &d2 ); + buf_put_be32(outbuf, d1); + buf_put_be32(outbuf + 4, d2); +} + +static unsigned int +decrypt_block (void *context, byte *outbuf, const byte *inbuf) +{ + BLOWFISH_context *bc = (BLOWFISH_context *) context; + do_decrypt_block (bc, outbuf, inbuf); + return /*burn_stack*/ (64); +} + +#endif /*!USE_AMD64_ASM&&!USE_ARM_ASM*/ + + +/* Bulk encryption of complete blocks in CTR mode. This function is only + intended for the bulk encryption feature of cipher.c. CTR is expected to be + of size BLOWFISH_BLOCKSIZE. */ +void +_gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + BLOWFISH_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char tmpbuf[BLOWFISH_BLOCKSIZE]; + int burn_stack_depth = (64) + 2 * BLOWFISH_BLOCKSIZE; + int i; + +#ifdef USE_AMD64_ASM + { + if (nblocks >= 4) + burn_stack_depth += 5 * sizeof(void*); + + /* Process data in 4 block chunks. */ + while (nblocks >= 4) + { + blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 4; + outbuf += 4 * BLOWFISH_BLOCKSIZE; + inbuf += 4 * BLOWFISH_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + /* TODO: use caching instead? */ + } +#elif defined(USE_ARM_ASM) + { + /* Process data in 2 block chunks. */ + while (nblocks >= 2) + { + _gcry_blowfish_arm_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 2; + outbuf += 2 * BLOWFISH_BLOCKSIZE; + inbuf += 2 * BLOWFISH_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + /* TODO: use caching instead? */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the counter. */ + do_encrypt_block(ctx, tmpbuf, ctr); + /* XOR the input with the encrypted counter and store in output. */ + buf_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE); + outbuf += BLOWFISH_BLOCKSIZE; + inbuf += BLOWFISH_BLOCKSIZE; + /* Increment the counter. */ + for (i = BLOWFISH_BLOCKSIZE; i > 0; i--) + { + ctr[i-1]++; + if (ctr[i-1]) + break; + } + } + + wipememory(tmpbuf, sizeof(tmpbuf)); + _gcry_burn_stack(burn_stack_depth); +} + + +/* Bulk decryption of complete blocks in CBC mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +void +_gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + BLOWFISH_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char savebuf[BLOWFISH_BLOCKSIZE]; + int burn_stack_depth = (64) + 2 * BLOWFISH_BLOCKSIZE; + +#ifdef USE_AMD64_ASM + { + if (nblocks >= 4) + burn_stack_depth += 5 * sizeof(void*); + + /* Process data in 4 block chunks. */ + while (nblocks >= 4) + { + blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 4; + outbuf += 4 * BLOWFISH_BLOCKSIZE; + inbuf += 4 * BLOWFISH_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#elif defined(USE_ARM_ASM) + { + /* Process data in 2 block chunks. */ + while (nblocks >= 2) + { + _gcry_blowfish_arm_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 2; + outbuf += 2 * BLOWFISH_BLOCKSIZE; + inbuf += 2 * BLOWFISH_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* INBUF is needed later and it may be identical to OUTBUF, so store + the intermediate result to SAVEBUF. */ + do_decrypt_block (ctx, savebuf, inbuf); + + buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOWFISH_BLOCKSIZE); + inbuf += BLOWFISH_BLOCKSIZE; + outbuf += BLOWFISH_BLOCKSIZE; + } + + wipememory(savebuf, sizeof(savebuf)); + _gcry_burn_stack(burn_stack_depth); +} + + +/* Bulk decryption of complete blocks in CFB mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +void +_gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + BLOWFISH_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth = (64) + 2 * BLOWFISH_BLOCKSIZE; + +#ifdef USE_AMD64_ASM + { + if (nblocks >= 4) + burn_stack_depth += 5 * sizeof(void*); + + /* Process data in 4 block chunks. */ + while (nblocks >= 4) + { + blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 4; + outbuf += 4 * BLOWFISH_BLOCKSIZE; + inbuf += 4 * BLOWFISH_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#elif defined(USE_ARM_ASM) + { + /* Process data in 2 block chunks. */ + while (nblocks >= 2) + { + _gcry_blowfish_arm_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 2; + outbuf += 2 * BLOWFISH_BLOCKSIZE; + inbuf += 2 * BLOWFISH_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + do_encrypt_block(ctx, iv, iv); + buf_xor_n_copy(outbuf, iv, inbuf, BLOWFISH_BLOCKSIZE); + outbuf += BLOWFISH_BLOCKSIZE; + inbuf += BLOWFISH_BLOCKSIZE; + } + + _gcry_burn_stack(burn_stack_depth); +} + + +/* Run the self-tests for BLOWFISH-CTR, tests IV increment of bulk CTR + encryption. Returns NULL on success. */ +static const char * +selftest_ctr (void) +{ + const int nblocks = 4+1; + const int blocksize = BLOWFISH_BLOCKSIZE; + const int context_size = sizeof(BLOWFISH_context); + + return _gcry_selftest_helper_ctr("BLOWFISH", &bf_setkey, + &encrypt_block, &_gcry_blowfish_ctr_enc, nblocks, blocksize, + context_size); +} + + +/* Run the self-tests for BLOWFISH-CBC, tests bulk CBC decryption. + Returns NULL on success. */ +static const char * +selftest_cbc (void) +{ + const int nblocks = 4+2; + const int blocksize = BLOWFISH_BLOCKSIZE; + const int context_size = sizeof(BLOWFISH_context); + + return _gcry_selftest_helper_cbc("BLOWFISH", &bf_setkey, + &encrypt_block, &_gcry_blowfish_cbc_dec, nblocks, blocksize, + context_size); +} + + +/* Run the self-tests for BLOWFISH-CFB, tests bulk CBC decryption. + Returns NULL on success. */ +static const char * +selftest_cfb (void) +{ + const int nblocks = 4+2; + const int blocksize = BLOWFISH_BLOCKSIZE; + const int context_size = sizeof(BLOWFISH_context); + + return _gcry_selftest_helper_cfb("BLOWFISH", &bf_setkey, + &encrypt_block, &_gcry_blowfish_cfb_dec, nblocks, blocksize, + context_size); +} + + +static const char* +selftest(void) +{ + BLOWFISH_context c; + byte plain[] = "BLOWFISH"; + byte buffer[8]; + static const byte plain3[] = + { 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10 }; + static const byte key3[] = + { 0x41, 0x79, 0x6E, 0xA0, 0x52, 0x61, 0x6E, 0xE4 }; + static const byte cipher3[] = + { 0xE1, 0x13, 0xF4, 0x10, 0x2C, 0xFC, 0xCE, 0x43 }; + const char *r; + + bf_setkey( (void *) &c, + (const unsigned char*)"abcdefghijklmnopqrstuvwxyz", 26 ); + encrypt_block( (void *) &c, buffer, plain ); + if( memcmp( buffer, "\x32\x4E\xD0\xFE\xF4\x13\xA2\x03", 8 ) ) + return "Blowfish selftest failed (1)."; + decrypt_block( (void *) &c, buffer, buffer ); + if( memcmp( buffer, plain, 8 ) ) + return "Blowfish selftest failed (2)."; + + bf_setkey( (void *) &c, key3, 8 ); + encrypt_block( (void *) &c, buffer, plain3 ); + if( memcmp( buffer, cipher3, 8 ) ) + return "Blowfish selftest failed (3)."; + decrypt_block( (void *) &c, buffer, buffer ); + if( memcmp( buffer, plain3, 8 ) ) + return "Blowfish selftest failed (4)."; + + if ( (r = selftest_cbc ()) ) + return r; + + if ( (r = selftest_cfb ()) ) + return r; + + if ( (r = selftest_ctr ()) ) + return r; + + return NULL; +} + + +struct hashset_elem { + u32 val; + short nidx; + char used; +}; + +static inline byte +val_to_hidx(u32 val) +{ + /* bf sboxes are quite random already. */ + return (val >> 24) ^ (val >> 16) ^ (val >> 8) ^ val; +} + +static inline int +add_val(struct hashset_elem hset[256], u32 val, int *midx, + struct hashset_elem *mpool) +{ + struct hashset_elem *elem; + byte hidx; + + hidx = val_to_hidx(val); + elem = &hset[hidx]; + + /* Check if first is in use. */ + if (elem->used == 0) + { + elem->val = val; + elem->nidx = -1; + elem->used = 1; + return 0; + } + + /* Check if first matches. */ + if (elem->val == val) + return 1; + + for (; elem->nidx >= 0; elem = &mpool[elem->nidx]) + { + /* Check if elem matches. */ + if (elem->val == val) + return 1; + } + + elem->nidx = (*midx)++; + elem = &mpool[elem->nidx]; + + elem->val = val; + elem->nidx = -1; + elem->used = 1; + + return 0; +} + +static gcry_err_code_t +do_bf_setkey (BLOWFISH_context *c, const byte *key, unsigned keylen) +{ + struct hashset_elem mempool[4 * 255]; /* Enough entries for the worst case. */ + struct hashset_elem hset[4][256]; + int memidx = 0; + int weak = 0; + int i, j, ret; + u32 data, datal, datar; + static int initialized; + static const char *selftest_failed; + + if( !initialized ) + { + initialized = 1; + selftest_failed = selftest(); + if( selftest_failed ) + log_error ("%s\n", selftest_failed ); + } + if( selftest_failed ) + return GPG_ERR_SELFTEST_FAILED; + + memset(hset, 0, sizeof(hset)); + + for(i=0; i < BLOWFISH_ROUNDS+2; i++ ) + c->p[i] = ps[i]; + for(i=0; i < 256; i++ ) + { + c->s0[i] = ks0[i]; + c->s1[i] = ks1[i]; + c->s2[i] = ks2[i]; + c->s3[i] = ks3[i]; + } + + for(i=j=0; i < BLOWFISH_ROUNDS+2; i++ ) + { + data = ((u32)key[j] << 24) | + ((u32)key[(j+1)%keylen] << 16) | + ((u32)key[(j+2)%keylen] << 8) | + ((u32)key[(j+3)%keylen]); + c->p[i] ^= data; + j = (j+4) % keylen; + } + + datal = datar = 0; + for(i=0; i < BLOWFISH_ROUNDS+2; i += 2 ) + { + do_encrypt( c, &datal, &datar ); + c->p[i] = datal; + c->p[i+1] = datar; + } + for(i=0; i < 256; i += 2 ) + { + do_encrypt( c, &datal, &datar ); + c->s0[i] = datal; + c->s0[i+1] = datar; + + /* Add values to hashset, detect duplicates (weak keys). */ + ret = add_val (hset[0], datal, &memidx, mempool); + weak = ret ? 1 : weak; + ret = add_val (hset[0], datar, &memidx, mempool); + weak = ret ? 1 : weak; + } + for(i=0; i < 256; i += 2 ) + { + do_encrypt( c, &datal, &datar ); + c->s1[i] = datal; + c->s1[i+1] = datar; + + /* Add values to hashset, detect duplicates (weak keys). */ + ret = add_val (hset[1], datal, &memidx, mempool); + weak = ret ? 1 : weak; + ret = add_val (hset[1], datar, &memidx, mempool); + weak = ret ? 1 : weak; + } + for(i=0; i < 256; i += 2 ) + { + do_encrypt( c, &datal, &datar ); + c->s2[i] = datal; + c->s2[i+1] = datar; + + /* Add values to hashset, detect duplicates (weak keys). */ + ret = add_val (hset[2], datal, &memidx, mempool); + weak = ret ? 1 : weak; + ret = add_val (hset[2], datar, &memidx, mempool); + weak = ret ? 1 : weak; + } + for(i=0; i < 256; i += 2 ) + { + do_encrypt( c, &datal, &datar ); + c->s3[i] = datal; + c->s3[i+1] = datar; + + /* Add values to hashset, detect duplicates (weak keys). */ + ret = add_val (hset[3], datal, &memidx, mempool); + weak = ret ? 1 : weak; + ret = add_val (hset[3], datar, &memidx, mempool); + weak = ret ? 1 : weak; + } + + /* Clear stack. */ + wipememory(hset, sizeof(hset)); + wipememory(mempool, sizeof(mempool[0]) * memidx); + + _gcry_burn_stack (64); + + /* Check for weak key. A weak key is a key in which a value in + the P-array (here c) occurs more than once per table. */ + if (weak) + return GPG_ERR_WEAK_KEY; + + return GPG_ERR_NO_ERROR; +} + + +static gcry_err_code_t +bf_setkey (void *context, const byte *key, unsigned keylen) +{ + BLOWFISH_context *c = (BLOWFISH_context *) context; + gcry_err_code_t rc = do_bf_setkey (c, key, keylen); + return rc; +} + + +gcry_cipher_spec_t _gcry_cipher_spec_blowfish = + { + GCRY_CIPHER_BLOWFISH, {0, 0}, + "BLOWFISH", NULL, NULL, BLOWFISH_BLOCKSIZE, 128, + sizeof (BLOWFISH_context), + bf_setkey, encrypt_block, decrypt_block + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/bufhelp.h b/libotr/libgcrypt-1.8.7/cipher/bufhelp.h new file mode 100644 index 0000000..83d3f53 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/bufhelp.h @@ -0,0 +1,456 @@ +/* bufhelp.h - Some buffer manipulation helpers + * Copyright (C) 2012-2017 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ +#ifndef GCRYPT_BUFHELP_H +#define GCRYPT_BUFHELP_H + + +#include "g10lib.h" +#include "bithelp.h" + + +#undef BUFHELP_UNALIGNED_ACCESS +#if defined(HAVE_GCC_ATTRIBUTE_PACKED) && \ + defined(HAVE_GCC_ATTRIBUTE_ALIGNED) && \ + defined(HAVE_GCC_ATTRIBUTE_MAY_ALIAS) +/* Compiler is supports attributes needed for automatically issuing unaligned + memory access instructions. + */ +# define BUFHELP_UNALIGNED_ACCESS 1 +#endif + + +#undef BUFHELP_FAST_UNALIGNED_ACCESS +#if defined(BUFHELP_UNALIGNED_ACCESS) && \ + (defined(__i386__) || defined(__x86_64__) || \ + (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \ + defined(__aarch64__)) +/* These architectures are able of unaligned memory accesses and can + handle those fast. + */ +# define BUFHELP_FAST_UNALIGNED_ACCESS 1 +#endif + + +#ifdef BUFHELP_FAST_UNALIGNED_ACCESS +/* Define type with one-byte alignment on architectures with fast unaligned + memory accesses. + */ +typedef struct bufhelp_int_s +{ + uintptr_t a; +} __attribute__((packed, aligned(1), may_alias)) bufhelp_int_t; +#else +/* Define type with default alignment for other architectures (unaligned + accessed handled in per byte loops). + */ +#ifdef HAVE_GCC_ATTRIBUTE_MAY_ALIAS +typedef struct bufhelp_int_s +{ + uintptr_t a; +} __attribute__((may_alias)) bufhelp_int_t; +#else +typedef struct bufhelp_int_s +{ + uintptr_t a; +} bufhelp_int_t; +#endif +#endif + + +/* Optimized function for small buffer copying */ +static inline void +buf_cpy(void *_dst, const void *_src, size_t len) +{ +#if __GNUC__ >= 4 && (defined(__x86_64__) || defined(__i386__)) + /* For AMD64 and i386, memcpy is faster. */ + memcpy(_dst, _src, len); +#else + byte *dst = _dst; + const byte *src = _src; + bufhelp_int_t *ldst; + const bufhelp_int_t *lsrc; +#ifndef BUFHELP_FAST_UNALIGNED_ACCESS + const unsigned int longmask = sizeof(bufhelp_int_t) - 1; + + /* Skip fast processing if buffers are unaligned. */ + if (UNLIKELY(((uintptr_t)dst | (uintptr_t)src) & longmask)) + goto do_bytes; +#endif + + ldst = (bufhelp_int_t *)(void *)dst; + lsrc = (const bufhelp_int_t *)(const void *)src; + + for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t)) + (ldst++)->a = (lsrc++)->a; + + dst = (byte *)ldst; + src = (const byte *)lsrc; + +#ifndef BUFHELP_FAST_UNALIGNED_ACCESS +do_bytes: +#endif + /* Handle tail. */ + for (; len; len--) + *dst++ = *src++; +#endif /*__GNUC__ >= 4 && (__x86_64__ || __i386__)*/ +} + + +/* Optimized function for buffer xoring */ +static inline void +buf_xor(void *_dst, const void *_src1, const void *_src2, size_t len) +{ + byte *dst = _dst; + const byte *src1 = _src1; + const byte *src2 = _src2; + bufhelp_int_t *ldst; + const bufhelp_int_t *lsrc1, *lsrc2; +#ifndef BUFHELP_FAST_UNALIGNED_ACCESS + const unsigned int longmask = sizeof(bufhelp_int_t) - 1; + + /* Skip fast processing if buffers are unaligned. */ + if (UNLIKELY(((uintptr_t)dst | (uintptr_t)src1 | (uintptr_t)src2) & longmask)) + goto do_bytes; +#endif + + ldst = (bufhelp_int_t *)(void *)dst; + lsrc1 = (const bufhelp_int_t *)(const void *)src1; + lsrc2 = (const bufhelp_int_t *)(const void *)src2; + + for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t)) + (ldst++)->a = (lsrc1++)->a ^ (lsrc2++)->a; + + dst = (byte *)ldst; + src1 = (const byte *)lsrc1; + src2 = (const byte *)lsrc2; + +#ifndef BUFHELP_FAST_UNALIGNED_ACCESS +do_bytes: +#endif + /* Handle tail. */ + for (; len; len--) + *dst++ = *src1++ ^ *src2++; +} + + +/* Optimized function for in-place buffer xoring. */ +static inline void +buf_xor_1(void *_dst, const void *_src, size_t len) +{ + byte *dst = _dst; + const byte *src = _src; + bufhelp_int_t *ldst; + const bufhelp_int_t *lsrc; +#ifndef BUFHELP_FAST_UNALIGNED_ACCESS + const unsigned int longmask = sizeof(bufhelp_int_t) - 1; + + /* Skip fast processing if buffers are unaligned. */ + if (UNLIKELY(((uintptr_t)dst | (uintptr_t)src) & longmask)) + goto do_bytes; +#endif + + ldst = (bufhelp_int_t *)(void *)dst; + lsrc = (const bufhelp_int_t *)(const void *)src; + + for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t)) + (ldst++)->a ^= (lsrc++)->a; + + dst = (byte *)ldst; + src = (const byte *)lsrc; + +#ifndef BUFHELP_FAST_UNALIGNED_ACCESS +do_bytes: +#endif + /* Handle tail. */ + for (; len; len--) + *dst++ ^= *src++; +} + + +/* Optimized function for buffer xoring with two destination buffers. Used + mainly by CFB mode encryption. */ +static inline void +buf_xor_2dst(void *_dst1, void *_dst2, const void *_src, size_t len) +{ + byte *dst1 = _dst1; + byte *dst2 = _dst2; + const byte *src = _src; + bufhelp_int_t *ldst1, *ldst2; + const bufhelp_int_t *lsrc; +#ifndef BUFHELP_FAST_UNALIGNED_ACCESS + const unsigned int longmask = sizeof(bufhelp_int_t) - 1; + + /* Skip fast processing if buffers are unaligned. */ + if (UNLIKELY(((uintptr_t)src | (uintptr_t)dst1 | (uintptr_t)dst2) & longmask)) + goto do_bytes; +#endif + + ldst1 = (bufhelp_int_t *)(void *)dst1; + ldst2 = (bufhelp_int_t *)(void *)dst2; + lsrc = (const bufhelp_int_t *)(const void *)src; + + for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t)) + (ldst1++)->a = ((ldst2++)->a ^= (lsrc++)->a); + + dst1 = (byte *)ldst1; + dst2 = (byte *)ldst2; + src = (const byte *)lsrc; + +#ifndef BUFHELP_FAST_UNALIGNED_ACCESS +do_bytes: +#endif + /* Handle tail. */ + for (; len; len--) + *dst1++ = (*dst2++ ^= *src++); +} + + +/* Optimized function for combined buffer xoring and copying. Used by mainly + CBC mode decryption. */ +static inline void +buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy, + const void *_src_cpy, size_t len) +{ + byte *dst_xor = _dst_xor; + byte *srcdst_cpy = _srcdst_cpy; + const byte *src_xor = _src_xor; + const byte *src_cpy = _src_cpy; + byte temp; + bufhelp_int_t *ldst_xor, *lsrcdst_cpy; + const bufhelp_int_t *lsrc_cpy, *lsrc_xor; + uintptr_t ltemp; +#ifndef BUFHELP_FAST_UNALIGNED_ACCESS + const unsigned int longmask = sizeof(bufhelp_int_t) - 1; + + /* Skip fast processing if buffers are unaligned. */ + if (UNLIKELY(((uintptr_t)src_cpy | (uintptr_t)src_xor | (uintptr_t)dst_xor | + (uintptr_t)srcdst_cpy) & longmask)) + goto do_bytes; +#endif + + ldst_xor = (bufhelp_int_t *)(void *)dst_xor; + lsrc_xor = (const bufhelp_int_t *)(void *)src_xor; + lsrcdst_cpy = (bufhelp_int_t *)(void *)srcdst_cpy; + lsrc_cpy = (const bufhelp_int_t *)(const void *)src_cpy; + + for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t)) + { + ltemp = (lsrc_cpy++)->a; + (ldst_xor++)->a = (lsrcdst_cpy)->a ^ (lsrc_xor++)->a; + (lsrcdst_cpy++)->a = ltemp; + } + + dst_xor = (byte *)ldst_xor; + src_xor = (const byte *)lsrc_xor; + srcdst_cpy = (byte *)lsrcdst_cpy; + src_cpy = (const byte *)lsrc_cpy; + +#ifndef BUFHELP_FAST_UNALIGNED_ACCESS +do_bytes: +#endif + /* Handle tail. */ + for (; len; len--) + { + temp = *src_cpy++; + *dst_xor++ = *srcdst_cpy ^ *src_xor++; + *srcdst_cpy++ = temp; + } +} + + +/* Optimized function for combined buffer xoring and copying. Used by mainly + CFB mode decryption. */ +static inline void +buf_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src, size_t len) +{ + buf_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, len); +} + + +/* Constant-time compare of two buffers. Returns 1 if buffers are equal, + and 0 if buffers differ. */ +static inline int +buf_eq_const(const void *_a, const void *_b, size_t len) +{ + const byte *a = _a; + const byte *b = _b; + int ab, ba; + size_t i; + + /* Constant-time compare. */ + for (i = 0, ab = 0, ba = 0; i < len; i++) + { + /* If a[i] != b[i], either ab or ba will be negative. */ + ab |= a[i] - b[i]; + ba |= b[i] - a[i]; + } + + /* 'ab | ba' is negative when buffers are not equal. */ + return (ab | ba) >= 0; +} + + +#ifndef BUFHELP_UNALIGNED_ACCESS + +/* Functions for loading and storing unaligned u32 values of different + endianness. */ +static inline u32 buf_get_be32(const void *_buf) +{ + const byte *in = _buf; + return ((u32)in[0] << 24) | ((u32)in[1] << 16) | \ + ((u32)in[2] << 8) | (u32)in[3]; +} + +static inline u32 buf_get_le32(const void *_buf) +{ + const byte *in = _buf; + return ((u32)in[3] << 24) | ((u32)in[2] << 16) | \ + ((u32)in[1] << 8) | (u32)in[0]; +} + +static inline void buf_put_be32(void *_buf, u32 val) +{ + byte *out = _buf; + out[0] = val >> 24; + out[1] = val >> 16; + out[2] = val >> 8; + out[3] = val; +} + +static inline void buf_put_le32(void *_buf, u32 val) +{ + byte *out = _buf; + out[3] = val >> 24; + out[2] = val >> 16; + out[1] = val >> 8; + out[0] = val; +} + + +/* Functions for loading and storing unaligned u64 values of different + endianness. */ +static inline u64 buf_get_be64(const void *_buf) +{ + const byte *in = _buf; + return ((u64)in[0] << 56) | ((u64)in[1] << 48) | \ + ((u64)in[2] << 40) | ((u64)in[3] << 32) | \ + ((u64)in[4] << 24) | ((u64)in[5] << 16) | \ + ((u64)in[6] << 8) | (u64)in[7]; +} + +static inline u64 buf_get_le64(const void *_buf) +{ + const byte *in = _buf; + return ((u64)in[7] << 56) | ((u64)in[6] << 48) | \ + ((u64)in[5] << 40) | ((u64)in[4] << 32) | \ + ((u64)in[3] << 24) | ((u64)in[2] << 16) | \ + ((u64)in[1] << 8) | (u64)in[0]; +} + +static inline void buf_put_be64(void *_buf, u64 val) +{ + byte *out = _buf; + out[0] = val >> 56; + out[1] = val >> 48; + out[2] = val >> 40; + out[3] = val >> 32; + out[4] = val >> 24; + out[5] = val >> 16; + out[6] = val >> 8; + out[7] = val; +} + +static inline void buf_put_le64(void *_buf, u64 val) +{ + byte *out = _buf; + out[7] = val >> 56; + out[6] = val >> 48; + out[5] = val >> 40; + out[4] = val >> 32; + out[3] = val >> 24; + out[2] = val >> 16; + out[1] = val >> 8; + out[0] = val; +} + +#else /*BUFHELP_UNALIGNED_ACCESS*/ + +typedef struct bufhelp_u32_s +{ + u32 a; +} __attribute__((packed, aligned(1), may_alias)) bufhelp_u32_t; + +/* Functions for loading and storing unaligned u32 values of different + endianness. */ +static inline u32 buf_get_be32(const void *_buf) +{ + return be_bswap32(((const bufhelp_u32_t *)_buf)->a); +} + +static inline u32 buf_get_le32(const void *_buf) +{ + return le_bswap32(((const bufhelp_u32_t *)_buf)->a); +} + +static inline void buf_put_be32(void *_buf, u32 val) +{ + bufhelp_u32_t *out = _buf; + out->a = be_bswap32(val); +} + +static inline void buf_put_le32(void *_buf, u32 val) +{ + bufhelp_u32_t *out = _buf; + out->a = le_bswap32(val); +} + + +typedef struct bufhelp_u64_s +{ + u64 a; +} __attribute__((packed, aligned(1), may_alias)) bufhelp_u64_t; + +/* Functions for loading and storing unaligned u64 values of different + endianness. */ +static inline u64 buf_get_be64(const void *_buf) +{ + return be_bswap64(((const bufhelp_u64_t *)_buf)->a); +} + +static inline u64 buf_get_le64(const void *_buf) +{ + return le_bswap64(((const bufhelp_u64_t *)_buf)->a); +} + +static inline void buf_put_be64(void *_buf, u64 val) +{ + bufhelp_u64_t *out = _buf; + out->a = be_bswap64(val); +} + +static inline void buf_put_le64(void *_buf, u64 val) +{ + bufhelp_u64_t *out = _buf; + out->a = le_bswap64(val); +} + + +#endif /*BUFHELP_UNALIGNED_ACCESS*/ + +#endif /*GCRYPT_BUFHELP_H*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/camellia-aarch64.S b/libotr/libgcrypt-1.8.7/cipher/camellia-aarch64.S new file mode 100644 index 0000000..440f69f --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/camellia-aarch64.S @@ -0,0 +1,557 @@ +/* camellia-aarch64.S - ARMv8/AArch64 assembly implementation of Camellia + * cipher + * + * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(__AARCH64EL__) +#ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS + +.text + +/* struct camellia_ctx: */ +#define key_table 0 + +/* register macros */ +#define CTX x0 +#define RDST x1 +#define RSRC x2 +#define RKEYBITS x3 + +#define RTAB1 x4 +#define RTAB2 x5 +#define RTAB3 x6 +#define RTAB4 x7 +#define RMASK w8 + +#define IL w9 +#define IR w10 + +#define xIL x9 +#define xIR x10 + +#define XL w11 +#define XR w12 +#define YL w13 +#define YR w14 + +#define RT0 w15 +#define RT1 w16 +#define RT2 w17 +#define RT3 w18 + +#define xRT0 x15 +#define xRT1 x16 +#define xRT2 x17 +#define xRT3 x18 + +#ifdef __AARCH64EL__ + #define host_to_be(reg, rtmp) \ + rev reg, reg; + #define be_to_host(reg, rtmp) \ + rev reg, reg; +#else + /* nop on big-endian */ + #define host_to_be(reg, rtmp) /*_*/ + #define be_to_host(reg, rtmp) /*_*/ +#endif + +#define ldr_input_aligned_be(rin, a, b, c, d, rtmp) \ + ldr a, [rin, #0]; \ + ldr b, [rin, #4]; \ + be_to_host(a, rtmp); \ + ldr c, [rin, #8]; \ + be_to_host(b, rtmp); \ + ldr d, [rin, #12]; \ + be_to_host(c, rtmp); \ + be_to_host(d, rtmp); + +#define str_output_aligned_be(rout, a, b, c, d, rtmp) \ + be_to_host(a, rtmp); \ + be_to_host(b, rtmp); \ + str a, [rout, #0]; \ + be_to_host(c, rtmp); \ + str b, [rout, #4]; \ + be_to_host(d, rtmp); \ + str c, [rout, #8]; \ + str d, [rout, #12]; + +/* unaligned word reads/writes allowed */ +#define ldr_input_be(rin, ra, rb, rc, rd, rtmp) \ + ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp) + +#define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \ + str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0) + +/********************************************************************** + 1-way camellia + **********************************************************************/ +#define roundsm(xl, xr, kl, kr, yl, yr) \ + ldr RT2, [CTX, #(key_table + ((kl) * 4))]; \ + and IR, RMASK, xr, lsl#(4); /*sp1110*/ \ + ldr RT3, [CTX, #(key_table + ((kr) * 4))]; \ + and IL, RMASK, xl, lsr#(24 - 4); /*sp1110*/ \ + and RT0, RMASK, xr, lsr#(16 - 4); /*sp3033*/ \ + ldr IR, [RTAB1, xIR]; \ + and RT1, RMASK, xl, lsr#(8 - 4); /*sp3033*/ \ + eor yl, yl, RT2; \ + ldr IL, [RTAB1, xIL]; \ + eor yr, yr, RT3; \ + \ + ldr RT0, [RTAB3, xRT0]; \ + ldr RT1, [RTAB3, xRT1]; \ + \ + and RT2, RMASK, xr, lsr#(24 - 4); /*sp0222*/ \ + and RT3, RMASK, xl, lsr#(16 - 4); /*sp0222*/ \ + \ + eor IR, IR, RT0; \ + eor IL, IL, RT1; \ + \ + ldr RT2, [RTAB2, xRT2]; \ + and RT0, RMASK, xr, lsr#(8 - 4); /*sp4404*/ \ + ldr RT3, [RTAB2, xRT3]; \ + and RT1, RMASK, xl, lsl#(4); /*sp4404*/ \ + \ + ldr RT0, [RTAB4, xRT0]; \ + ldr RT1, [RTAB4, xRT1]; \ + \ + eor IR, IR, RT2; \ + eor IL, IL, RT3; \ + eor IR, IR, RT0; \ + eor IL, IL, RT1; \ + \ + eor IR, IR, IL; \ + eor yr, yr, IL, ror#8; \ + eor yl, yl, IR; \ + eor yr, yr, IR; + +#define enc_rounds(n) \ + roundsm(XL, XR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, XL, XR); + +#define dec_rounds(n) \ + roundsm(XL, XR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, XL, XR); + +/* perform FL and FL⁻¹ */ +#define fls(ll, lr, rl, rr, kll, klr, krl, krr) \ + ldr RT0, [CTX, #(key_table + ((kll) * 4))]; \ + ldr RT2, [CTX, #(key_table + ((krr) * 4))]; \ + and RT0, RT0, ll; \ + ldr RT3, [CTX, #(key_table + ((krl) * 4))]; \ + orr RT2, RT2, rr; \ + ldr RT1, [CTX, #(key_table + ((klr) * 4))]; \ + eor rl, rl, RT2; \ + eor lr, lr, RT0, ror#31; \ + and RT3, RT3, rl; \ + orr RT1, RT1, lr; \ + eor ll, ll, RT1; \ + eor rr, rr, RT3, ror#31; + +#define enc_fls(n) \ + fls(XL, XR, YL, YR, \ + (n) * 2 + 0, (n) * 2 + 1, \ + (n) * 2 + 2, (n) * 2 + 3); + +#define dec_fls(n) \ + fls(XL, XR, YL, YR, \ + (n) * 2 + 2, (n) * 2 + 3, \ + (n) * 2 + 0, (n) * 2 + 1); + +#define inpack(n) \ + ldr_input_be(RSRC, XL, XR, YL, YR, RT0); \ + ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \ + ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \ + eor XL, XL, RT0; \ + eor XR, XR, RT1; + +#define outunpack(n) \ + ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \ + ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \ + eor YL, YL, RT0; \ + eor YR, YR, RT1; \ + str_output_be(RDST, YL, YR, XL, XR, RT0, RT1); + +.globl _gcry_camellia_arm_encrypt_block +.type _gcry_camellia_arm_encrypt_block,@function; + +_gcry_camellia_arm_encrypt_block: + /* input: + * x0: keytable + * x1: dst + * x2: src + * x3: keybitlen + */ + + adr RTAB1, _gcry_camellia_arm_tables; + mov RMASK, #(0xff<<4); /* byte mask */ + add RTAB2, RTAB1, #(1 * 4); + add RTAB3, RTAB1, #(2 * 4); + add RTAB4, RTAB1, #(3 * 4); + + inpack(0); + + enc_rounds(0); + enc_fls(8); + enc_rounds(8); + enc_fls(16); + enc_rounds(16); + + cmp RKEYBITS, #(16 * 8); + bne .Lenc_256; + + outunpack(24); + + ret; +.ltorg + +.Lenc_256: + enc_fls(24); + enc_rounds(24); + + outunpack(32); + + ret; +.ltorg +.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block; + +.globl _gcry_camellia_arm_decrypt_block +.type _gcry_camellia_arm_decrypt_block,@function; + +_gcry_camellia_arm_decrypt_block: + /* input: + * x0: keytable + * x1: dst + * x2: src + * x3: keybitlen + */ + + adr RTAB1, _gcry_camellia_arm_tables; + mov RMASK, #(0xff<<4); /* byte mask */ + add RTAB2, RTAB1, #(1 * 4); + add RTAB3, RTAB1, #(2 * 4); + add RTAB4, RTAB1, #(3 * 4); + + cmp RKEYBITS, #(16 * 8); + bne .Ldec_256; + + inpack(24); + +.Ldec_128: + dec_rounds(16); + dec_fls(16); + dec_rounds(8); + dec_fls(8); + dec_rounds(0); + + outunpack(0); + + ret; +.ltorg + +.Ldec_256: + inpack(32); + dec_rounds(24); + dec_fls(24); + + b .Ldec_128; +.ltorg +.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block; + +/* Encryption/Decryption tables */ +.globl _gcry_camellia_arm_tables +.type _gcry_camellia_arm_tables,@object; +.balign 32 +_gcry_camellia_arm_tables: +.Lcamellia_sp1110: +.long 0x70707000 +.Lcamellia_sp0222: + .long 0x00e0e0e0 +.Lcamellia_sp3033: + .long 0x38003838 +.Lcamellia_sp4404: + .long 0x70700070 +.long 0x82828200, 0x00050505, 0x41004141, 0x2c2c002c +.long 0x2c2c2c00, 0x00585858, 0x16001616, 0xb3b300b3 +.long 0xececec00, 0x00d9d9d9, 0x76007676, 0xc0c000c0 +.long 0xb3b3b300, 0x00676767, 0xd900d9d9, 0xe4e400e4 +.long 0x27272700, 0x004e4e4e, 0x93009393, 0x57570057 +.long 0xc0c0c000, 0x00818181, 0x60006060, 0xeaea00ea +.long 0xe5e5e500, 0x00cbcbcb, 0xf200f2f2, 0xaeae00ae +.long 0xe4e4e400, 0x00c9c9c9, 0x72007272, 0x23230023 +.long 0x85858500, 0x000b0b0b, 0xc200c2c2, 0x6b6b006b +.long 0x57575700, 0x00aeaeae, 0xab00abab, 0x45450045 +.long 0x35353500, 0x006a6a6a, 0x9a009a9a, 0xa5a500a5 +.long 0xeaeaea00, 0x00d5d5d5, 0x75007575, 0xeded00ed +.long 0x0c0c0c00, 0x00181818, 0x06000606, 0x4f4f004f +.long 0xaeaeae00, 0x005d5d5d, 0x57005757, 0x1d1d001d +.long 0x41414100, 0x00828282, 0xa000a0a0, 0x92920092 +.long 0x23232300, 0x00464646, 0x91009191, 0x86860086 +.long 0xefefef00, 0x00dfdfdf, 0xf700f7f7, 0xafaf00af +.long 0x6b6b6b00, 0x00d6d6d6, 0xb500b5b5, 0x7c7c007c +.long 0x93939300, 0x00272727, 0xc900c9c9, 0x1f1f001f +.long 0x45454500, 0x008a8a8a, 0xa200a2a2, 0x3e3e003e +.long 0x19191900, 0x00323232, 0x8c008c8c, 0xdcdc00dc +.long 0xa5a5a500, 0x004b4b4b, 0xd200d2d2, 0x5e5e005e +.long 0x21212100, 0x00424242, 0x90009090, 0x0b0b000b +.long 0xededed00, 0x00dbdbdb, 0xf600f6f6, 0xa6a600a6 +.long 0x0e0e0e00, 0x001c1c1c, 0x07000707, 0x39390039 +.long 0x4f4f4f00, 0x009e9e9e, 0xa700a7a7, 0xd5d500d5 +.long 0x4e4e4e00, 0x009c9c9c, 0x27002727, 0x5d5d005d +.long 0x1d1d1d00, 0x003a3a3a, 0x8e008e8e, 0xd9d900d9 +.long 0x65656500, 0x00cacaca, 0xb200b2b2, 0x5a5a005a +.long 0x92929200, 0x00252525, 0x49004949, 0x51510051 +.long 0xbdbdbd00, 0x007b7b7b, 0xde00dede, 0x6c6c006c +.long 0x86868600, 0x000d0d0d, 0x43004343, 0x8b8b008b +.long 0xb8b8b800, 0x00717171, 0x5c005c5c, 0x9a9a009a +.long 0xafafaf00, 0x005f5f5f, 0xd700d7d7, 0xfbfb00fb +.long 0x8f8f8f00, 0x001f1f1f, 0xc700c7c7, 0xb0b000b0 +.long 0x7c7c7c00, 0x00f8f8f8, 0x3e003e3e, 0x74740074 +.long 0xebebeb00, 0x00d7d7d7, 0xf500f5f5, 0x2b2b002b +.long 0x1f1f1f00, 0x003e3e3e, 0x8f008f8f, 0xf0f000f0 +.long 0xcecece00, 0x009d9d9d, 0x67006767, 0x84840084 +.long 0x3e3e3e00, 0x007c7c7c, 0x1f001f1f, 0xdfdf00df +.long 0x30303000, 0x00606060, 0x18001818, 0xcbcb00cb +.long 0xdcdcdc00, 0x00b9b9b9, 0x6e006e6e, 0x34340034 +.long 0x5f5f5f00, 0x00bebebe, 0xaf00afaf, 0x76760076 +.long 0x5e5e5e00, 0x00bcbcbc, 0x2f002f2f, 0x6d6d006d +.long 0xc5c5c500, 0x008b8b8b, 0xe200e2e2, 0xa9a900a9 +.long 0x0b0b0b00, 0x00161616, 0x85008585, 0xd1d100d1 +.long 0x1a1a1a00, 0x00343434, 0x0d000d0d, 0x04040004 +.long 0xa6a6a600, 0x004d4d4d, 0x53005353, 0x14140014 +.long 0xe1e1e100, 0x00c3c3c3, 0xf000f0f0, 0x3a3a003a +.long 0x39393900, 0x00727272, 0x9c009c9c, 0xdede00de +.long 0xcacaca00, 0x00959595, 0x65006565, 0x11110011 +.long 0xd5d5d500, 0x00ababab, 0xea00eaea, 0x32320032 +.long 0x47474700, 0x008e8e8e, 0xa300a3a3, 0x9c9c009c +.long 0x5d5d5d00, 0x00bababa, 0xae00aeae, 0x53530053 +.long 0x3d3d3d00, 0x007a7a7a, 0x9e009e9e, 0xf2f200f2 +.long 0xd9d9d900, 0x00b3b3b3, 0xec00ecec, 0xfefe00fe +.long 0x01010100, 0x00020202, 0x80008080, 0xcfcf00cf +.long 0x5a5a5a00, 0x00b4b4b4, 0x2d002d2d, 0xc3c300c3 +.long 0xd6d6d600, 0x00adadad, 0x6b006b6b, 0x7a7a007a +.long 0x51515100, 0x00a2a2a2, 0xa800a8a8, 0x24240024 +.long 0x56565600, 0x00acacac, 0x2b002b2b, 0xe8e800e8 +.long 0x6c6c6c00, 0x00d8d8d8, 0x36003636, 0x60600060 +.long 0x4d4d4d00, 0x009a9a9a, 0xa600a6a6, 0x69690069 +.long 0x8b8b8b00, 0x00171717, 0xc500c5c5, 0xaaaa00aa +.long 0x0d0d0d00, 0x001a1a1a, 0x86008686, 0xa0a000a0 +.long 0x9a9a9a00, 0x00353535, 0x4d004d4d, 0xa1a100a1 +.long 0x66666600, 0x00cccccc, 0x33003333, 0x62620062 +.long 0xfbfbfb00, 0x00f7f7f7, 0xfd00fdfd, 0x54540054 +.long 0xcccccc00, 0x00999999, 0x66006666, 0x1e1e001e +.long 0xb0b0b000, 0x00616161, 0x58005858, 0xe0e000e0 +.long 0x2d2d2d00, 0x005a5a5a, 0x96009696, 0x64640064 +.long 0x74747400, 0x00e8e8e8, 0x3a003a3a, 0x10100010 +.long 0x12121200, 0x00242424, 0x09000909, 0x00000000 +.long 0x2b2b2b00, 0x00565656, 0x95009595, 0xa3a300a3 +.long 0x20202000, 0x00404040, 0x10001010, 0x75750075 +.long 0xf0f0f000, 0x00e1e1e1, 0x78007878, 0x8a8a008a +.long 0xb1b1b100, 0x00636363, 0xd800d8d8, 0xe6e600e6 +.long 0x84848400, 0x00090909, 0x42004242, 0x09090009 +.long 0x99999900, 0x00333333, 0xcc00cccc, 0xdddd00dd +.long 0xdfdfdf00, 0x00bfbfbf, 0xef00efef, 0x87870087 +.long 0x4c4c4c00, 0x00989898, 0x26002626, 0x83830083 +.long 0xcbcbcb00, 0x00979797, 0xe500e5e5, 0xcdcd00cd +.long 0xc2c2c200, 0x00858585, 0x61006161, 0x90900090 +.long 0x34343400, 0x00686868, 0x1a001a1a, 0x73730073 +.long 0x7e7e7e00, 0x00fcfcfc, 0x3f003f3f, 0xf6f600f6 +.long 0x76767600, 0x00ececec, 0x3b003b3b, 0x9d9d009d +.long 0x05050500, 0x000a0a0a, 0x82008282, 0xbfbf00bf +.long 0x6d6d6d00, 0x00dadada, 0xb600b6b6, 0x52520052 +.long 0xb7b7b700, 0x006f6f6f, 0xdb00dbdb, 0xd8d800d8 +.long 0xa9a9a900, 0x00535353, 0xd400d4d4, 0xc8c800c8 +.long 0x31313100, 0x00626262, 0x98009898, 0xc6c600c6 +.long 0xd1d1d100, 0x00a3a3a3, 0xe800e8e8, 0x81810081 +.long 0x17171700, 0x002e2e2e, 0x8b008b8b, 0x6f6f006f +.long 0x04040400, 0x00080808, 0x02000202, 0x13130013 +.long 0xd7d7d700, 0x00afafaf, 0xeb00ebeb, 0x63630063 +.long 0x14141400, 0x00282828, 0x0a000a0a, 0xe9e900e9 +.long 0x58585800, 0x00b0b0b0, 0x2c002c2c, 0xa7a700a7 +.long 0x3a3a3a00, 0x00747474, 0x1d001d1d, 0x9f9f009f +.long 0x61616100, 0x00c2c2c2, 0xb000b0b0, 0xbcbc00bc +.long 0xdedede00, 0x00bdbdbd, 0x6f006f6f, 0x29290029 +.long 0x1b1b1b00, 0x00363636, 0x8d008d8d, 0xf9f900f9 +.long 0x11111100, 0x00222222, 0x88008888, 0x2f2f002f +.long 0x1c1c1c00, 0x00383838, 0x0e000e0e, 0xb4b400b4 +.long 0x32323200, 0x00646464, 0x19001919, 0x78780078 +.long 0x0f0f0f00, 0x001e1e1e, 0x87008787, 0x06060006 +.long 0x9c9c9c00, 0x00393939, 0x4e004e4e, 0xe7e700e7 +.long 0x16161600, 0x002c2c2c, 0x0b000b0b, 0x71710071 +.long 0x53535300, 0x00a6a6a6, 0xa900a9a9, 0xd4d400d4 +.long 0x18181800, 0x00303030, 0x0c000c0c, 0xabab00ab +.long 0xf2f2f200, 0x00e5e5e5, 0x79007979, 0x88880088 +.long 0x22222200, 0x00444444, 0x11001111, 0x8d8d008d +.long 0xfefefe00, 0x00fdfdfd, 0x7f007f7f, 0x72720072 +.long 0x44444400, 0x00888888, 0x22002222, 0xb9b900b9 +.long 0xcfcfcf00, 0x009f9f9f, 0xe700e7e7, 0xf8f800f8 +.long 0xb2b2b200, 0x00656565, 0x59005959, 0xacac00ac +.long 0xc3c3c300, 0x00878787, 0xe100e1e1, 0x36360036 +.long 0xb5b5b500, 0x006b6b6b, 0xda00dada, 0x2a2a002a +.long 0x7a7a7a00, 0x00f4f4f4, 0x3d003d3d, 0x3c3c003c +.long 0x91919100, 0x00232323, 0xc800c8c8, 0xf1f100f1 +.long 0x24242400, 0x00484848, 0x12001212, 0x40400040 +.long 0x08080800, 0x00101010, 0x04000404, 0xd3d300d3 +.long 0xe8e8e800, 0x00d1d1d1, 0x74007474, 0xbbbb00bb +.long 0xa8a8a800, 0x00515151, 0x54005454, 0x43430043 +.long 0x60606000, 0x00c0c0c0, 0x30003030, 0x15150015 +.long 0xfcfcfc00, 0x00f9f9f9, 0x7e007e7e, 0xadad00ad +.long 0x69696900, 0x00d2d2d2, 0xb400b4b4, 0x77770077 +.long 0x50505000, 0x00a0a0a0, 0x28002828, 0x80800080 +.long 0xaaaaaa00, 0x00555555, 0x55005555, 0x82820082 +.long 0xd0d0d000, 0x00a1a1a1, 0x68006868, 0xecec00ec +.long 0xa0a0a000, 0x00414141, 0x50005050, 0x27270027 +.long 0x7d7d7d00, 0x00fafafa, 0xbe00bebe, 0xe5e500e5 +.long 0xa1a1a100, 0x00434343, 0xd000d0d0, 0x85850085 +.long 0x89898900, 0x00131313, 0xc400c4c4, 0x35350035 +.long 0x62626200, 0x00c4c4c4, 0x31003131, 0x0c0c000c +.long 0x97979700, 0x002f2f2f, 0xcb00cbcb, 0x41410041 +.long 0x54545400, 0x00a8a8a8, 0x2a002a2a, 0xefef00ef +.long 0x5b5b5b00, 0x00b6b6b6, 0xad00adad, 0x93930093 +.long 0x1e1e1e00, 0x003c3c3c, 0x0f000f0f, 0x19190019 +.long 0x95959500, 0x002b2b2b, 0xca00caca, 0x21210021 +.long 0xe0e0e000, 0x00c1c1c1, 0x70007070, 0x0e0e000e +.long 0xffffff00, 0x00ffffff, 0xff00ffff, 0x4e4e004e +.long 0x64646400, 0x00c8c8c8, 0x32003232, 0x65650065 +.long 0xd2d2d200, 0x00a5a5a5, 0x69006969, 0xbdbd00bd +.long 0x10101000, 0x00202020, 0x08000808, 0xb8b800b8 +.long 0xc4c4c400, 0x00898989, 0x62006262, 0x8f8f008f +.long 0x00000000, 0x00000000, 0x00000000, 0xebeb00eb +.long 0x48484800, 0x00909090, 0x24002424, 0xcece00ce +.long 0xa3a3a300, 0x00474747, 0xd100d1d1, 0x30300030 +.long 0xf7f7f700, 0x00efefef, 0xfb00fbfb, 0x5f5f005f +.long 0x75757500, 0x00eaeaea, 0xba00baba, 0xc5c500c5 +.long 0xdbdbdb00, 0x00b7b7b7, 0xed00eded, 0x1a1a001a +.long 0x8a8a8a00, 0x00151515, 0x45004545, 0xe1e100e1 +.long 0x03030300, 0x00060606, 0x81008181, 0xcaca00ca +.long 0xe6e6e600, 0x00cdcdcd, 0x73007373, 0x47470047 +.long 0xdadada00, 0x00b5b5b5, 0x6d006d6d, 0x3d3d003d +.long 0x09090900, 0x00121212, 0x84008484, 0x01010001 +.long 0x3f3f3f00, 0x007e7e7e, 0x9f009f9f, 0xd6d600d6 +.long 0xdddddd00, 0x00bbbbbb, 0xee00eeee, 0x56560056 +.long 0x94949400, 0x00292929, 0x4a004a4a, 0x4d4d004d +.long 0x87878700, 0x000f0f0f, 0xc300c3c3, 0x0d0d000d +.long 0x5c5c5c00, 0x00b8b8b8, 0x2e002e2e, 0x66660066 +.long 0x83838300, 0x00070707, 0xc100c1c1, 0xcccc00cc +.long 0x02020200, 0x00040404, 0x01000101, 0x2d2d002d +.long 0xcdcdcd00, 0x009b9b9b, 0xe600e6e6, 0x12120012 +.long 0x4a4a4a00, 0x00949494, 0x25002525, 0x20200020 +.long 0x90909000, 0x00212121, 0x48004848, 0xb1b100b1 +.long 0x33333300, 0x00666666, 0x99009999, 0x99990099 +.long 0x73737300, 0x00e6e6e6, 0xb900b9b9, 0x4c4c004c +.long 0x67676700, 0x00cecece, 0xb300b3b3, 0xc2c200c2 +.long 0xf6f6f600, 0x00ededed, 0x7b007b7b, 0x7e7e007e +.long 0xf3f3f300, 0x00e7e7e7, 0xf900f9f9, 0x05050005 +.long 0x9d9d9d00, 0x003b3b3b, 0xce00cece, 0xb7b700b7 +.long 0x7f7f7f00, 0x00fefefe, 0xbf00bfbf, 0x31310031 +.long 0xbfbfbf00, 0x007f7f7f, 0xdf00dfdf, 0x17170017 +.long 0xe2e2e200, 0x00c5c5c5, 0x71007171, 0xd7d700d7 +.long 0x52525200, 0x00a4a4a4, 0x29002929, 0x58580058 +.long 0x9b9b9b00, 0x00373737, 0xcd00cdcd, 0x61610061 +.long 0xd8d8d800, 0x00b1b1b1, 0x6c006c6c, 0x1b1b001b +.long 0x26262600, 0x004c4c4c, 0x13001313, 0x1c1c001c +.long 0xc8c8c800, 0x00919191, 0x64006464, 0x0f0f000f +.long 0x37373700, 0x006e6e6e, 0x9b009b9b, 0x16160016 +.long 0xc6c6c600, 0x008d8d8d, 0x63006363, 0x18180018 +.long 0x3b3b3b00, 0x00767676, 0x9d009d9d, 0x22220022 +.long 0x81818100, 0x00030303, 0xc000c0c0, 0x44440044 +.long 0x96969600, 0x002d2d2d, 0x4b004b4b, 0xb2b200b2 +.long 0x6f6f6f00, 0x00dedede, 0xb700b7b7, 0xb5b500b5 +.long 0x4b4b4b00, 0x00969696, 0xa500a5a5, 0x91910091 +.long 0x13131300, 0x00262626, 0x89008989, 0x08080008 +.long 0xbebebe00, 0x007d7d7d, 0x5f005f5f, 0xa8a800a8 +.long 0x63636300, 0x00c6c6c6, 0xb100b1b1, 0xfcfc00fc +.long 0x2e2e2e00, 0x005c5c5c, 0x17001717, 0x50500050 +.long 0xe9e9e900, 0x00d3d3d3, 0xf400f4f4, 0xd0d000d0 +.long 0x79797900, 0x00f2f2f2, 0xbc00bcbc, 0x7d7d007d +.long 0xa7a7a700, 0x004f4f4f, 0xd300d3d3, 0x89890089 +.long 0x8c8c8c00, 0x00191919, 0x46004646, 0x97970097 +.long 0x9f9f9f00, 0x003f3f3f, 0xcf00cfcf, 0x5b5b005b +.long 0x6e6e6e00, 0x00dcdcdc, 0x37003737, 0x95950095 +.long 0xbcbcbc00, 0x00797979, 0x5e005e5e, 0xffff00ff +.long 0x8e8e8e00, 0x001d1d1d, 0x47004747, 0xd2d200d2 +.long 0x29292900, 0x00525252, 0x94009494, 0xc4c400c4 +.long 0xf5f5f500, 0x00ebebeb, 0xfa00fafa, 0x48480048 +.long 0xf9f9f900, 0x00f3f3f3, 0xfc00fcfc, 0xf7f700f7 +.long 0xb6b6b600, 0x006d6d6d, 0x5b005b5b, 0xdbdb00db +.long 0x2f2f2f00, 0x005e5e5e, 0x97009797, 0x03030003 +.long 0xfdfdfd00, 0x00fbfbfb, 0xfe00fefe, 0xdada00da +.long 0xb4b4b400, 0x00696969, 0x5a005a5a, 0x3f3f003f +.long 0x59595900, 0x00b2b2b2, 0xac00acac, 0x94940094 +.long 0x78787800, 0x00f0f0f0, 0x3c003c3c, 0x5c5c005c +.long 0x98989800, 0x00313131, 0x4c004c4c, 0x02020002 +.long 0x06060600, 0x000c0c0c, 0x03000303, 0x4a4a004a +.long 0x6a6a6a00, 0x00d4d4d4, 0x35003535, 0x33330033 +.long 0xe7e7e700, 0x00cfcfcf, 0xf300f3f3, 0x67670067 +.long 0x46464600, 0x008c8c8c, 0x23002323, 0xf3f300f3 +.long 0x71717100, 0x00e2e2e2, 0xb800b8b8, 0x7f7f007f +.long 0xbababa00, 0x00757575, 0x5d005d5d, 0xe2e200e2 +.long 0xd4d4d400, 0x00a9a9a9, 0x6a006a6a, 0x9b9b009b +.long 0x25252500, 0x004a4a4a, 0x92009292, 0x26260026 +.long 0xababab00, 0x00575757, 0xd500d5d5, 0x37370037 +.long 0x42424200, 0x00848484, 0x21002121, 0x3b3b003b +.long 0x88888800, 0x00111111, 0x44004444, 0x96960096 +.long 0xa2a2a200, 0x00454545, 0x51005151, 0x4b4b004b +.long 0x8d8d8d00, 0x001b1b1b, 0xc600c6c6, 0xbebe00be +.long 0xfafafa00, 0x00f5f5f5, 0x7d007d7d, 0x2e2e002e +.long 0x72727200, 0x00e4e4e4, 0x39003939, 0x79790079 +.long 0x07070700, 0x000e0e0e, 0x83008383, 0x8c8c008c +.long 0xb9b9b900, 0x00737373, 0xdc00dcdc, 0x6e6e006e +.long 0x55555500, 0x00aaaaaa, 0xaa00aaaa, 0x8e8e008e +.long 0xf8f8f800, 0x00f1f1f1, 0x7c007c7c, 0xf5f500f5 +.long 0xeeeeee00, 0x00dddddd, 0x77007777, 0xb6b600b6 +.long 0xacacac00, 0x00595959, 0x56005656, 0xfdfd00fd +.long 0x0a0a0a00, 0x00141414, 0x05000505, 0x59590059 +.long 0x36363600, 0x006c6c6c, 0x1b001b1b, 0x98980098 +.long 0x49494900, 0x00929292, 0xa400a4a4, 0x6a6a006a +.long 0x2a2a2a00, 0x00545454, 0x15001515, 0x46460046 +.long 0x68686800, 0x00d0d0d0, 0x34003434, 0xbaba00ba +.long 0x3c3c3c00, 0x00787878, 0x1e001e1e, 0x25250025 +.long 0x38383800, 0x00707070, 0x1c001c1c, 0x42420042 +.long 0xf1f1f100, 0x00e3e3e3, 0xf800f8f8, 0xa2a200a2 +.long 0xa4a4a400, 0x00494949, 0x52005252, 0xfafa00fa +.long 0x40404000, 0x00808080, 0x20002020, 0x07070007 +.long 0x28282800, 0x00505050, 0x14001414, 0x55550055 +.long 0xd3d3d300, 0x00a7a7a7, 0xe900e9e9, 0xeeee00ee +.long 0x7b7b7b00, 0x00f6f6f6, 0xbd00bdbd, 0x0a0a000a +.long 0xbbbbbb00, 0x00777777, 0xdd00dddd, 0x49490049 +.long 0xc9c9c900, 0x00939393, 0xe400e4e4, 0x68680068 +.long 0x43434300, 0x00868686, 0xa100a1a1, 0x38380038 +.long 0xc1c1c100, 0x00838383, 0xe000e0e0, 0xa4a400a4 +.long 0x15151500, 0x002a2a2a, 0x8a008a8a, 0x28280028 +.long 0xe3e3e300, 0x00c7c7c7, 0xf100f1f1, 0x7b7b007b +.long 0xadadad00, 0x005b5b5b, 0xd600d6d6, 0xc9c900c9 +.long 0xf4f4f400, 0x00e9e9e9, 0x7a007a7a, 0xc1c100c1 +.long 0x77777700, 0x00eeeeee, 0xbb00bbbb, 0xe3e300e3 +.long 0xc7c7c700, 0x008f8f8f, 0xe300e3e3, 0xf4f400f4 +.long 0x80808000, 0x00010101, 0x40004040, 0xc7c700c7 +.long 0x9e9e9e00, 0x003d3d3d, 0x4f004f4f, 0x9e9e009e +.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables; + +#endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ +#endif /*__AARCH64EL__*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/camellia-aesni-avx-amd64.S b/libotr/libgcrypt-1.8.7/cipher/camellia-aesni-avx-amd64.S new file mode 100644 index 0000000..8022934 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/camellia-aesni-avx-amd64.S @@ -0,0 +1,2591 @@ +/* camellia-avx-aesni-amd64.S - AES-NI/AVX implementation of Camellia cipher + * + * Copyright (C) 2013-2015 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT) + +#ifdef __PIC__ +# define RIP (%rip) +#else +# define RIP +#endif + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#define CAMELLIA_TABLE_BYTE_LEN 272 + +/* struct CAMELLIA_context: */ +#define key_table 0 +#define key_bitlength CAMELLIA_TABLE_BYTE_LEN + +/* register macros */ +#define CTX %rdi +#define RIO %r8 + +/********************************************************************** + helper macros + **********************************************************************/ +#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +/********************************************************************** + 16-way camellia + **********************************************************************/ + +/* + * IN: + * x0..x7: byte-sliced AB state + * mem_cd: register pointer storing CD state + * key: index for key material + * OUT: + * x0..x7: new byte-sliced CD state + */ +#define roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \ + t7, mem_cd, key) \ + /* \ + * S-function with AES subbytes \ + */ \ + vmovdqa .Linv_shift_row RIP, t4; \ + vbroadcastss .L0f0f0f0f RIP, t7; \ + vmovdqa .Lpre_tf_lo_s1 RIP, t0; \ + vmovdqa .Lpre_tf_hi_s1 RIP, t1; \ + \ + /* AES inverse shift rows */ \ + vpshufb t4, x0, x0; \ + vpshufb t4, x7, x7; \ + vpshufb t4, x1, x1; \ + vpshufb t4, x4, x4; \ + vpshufb t4, x2, x2; \ + vpshufb t4, x5, x5; \ + vpshufb t4, x3, x3; \ + vpshufb t4, x6, x6; \ + \ + /* prefilter sboxes 1, 2 and 3 */ \ + vmovdqa .Lpre_tf_lo_s4 RIP, t2; \ + vmovdqa .Lpre_tf_hi_s4 RIP, t3; \ + filter_8bit(x0, t0, t1, t7, t6); \ + filter_8bit(x7, t0, t1, t7, t6); \ + filter_8bit(x1, t0, t1, t7, t6); \ + filter_8bit(x4, t0, t1, t7, t6); \ + filter_8bit(x2, t0, t1, t7, t6); \ + filter_8bit(x5, t0, t1, t7, t6); \ + \ + /* prefilter sbox 4 */ \ + vpxor t4, t4, t4; \ + filter_8bit(x3, t2, t3, t7, t6); \ + filter_8bit(x6, t2, t3, t7, t6); \ + \ + /* AES subbytes + AES shift rows */ \ + vmovdqa .Lpost_tf_lo_s1 RIP, t0; \ + vmovdqa .Lpost_tf_hi_s1 RIP, t1; \ + vaesenclast t4, x0, x0; \ + vaesenclast t4, x7, x7; \ + vaesenclast t4, x1, x1; \ + vaesenclast t4, x4, x4; \ + vaesenclast t4, x2, x2; \ + vaesenclast t4, x5, x5; \ + vaesenclast t4, x3, x3; \ + vaesenclast t4, x6, x6; \ + \ + /* postfilter sboxes 1 and 4 */ \ + vmovdqa .Lpost_tf_lo_s3 RIP, t2; \ + vmovdqa .Lpost_tf_hi_s3 RIP, t3; \ + filter_8bit(x0, t0, t1, t7, t6); \ + filter_8bit(x7, t0, t1, t7, t6); \ + filter_8bit(x3, t0, t1, t7, t6); \ + filter_8bit(x6, t0, t1, t7, t6); \ + \ + /* postfilter sbox 3 */ \ + vmovdqa .Lpost_tf_lo_s2 RIP, t4; \ + vmovdqa .Lpost_tf_hi_s2 RIP, t5; \ + filter_8bit(x2, t2, t3, t7, t6); \ + filter_8bit(x5, t2, t3, t7, t6); \ + \ + vpxor t6, t6, t6; \ + vmovq key, t0; \ + \ + /* postfilter sbox 2 */ \ + filter_8bit(x1, t4, t5, t7, t2); \ + filter_8bit(x4, t4, t5, t7, t2); \ + \ + vpsrldq $5, t0, t5; \ + vpsrldq $1, t0, t1; \ + vpsrldq $2, t0, t2; \ + vpsrldq $3, t0, t3; \ + vpsrldq $4, t0, t4; \ + vpshufb t6, t0, t0; \ + vpshufb t6, t1, t1; \ + vpshufb t6, t2, t2; \ + vpshufb t6, t3, t3; \ + vpshufb t6, t4, t4; \ + vpsrldq $2, t5, t7; \ + vpshufb t6, t7, t7; \ + \ + /* P-function */ \ + vpxor x5, x0, x0; \ + vpxor x6, x1, x1; \ + vpxor x7, x2, x2; \ + vpxor x4, x3, x3; \ + \ + vpxor x2, x4, x4; \ + vpxor x3, x5, x5; \ + vpxor x0, x6, x6; \ + vpxor x1, x7, x7; \ + \ + vpxor x7, x0, x0; \ + vpxor x4, x1, x1; \ + vpxor x5, x2, x2; \ + vpxor x6, x3, x3; \ + \ + vpxor x3, x4, x4; \ + vpxor x0, x5, x5; \ + vpxor x1, x6, x6; \ + vpxor x2, x7, x7; /* note: high and low parts swapped */ \ + \ + /* Add key material and result to CD (x becomes new CD) */ \ + \ + vpxor t3, x4, x4; \ + vpxor 0 * 16(mem_cd), x4, x4; \ + \ + vpxor t2, x5, x5; \ + vpxor 1 * 16(mem_cd), x5, x5; \ + \ + vpsrldq $1, t5, t3; \ + vpshufb t6, t5, t5; \ + vpshufb t6, t3, t6; \ + \ + vpxor t1, x6, x6; \ + vpxor 2 * 16(mem_cd), x6, x6; \ + \ + vpxor t0, x7, x7; \ + vpxor 3 * 16(mem_cd), x7, x7; \ + \ + vpxor t7, x0, x0; \ + vpxor 4 * 16(mem_cd), x0, x0; \ + \ + vpxor t6, x1, x1; \ + vpxor 5 * 16(mem_cd), x1, x1; \ + \ + vpxor t5, x2, x2; \ + vpxor 6 * 16(mem_cd), x2, x2; \ + \ + vpxor t4, x3, x3; \ + vpxor 7 * 16(mem_cd), x3, x3; + +/* + * IN/OUT: + * x0..x7: byte-sliced AB state preloaded + * mem_ab: byte-sliced AB state in memory + * mem_cb: byte-sliced CD state in memory + */ +#define two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i, dir, store_ab) \ + roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_cd, (key_table + (i) * 8)(CTX)); \ + \ + vmovdqu x4, 0 * 16(mem_cd); \ + vmovdqu x5, 1 * 16(mem_cd); \ + vmovdqu x6, 2 * 16(mem_cd); \ + vmovdqu x7, 3 * 16(mem_cd); \ + vmovdqu x0, 4 * 16(mem_cd); \ + vmovdqu x1, 5 * 16(mem_cd); \ + vmovdqu x2, 6 * 16(mem_cd); \ + vmovdqu x3, 7 * 16(mem_cd); \ + \ + roundsm16(x4, x5, x6, x7, x0, x1, x2, x3, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, (key_table + ((i) + (dir)) * 8)(CTX)); \ + \ + store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab); + +#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */ + +#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \ + /* Store new AB state */ \ + vmovdqu x0, 0 * 16(mem_ab); \ + vmovdqu x1, 1 * 16(mem_ab); \ + vmovdqu x2, 2 * 16(mem_ab); \ + vmovdqu x3, 3 * 16(mem_ab); \ + vmovdqu x4, 4 * 16(mem_ab); \ + vmovdqu x5, 5 * 16(mem_ab); \ + vmovdqu x6, 6 * 16(mem_ab); \ + vmovdqu x7, 7 * 16(mem_ab); + +#define enc_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i) \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store); + +#define dec_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i) \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store); + +/* + * IN: + * v0..3: byte-sliced 32-bit integers + * OUT: + * v0..3: (IN <<< 1) + */ +#define rol32_1_16(v0, v1, v2, v3, t0, t1, t2, zero) \ + vpcmpgtb v0, zero, t0; \ + vpaddb v0, v0, v0; \ + vpabsb t0, t0; \ + \ + vpcmpgtb v1, zero, t1; \ + vpaddb v1, v1, v1; \ + vpabsb t1, t1; \ + \ + vpcmpgtb v2, zero, t2; \ + vpaddb v2, v2, v2; \ + vpabsb t2, t2; \ + \ + vpor t0, v1, v1; \ + \ + vpcmpgtb v3, zero, t0; \ + vpaddb v3, v3, v3; \ + vpabsb t0, t0; \ + \ + vpor t1, v2, v2; \ + vpor t2, v3, v3; \ + vpor t0, v0, v0; + +/* + * IN: + * r: byte-sliced AB state in memory + * l: byte-sliced CD state in memory + * OUT: + * x0..x7: new byte-sliced CD state + */ +#define fls16(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \ + tt1, tt2, tt3, kll, klr, krl, krr) \ + /* \ + * t0 = kll; \ + * t0 &= ll; \ + * lr ^= rol32(t0, 1); \ + */ \ + vpxor tt0, tt0, tt0; \ + vmovd kll, t0; \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpand l0, t0, t0; \ + vpand l1, t1, t1; \ + vpand l2, t2, t2; \ + vpand l3, t3, t3; \ + \ + rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ + \ + vpxor l4, t0, l4; \ + vmovdqu l4, 4 * 16(l); \ + vpxor l5, t1, l5; \ + vmovdqu l5, 5 * 16(l); \ + vpxor l6, t2, l6; \ + vmovdqu l6, 6 * 16(l); \ + vpxor l7, t3, l7; \ + vmovdqu l7, 7 * 16(l); \ + \ + /* \ + * t2 = krr; \ + * t2 |= rr; \ + * rl ^= t2; \ + */ \ + \ + vmovd krr, t0; \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpor 4 * 16(r), t0, t0; \ + vpor 5 * 16(r), t1, t1; \ + vpor 6 * 16(r), t2, t2; \ + vpor 7 * 16(r), t3, t3; \ + \ + vpxor 0 * 16(r), t0, t0; \ + vpxor 1 * 16(r), t1, t1; \ + vpxor 2 * 16(r), t2, t2; \ + vpxor 3 * 16(r), t3, t3; \ + vmovdqu t0, 0 * 16(r); \ + vmovdqu t1, 1 * 16(r); \ + vmovdqu t2, 2 * 16(r); \ + vmovdqu t3, 3 * 16(r); \ + \ + /* \ + * t2 = krl; \ + * t2 &= rl; \ + * rr ^= rol32(t2, 1); \ + */ \ + vmovd krl, t0; \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpand 0 * 16(r), t0, t0; \ + vpand 1 * 16(r), t1, t1; \ + vpand 2 * 16(r), t2, t2; \ + vpand 3 * 16(r), t3, t3; \ + \ + rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ + \ + vpxor 4 * 16(r), t0, t0; \ + vpxor 5 * 16(r), t1, t1; \ + vpxor 6 * 16(r), t2, t2; \ + vpxor 7 * 16(r), t3, t3; \ + vmovdqu t0, 4 * 16(r); \ + vmovdqu t1, 5 * 16(r); \ + vmovdqu t2, 6 * 16(r); \ + vmovdqu t3, 7 * 16(r); \ + \ + /* \ + * t0 = klr; \ + * t0 |= lr; \ + * ll ^= t0; \ + */ \ + \ + vmovd klr, t0; \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpor l4, t0, t0; \ + vpor l5, t1, t1; \ + vpor l6, t2, t2; \ + vpor l7, t3, t3; \ + \ + vpxor l0, t0, l0; \ + vmovdqu l0, 0 * 16(l); \ + vpxor l1, t1, l1; \ + vmovdqu l1, 1 * 16(l); \ + vpxor l2, t2, l2; \ + vmovdqu l2, 2 * 16(l); \ + vpxor l3, t3, l3; \ + vmovdqu l3, 3 * 16(l); + +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +#define byteslice_16x16b_fast(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, \ + a3, b3, c3, d3, st0, st1) \ + vmovdqu d2, st0; \ + vmovdqu d3, st1; \ + transpose_4x4(a0, a1, a2, a3, d2, d3); \ + transpose_4x4(b0, b1, b2, b3, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu a0, st0; \ + vmovdqu a1, st1; \ + transpose_4x4(c0, c1, c2, c3, a0, a1); \ + transpose_4x4(d0, d1, d2, d3, a0, a1); \ + \ + vmovdqu .Lshufb_16x16b RIP, a0; \ + vmovdqu st1, a1; \ + vpshufb a0, a2, a2; \ + vpshufb a0, a3, a3; \ + vpshufb a0, b0, b0; \ + vpshufb a0, b1, b1; \ + vpshufb a0, b2, b2; \ + vpshufb a0, b3, b3; \ + vpshufb a0, a1, a1; \ + vpshufb a0, c0, c0; \ + vpshufb a0, c1, c1; \ + vpshufb a0, c2, c2; \ + vpshufb a0, c3, c3; \ + vpshufb a0, d0, d0; \ + vpshufb a0, d1, d1; \ + vpshufb a0, d2, d2; \ + vpshufb a0, d3, d3; \ + vmovdqu d3, st1; \ + vmovdqu st0, d3; \ + vpshufb a0, d3, a0; \ + vmovdqu d2, st0; \ + \ + transpose_4x4(a0, b0, c0, d0, d2, d3); \ + transpose_4x4(a1, b1, c1, d1, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu b0, st0; \ + vmovdqu b1, st1; \ + transpose_4x4(a2, b2, c2, d2, b0, b1); \ + transpose_4x4(a3, b3, c3, d3, b0, b1); \ + vmovdqu st0, b0; \ + vmovdqu st1, b1; \ + /* does not adjust output bytes inside vectors */ + +#define transpose_8x8b(a, b, c, d, e, f, g, h, t0, t1, t2, t3, t4) \ + vpunpcklbw a, b, t0; \ + vpunpckhbw a, b, b; \ + \ + vpunpcklbw c, d, t1; \ + vpunpckhbw c, d, d; \ + \ + vpunpcklbw e, f, t2; \ + vpunpckhbw e, f, f; \ + \ + vpunpcklbw g, h, t3; \ + vpunpckhbw g, h, h; \ + \ + vpunpcklwd t0, t1, g; \ + vpunpckhwd t0, t1, t0; \ + \ + vpunpcklwd b, d, t1; \ + vpunpckhwd b, d, e; \ + \ + vpunpcklwd t2, t3, c; \ + vpunpckhwd t2, t3, t2; \ + \ + vpunpcklwd f, h, t3; \ + vpunpckhwd f, h, b; \ + \ + vpunpcklwd e, b, t4; \ + vpunpckhwd e, b, b; \ + \ + vpunpcklwd t1, t3, e; \ + vpunpckhwd t1, t3, f; \ + \ + vmovdqa .Ltranspose_8x8_shuf RIP, t3; \ + \ + vpunpcklwd g, c, d; \ + vpunpckhwd g, c, c; \ + \ + vpunpcklwd t0, t2, t1; \ + vpunpckhwd t0, t2, h; \ + \ + vpunpckhqdq b, h, a; \ + vpshufb t3, a, a; \ + vpunpcklqdq b, h, b; \ + vpshufb t3, b, b; \ + \ + vpunpckhqdq e, d, g; \ + vpshufb t3, g, g; \ + vpunpcklqdq e, d, h; \ + vpshufb t3, h, h; \ + \ + vpunpckhqdq f, c, e; \ + vpshufb t3, e, e; \ + vpunpcklqdq f, c, f; \ + vpshufb t3, f, f; \ + \ + vpunpckhqdq t4, t1, c; \ + vpshufb t3, c, c; \ + vpunpcklqdq t4, t1, d; \ + vpshufb t3, d, d; + +/* load blocks to registers and apply pre-whitening */ +#define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, rio, key) \ + vmovq key, x0; \ + vpshufb .Lpack_bswap RIP, x0, x0; \ + \ + vpxor 0 * 16(rio), x0, y7; \ + vpxor 1 * 16(rio), x0, y6; \ + vpxor 2 * 16(rio), x0, y5; \ + vpxor 3 * 16(rio), x0, y4; \ + vpxor 4 * 16(rio), x0, y3; \ + vpxor 5 * 16(rio), x0, y2; \ + vpxor 6 * 16(rio), x0, y1; \ + vpxor 7 * 16(rio), x0, y0; \ + vpxor 8 * 16(rio), x0, x7; \ + vpxor 9 * 16(rio), x0, x6; \ + vpxor 10 * 16(rio), x0, x5; \ + vpxor 11 * 16(rio), x0, x4; \ + vpxor 12 * 16(rio), x0, x3; \ + vpxor 13 * 16(rio), x0, x2; \ + vpxor 14 * 16(rio), x0, x1; \ + vpxor 15 * 16(rio), x0, x0; + +/* byteslice pre-whitened blocks and store to temporary memory */ +#define inpack16_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd) \ + byteslice_16x16b_fast(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, \ + y4, y5, y6, y7, (mem_ab), (mem_cd)); \ + \ + vmovdqu x0, 0 * 16(mem_ab); \ + vmovdqu x1, 1 * 16(mem_ab); \ + vmovdqu x2, 2 * 16(mem_ab); \ + vmovdqu x3, 3 * 16(mem_ab); \ + vmovdqu x4, 4 * 16(mem_ab); \ + vmovdqu x5, 5 * 16(mem_ab); \ + vmovdqu x6, 6 * 16(mem_ab); \ + vmovdqu x7, 7 * 16(mem_ab); \ + vmovdqu y0, 0 * 16(mem_cd); \ + vmovdqu y1, 1 * 16(mem_cd); \ + vmovdqu y2, 2 * 16(mem_cd); \ + vmovdqu y3, 3 * 16(mem_cd); \ + vmovdqu y4, 4 * 16(mem_cd); \ + vmovdqu y5, 5 * 16(mem_cd); \ + vmovdqu y6, 6 * 16(mem_cd); \ + vmovdqu y7, 7 * 16(mem_cd); + +/* de-byteslice, apply post-whitening and store blocks */ +#define outunpack16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \ + y5, y6, y7, key, stack_tmp0, stack_tmp1) \ + byteslice_16x16b_fast(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, \ + y3, y7, x3, x7, stack_tmp0, stack_tmp1); \ + \ + vmovdqu x0, stack_tmp0; \ + \ + vmovq key, x0; \ + vpshufb .Lpack_bswap RIP, x0, x0; \ + \ + vpxor x0, y7, y7; \ + vpxor x0, y6, y6; \ + vpxor x0, y5, y5; \ + vpxor x0, y4, y4; \ + vpxor x0, y3, y3; \ + vpxor x0, y2, y2; \ + vpxor x0, y1, y1; \ + vpxor x0, y0, y0; \ + vpxor x0, x7, x7; \ + vpxor x0, x6, x6; \ + vpxor x0, x5, x5; \ + vpxor x0, x4, x4; \ + vpxor x0, x3, x3; \ + vpxor x0, x2, x2; \ + vpxor x0, x1, x1; \ + vpxor stack_tmp0, x0, x0; + +#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, rio) \ + vmovdqu x0, 0 * 16(rio); \ + vmovdqu x1, 1 * 16(rio); \ + vmovdqu x2, 2 * 16(rio); \ + vmovdqu x3, 3 * 16(rio); \ + vmovdqu x4, 4 * 16(rio); \ + vmovdqu x5, 5 * 16(rio); \ + vmovdqu x6, 6 * 16(rio); \ + vmovdqu x7, 7 * 16(rio); \ + vmovdqu y0, 8 * 16(rio); \ + vmovdqu y1, 9 * 16(rio); \ + vmovdqu y2, 10 * 16(rio); \ + vmovdqu y3, 11 * 16(rio); \ + vmovdqu y4, 12 * 16(rio); \ + vmovdqu y5, 13 * 16(rio); \ + vmovdqu y6, 14 * 16(rio); \ + vmovdqu y7, 15 * 16(rio); + +.text +.align 16 + +#define SHUFB_BYTES(idx) \ + 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) + +.Lshufb_16x16b: + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3); + +.Lpack_bswap: + .long 0x00010203 + .long 0x04050607 + .long 0x80808080 + .long 0x80808080 + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* + * pre-SubByte transform + * + * pre-lookup for sbox1, sbox2, sbox3: + * swap_bitendianness( + * isom_map_camellia_to_aes( + * camellia_f( + * swap_bitendianess(in) + * ) + * ) + * ) + * + * (note: '⊕ 0xc5' inside camellia_f()) + */ +.Lpre_tf_lo_s1: + .byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86 + .byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88 +.Lpre_tf_hi_s1: + .byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a + .byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23 + +/* + * pre-SubByte transform + * + * pre-lookup for sbox4: + * swap_bitendianness( + * isom_map_camellia_to_aes( + * camellia_f( + * swap_bitendianess(in <<< 1) + * ) + * ) + * ) + * + * (note: '⊕ 0xc5' inside camellia_f()) + */ +.Lpre_tf_lo_s4: + .byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25 + .byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74 +.Lpre_tf_hi_s4: + .byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72 + .byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf + +/* + * post-SubByte transform + * + * post-lookup for sbox1, sbox4: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s1: + .byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31 + .byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1 +.Lpost_tf_hi_s1: + .byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8 + .byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c + +/* + * post-SubByte transform + * + * post-lookup for sbox2: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) <<< 1 + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s2: + .byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62 + .byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3 +.Lpost_tf_hi_s2: + .byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51 + .byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18 + +/* + * post-SubByte transform + * + * post-lookup for sbox3: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) >>> 1 + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s3: + .byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98 + .byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8 +.Lpost_tf_hi_s3: + .byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54 + .byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06 + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +/* shuffle mask for 8x8 byte transpose */ +.Ltranspose_8x8_shuf: + .byte 0, 1, 4, 5, 2, 3, 6, 7, 8+0, 8+1, 8+4, 8+5, 8+2, 8+3, 8+6, 8+7 + +.align 4 +/* 4-bit mask */ +.L0f0f0f0f: + .long 0x0f0f0f0f + + +.align 8 +ELF(.type __camellia_enc_blk16,@function;) + +__camellia_enc_blk16: + /* input: + * %rdi: ctx, CTX + * %rax: temporary storage, 256 bytes + * %xmm0..%xmm15: 16 plaintext blocks + * output: + * %xmm0..%xmm15: 16 encrypted blocks, order swapped: + * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 + */ + + leaq 8 * 16(%rax), %rcx; + + inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx); + + enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 0); + + fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, + ((key_table + (8) * 8) + 0)(CTX), + ((key_table + (8) * 8) + 4)(CTX), + ((key_table + (8) * 8) + 8)(CTX), + ((key_table + (8) * 8) + 12)(CTX)); + + enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 8); + + fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, + ((key_table + (16) * 8) + 0)(CTX), + ((key_table + (16) * 8) + 4)(CTX), + ((key_table + (16) * 8) + 8)(CTX), + ((key_table + (16) * 8) + 12)(CTX)); + + enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 16); + + movl $24, %r8d; + cmpl $128, key_bitlength(CTX); + jne .Lenc_max32; + +.Lenc_done: + /* load CD for output */ + vmovdqu 0 * 16(%rcx), %xmm8; + vmovdqu 1 * 16(%rcx), %xmm9; + vmovdqu 2 * 16(%rcx), %xmm10; + vmovdqu 3 * 16(%rcx), %xmm11; + vmovdqu 4 * 16(%rcx), %xmm12; + vmovdqu 5 * 16(%rcx), %xmm13; + vmovdqu 6 * 16(%rcx), %xmm14; + vmovdqu 7 * 16(%rcx), %xmm15; + + outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax)); + + ret; + +.align 8 +.Lenc_max32: + movl $32, %r8d; + + fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, + ((key_table + (24) * 8) + 0)(CTX), + ((key_table + (24) * 8) + 4)(CTX), + ((key_table + (24) * 8) + 8)(CTX), + ((key_table + (24) * 8) + 12)(CTX)); + + enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 24); + + jmp .Lenc_done; +ELF(.size __camellia_enc_blk16,.-__camellia_enc_blk16;) + +.align 8 +ELF(.type __camellia_dec_blk16,@function;) + +__camellia_dec_blk16: + /* input: + * %rdi: ctx, CTX + * %rax: temporary storage, 256 bytes + * %r8d: 24 for 16 byte key, 32 for larger + * %xmm0..%xmm15: 16 encrypted blocks + * output: + * %xmm0..%xmm15: 16 plaintext blocks, order swapped: + * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 + */ + + leaq 8 * 16(%rax), %rcx; + + inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx); + + cmpl $32, %r8d; + je .Ldec_max32; + +.Ldec_max24: + dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 16); + + fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, + ((key_table + (16) * 8) + 8)(CTX), + ((key_table + (16) * 8) + 12)(CTX), + ((key_table + (16) * 8) + 0)(CTX), + ((key_table + (16) * 8) + 4)(CTX)); + + dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 8); + + fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, + ((key_table + (8) * 8) + 8)(CTX), + ((key_table + (8) * 8) + 12)(CTX), + ((key_table + (8) * 8) + 0)(CTX), + ((key_table + (8) * 8) + 4)(CTX)); + + dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 0); + + /* load CD for output */ + vmovdqu 0 * 16(%rcx), %xmm8; + vmovdqu 1 * 16(%rcx), %xmm9; + vmovdqu 2 * 16(%rcx), %xmm10; + vmovdqu 3 * 16(%rcx), %xmm11; + vmovdqu 4 * 16(%rcx), %xmm12; + vmovdqu 5 * 16(%rcx), %xmm13; + vmovdqu 6 * 16(%rcx), %xmm14; + vmovdqu 7 * 16(%rcx), %xmm15; + + outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax)); + + ret; + +.align 8 +.Ldec_max32: + dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 24); + + fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, + ((key_table + (24) * 8) + 8)(CTX), + ((key_table + (24) * 8) + 12)(CTX), + ((key_table + (24) * 8) + 0)(CTX), + ((key_table + (24) * 8) + 4)(CTX)); + + jmp .Ldec_max24; +ELF(.size __camellia_dec_blk16,.-__camellia_dec_blk16;) + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + +.align 8 +.globl _gcry_camellia_aesni_avx_ctr_enc +ELF(.type _gcry_camellia_aesni_avx_ctr_enc,@function;) + +_gcry_camellia_aesni_avx_ctr_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv (big endian, 128bit) + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + subq $(16 * 16), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + vmovdqa .Lbswap128_mask RIP, %xmm14; + + /* load IV and byteswap */ + vmovdqu (%rcx), %xmm15; + vmovdqu %xmm15, 15 * 16(%rax); + vpshufb %xmm14, %xmm15, %xmm0; /* be => le */ + + vpcmpeqd %xmm15, %xmm15, %xmm15; + vpsrldq $8, %xmm15, %xmm15; /* low: -1, high: 0 */ + + /* construct IVs */ + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm13; + vmovdqu %xmm13, 14 * 16(%rax); + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm13; + vmovdqu %xmm13, 13 * 16(%rax); + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm12; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm11; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm10; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm9; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm8; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm7; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm6; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm5; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm4; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm3; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm2; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm1; + inc_le128(%xmm0, %xmm15, %xmm13); + vmovdqa %xmm0, %xmm13; + vpshufb %xmm14, %xmm0, %xmm0; + inc_le128(%xmm13, %xmm15, %xmm14); + vpshufb .Lbswap128_mask RIP, %xmm13, %xmm13; /* le => be */ + vmovdqu %xmm13, (%rcx); + + /* inpack16_pre: */ + vmovq (key_table)(CTX), %xmm15; + vpshufb .Lpack_bswap RIP, %xmm15, %xmm15; + vpxor %xmm0, %xmm15, %xmm0; + vpxor %xmm1, %xmm15, %xmm1; + vpxor %xmm2, %xmm15, %xmm2; + vpxor %xmm3, %xmm15, %xmm3; + vpxor %xmm4, %xmm15, %xmm4; + vpxor %xmm5, %xmm15, %xmm5; + vpxor %xmm6, %xmm15, %xmm6; + vpxor %xmm7, %xmm15, %xmm7; + vpxor %xmm8, %xmm15, %xmm8; + vpxor %xmm9, %xmm15, %xmm9; + vpxor %xmm10, %xmm15, %xmm10; + vpxor %xmm11, %xmm15, %xmm11; + vpxor %xmm12, %xmm15, %xmm12; + vpxor 13 * 16(%rax), %xmm15, %xmm13; + vpxor 14 * 16(%rax), %xmm15, %xmm14; + vpxor 15 * 16(%rax), %xmm15, %xmm15; + + call __camellia_enc_blk16; + + vpxor 0 * 16(%rdx), %xmm7, %xmm7; + vpxor 1 * 16(%rdx), %xmm6, %xmm6; + vpxor 2 * 16(%rdx), %xmm5, %xmm5; + vpxor 3 * 16(%rdx), %xmm4, %xmm4; + vpxor 4 * 16(%rdx), %xmm3, %xmm3; + vpxor 5 * 16(%rdx), %xmm2, %xmm2; + vpxor 6 * 16(%rdx), %xmm1, %xmm1; + vpxor 7 * 16(%rdx), %xmm0, %xmm0; + vpxor 8 * 16(%rdx), %xmm15, %xmm15; + vpxor 9 * 16(%rdx), %xmm14, %xmm14; + vpxor 10 * 16(%rdx), %xmm13, %xmm13; + vpxor 11 * 16(%rdx), %xmm12, %xmm12; + vpxor 12 * 16(%rdx), %xmm11, %xmm11; + vpxor 13 * 16(%rdx), %xmm10, %xmm10; + vpxor 14 * 16(%rdx), %xmm9, %xmm9; + vpxor 15 * 16(%rdx), %xmm8, %xmm8; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + vzeroall; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;) + +.align 8 +.globl _gcry_camellia_aesni_avx_cbc_dec +ELF(.type _gcry_camellia_aesni_avx_cbc_dec,@function;) + +_gcry_camellia_aesni_avx_cbc_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + movq %rcx, %r9; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %eax; + cmovel %eax, %r8d; /* max */ + + inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rdx, (key_table)(CTX, %r8, 8)); + + subq $(16 * 16), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + call __camellia_dec_blk16; + + /* XOR output with IV */ + vpxor (%r9), %xmm7, %xmm7; + vpxor (0 * 16)(%rdx), %xmm6, %xmm6; + vpxor (1 * 16)(%rdx), %xmm5, %xmm5; + vpxor (2 * 16)(%rdx), %xmm4, %xmm4; + vpxor (3 * 16)(%rdx), %xmm3, %xmm3; + vpxor (4 * 16)(%rdx), %xmm2, %xmm2; + vpxor (5 * 16)(%rdx), %xmm1, %xmm1; + vpxor (6 * 16)(%rdx), %xmm0, %xmm0; + vpxor (7 * 16)(%rdx), %xmm15, %xmm15; + vpxor (8 * 16)(%rdx), %xmm14, %xmm14; + vpxor (9 * 16)(%rdx), %xmm13, %xmm13; + vpxor (10 * 16)(%rdx), %xmm12, %xmm12; + vpxor (11 * 16)(%rdx), %xmm11, %xmm11; + vpxor (12 * 16)(%rdx), %xmm10, %xmm10; + vpxor (13 * 16)(%rdx), %xmm9, %xmm9; + vpxor (14 * 16)(%rdx), %xmm8, %xmm8; + movq (15 * 16 + 0)(%rdx), %r10; + movq (15 * 16 + 8)(%rdx), %r11; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + /* store new IV */ + movq %r10, (0)(%r9); + movq %r11, (8)(%r9); + + vzeroall; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx_cbc_dec,.-_gcry_camellia_aesni_avx_cbc_dec;) + +.align 8 +.globl _gcry_camellia_aesni_avx_cfb_dec +ELF(.type _gcry_camellia_aesni_avx_cfb_dec,@function;) + +_gcry_camellia_aesni_avx_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + subq $(16 * 16), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + /* inpack16_pre: */ + vmovq (key_table)(CTX), %xmm0; + vpshufb .Lpack_bswap RIP, %xmm0, %xmm0; + vpxor (%rcx), %xmm0, %xmm15; + vmovdqu 15 * 16(%rdx), %xmm1; + vmovdqu %xmm1, (%rcx); /* store new IV */ + vpxor 0 * 16(%rdx), %xmm0, %xmm14; + vpxor 1 * 16(%rdx), %xmm0, %xmm13; + vpxor 2 * 16(%rdx), %xmm0, %xmm12; + vpxor 3 * 16(%rdx), %xmm0, %xmm11; + vpxor 4 * 16(%rdx), %xmm0, %xmm10; + vpxor 5 * 16(%rdx), %xmm0, %xmm9; + vpxor 6 * 16(%rdx), %xmm0, %xmm8; + vpxor 7 * 16(%rdx), %xmm0, %xmm7; + vpxor 8 * 16(%rdx), %xmm0, %xmm6; + vpxor 9 * 16(%rdx), %xmm0, %xmm5; + vpxor 10 * 16(%rdx), %xmm0, %xmm4; + vpxor 11 * 16(%rdx), %xmm0, %xmm3; + vpxor 12 * 16(%rdx), %xmm0, %xmm2; + vpxor 13 * 16(%rdx), %xmm0, %xmm1; + vpxor 14 * 16(%rdx), %xmm0, %xmm0; + + call __camellia_enc_blk16; + + vpxor 0 * 16(%rdx), %xmm7, %xmm7; + vpxor 1 * 16(%rdx), %xmm6, %xmm6; + vpxor 2 * 16(%rdx), %xmm5, %xmm5; + vpxor 3 * 16(%rdx), %xmm4, %xmm4; + vpxor 4 * 16(%rdx), %xmm3, %xmm3; + vpxor 5 * 16(%rdx), %xmm2, %xmm2; + vpxor 6 * 16(%rdx), %xmm1, %xmm1; + vpxor 7 * 16(%rdx), %xmm0, %xmm0; + vpxor 8 * 16(%rdx), %xmm15, %xmm15; + vpxor 9 * 16(%rdx), %xmm14, %xmm14; + vpxor 10 * 16(%rdx), %xmm13, %xmm13; + vpxor 11 * 16(%rdx), %xmm12, %xmm12; + vpxor 12 * 16(%rdx), %xmm11, %xmm11; + vpxor 13 * 16(%rdx), %xmm10, %xmm10; + vpxor 14 * 16(%rdx), %xmm9, %xmm9; + vpxor 15 * 16(%rdx), %xmm8, %xmm8; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + vzeroall; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx_cfb_dec,.-_gcry_camellia_aesni_avx_cfb_dec;) + +.align 8 +.globl _gcry_camellia_aesni_avx_ocb_enc +ELF(.type _gcry_camellia_aesni_avx_ocb_enc,@function;) + +_gcry_camellia_aesni_avx_ocb_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[16]) + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + subq $(16 * 16 + 4 * 8), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 16 + 0 * 8)(%rax); + movq %r11, (16 * 16 + 1 * 8)(%rax); + movq %r12, (16 * 16 + 2 * 8)(%rax); + movq %r13, (16 * 16 + 3 * 8)(%rax); + + vmovdqu (%rcx), %xmm14; + vmovdqu (%r8), %xmm15; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, lreg, xreg) \ + vmovdqu (n * 16)(%rdx), xreg; \ + vpxor (lreg), %xmm14, %xmm14; \ + vpxor xreg, %xmm15, %xmm15; \ + vpxor xreg, %xmm14, xreg; \ + vmovdqu %xmm14, (n * 16)(%rsi); + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %xmm0); + vmovdqu %xmm0, (15 * 16)(%rax); + OCB_INPUT(1, %r11, %xmm0); + vmovdqu %xmm0, (14 * 16)(%rax); + OCB_INPUT(2, %r12, %xmm13); + OCB_INPUT(3, %r13, %xmm12); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %xmm11); + OCB_INPUT(5, %r11, %xmm10); + OCB_INPUT(6, %r12, %xmm9); + OCB_INPUT(7, %r13, %xmm8); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(8, %r10, %xmm7); + OCB_INPUT(9, %r11, %xmm6); + OCB_INPUT(10, %r12, %xmm5); + OCB_INPUT(11, %r13, %xmm4); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(12, %r10, %xmm3); + OCB_INPUT(13, %r11, %xmm2); + OCB_INPUT(14, %r12, %xmm1); + OCB_INPUT(15, %r13, %xmm0); +#undef OCB_INPUT + + vmovdqu %xmm14, (%rcx); + vmovdqu %xmm15, (%r8); + + /* inpack16_pre: */ + vmovq (key_table)(CTX), %xmm15; + vpshufb .Lpack_bswap RIP, %xmm15, %xmm15; + vpxor %xmm0, %xmm15, %xmm0; + vpxor %xmm1, %xmm15, %xmm1; + vpxor %xmm2, %xmm15, %xmm2; + vpxor %xmm3, %xmm15, %xmm3; + vpxor %xmm4, %xmm15, %xmm4; + vpxor %xmm5, %xmm15, %xmm5; + vpxor %xmm6, %xmm15, %xmm6; + vpxor %xmm7, %xmm15, %xmm7; + vpxor %xmm8, %xmm15, %xmm8; + vpxor %xmm9, %xmm15, %xmm9; + vpxor %xmm10, %xmm15, %xmm10; + vpxor %xmm11, %xmm15, %xmm11; + vpxor %xmm12, %xmm15, %xmm12; + vpxor %xmm13, %xmm15, %xmm13; + vpxor 14 * 16(%rax), %xmm15, %xmm14; + vpxor 15 * 16(%rax), %xmm15, %xmm15; + + call __camellia_enc_blk16; + + vpxor 0 * 16(%rsi), %xmm7, %xmm7; + vpxor 1 * 16(%rsi), %xmm6, %xmm6; + vpxor 2 * 16(%rsi), %xmm5, %xmm5; + vpxor 3 * 16(%rsi), %xmm4, %xmm4; + vpxor 4 * 16(%rsi), %xmm3, %xmm3; + vpxor 5 * 16(%rsi), %xmm2, %xmm2; + vpxor 6 * 16(%rsi), %xmm1, %xmm1; + vpxor 7 * 16(%rsi), %xmm0, %xmm0; + vpxor 8 * 16(%rsi), %xmm15, %xmm15; + vpxor 9 * 16(%rsi), %xmm14, %xmm14; + vpxor 10 * 16(%rsi), %xmm13, %xmm13; + vpxor 11 * 16(%rsi), %xmm12, %xmm12; + vpxor 12 * 16(%rsi), %xmm11, %xmm11; + vpxor 13 * 16(%rsi), %xmm10, %xmm10; + vpxor 14 * 16(%rsi), %xmm9, %xmm9; + vpxor 15 * 16(%rsi), %xmm8, %xmm8; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + vzeroall; + + movq (16 * 16 + 0 * 8)(%rax), %r10; + movq (16 * 16 + 1 * 8)(%rax), %r11; + movq (16 * 16 + 2 * 8)(%rax), %r12; + movq (16 * 16 + 3 * 8)(%rax), %r13; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx_ocb_enc,.-_gcry_camellia_aesni_avx_ocb_enc;) + +.align 8 +.globl _gcry_camellia_aesni_avx_ocb_dec +ELF(.type _gcry_camellia_aesni_avx_ocb_dec,@function;) + +_gcry_camellia_aesni_avx_ocb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[16]) + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + subq $(16 * 16 + 4 * 8), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 16 + 0 * 8)(%rax); + movq %r11, (16 * 16 + 1 * 8)(%rax); + movq %r12, (16 * 16 + 2 * 8)(%rax); + movq %r13, (16 * 16 + 3 * 8)(%rax); + + vmovdqu (%rcx), %xmm15; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + +#define OCB_INPUT(n, lreg, xreg) \ + vmovdqu (n * 16)(%rdx), xreg; \ + vpxor (lreg), %xmm15, %xmm15; \ + vpxor xreg, %xmm15, xreg; \ + vmovdqu %xmm15, (n * 16)(%rsi); + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %xmm0); + vmovdqu %xmm0, (15 * 16)(%rax); + OCB_INPUT(1, %r11, %xmm14); + OCB_INPUT(2, %r12, %xmm13); + OCB_INPUT(3, %r13, %xmm12); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %xmm11); + OCB_INPUT(5, %r11, %xmm10); + OCB_INPUT(6, %r12, %xmm9); + OCB_INPUT(7, %r13, %xmm8); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(8, %r10, %xmm7); + OCB_INPUT(9, %r11, %xmm6); + OCB_INPUT(10, %r12, %xmm5); + OCB_INPUT(11, %r13, %xmm4); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(12, %r10, %xmm3); + OCB_INPUT(13, %r11, %xmm2); + OCB_INPUT(14, %r12, %xmm1); + OCB_INPUT(15, %r13, %xmm0); +#undef OCB_INPUT + + vmovdqu %xmm15, (%rcx); + + movq %r8, %r10; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %r9d; + cmovel %r9d, %r8d; /* max */ + + /* inpack16_pre: */ + vmovq (key_table)(CTX, %r8, 8), %xmm15; + vpshufb .Lpack_bswap RIP, %xmm15, %xmm15; + vpxor %xmm0, %xmm15, %xmm0; + vpxor %xmm1, %xmm15, %xmm1; + vpxor %xmm2, %xmm15, %xmm2; + vpxor %xmm3, %xmm15, %xmm3; + vpxor %xmm4, %xmm15, %xmm4; + vpxor %xmm5, %xmm15, %xmm5; + vpxor %xmm6, %xmm15, %xmm6; + vpxor %xmm7, %xmm15, %xmm7; + vpxor %xmm8, %xmm15, %xmm8; + vpxor %xmm9, %xmm15, %xmm9; + vpxor %xmm10, %xmm15, %xmm10; + vpxor %xmm11, %xmm15, %xmm11; + vpxor %xmm12, %xmm15, %xmm12; + vpxor %xmm13, %xmm15, %xmm13; + vpxor %xmm14, %xmm15, %xmm14; + vpxor 15 * 16(%rax), %xmm15, %xmm15; + + call __camellia_dec_blk16; + + vpxor 0 * 16(%rsi), %xmm7, %xmm7; + vpxor 1 * 16(%rsi), %xmm6, %xmm6; + vpxor 2 * 16(%rsi), %xmm5, %xmm5; + vpxor 3 * 16(%rsi), %xmm4, %xmm4; + vpxor 4 * 16(%rsi), %xmm3, %xmm3; + vpxor 5 * 16(%rsi), %xmm2, %xmm2; + vpxor 6 * 16(%rsi), %xmm1, %xmm1; + vpxor 7 * 16(%rsi), %xmm0, %xmm0; + vmovdqu %xmm7, (7 * 16)(%rax); + vpxor 8 * 16(%rsi), %xmm15, %xmm15; + vpxor 9 * 16(%rsi), %xmm14, %xmm14; + vpxor 10 * 16(%rsi), %xmm13, %xmm13; + vpxor 11 * 16(%rsi), %xmm12, %xmm12; + vpxor 12 * 16(%rsi), %xmm11, %xmm11; + vpxor 13 * 16(%rsi), %xmm10, %xmm10; + vpxor 14 * 16(%rsi), %xmm9, %xmm9; + vpxor 15 * 16(%rsi), %xmm8, %xmm8; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + + vpxor (%r10), %xmm7, %xmm7; + vpxor %xmm6, %xmm7, %xmm7; + vpxor %xmm5, %xmm7, %xmm7; + vpxor %xmm4, %xmm7, %xmm7; + vpxor %xmm3, %xmm7, %xmm7; + vpxor %xmm2, %xmm7, %xmm7; + vpxor %xmm1, %xmm7, %xmm7; + vpxor %xmm0, %xmm7, %xmm7; + vpxor %xmm15, %xmm7, %xmm7; + vpxor %xmm14, %xmm7, %xmm7; + vpxor %xmm13, %xmm7, %xmm7; + vpxor %xmm12, %xmm7, %xmm7; + vpxor %xmm11, %xmm7, %xmm7; + vpxor %xmm10, %xmm7, %xmm7; + vpxor %xmm9, %xmm7, %xmm7; + vpxor %xmm8, %xmm7, %xmm7; + vmovdqu %xmm7, (%r10); + vmovdqu (7 * 16)(%rax), %xmm7; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + vzeroall; + + movq (16 * 16 + 0 * 8)(%rax), %r10; + movq (16 * 16 + 1 * 8)(%rax), %r11; + movq (16 * 16 + 2 * 8)(%rax), %r12; + movq (16 * 16 + 3 * 8)(%rax), %r13; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx_ocb_dec,.-_gcry_camellia_aesni_avx_ocb_dec;) + +.align 8 +.globl _gcry_camellia_aesni_avx_ocb_auth +ELF(.type _gcry_camellia_aesni_avx_ocb_auth,@function;) + +_gcry_camellia_aesni_avx_ocb_auth: + /* input: + * %rdi: ctx, CTX + * %rsi: abuf (16 blocks) + * %rdx: offset + * %rcx: checksum + * %r8 : L pointers (void *L[16]) + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + subq $(16 * 16 + 4 * 8), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 16 + 0 * 8)(%rax); + movq %r11, (16 * 16 + 1 * 8)(%rax); + movq %r12, (16 * 16 + 2 * 8)(%rax); + movq %r13, (16 * 16 + 3 * 8)(%rax); + + vmovdqu (%rdx), %xmm15; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + +#define OCB_INPUT(n, lreg, xreg) \ + vmovdqu (n * 16)(%rsi), xreg; \ + vpxor (lreg), %xmm15, %xmm15; \ + vpxor xreg, %xmm15, xreg; + + movq (0 * 8)(%r8), %r10; + movq (1 * 8)(%r8), %r11; + movq (2 * 8)(%r8), %r12; + movq (3 * 8)(%r8), %r13; + OCB_INPUT(0, %r10, %xmm0); + vmovdqu %xmm0, (15 * 16)(%rax); + OCB_INPUT(1, %r11, %xmm14); + OCB_INPUT(2, %r12, %xmm13); + OCB_INPUT(3, %r13, %xmm12); + movq (4 * 8)(%r8), %r10; + movq (5 * 8)(%r8), %r11; + movq (6 * 8)(%r8), %r12; + movq (7 * 8)(%r8), %r13; + OCB_INPUT(4, %r10, %xmm11); + OCB_INPUT(5, %r11, %xmm10); + OCB_INPUT(6, %r12, %xmm9); + OCB_INPUT(7, %r13, %xmm8); + movq (8 * 8)(%r8), %r10; + movq (9 * 8)(%r8), %r11; + movq (10 * 8)(%r8), %r12; + movq (11 * 8)(%r8), %r13; + OCB_INPUT(8, %r10, %xmm7); + OCB_INPUT(9, %r11, %xmm6); + OCB_INPUT(10, %r12, %xmm5); + OCB_INPUT(11, %r13, %xmm4); + movq (12 * 8)(%r8), %r10; + movq (13 * 8)(%r8), %r11; + movq (14 * 8)(%r8), %r12; + movq (15 * 8)(%r8), %r13; + OCB_INPUT(12, %r10, %xmm3); + OCB_INPUT(13, %r11, %xmm2); + OCB_INPUT(14, %r12, %xmm1); + OCB_INPUT(15, %r13, %xmm0); +#undef OCB_INPUT + + vmovdqu %xmm15, (%rdx); + + movq %rcx, %r10; + + /* inpack16_pre: */ + vmovq (key_table)(CTX), %xmm15; + vpshufb .Lpack_bswap RIP, %xmm15, %xmm15; + vpxor %xmm0, %xmm15, %xmm0; + vpxor %xmm1, %xmm15, %xmm1; + vpxor %xmm2, %xmm15, %xmm2; + vpxor %xmm3, %xmm15, %xmm3; + vpxor %xmm4, %xmm15, %xmm4; + vpxor %xmm5, %xmm15, %xmm5; + vpxor %xmm6, %xmm15, %xmm6; + vpxor %xmm7, %xmm15, %xmm7; + vpxor %xmm8, %xmm15, %xmm8; + vpxor %xmm9, %xmm15, %xmm9; + vpxor %xmm10, %xmm15, %xmm10; + vpxor %xmm11, %xmm15, %xmm11; + vpxor %xmm12, %xmm15, %xmm12; + vpxor %xmm13, %xmm15, %xmm13; + vpxor %xmm14, %xmm15, %xmm14; + vpxor 15 * 16(%rax), %xmm15, %xmm15; + + call __camellia_enc_blk16; + + vpxor %xmm7, %xmm6, %xmm6; + vpxor %xmm5, %xmm4, %xmm4; + vpxor %xmm3, %xmm2, %xmm2; + vpxor %xmm1, %xmm0, %xmm0; + vpxor %xmm15, %xmm14, %xmm14; + vpxor %xmm13, %xmm12, %xmm12; + vpxor %xmm11, %xmm10, %xmm10; + vpxor %xmm9, %xmm8, %xmm8; + + vpxor %xmm6, %xmm4, %xmm4; + vpxor %xmm2, %xmm0, %xmm0; + vpxor %xmm14, %xmm12, %xmm12; + vpxor %xmm10, %xmm8, %xmm8; + + vpxor %xmm4, %xmm0, %xmm0; + vpxor %xmm12, %xmm8, %xmm8; + + vpxor %xmm0, %xmm8, %xmm0; + vpxor (%r10), %xmm0, %xmm0; + vmovdqu %xmm0, (%r10); + + vzeroall; + + movq (16 * 16 + 0 * 8)(%rax), %r10; + movq (16 * 16 + 1 * 8)(%rax), %r11; + movq (16 * 16 + 2 * 8)(%rax), %r12; + movq (16 * 16 + 3 * 8)(%rax), %r13; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;) + +/* + * IN: + * ab: 64-bit AB state + * cd: 64-bit CD state + */ +#define camellia_f(ab, x, t0, t1, t2, t3, t4, inv_shift_row, sbox4mask, \ + _0f0f0f0fmask, pre_s1lo_mask, pre_s1hi_mask, key) \ + vmovq key, t0; \ + vpxor x, x, t3; \ + \ + vpxor ab, t0, x; \ + \ + /* \ + * S-function with AES subbytes \ + */ \ + \ + /* input rotation for sbox4 (<<< 1) */ \ + vpand x, sbox4mask, t0; \ + vpandn x, sbox4mask, x; \ + vpaddw t0, t0, t1; \ + vpsrlw $7, t0, t0; \ + vpor t0, t1, t0; \ + vpand sbox4mask, t0, t0; \ + vpor t0, x, x; \ + \ + vmovdqa .Lpost_tf_lo_s1 RIP, t0; \ + vmovdqa .Lpost_tf_hi_s1 RIP, t1; \ + \ + /* prefilter sboxes */ \ + filter_8bit(x, pre_s1lo_mask, pre_s1hi_mask, _0f0f0f0fmask, t2); \ + \ + /* AES subbytes + AES shift rows + AES inv shift rows */ \ + vaesenclast t3, x, x; \ + \ + /* postfilter sboxes */ \ + filter_8bit(x, t0, t1, _0f0f0f0fmask, t2); \ + \ + /* output rotation for sbox2 (<<< 1) */ \ + /* output rotation for sbox3 (>>> 1) */ \ + vpshufb inv_shift_row, x, t1; \ + vpshufb .Lsp0044440444044404mask RIP, x, t4; \ + vpshufb .Lsp1110111010011110mask RIP, x, x; \ + vpaddb t1, t1, t2; \ + vpsrlw $7, t1, t0; \ + vpsllw $7, t1, t3; \ + vpor t0, t2, t0; \ + vpsrlw $1, t1, t1; \ + vpshufb .Lsp0222022222000222mask RIP, t0, t0; \ + vpor t1, t3, t1; \ + \ + vpxor x, t4, t4; \ + vpshufb .Lsp3033303303303033mask RIP, t1, t1; \ + vpxor t4, t0, t0; \ + vpxor t1, t0, t0; \ + vpsrldq $8, t0, x; \ + vpxor t0, x, x; + +#define vec_rol128(in, out, nrol, t0) \ + vpshufd $0x4e, in, out; \ + vpsllq $(nrol), in, t0; \ + vpsrlq $(64-(nrol)), out, out; \ + vpaddd t0, out, out; + +#define vec_ror128(in, out, nror, t0) \ + vpshufd $0x4e, in, out; \ + vpsrlq $(nror), in, t0; \ + vpsllq $(64-(nror)), out, out; \ + vpaddd t0, out, out; + + +.align 16 +.Linv_shift_row_and_unpcklbw: + .byte 0x00, 0xff, 0x0d, 0xff, 0x0a, 0xff, 0x07, 0xff + .byte 0x04, 0xff, 0x01, 0xff, 0x0e, 0xff, 0x0b, 0xff +.Lsp0044440444044404mask: + .long 0xffff0404, 0x0404ff04; + .long 0x0d0dff0d, 0x0d0dff0d; +.Lsp1110111010011110mask: + .long 0x000000ff, 0x000000ff; + .long 0x0bffff0b, 0x0b0b0bff; +.Lsp0222022222000222mask: + .long 0xff060606, 0xff060606; + .long 0x0c0cffff, 0xff0c0c0c; +.Lsp3033303303303033mask: + .long 0x04ff0404, 0x04ff0404; + .long 0xff0a0aff, 0x0aff0a0a; +.Lsbox4_input_mask: + .byte 0x00, 0xff, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00; +.Lsigma1: + .long 0x3BCC908B, 0xA09E667F; +.Lsigma2: + .long 0x4CAA73B2, 0xB67AE858; +.Lsigma3: + .long 0xE94F82BE, 0xC6EF372F; +.Lsigma4: + .long 0xF1D36F1C, 0x54FF53A5; +.Lsigma5: + .long 0xDE682D1D, 0x10E527FA; +.Lsigma6: + .long 0xB3E6C1FD, 0xB05688C2; + + +.align 8 +ELF(.type __camellia_avx_setup128,@function;) +__camellia_avx_setup128: + /* input: + * %rdi: ctx, CTX; subkey storage at key_table(CTX) + * %xmm0: key + */ +#define cmll_sub(n, ctx) (key_table+((n)*8))(ctx) +#define KL128 %xmm0 +#define KA128 %xmm2 + + vpshufb .Lbswap128_mask RIP, KL128, KL128; + + vmovdqa .Linv_shift_row_and_unpcklbw RIP, %xmm11; + vmovq .Lsbox4_input_mask RIP, %xmm12; + vbroadcastss .L0f0f0f0f RIP, %xmm13; + vmovdqa .Lpre_tf_lo_s1 RIP, %xmm14; + vmovdqa .Lpre_tf_hi_s1 RIP, %xmm15; + + /* + * Generate KA + */ + vpsrldq $8, KL128, %xmm2; + vmovdqa KL128, %xmm3; + vpslldq $8, %xmm3, %xmm3; + vpsrldq $8, %xmm3, %xmm3; + + camellia_f(%xmm2, %xmm4, %xmm1, + %xmm5, %xmm6, %xmm7, %xmm8, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 RIP); + vpxor %xmm4, %xmm3, %xmm3; + camellia_f(%xmm3, %xmm2, %xmm1, + %xmm5, %xmm6, %xmm7, %xmm8, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 RIP); + camellia_f(%xmm2, %xmm3, %xmm1, + %xmm5, %xmm6, %xmm7, %xmm8, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 RIP); + vpxor %xmm4, %xmm3, %xmm3; + camellia_f(%xmm3, %xmm4, %xmm1, + %xmm5, %xmm6, %xmm7, %xmm8, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 RIP); + + vpslldq $8, %xmm3, %xmm3; + vpxor %xmm4, %xmm2, %xmm2; + vpsrldq $8, %xmm3, %xmm3; + vpslldq $8, %xmm2, KA128; + vpor %xmm3, KA128, KA128; + + /* + * Generate subkeys + */ + vmovdqu KA128, cmll_sub(24, CTX); + vec_rol128(KL128, %xmm3, 15, %xmm15); + vec_rol128(KA128, %xmm4, 15, %xmm15); + vec_rol128(KA128, %xmm5, 30, %xmm15); + vec_rol128(KL128, %xmm6, 45, %xmm15); + vec_rol128(KA128, %xmm7, 45, %xmm15); + vec_rol128(KL128, %xmm8, 60, %xmm15); + vec_rol128(KA128, %xmm9, 60, %xmm15); + vec_ror128(KL128, %xmm10, 128-77, %xmm15); + + /* absorb kw2 to other subkeys */ + vpslldq $8, KL128, %xmm15; + vpsrldq $8, %xmm15, %xmm15; + vpxor %xmm15, KA128, KA128; + vpxor %xmm15, %xmm3, %xmm3; + vpxor %xmm15, %xmm4, %xmm4; + + /* subl(1) ^= subr(1) & ~subr(9); */ + vpandn %xmm15, %xmm5, %xmm13; + vpslldq $12, %xmm13, %xmm13; + vpsrldq $8, %xmm13, %xmm13; + vpxor %xmm13, %xmm15, %xmm15; + /* dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm5, %xmm14; + vpslld $1, %xmm14, %xmm11; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm11, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpxor %xmm15, %xmm6, %xmm6; + vpxor %xmm15, %xmm8, %xmm8; + vpxor %xmm15, %xmm9, %xmm9; + + /* subl(1) ^= subr(1) & ~subr(17); */ + vpandn %xmm15, %xmm10, %xmm13; + vpslldq $12, %xmm13, %xmm13; + vpsrldq $8, %xmm13, %xmm13; + vpxor %xmm13, %xmm15, %xmm15; + /* dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm10, %xmm14; + vpslld $1, %xmm14, %xmm11; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm11, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpshufd $0x1b, KL128, KL128; + vpshufd $0x1b, KA128, KA128; + vpshufd $0x1b, %xmm3, %xmm3; + vpshufd $0x1b, %xmm4, %xmm4; + vpshufd $0x1b, %xmm5, %xmm5; + vpshufd $0x1b, %xmm6, %xmm6; + vpshufd $0x1b, %xmm7, %xmm7; + vpshufd $0x1b, %xmm8, %xmm8; + vpshufd $0x1b, %xmm9, %xmm9; + vpshufd $0x1b, %xmm10, %xmm10; + + vmovdqu KL128, cmll_sub(0, CTX); + vpshufd $0x1b, KL128, KL128; + vmovdqu KA128, cmll_sub(2, CTX); + vmovdqu %xmm3, cmll_sub(4, CTX); + vmovdqu %xmm4, cmll_sub(6, CTX); + vmovdqu %xmm5, cmll_sub(8, CTX); + vmovdqu %xmm6, cmll_sub(10, CTX); + vpsrldq $8, %xmm8, %xmm8; + vmovq %xmm7, cmll_sub(12, CTX); + vmovq %xmm8, cmll_sub(13, CTX); + vmovdqu %xmm9, cmll_sub(14, CTX); + vmovdqu %xmm10, cmll_sub(16, CTX); + + vmovdqu cmll_sub(24, CTX), KA128; + + vec_ror128(KL128, %xmm3, 128 - 94, %xmm7); + vec_ror128(KA128, %xmm4, 128 - 94, %xmm7); + vec_ror128(KL128, %xmm5, 128 - 111, %xmm7); + vec_ror128(KA128, %xmm6, 128 - 111, %xmm7); + + vpxor %xmm15, %xmm3, %xmm3; + vpxor %xmm15, %xmm4, %xmm4; + vpxor %xmm15, %xmm5, %xmm5; + vpslldq $8, %xmm15, %xmm15; + vpxor %xmm15, %xmm6, %xmm6; + + /* absorb kw4 to other subkeys */ + vpslldq $8, %xmm6, %xmm15; + vpxor %xmm15, %xmm5, %xmm5; + vpxor %xmm15, %xmm4, %xmm4; + vpxor %xmm15, %xmm3, %xmm3; + + /* subl(25) ^= subr(25) & ~subr(16); */ + vpshufd $0x1b, cmll_sub(16, CTX), %xmm10; + vpandn %xmm15, %xmm10, %xmm13; + vpslldq $4, %xmm13, %xmm13; + vpxor %xmm13, %xmm15, %xmm15; + /* dw = subl(25) & subl(16), subr(25) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm10, %xmm14; + vpslld $1, %xmm14, %xmm11; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm11, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpshufd $0x1b, %xmm3, %xmm3; + vpshufd $0x1b, %xmm4, %xmm4; + vpshufd $0x1b, %xmm5, %xmm5; + vpshufd $0x1b, %xmm6, %xmm6; + + vmovdqu %xmm3, cmll_sub(18, CTX); + vmovdqu %xmm4, cmll_sub(20, CTX); + vmovdqu %xmm5, cmll_sub(22, CTX); + vmovdqu %xmm6, cmll_sub(24, CTX); + + vpshufd $0x1b, cmll_sub(14, CTX), %xmm3; + vpshufd $0x1b, cmll_sub(12, CTX), %xmm4; + vpshufd $0x1b, cmll_sub(10, CTX), %xmm5; + vpshufd $0x1b, cmll_sub(8, CTX), %xmm6; + + vpxor %xmm15, %xmm3, %xmm3; + vpxor %xmm15, %xmm4, %xmm4; + vpxor %xmm15, %xmm5, %xmm5; + + /* subl(25) ^= subr(25) & ~subr(8); */ + vpandn %xmm15, %xmm6, %xmm13; + vpslldq $4, %xmm13, %xmm13; + vpxor %xmm13, %xmm15, %xmm15; + /* dw = subl(25) & subl(8), subr(25) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm6, %xmm14; + vpslld $1, %xmm14, %xmm11; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm11, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpshufd $0x1b, %xmm3, %xmm3; + vpshufd $0x1b, %xmm4, %xmm4; + vpshufd $0x1b, %xmm5, %xmm5; + + vmovdqu %xmm3, cmll_sub(14, CTX); + vmovdqu %xmm4, cmll_sub(12, CTX); + vmovdqu %xmm5, cmll_sub(10, CTX); + + vpshufd $0x1b, cmll_sub(6, CTX), %xmm6; + vpshufd $0x1b, cmll_sub(4, CTX), %xmm4; + vpshufd $0x1b, cmll_sub(2, CTX), %xmm2; + vpshufd $0x1b, cmll_sub(0, CTX), %xmm0; + + vpxor %xmm15, %xmm6, %xmm6; + vpxor %xmm15, %xmm4, %xmm4; + vpxor %xmm15, %xmm2, %xmm2; + vpxor %xmm15, %xmm0, %xmm0; + + vpshufd $0x1b, %xmm6, %xmm6; + vpshufd $0x1b, %xmm4, %xmm4; + vpshufd $0x1b, %xmm2, %xmm2; + vpshufd $0x1b, %xmm0, %xmm0; + + vpsrldq $8, %xmm2, %xmm3; + vpsrldq $8, %xmm4, %xmm5; + vpsrldq $8, %xmm6, %xmm7; + + /* + * key XOR is end of F-function. + */ + vpxor %xmm2, %xmm0, %xmm0; + vpxor %xmm4, %xmm2, %xmm2; + + vmovq %xmm0, cmll_sub(0, CTX); + vmovq %xmm3, cmll_sub(2, CTX); + vpxor %xmm5, %xmm3, %xmm3; + vpxor %xmm6, %xmm4, %xmm4; + vpxor %xmm7, %xmm5, %xmm5; + vmovq %xmm2, cmll_sub(3, CTX); + vmovq %xmm3, cmll_sub(4, CTX); + vmovq %xmm4, cmll_sub(5, CTX); + vmovq %xmm5, cmll_sub(6, CTX); + + vmovq cmll_sub(7, CTX), %xmm7; + vmovq cmll_sub(8, CTX), %xmm8; + vmovq cmll_sub(9, CTX), %xmm9; + vmovq cmll_sub(10, CTX), %xmm10; + /* tl = subl(10) ^ (subr(10) & ~subr(8)); */ + vpandn %xmm10, %xmm8, %xmm15; + vpsrldq $4, %xmm15, %xmm15; + vpxor %xmm15, %xmm10, %xmm0; + /* dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); */ + vpand %xmm8, %xmm0, %xmm15; + vpslld $1, %xmm15, %xmm14; + vpsrld $31, %xmm15, %xmm15; + vpaddd %xmm14, %xmm15, %xmm15; + vpslldq $12, %xmm15, %xmm15; + vpsrldq $8, %xmm15, %xmm15; + vpxor %xmm15, %xmm0, %xmm0; + + vpxor %xmm0, %xmm6, %xmm6; + vmovq %xmm6, cmll_sub(7, CTX); + + vmovq cmll_sub(11, CTX), %xmm11; + vmovq cmll_sub(12, CTX), %xmm12; + vmovq cmll_sub(13, CTX), %xmm13; + vmovq cmll_sub(14, CTX), %xmm14; + vmovq cmll_sub(15, CTX), %xmm15; + /* tl = subl(7) ^ (subr(7) & ~subr(9)); */ + vpandn %xmm7, %xmm9, %xmm1; + vpsrldq $4, %xmm1, %xmm1; + vpxor %xmm1, %xmm7, %xmm0; + /* dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); */ + vpand %xmm9, %xmm0, %xmm1; + vpslld $1, %xmm1, %xmm2; + vpsrld $31, %xmm1, %xmm1; + vpaddd %xmm2, %xmm1, %xmm1; + vpslldq $12, %xmm1, %xmm1; + vpsrldq $8, %xmm1, %xmm1; + vpxor %xmm1, %xmm0, %xmm0; + + vpxor %xmm11, %xmm0, %xmm0; + vpxor %xmm12, %xmm10, %xmm10; + vpxor %xmm13, %xmm11, %xmm11; + vpxor %xmm14, %xmm12, %xmm12; + vpxor %xmm15, %xmm13, %xmm13; + vmovq %xmm0, cmll_sub(10, CTX); + vmovq %xmm10, cmll_sub(11, CTX); + vmovq %xmm11, cmll_sub(12, CTX); + vmovq %xmm12, cmll_sub(13, CTX); + vmovq %xmm13, cmll_sub(14, CTX); + + vmovq cmll_sub(16, CTX), %xmm6; + vmovq cmll_sub(17, CTX), %xmm7; + vmovq cmll_sub(18, CTX), %xmm8; + vmovq cmll_sub(19, CTX), %xmm9; + vmovq cmll_sub(20, CTX), %xmm10; + /* tl = subl(18) ^ (subr(18) & ~subr(16)); */ + vpandn %xmm8, %xmm6, %xmm1; + vpsrldq $4, %xmm1, %xmm1; + vpxor %xmm1, %xmm8, %xmm0; + /* dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); */ + vpand %xmm6, %xmm0, %xmm1; + vpslld $1, %xmm1, %xmm2; + vpsrld $31, %xmm1, %xmm1; + vpaddd %xmm2, %xmm1, %xmm1; + vpslldq $12, %xmm1, %xmm1; + vpsrldq $8, %xmm1, %xmm1; + vpxor %xmm1, %xmm0, %xmm0; + + vpxor %xmm14, %xmm0, %xmm0; + vmovq %xmm0, cmll_sub(15, CTX); + + /* tl = subl(15) ^ (subr(15) & ~subr(17)); */ + vpandn %xmm15, %xmm7, %xmm1; + vpsrldq $4, %xmm1, %xmm1; + vpxor %xmm1, %xmm15, %xmm0; + /* dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); */ + vpand %xmm7, %xmm0, %xmm1; + vpslld $1, %xmm1, %xmm2; + vpsrld $31, %xmm1, %xmm1; + vpaddd %xmm2, %xmm1, %xmm1; + vpslldq $12, %xmm1, %xmm1; + vpsrldq $8, %xmm1, %xmm1; + vpxor %xmm1, %xmm0, %xmm0; + + vmovq cmll_sub(21, CTX), %xmm1; + vmovq cmll_sub(22, CTX), %xmm2; + vmovq cmll_sub(23, CTX), %xmm3; + vmovq cmll_sub(24, CTX), %xmm4; + + vpxor %xmm9, %xmm0, %xmm0; + vpxor %xmm10, %xmm8, %xmm8; + vpxor %xmm1, %xmm9, %xmm9; + vpxor %xmm2, %xmm10, %xmm10; + vpxor %xmm3, %xmm1, %xmm1; + vpxor %xmm4, %xmm3, %xmm3; + + vmovq %xmm0, cmll_sub(18, CTX); + vmovq %xmm8, cmll_sub(19, CTX); + vmovq %xmm9, cmll_sub(20, CTX); + vmovq %xmm10, cmll_sub(21, CTX); + vmovq %xmm1, cmll_sub(22, CTX); + vmovq %xmm2, cmll_sub(23, CTX); + vmovq %xmm3, cmll_sub(24, CTX); + + /* kw2 and kw4 are unused now. */ + movq $0, cmll_sub(1, CTX); + movq $0, cmll_sub(25, CTX); + + vzeroall; + + ret; +ELF(.size __camellia_avx_setup128,.-__camellia_avx_setup128;) + +.align 8 +ELF(.type __camellia_avx_setup256,@function;) + +__camellia_avx_setup256: + /* input: + * %rdi: ctx, CTX; subkey storage at key_table(CTX) + * %xmm0 & %xmm1: key + */ +#define KL128 %xmm0 +#define KR128 %xmm1 +#define KA128 %xmm2 +#define KB128 %xmm3 + + vpshufb .Lbswap128_mask RIP, KL128, KL128; + vpshufb .Lbswap128_mask RIP, KR128, KR128; + + vmovdqa .Linv_shift_row_and_unpcklbw RIP, %xmm11; + vmovq .Lsbox4_input_mask RIP, %xmm12; + vbroadcastss .L0f0f0f0f RIP, %xmm13; + vmovdqa .Lpre_tf_lo_s1 RIP, %xmm14; + vmovdqa .Lpre_tf_hi_s1 RIP, %xmm15; + + /* + * Generate KA + */ + vpxor KL128, KR128, %xmm3; + vpsrldq $8, KR128, %xmm6; + vpsrldq $8, %xmm3, %xmm2; + vpslldq $8, %xmm3, %xmm3; + vpsrldq $8, %xmm3, %xmm3; + + camellia_f(%xmm2, %xmm4, %xmm5, + %xmm7, %xmm8, %xmm9, %xmm10, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 RIP); + vpxor %xmm4, %xmm3, %xmm3; + camellia_f(%xmm3, %xmm2, %xmm5, + %xmm7, %xmm8, %xmm9, %xmm10, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 RIP); + vpxor %xmm6, %xmm2, %xmm2; + camellia_f(%xmm2, %xmm3, %xmm5, + %xmm7, %xmm8, %xmm9, %xmm10, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 RIP); + vpxor %xmm4, %xmm3, %xmm3; + vpxor KR128, %xmm3, %xmm3; + camellia_f(%xmm3, %xmm4, %xmm5, + %xmm7, %xmm8, %xmm9, %xmm10, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 RIP); + + vpslldq $8, %xmm3, %xmm3; + vpxor %xmm4, %xmm2, %xmm2; + vpsrldq $8, %xmm3, %xmm3; + vpslldq $8, %xmm2, KA128; + vpor %xmm3, KA128, KA128; + + /* + * Generate KB + */ + vpxor KA128, KR128, %xmm3; + vpsrldq $8, %xmm3, %xmm4; + vpslldq $8, %xmm3, %xmm3; + vpsrldq $8, %xmm3, %xmm3; + + camellia_f(%xmm4, %xmm5, %xmm6, + %xmm7, %xmm8, %xmm9, %xmm10, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma5 RIP); + vpxor %xmm5, %xmm3, %xmm3; + + camellia_f(%xmm3, %xmm5, %xmm6, + %xmm7, %xmm8, %xmm9, %xmm10, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma6 RIP); + vpslldq $8, %xmm3, %xmm3; + vpxor %xmm5, %xmm4, %xmm4; + vpsrldq $8, %xmm3, %xmm3; + vpslldq $8, %xmm4, %xmm4; + vpor %xmm3, %xmm4, KB128; + + /* + * Generate subkeys + */ + vmovdqu KB128, cmll_sub(32, CTX); + vec_rol128(KR128, %xmm4, 15, %xmm15); + vec_rol128(KA128, %xmm5, 15, %xmm15); + vec_rol128(KR128, %xmm6, 30, %xmm15); + vec_rol128(KB128, %xmm7, 30, %xmm15); + vec_rol128(KL128, %xmm8, 45, %xmm15); + vec_rol128(KA128, %xmm9, 45, %xmm15); + vec_rol128(KL128, %xmm10, 60, %xmm15); + vec_rol128(KR128, %xmm11, 60, %xmm15); + vec_rol128(KB128, %xmm12, 60, %xmm15); + + /* absorb kw2 to other subkeys */ + vpslldq $8, KL128, %xmm15; + vpsrldq $8, %xmm15, %xmm15; + vpxor %xmm15, KB128, KB128; + vpxor %xmm15, %xmm4, %xmm4; + vpxor %xmm15, %xmm5, %xmm5; + + /* subl(1) ^= subr(1) & ~subr(9); */ + vpandn %xmm15, %xmm6, %xmm13; + vpslldq $12, %xmm13, %xmm13; + vpsrldq $8, %xmm13, %xmm13; + vpxor %xmm13, %xmm15, %xmm15; + /* dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm6, %xmm14; + vpslld $1, %xmm14, %xmm13; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm13, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpxor %xmm15, %xmm7, %xmm7; + vpxor %xmm15, %xmm8, %xmm8; + vpxor %xmm15, %xmm9, %xmm9; + + vpshufd $0x1b, KL128, KL128; + vpshufd $0x1b, KB128, KB128; + vpshufd $0x1b, %xmm4, %xmm4; + vpshufd $0x1b, %xmm5, %xmm5; + vpshufd $0x1b, %xmm6, %xmm6; + vpshufd $0x1b, %xmm7, %xmm7; + vpshufd $0x1b, %xmm8, %xmm8; + vpshufd $0x1b, %xmm9, %xmm9; + + vmovdqu KL128, cmll_sub(0, CTX); + vpshufd $0x1b, KL128, KL128; + vmovdqu KB128, cmll_sub(2, CTX); + vmovdqu %xmm4, cmll_sub(4, CTX); + vmovdqu %xmm5, cmll_sub(6, CTX); + vmovdqu %xmm6, cmll_sub(8, CTX); + vmovdqu %xmm7, cmll_sub(10, CTX); + vmovdqu %xmm8, cmll_sub(12, CTX); + vmovdqu %xmm9, cmll_sub(14, CTX); + + vmovdqu cmll_sub(32, CTX), KB128; + + /* subl(1) ^= subr(1) & ~subr(17); */ + vpandn %xmm15, %xmm10, %xmm13; + vpslldq $12, %xmm13, %xmm13; + vpsrldq $8, %xmm13, %xmm13; + vpxor %xmm13, %xmm15, %xmm15; + /* dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm10, %xmm14; + vpslld $1, %xmm14, %xmm13; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm13, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpxor %xmm15, %xmm11, %xmm11; + vpxor %xmm15, %xmm12, %xmm12; + + vec_ror128(KL128, %xmm4, 128-77, %xmm14); + vec_ror128(KA128, %xmm5, 128-77, %xmm14); + vec_ror128(KR128, %xmm6, 128-94, %xmm14); + vec_ror128(KA128, %xmm7, 128-94, %xmm14); + vec_ror128(KL128, %xmm8, 128-111, %xmm14); + vec_ror128(KB128, %xmm9, 128-111, %xmm14); + + vpxor %xmm15, %xmm4, %xmm4; + + vpshufd $0x1b, %xmm10, %xmm10; + vpshufd $0x1b, %xmm11, %xmm11; + vpshufd $0x1b, %xmm12, %xmm12; + vpshufd $0x1b, %xmm4, %xmm4; + + vmovdqu %xmm10, cmll_sub(16, CTX); + vmovdqu %xmm11, cmll_sub(18, CTX); + vmovdqu %xmm12, cmll_sub(20, CTX); + vmovdqu %xmm4, cmll_sub(22, CTX); + + /* subl(1) ^= subr(1) & ~subr(25); */ + vpandn %xmm15, %xmm5, %xmm13; + vpslldq $12, %xmm13, %xmm13; + vpsrldq $8, %xmm13, %xmm13; + vpxor %xmm13, %xmm15, %xmm15; + /* dw = subl(1) & subl(25), subr(1) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm5, %xmm14; + vpslld $1, %xmm14, %xmm13; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm13, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpxor %xmm15, %xmm6, %xmm6; + vpxor %xmm15, %xmm7, %xmm7; + vpxor %xmm15, %xmm8, %xmm8; + vpslldq $8, %xmm15, %xmm15; + vpxor %xmm15, %xmm9, %xmm9; + + /* absorb kw4 to other subkeys */ + vpslldq $8, %xmm9, %xmm15; + vpxor %xmm15, %xmm8, %xmm8; + vpxor %xmm15, %xmm7, %xmm7; + vpxor %xmm15, %xmm6, %xmm6; + + /* subl(33) ^= subr(33) & ~subr(24); */ + vpandn %xmm15, %xmm5, %xmm14; + vpslldq $4, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + /* dw = subl(33) & subl(24), subr(33) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm5, %xmm14; + vpslld $1, %xmm14, %xmm13; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm13, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpshufd $0x1b, %xmm5, %xmm5; + vpshufd $0x1b, %xmm6, %xmm6; + vpshufd $0x1b, %xmm7, %xmm7; + vpshufd $0x1b, %xmm8, %xmm8; + vpshufd $0x1b, %xmm9, %xmm9; + + vmovdqu %xmm5, cmll_sub(24, CTX); + vmovdqu %xmm6, cmll_sub(26, CTX); + vmovdqu %xmm7, cmll_sub(28, CTX); + vmovdqu %xmm8, cmll_sub(30, CTX); + vmovdqu %xmm9, cmll_sub(32, CTX); + + vpshufd $0x1b, cmll_sub(22, CTX), %xmm0; + vpshufd $0x1b, cmll_sub(20, CTX), %xmm1; + vpshufd $0x1b, cmll_sub(18, CTX), %xmm2; + vpshufd $0x1b, cmll_sub(16, CTX), %xmm3; + vpshufd $0x1b, cmll_sub(14, CTX), %xmm4; + vpshufd $0x1b, cmll_sub(12, CTX), %xmm5; + vpshufd $0x1b, cmll_sub(10, CTX), %xmm6; + vpshufd $0x1b, cmll_sub(8, CTX), %xmm7; + + vpxor %xmm15, %xmm0, %xmm0; + vpxor %xmm15, %xmm1, %xmm1; + vpxor %xmm15, %xmm2, %xmm2; + + /* subl(33) ^= subr(33) & ~subr(24); */ + vpandn %xmm15, %xmm3, %xmm14; + vpslldq $4, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + /* dw = subl(33) & subl(24), subr(33) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm3, %xmm14; + vpslld $1, %xmm14, %xmm13; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm13, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpxor %xmm15, %xmm4, %xmm4; + vpxor %xmm15, %xmm5, %xmm5; + vpxor %xmm15, %xmm6, %xmm6; + + vpshufd $0x1b, %xmm0, %xmm0; + vpshufd $0x1b, %xmm1, %xmm1; + vpshufd $0x1b, %xmm2, %xmm2; + vpshufd $0x1b, %xmm4, %xmm4; + vpshufd $0x1b, %xmm5, %xmm5; + vpshufd $0x1b, %xmm6, %xmm6; + + vmovdqu %xmm0, cmll_sub(22, CTX); + vmovdqu %xmm1, cmll_sub(20, CTX); + vmovdqu %xmm2, cmll_sub(18, CTX); + vmovdqu %xmm4, cmll_sub(14, CTX); + vmovdqu %xmm5, cmll_sub(12, CTX); + vmovdqu %xmm6, cmll_sub(10, CTX); + + vpshufd $0x1b, cmll_sub(6, CTX), %xmm6; + vpshufd $0x1b, cmll_sub(4, CTX), %xmm4; + vpshufd $0x1b, cmll_sub(2, CTX), %xmm2; + vpshufd $0x1b, cmll_sub(0, CTX), %xmm0; + + /* subl(33) ^= subr(33) & ~subr(24); */ + vpandn %xmm15, %xmm7, %xmm14; + vpslldq $4, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + /* dw = subl(33) & subl(24), subr(33) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm7, %xmm14; + vpslld $1, %xmm14, %xmm13; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm13, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpxor %xmm15, %xmm6, %xmm6; + vpxor %xmm15, %xmm4, %xmm4; + vpxor %xmm15, %xmm2, %xmm2; + vpxor %xmm15, %xmm0, %xmm0; + + vpshufd $0x1b, %xmm6, %xmm6; + vpshufd $0x1b, %xmm4, %xmm4; + vpshufd $0x1b, %xmm2, %xmm2; + vpshufd $0x1b, %xmm0, %xmm0; + + vpsrldq $8, %xmm2, %xmm3; + vpsrldq $8, %xmm4, %xmm5; + vpsrldq $8, %xmm6, %xmm7; + + /* + * key XOR is end of F-function. + */ + vpxor %xmm2, %xmm0, %xmm0; + vpxor %xmm4, %xmm2, %xmm2; + + vmovq %xmm0, cmll_sub(0, CTX); + vmovq %xmm3, cmll_sub(2, CTX); + vpxor %xmm5, %xmm3, %xmm3; + vpxor %xmm6, %xmm4, %xmm4; + vpxor %xmm7, %xmm5, %xmm5; + vmovq %xmm2, cmll_sub(3, CTX); + vmovq %xmm3, cmll_sub(4, CTX); + vmovq %xmm4, cmll_sub(5, CTX); + vmovq %xmm5, cmll_sub(6, CTX); + + vmovq cmll_sub(7, CTX), %xmm7; + vmovq cmll_sub(8, CTX), %xmm8; + vmovq cmll_sub(9, CTX), %xmm9; + vmovq cmll_sub(10, CTX), %xmm10; + /* tl = subl(10) ^ (subr(10) & ~subr(8)); */ + vpandn %xmm10, %xmm8, %xmm15; + vpsrldq $4, %xmm15, %xmm15; + vpxor %xmm15, %xmm10, %xmm0; + /* dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); */ + vpand %xmm8, %xmm0, %xmm15; + vpslld $1, %xmm15, %xmm14; + vpsrld $31, %xmm15, %xmm15; + vpaddd %xmm14, %xmm15, %xmm15; + vpslldq $12, %xmm15, %xmm15; + vpsrldq $8, %xmm15, %xmm15; + vpxor %xmm15, %xmm0, %xmm0; + + vpxor %xmm0, %xmm6, %xmm6; + vmovq %xmm6, cmll_sub(7, CTX); + + vmovq cmll_sub(11, CTX), %xmm11; + vmovq cmll_sub(12, CTX), %xmm12; + vmovq cmll_sub(13, CTX), %xmm13; + vmovq cmll_sub(14, CTX), %xmm14; + vmovq cmll_sub(15, CTX), %xmm15; + /* tl = subl(7) ^ (subr(7) & ~subr(9)); */ + vpandn %xmm7, %xmm9, %xmm1; + vpsrldq $4, %xmm1, %xmm1; + vpxor %xmm1, %xmm7, %xmm0; + /* dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); */ + vpand %xmm9, %xmm0, %xmm1; + vpslld $1, %xmm1, %xmm2; + vpsrld $31, %xmm1, %xmm1; + vpaddd %xmm2, %xmm1, %xmm1; + vpslldq $12, %xmm1, %xmm1; + vpsrldq $8, %xmm1, %xmm1; + vpxor %xmm1, %xmm0, %xmm0; + + vpxor %xmm11, %xmm0, %xmm0; + vpxor %xmm12, %xmm10, %xmm10; + vpxor %xmm13, %xmm11, %xmm11; + vpxor %xmm14, %xmm12, %xmm12; + vpxor %xmm15, %xmm13, %xmm13; + vmovq %xmm0, cmll_sub(10, CTX); + vmovq %xmm10, cmll_sub(11, CTX); + vmovq %xmm11, cmll_sub(12, CTX); + vmovq %xmm12, cmll_sub(13, CTX); + vmovq %xmm13, cmll_sub(14, CTX); + + vmovq cmll_sub(16, CTX), %xmm6; + vmovq cmll_sub(17, CTX), %xmm7; + vmovq cmll_sub(18, CTX), %xmm8; + vmovq cmll_sub(19, CTX), %xmm9; + vmovq cmll_sub(20, CTX), %xmm10; + /* tl = subl(18) ^ (subr(18) & ~subr(16)); */ + vpandn %xmm8, %xmm6, %xmm1; + vpsrldq $4, %xmm1, %xmm1; + vpxor %xmm1, %xmm8, %xmm0; + /* dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); */ + vpand %xmm6, %xmm0, %xmm1; + vpslld $1, %xmm1, %xmm2; + vpsrld $31, %xmm1, %xmm1; + vpaddd %xmm2, %xmm1, %xmm1; + vpslldq $12, %xmm1, %xmm1; + vpsrldq $8, %xmm1, %xmm1; + vpxor %xmm1, %xmm0, %xmm0; + + vpxor %xmm14, %xmm0, %xmm0; + vmovq %xmm0, cmll_sub(15, CTX); + + /* tl = subl(15) ^ (subr(15) & ~subr(17)); */ + vpandn %xmm15, %xmm7, %xmm1; + vpsrldq $4, %xmm1, %xmm1; + vpxor %xmm1, %xmm15, %xmm0; + /* dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); */ + vpand %xmm7, %xmm0, %xmm1; + vpslld $1, %xmm1, %xmm2; + vpsrld $31, %xmm1, %xmm1; + vpaddd %xmm2, %xmm1, %xmm1; + vpslldq $12, %xmm1, %xmm1; + vpsrldq $8, %xmm1, %xmm1; + vpxor %xmm1, %xmm0, %xmm0; + + vmovq cmll_sub(21, CTX), %xmm1; + vmovq cmll_sub(22, CTX), %xmm2; + vmovq cmll_sub(23, CTX), %xmm3; + vmovq cmll_sub(24, CTX), %xmm4; + + vpxor %xmm9, %xmm0, %xmm0; + vpxor %xmm10, %xmm8, %xmm8; + vpxor %xmm1, %xmm9, %xmm9; + vpxor %xmm2, %xmm10, %xmm10; + vpxor %xmm3, %xmm1, %xmm1; + + vmovq %xmm0, cmll_sub(18, CTX); + vmovq %xmm8, cmll_sub(19, CTX); + vmovq %xmm9, cmll_sub(20, CTX); + vmovq %xmm10, cmll_sub(21, CTX); + vmovq %xmm1, cmll_sub(22, CTX); + + vmovq cmll_sub(25, CTX), %xmm5; + vmovq cmll_sub(26, CTX), %xmm6; + vmovq cmll_sub(27, CTX), %xmm7; + vmovq cmll_sub(28, CTX), %xmm8; + vmovq cmll_sub(29, CTX), %xmm9; + vmovq cmll_sub(30, CTX), %xmm10; + vmovq cmll_sub(31, CTX), %xmm11; + vmovq cmll_sub(32, CTX), %xmm12; + + /* tl = subl(26) ^ (subr(26) & ~subr(24)); */ + vpandn %xmm6, %xmm4, %xmm15; + vpsrldq $4, %xmm15, %xmm15; + vpxor %xmm15, %xmm6, %xmm0; + /* dw = tl & subl(26), tr = subr(24) ^ CAMELLIA_RL1(dw); */ + vpand %xmm4, %xmm0, %xmm15; + vpslld $1, %xmm15, %xmm14; + vpsrld $31, %xmm15, %xmm15; + vpaddd %xmm14, %xmm15, %xmm15; + vpslldq $12, %xmm15, %xmm15; + vpsrldq $8, %xmm15, %xmm15; + vpxor %xmm15, %xmm0, %xmm0; + + vpxor %xmm0, %xmm2, %xmm2; + vmovq %xmm2, cmll_sub(23, CTX); + + /* tl = subl(23) ^ (subr(23) & ~subr(25)); */ + vpandn %xmm3, %xmm5, %xmm15; + vpsrldq $4, %xmm15, %xmm15; + vpxor %xmm15, %xmm3, %xmm0; + /* dw = tl & subl(26), tr = subr(24) ^ CAMELLIA_RL1(dw); */ + vpand %xmm5, %xmm0, %xmm15; + vpslld $1, %xmm15, %xmm14; + vpsrld $31, %xmm15, %xmm15; + vpaddd %xmm14, %xmm15, %xmm15; + vpslldq $12, %xmm15, %xmm15; + vpsrldq $8, %xmm15, %xmm15; + vpxor %xmm15, %xmm0, %xmm0; + + vpxor %xmm7, %xmm0, %xmm0; + vpxor %xmm8, %xmm6, %xmm6; + vpxor %xmm9, %xmm7, %xmm7; + vpxor %xmm10, %xmm8, %xmm8; + vpxor %xmm11, %xmm9, %xmm9; + vpxor %xmm12, %xmm11, %xmm11; + + vmovq %xmm0, cmll_sub(26, CTX); + vmovq %xmm6, cmll_sub(27, CTX); + vmovq %xmm7, cmll_sub(28, CTX); + vmovq %xmm8, cmll_sub(29, CTX); + vmovq %xmm9, cmll_sub(30, CTX); + vmovq %xmm10, cmll_sub(31, CTX); + vmovq %xmm11, cmll_sub(32, CTX); + + /* kw2 and kw4 are unused now. */ + movq $0, cmll_sub(1, CTX); + movq $0, cmll_sub(33, CTX); + + vzeroall; + + ret; +ELF(.size __camellia_avx_setup256,.-__camellia_avx_setup256;) + +.align 8 +.globl _gcry_camellia_aesni_avx_keygen +ELF(.type _gcry_camellia_aesni_avx_keygen,@function;) + +_gcry_camellia_aesni_avx_keygen: + /* input: + * %rdi: ctx, CTX + * %rsi: key + * %rdx: keylen + */ + + vzeroupper; + + vmovdqu (%rsi), %xmm0; + cmpl $24, %edx; + jb __camellia_avx_setup128; + je .Lprepare_key192; + + vmovdqu 16(%rsi), %xmm1; + jmp __camellia_avx_setup256; + +.Lprepare_key192: + vpcmpeqd %xmm2, %xmm2, %xmm2; + vmovq 16(%rsi), %xmm1; + + vpxor %xmm1, %xmm2, %xmm2; + vpslldq $8, %xmm2, %xmm2; + vpor %xmm2, %xmm1, %xmm1; + + jmp __camellia_avx_setup256; +ELF(.size _gcry_camellia_aesni_avx_keygen,.-_gcry_camellia_aesni_avx_keygen;) + +#endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)*/ +#endif /*__x86_64*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/camellia-aesni-avx2-amd64.S b/libotr/libgcrypt-1.8.7/cipher/camellia-aesni-avx2-amd64.S new file mode 100644 index 0000000..897e4ae --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/camellia-aesni-avx2-amd64.S @@ -0,0 +1,1762 @@ +/* camellia-avx2-aesni-amd64.S - AES-NI/AVX2 implementation of Camellia cipher + * + * Copyright (C) 2013-2015 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) + +#ifdef __PIC__ +# define RIP (%rip) +#else +# define RIP +#endif + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#define CAMELLIA_TABLE_BYTE_LEN 272 + +/* struct CAMELLIA_context: */ +#define key_table 0 +#define key_bitlength CAMELLIA_TABLE_BYTE_LEN + +/* register macros */ +#define CTX %rdi +#define RIO %r8 + +/********************************************************************** + helper macros + **********************************************************************/ +#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +#define ymm0_x xmm0 +#define ymm1_x xmm1 +#define ymm2_x xmm2 +#define ymm3_x xmm3 +#define ymm4_x xmm4 +#define ymm5_x xmm5 +#define ymm6_x xmm6 +#define ymm7_x xmm7 +#define ymm8_x xmm8 +#define ymm9_x xmm9 +#define ymm10_x xmm10 +#define ymm11_x xmm11 +#define ymm12_x xmm12 +#define ymm13_x xmm13 +#define ymm14_x xmm14 +#define ymm15_x xmm15 + +/********************************************************************** + 32-way camellia + **********************************************************************/ + +/* + * IN: + * x0..x7: byte-sliced AB state + * mem_cd: register pointer storing CD state + * key: index for key material + * OUT: + * x0..x7: new byte-sliced CD state + */ +#define roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \ + t7, mem_cd, key) \ + /* \ + * S-function with AES subbytes \ + */ \ + vbroadcasti128 .Linv_shift_row RIP, t4; \ + vpbroadcastd .L0f0f0f0f RIP, t7; \ + vbroadcasti128 .Lpre_tf_lo_s1 RIP, t5; \ + vbroadcasti128 .Lpre_tf_hi_s1 RIP, t6; \ + vbroadcasti128 .Lpre_tf_lo_s4 RIP, t2; \ + vbroadcasti128 .Lpre_tf_hi_s4 RIP, t3; \ + \ + /* AES inverse shift rows */ \ + vpshufb t4, x0, x0; \ + vpshufb t4, x7, x7; \ + vpshufb t4, x3, x3; \ + vpshufb t4, x6, x6; \ + vpshufb t4, x2, x2; \ + vpshufb t4, x5, x5; \ + vpshufb t4, x1, x1; \ + vpshufb t4, x4, x4; \ + \ + /* prefilter sboxes 1, 2 and 3 */ \ + /* prefilter sbox 4 */ \ + filter_8bit(x0, t5, t6, t7, t4); \ + filter_8bit(x7, t5, t6, t7, t4); \ + vextracti128 $1, x0, t0##_x; \ + vextracti128 $1, x7, t1##_x; \ + filter_8bit(x3, t2, t3, t7, t4); \ + filter_8bit(x6, t2, t3, t7, t4); \ + vextracti128 $1, x3, t3##_x; \ + vextracti128 $1, x6, t2##_x; \ + filter_8bit(x2, t5, t6, t7, t4); \ + filter_8bit(x5, t5, t6, t7, t4); \ + filter_8bit(x1, t5, t6, t7, t4); \ + filter_8bit(x4, t5, t6, t7, t4); \ + \ + vpxor t4##_x, t4##_x, t4##_x; \ + \ + /* AES subbytes + AES shift rows */ \ + vextracti128 $1, x2, t6##_x; \ + vextracti128 $1, x5, t5##_x; \ + vaesenclast t4##_x, x0##_x, x0##_x; \ + vaesenclast t4##_x, t0##_x, t0##_x; \ + vaesenclast t4##_x, x7##_x, x7##_x; \ + vaesenclast t4##_x, t1##_x, t1##_x; \ + vaesenclast t4##_x, x3##_x, x3##_x; \ + vaesenclast t4##_x, t3##_x, t3##_x; \ + vaesenclast t4##_x, x6##_x, x6##_x; \ + vaesenclast t4##_x, t2##_x, t2##_x; \ + vinserti128 $1, t0##_x, x0, x0; \ + vinserti128 $1, t1##_x, x7, x7; \ + vinserti128 $1, t3##_x, x3, x3; \ + vinserti128 $1, t2##_x, x6, x6; \ + vextracti128 $1, x1, t3##_x; \ + vextracti128 $1, x4, t2##_x; \ + vbroadcasti128 .Lpost_tf_lo_s1 RIP, t0; \ + vbroadcasti128 .Lpost_tf_hi_s1 RIP, t1; \ + vaesenclast t4##_x, x2##_x, x2##_x; \ + vaesenclast t4##_x, t6##_x, t6##_x; \ + vaesenclast t4##_x, x5##_x, x5##_x; \ + vaesenclast t4##_x, t5##_x, t5##_x; \ + vaesenclast t4##_x, x1##_x, x1##_x; \ + vaesenclast t4##_x, t3##_x, t3##_x; \ + vaesenclast t4##_x, x4##_x, x4##_x; \ + vaesenclast t4##_x, t2##_x, t2##_x; \ + vinserti128 $1, t6##_x, x2, x2; \ + vinserti128 $1, t5##_x, x5, x5; \ + vinserti128 $1, t3##_x, x1, x1; \ + vinserti128 $1, t2##_x, x4, x4; \ + \ + /* postfilter sboxes 1 and 4 */ \ + vbroadcasti128 .Lpost_tf_lo_s3 RIP, t2; \ + vbroadcasti128 .Lpost_tf_hi_s3 RIP, t3; \ + filter_8bit(x0, t0, t1, t7, t4); \ + filter_8bit(x7, t0, t1, t7, t4); \ + filter_8bit(x3, t0, t1, t7, t6); \ + filter_8bit(x6, t0, t1, t7, t6); \ + \ + /* postfilter sbox 3 */ \ + vbroadcasti128 .Lpost_tf_lo_s2 RIP, t4; \ + vbroadcasti128 .Lpost_tf_hi_s2 RIP, t5; \ + filter_8bit(x2, t2, t3, t7, t6); \ + filter_8bit(x5, t2, t3, t7, t6); \ + \ + vpbroadcastq key, t0; /* higher 64-bit duplicate ignored */ \ + \ + /* postfilter sbox 2 */ \ + filter_8bit(x1, t4, t5, t7, t2); \ + filter_8bit(x4, t4, t5, t7, t2); \ + vpxor t7, t7, t7; \ + \ + vpsrldq $1, t0, t1; \ + vpsrldq $2, t0, t2; \ + vpshufb t7, t1, t1; \ + vpsrldq $3, t0, t3; \ + \ + /* P-function */ \ + vpxor x5, x0, x0; \ + vpxor x6, x1, x1; \ + vpxor x7, x2, x2; \ + vpxor x4, x3, x3; \ + \ + vpshufb t7, t2, t2; \ + vpsrldq $4, t0, t4; \ + vpshufb t7, t3, t3; \ + vpsrldq $5, t0, t5; \ + vpshufb t7, t4, t4; \ + \ + vpxor x2, x4, x4; \ + vpxor x3, x5, x5; \ + vpxor x0, x6, x6; \ + vpxor x1, x7, x7; \ + \ + vpsrldq $6, t0, t6; \ + vpshufb t7, t5, t5; \ + vpshufb t7, t6, t6; \ + \ + vpxor x7, x0, x0; \ + vpxor x4, x1, x1; \ + vpxor x5, x2, x2; \ + vpxor x6, x3, x3; \ + \ + vpxor x3, x4, x4; \ + vpxor x0, x5, x5; \ + vpxor x1, x6, x6; \ + vpxor x2, x7, x7; /* note: high and low parts swapped */ \ + \ + /* Add key material and result to CD (x becomes new CD) */ \ + \ + vpxor t6, x1, x1; \ + vpxor 5 * 32(mem_cd), x1, x1; \ + \ + vpsrldq $7, t0, t6; \ + vpshufb t7, t0, t0; \ + vpshufb t7, t6, t7; \ + \ + vpxor t7, x0, x0; \ + vpxor 4 * 32(mem_cd), x0, x0; \ + \ + vpxor t5, x2, x2; \ + vpxor 6 * 32(mem_cd), x2, x2; \ + \ + vpxor t4, x3, x3; \ + vpxor 7 * 32(mem_cd), x3, x3; \ + \ + vpxor t3, x4, x4; \ + vpxor 0 * 32(mem_cd), x4, x4; \ + \ + vpxor t2, x5, x5; \ + vpxor 1 * 32(mem_cd), x5, x5; \ + \ + vpxor t1, x6, x6; \ + vpxor 2 * 32(mem_cd), x6, x6; \ + \ + vpxor t0, x7, x7; \ + vpxor 3 * 32(mem_cd), x7, x7; + +/* + * IN/OUT: + * x0..x7: byte-sliced AB state preloaded + * mem_ab: byte-sliced AB state in memory + * mem_cb: byte-sliced CD state in memory + */ +#define two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i, dir, store_ab) \ + roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_cd, (key_table + (i) * 8)(CTX)); \ + \ + vmovdqu x0, 4 * 32(mem_cd); \ + vmovdqu x1, 5 * 32(mem_cd); \ + vmovdqu x2, 6 * 32(mem_cd); \ + vmovdqu x3, 7 * 32(mem_cd); \ + vmovdqu x4, 0 * 32(mem_cd); \ + vmovdqu x5, 1 * 32(mem_cd); \ + vmovdqu x6, 2 * 32(mem_cd); \ + vmovdqu x7, 3 * 32(mem_cd); \ + \ + roundsm32(x4, x5, x6, x7, x0, x1, x2, x3, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, (key_table + ((i) + (dir)) * 8)(CTX)); \ + \ + store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab); + +#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */ + +#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \ + /* Store new AB state */ \ + vmovdqu x4, 4 * 32(mem_ab); \ + vmovdqu x5, 5 * 32(mem_ab); \ + vmovdqu x6, 6 * 32(mem_ab); \ + vmovdqu x7, 7 * 32(mem_ab); \ + vmovdqu x0, 0 * 32(mem_ab); \ + vmovdqu x1, 1 * 32(mem_ab); \ + vmovdqu x2, 2 * 32(mem_ab); \ + vmovdqu x3, 3 * 32(mem_ab); + +#define enc_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i) \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store); + +#define dec_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i) \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store); + +/* + * IN: + * v0..3: byte-sliced 32-bit integers + * OUT: + * v0..3: (IN <<< 1) + */ +#define rol32_1_32(v0, v1, v2, v3, t0, t1, t2, zero) \ + vpcmpgtb v0, zero, t0; \ + vpaddb v0, v0, v0; \ + vpabsb t0, t0; \ + \ + vpcmpgtb v1, zero, t1; \ + vpaddb v1, v1, v1; \ + vpabsb t1, t1; \ + \ + vpcmpgtb v2, zero, t2; \ + vpaddb v2, v2, v2; \ + vpabsb t2, t2; \ + \ + vpor t0, v1, v1; \ + \ + vpcmpgtb v3, zero, t0; \ + vpaddb v3, v3, v3; \ + vpabsb t0, t0; \ + \ + vpor t1, v2, v2; \ + vpor t2, v3, v3; \ + vpor t0, v0, v0; + +/* + * IN: + * r: byte-sliced AB state in memory + * l: byte-sliced CD state in memory + * OUT: + * x0..x7: new byte-sliced CD state + */ +#define fls32(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \ + tt1, tt2, tt3, kll, klr, krl, krr) \ + /* \ + * t0 = kll; \ + * t0 &= ll; \ + * lr ^= rol32(t0, 1); \ + */ \ + vpbroadcastd kll, t0; /* only lowest 32-bit used */ \ + vpxor tt0, tt0, tt0; \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpand l0, t0, t0; \ + vpand l1, t1, t1; \ + vpand l2, t2, t2; \ + vpand l3, t3, t3; \ + \ + rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ + \ + vpxor l4, t0, l4; \ + vpbroadcastd krr, t0; /* only lowest 32-bit used */ \ + vmovdqu l4, 4 * 32(l); \ + vpxor l5, t1, l5; \ + vmovdqu l5, 5 * 32(l); \ + vpxor l6, t2, l6; \ + vmovdqu l6, 6 * 32(l); \ + vpxor l7, t3, l7; \ + vmovdqu l7, 7 * 32(l); \ + \ + /* \ + * t2 = krr; \ + * t2 |= rr; \ + * rl ^= t2; \ + */ \ + \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpor 4 * 32(r), t0, t0; \ + vpor 5 * 32(r), t1, t1; \ + vpor 6 * 32(r), t2, t2; \ + vpor 7 * 32(r), t3, t3; \ + \ + vpxor 0 * 32(r), t0, t0; \ + vpxor 1 * 32(r), t1, t1; \ + vpxor 2 * 32(r), t2, t2; \ + vpxor 3 * 32(r), t3, t3; \ + vmovdqu t0, 0 * 32(r); \ + vpbroadcastd krl, t0; /* only lowest 32-bit used */ \ + vmovdqu t1, 1 * 32(r); \ + vmovdqu t2, 2 * 32(r); \ + vmovdqu t3, 3 * 32(r); \ + \ + /* \ + * t2 = krl; \ + * t2 &= rl; \ + * rr ^= rol32(t2, 1); \ + */ \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpand 0 * 32(r), t0, t0; \ + vpand 1 * 32(r), t1, t1; \ + vpand 2 * 32(r), t2, t2; \ + vpand 3 * 32(r), t3, t3; \ + \ + rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ + \ + vpxor 4 * 32(r), t0, t0; \ + vpxor 5 * 32(r), t1, t1; \ + vpxor 6 * 32(r), t2, t2; \ + vpxor 7 * 32(r), t3, t3; \ + vmovdqu t0, 4 * 32(r); \ + vpbroadcastd klr, t0; /* only lowest 32-bit used */ \ + vmovdqu t1, 5 * 32(r); \ + vmovdqu t2, 6 * 32(r); \ + vmovdqu t3, 7 * 32(r); \ + \ + /* \ + * t0 = klr; \ + * t0 |= lr; \ + * ll ^= t0; \ + */ \ + \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpor l4, t0, t0; \ + vpor l5, t1, t1; \ + vpor l6, t2, t2; \ + vpor l7, t3, t3; \ + \ + vpxor l0, t0, l0; \ + vmovdqu l0, 0 * 32(l); \ + vpxor l1, t1, l1; \ + vmovdqu l1, 1 * 32(l); \ + vpxor l2, t2, l2; \ + vmovdqu l2, 2 * 32(l); \ + vpxor l3, t3, l3; \ + vmovdqu l3, 3 * 32(l); + +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +#define byteslice_16x16b_fast(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, \ + a3, b3, c3, d3, st0, st1) \ + vmovdqu d2, st0; \ + vmovdqu d3, st1; \ + transpose_4x4(a0, a1, a2, a3, d2, d3); \ + transpose_4x4(b0, b1, b2, b3, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu a0, st0; \ + vmovdqu a1, st1; \ + transpose_4x4(c0, c1, c2, c3, a0, a1); \ + transpose_4x4(d0, d1, d2, d3, a0, a1); \ + \ + vbroadcasti128 .Lshufb_16x16b RIP, a0; \ + vmovdqu st1, a1; \ + vpshufb a0, a2, a2; \ + vpshufb a0, a3, a3; \ + vpshufb a0, b0, b0; \ + vpshufb a0, b1, b1; \ + vpshufb a0, b2, b2; \ + vpshufb a0, b3, b3; \ + vpshufb a0, a1, a1; \ + vpshufb a0, c0, c0; \ + vpshufb a0, c1, c1; \ + vpshufb a0, c2, c2; \ + vpshufb a0, c3, c3; \ + vpshufb a0, d0, d0; \ + vpshufb a0, d1, d1; \ + vpshufb a0, d2, d2; \ + vpshufb a0, d3, d3; \ + vmovdqu d3, st1; \ + vmovdqu st0, d3; \ + vpshufb a0, d3, a0; \ + vmovdqu d2, st0; \ + \ + transpose_4x4(a0, b0, c0, d0, d2, d3); \ + transpose_4x4(a1, b1, c1, d1, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu b0, st0; \ + vmovdqu b1, st1; \ + transpose_4x4(a2, b2, c2, d2, b0, b1); \ + transpose_4x4(a3, b3, c3, d3, b0, b1); \ + vmovdqu st0, b0; \ + vmovdqu st1, b1; \ + /* does not adjust output bytes inside vectors */ + +/* load blocks to registers and apply pre-whitening */ +#define inpack32_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, rio, key) \ + vpbroadcastq key, x0; \ + vpshufb .Lpack_bswap RIP, x0, x0; \ + \ + vpxor 0 * 32(rio), x0, y7; \ + vpxor 1 * 32(rio), x0, y6; \ + vpxor 2 * 32(rio), x0, y5; \ + vpxor 3 * 32(rio), x0, y4; \ + vpxor 4 * 32(rio), x0, y3; \ + vpxor 5 * 32(rio), x0, y2; \ + vpxor 6 * 32(rio), x0, y1; \ + vpxor 7 * 32(rio), x0, y0; \ + vpxor 8 * 32(rio), x0, x7; \ + vpxor 9 * 32(rio), x0, x6; \ + vpxor 10 * 32(rio), x0, x5; \ + vpxor 11 * 32(rio), x0, x4; \ + vpxor 12 * 32(rio), x0, x3; \ + vpxor 13 * 32(rio), x0, x2; \ + vpxor 14 * 32(rio), x0, x1; \ + vpxor 15 * 32(rio), x0, x0; + +/* byteslice pre-whitened blocks and store to temporary memory */ +#define inpack32_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd) \ + byteslice_16x16b_fast(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, \ + y4, y5, y6, y7, (mem_ab), (mem_cd)); \ + \ + vmovdqu x0, 0 * 32(mem_ab); \ + vmovdqu x1, 1 * 32(mem_ab); \ + vmovdqu x2, 2 * 32(mem_ab); \ + vmovdqu x3, 3 * 32(mem_ab); \ + vmovdqu x4, 4 * 32(mem_ab); \ + vmovdqu x5, 5 * 32(mem_ab); \ + vmovdqu x6, 6 * 32(mem_ab); \ + vmovdqu x7, 7 * 32(mem_ab); \ + vmovdqu y0, 0 * 32(mem_cd); \ + vmovdqu y1, 1 * 32(mem_cd); \ + vmovdqu y2, 2 * 32(mem_cd); \ + vmovdqu y3, 3 * 32(mem_cd); \ + vmovdqu y4, 4 * 32(mem_cd); \ + vmovdqu y5, 5 * 32(mem_cd); \ + vmovdqu y6, 6 * 32(mem_cd); \ + vmovdqu y7, 7 * 32(mem_cd); + +/* de-byteslice, apply post-whitening and store blocks */ +#define outunpack32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \ + y5, y6, y7, key, stack_tmp0, stack_tmp1) \ + byteslice_16x16b_fast(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, \ + y3, y7, x3, x7, stack_tmp0, stack_tmp1); \ + \ + vmovdqu x0, stack_tmp0; \ + \ + vpbroadcastq key, x0; \ + vpshufb .Lpack_bswap RIP, x0, x0; \ + \ + vpxor x0, y7, y7; \ + vpxor x0, y6, y6; \ + vpxor x0, y5, y5; \ + vpxor x0, y4, y4; \ + vpxor x0, y3, y3; \ + vpxor x0, y2, y2; \ + vpxor x0, y1, y1; \ + vpxor x0, y0, y0; \ + vpxor x0, x7, x7; \ + vpxor x0, x6, x6; \ + vpxor x0, x5, x5; \ + vpxor x0, x4, x4; \ + vpxor x0, x3, x3; \ + vpxor x0, x2, x2; \ + vpxor x0, x1, x1; \ + vpxor stack_tmp0, x0, x0; + +#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, rio) \ + vmovdqu x0, 0 * 32(rio); \ + vmovdqu x1, 1 * 32(rio); \ + vmovdqu x2, 2 * 32(rio); \ + vmovdqu x3, 3 * 32(rio); \ + vmovdqu x4, 4 * 32(rio); \ + vmovdqu x5, 5 * 32(rio); \ + vmovdqu x6, 6 * 32(rio); \ + vmovdqu x7, 7 * 32(rio); \ + vmovdqu y0, 8 * 32(rio); \ + vmovdqu y1, 9 * 32(rio); \ + vmovdqu y2, 10 * 32(rio); \ + vmovdqu y3, 11 * 32(rio); \ + vmovdqu y4, 12 * 32(rio); \ + vmovdqu y5, 13 * 32(rio); \ + vmovdqu y6, 14 * 32(rio); \ + vmovdqu y7, 15 * 32(rio); + +.text +.align 32 + +#define SHUFB_BYTES(idx) \ + 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) + +.Lshufb_16x16b: + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) + +.Lpack_bswap: + .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 + .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* + * pre-SubByte transform + * + * pre-lookup for sbox1, sbox2, sbox3: + * swap_bitendianness( + * isom_map_camellia_to_aes( + * camellia_f( + * swap_bitendianess(in) + * ) + * ) + * ) + * + * (note: '⊕ 0xc5' inside camellia_f()) + */ +.Lpre_tf_lo_s1: + .byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86 + .byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88 +.Lpre_tf_hi_s1: + .byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a + .byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23 + +/* + * pre-SubByte transform + * + * pre-lookup for sbox4: + * swap_bitendianness( + * isom_map_camellia_to_aes( + * camellia_f( + * swap_bitendianess(in <<< 1) + * ) + * ) + * ) + * + * (note: '⊕ 0xc5' inside camellia_f()) + */ +.Lpre_tf_lo_s4: + .byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25 + .byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74 +.Lpre_tf_hi_s4: + .byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72 + .byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf + +/* + * post-SubByte transform + * + * post-lookup for sbox1, sbox4: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s1: + .byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31 + .byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1 +.Lpost_tf_hi_s1: + .byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8 + .byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c + +/* + * post-SubByte transform + * + * post-lookup for sbox2: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) <<< 1 + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s2: + .byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62 + .byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3 +.Lpost_tf_hi_s2: + .byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51 + .byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18 + +/* + * post-SubByte transform + * + * post-lookup for sbox3: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) >>> 1 + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s3: + .byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98 + .byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8 +.Lpost_tf_hi_s3: + .byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54 + .byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06 + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +.align 4 +/* 4-bit mask */ +.L0f0f0f0f: + .long 0x0f0f0f0f + + +.align 8 +ELF(.type __camellia_enc_blk32,@function;) + +__camellia_enc_blk32: + /* input: + * %rdi: ctx, CTX + * %rax: temporary storage, 512 bytes + * %ymm0..%ymm15: 32 plaintext blocks + * output: + * %ymm0..%ymm15: 32 encrypted blocks, order swapped: + * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 + */ + + leaq 8 * 32(%rax), %rcx; + + inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx); + + enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 0); + + fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, + ((key_table + (8) * 8) + 0)(CTX), + ((key_table + (8) * 8) + 4)(CTX), + ((key_table + (8) * 8) + 8)(CTX), + ((key_table + (8) * 8) + 12)(CTX)); + + enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 8); + + fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, + ((key_table + (16) * 8) + 0)(CTX), + ((key_table + (16) * 8) + 4)(CTX), + ((key_table + (16) * 8) + 8)(CTX), + ((key_table + (16) * 8) + 12)(CTX)); + + enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 16); + + movl $24, %r8d; + cmpl $128, key_bitlength(CTX); + jne .Lenc_max32; + +.Lenc_done: + /* load CD for output */ + vmovdqu 0 * 32(%rcx), %ymm8; + vmovdqu 1 * 32(%rcx), %ymm9; + vmovdqu 2 * 32(%rcx), %ymm10; + vmovdqu 3 * 32(%rcx), %ymm11; + vmovdqu 4 * 32(%rcx), %ymm12; + vmovdqu 5 * 32(%rcx), %ymm13; + vmovdqu 6 * 32(%rcx), %ymm14; + vmovdqu 7 * 32(%rcx), %ymm15; + + outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax)); + + ret; + +.align 8 +.Lenc_max32: + movl $32, %r8d; + + fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, + ((key_table + (24) * 8) + 0)(CTX), + ((key_table + (24) * 8) + 4)(CTX), + ((key_table + (24) * 8) + 8)(CTX), + ((key_table + (24) * 8) + 12)(CTX)); + + enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 24); + + jmp .Lenc_done; +ELF(.size __camellia_enc_blk32,.-__camellia_enc_blk32;) + +.align 8 +ELF(.type __camellia_dec_blk32,@function;) + +__camellia_dec_blk32: + /* input: + * %rdi: ctx, CTX + * %rax: temporary storage, 512 bytes + * %r8d: 24 for 16 byte key, 32 for larger + * %ymm0..%ymm15: 16 encrypted blocks + * output: + * %ymm0..%ymm15: 16 plaintext blocks, order swapped: + * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 + */ + + leaq 8 * 32(%rax), %rcx; + + inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx); + + cmpl $32, %r8d; + je .Ldec_max32; + +.Ldec_max24: + dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 16); + + fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, + ((key_table + (16) * 8) + 8)(CTX), + ((key_table + (16) * 8) + 12)(CTX), + ((key_table + (16) * 8) + 0)(CTX), + ((key_table + (16) * 8) + 4)(CTX)); + + dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 8); + + fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, + ((key_table + (8) * 8) + 8)(CTX), + ((key_table + (8) * 8) + 12)(CTX), + ((key_table + (8) * 8) + 0)(CTX), + ((key_table + (8) * 8) + 4)(CTX)); + + dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 0); + + /* load CD for output */ + vmovdqu 0 * 32(%rcx), %ymm8; + vmovdqu 1 * 32(%rcx), %ymm9; + vmovdqu 2 * 32(%rcx), %ymm10; + vmovdqu 3 * 32(%rcx), %ymm11; + vmovdqu 4 * 32(%rcx), %ymm12; + vmovdqu 5 * 32(%rcx), %ymm13; + vmovdqu 6 * 32(%rcx), %ymm14; + vmovdqu 7 * 32(%rcx), %ymm15; + + outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax)); + + ret; + +.align 8 +.Ldec_max32: + dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 24); + + fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, + ((key_table + (24) * 8) + 8)(CTX), + ((key_table + (24) * 8) + 12)(CTX), + ((key_table + (24) * 8) + 0)(CTX), + ((key_table + (24) * 8) + 4)(CTX)); + + jmp .Ldec_max24; +ELF(.size __camellia_dec_blk32,.-__camellia_dec_blk32;) + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + +.align 8 +.globl _gcry_camellia_aesni_avx2_ctr_enc +ELF(.type _gcry_camellia_aesni_avx2_ctr_enc,@function;) + +_gcry_camellia_aesni_avx2_ctr_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) + * %rcx: iv (big endian, 128bit) + */ + + pushq %rbp; + movq %rsp, %rbp; + + movq 8(%rcx), %r11; + bswapq %r11; + + vzeroupper; + + subq $(16 * 32), %rsp; + andq $~63, %rsp; + movq %rsp, %rax; + + vpcmpeqd %ymm15, %ymm15, %ymm15; + vpsrldq $8, %ymm15, %ymm15; /* ab: -1:0 ; cd: -1:0 */ + + /* load IV and byteswap */ + vmovdqu (%rcx), %xmm0; + vpshufb .Lbswap128_mask RIP, %xmm0, %xmm0; + vmovdqa %xmm0, %xmm1; + inc_le128(%xmm0, %xmm15, %xmm14); + vbroadcasti128 .Lbswap128_mask RIP, %ymm14; + vinserti128 $1, %xmm0, %ymm1, %ymm0; + vpshufb %ymm14, %ymm0, %ymm13; + vmovdqu %ymm13, 15 * 32(%rax); + + /* check need for handling 64-bit overflow and carry */ + cmpq $(0xffffffffffffffff - 32), %r11; + ja .Lload_ctr_carry; + + /* construct IVs */ + vpaddq %ymm15, %ymm15, %ymm15; /* ab: -2:0 ; cd: -2:0 */ + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm13; + vmovdqu %ymm13, 14 * 32(%rax); + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm13; + vmovdqu %ymm13, 13 * 32(%rax); + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm12; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm11; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm10; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm9; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm8; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm7; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm6; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm5; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm4; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm3; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm2; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm1; + vpsubq %ymm15, %ymm0, %ymm0; /* +30 ; +31 */ + vpsubq %xmm15, %xmm0, %xmm13; /* +32 */ + vpshufb %ymm14, %ymm0, %ymm0; + vpshufb %xmm14, %xmm13, %xmm13; + vmovdqu %xmm13, (%rcx); + + jmp .Lload_ctr_done; + +.align 4 +.Lload_ctr_carry: + /* construct IVs */ + inc_le128(%ymm0, %ymm15, %ymm13); /* ab: le1 ; cd: le2 */ + inc_le128(%ymm0, %ymm15, %ymm13); /* ab: le2 ; cd: le3 */ + vpshufb %ymm14, %ymm0, %ymm13; + vmovdqu %ymm13, 14 * 32(%rax); + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm13; + vmovdqu %ymm13, 13 * 32(%rax); + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm12; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm11; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm10; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm9; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm8; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm7; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm6; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm5; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm4; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm3; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm2; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm1; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vextracti128 $1, %ymm0, %xmm13; + vpshufb %ymm14, %ymm0, %ymm0; + inc_le128(%xmm13, %xmm15, %xmm14); + vpshufb .Lbswap128_mask RIP, %xmm13, %xmm13; + vmovdqu %xmm13, (%rcx); + +.align 4 +.Lload_ctr_done: + /* inpack16_pre: */ + vpbroadcastq (key_table)(CTX), %ymm15; + vpshufb .Lpack_bswap RIP, %ymm15, %ymm15; + vpxor %ymm0, %ymm15, %ymm0; + vpxor %ymm1, %ymm15, %ymm1; + vpxor %ymm2, %ymm15, %ymm2; + vpxor %ymm3, %ymm15, %ymm3; + vpxor %ymm4, %ymm15, %ymm4; + vpxor %ymm5, %ymm15, %ymm5; + vpxor %ymm6, %ymm15, %ymm6; + vpxor %ymm7, %ymm15, %ymm7; + vpxor %ymm8, %ymm15, %ymm8; + vpxor %ymm9, %ymm15, %ymm9; + vpxor %ymm10, %ymm15, %ymm10; + vpxor %ymm11, %ymm15, %ymm11; + vpxor %ymm12, %ymm15, %ymm12; + vpxor 13 * 32(%rax), %ymm15, %ymm13; + vpxor 14 * 32(%rax), %ymm15, %ymm14; + vpxor 15 * 32(%rax), %ymm15, %ymm15; + + call __camellia_enc_blk32; + + vpxor 0 * 32(%rdx), %ymm7, %ymm7; + vpxor 1 * 32(%rdx), %ymm6, %ymm6; + vpxor 2 * 32(%rdx), %ymm5, %ymm5; + vpxor 3 * 32(%rdx), %ymm4, %ymm4; + vpxor 4 * 32(%rdx), %ymm3, %ymm3; + vpxor 5 * 32(%rdx), %ymm2, %ymm2; + vpxor 6 * 32(%rdx), %ymm1, %ymm1; + vpxor 7 * 32(%rdx), %ymm0, %ymm0; + vpxor 8 * 32(%rdx), %ymm15, %ymm15; + vpxor 9 * 32(%rdx), %ymm14, %ymm14; + vpxor 10 * 32(%rdx), %ymm13, %ymm13; + vpxor 11 * 32(%rdx), %ymm12, %ymm12; + vpxor 12 * 32(%rdx), %ymm11, %ymm11; + vpxor 13 * 32(%rdx), %ymm10, %ymm10; + vpxor 14 * 32(%rdx), %ymm9, %ymm9; + vpxor 15 * 32(%rdx), %ymm8, %ymm8; + leaq 32 * 16(%rdx), %rdx; + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + + vzeroall; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx2_ctr_enc,.-_gcry_camellia_aesni_avx2_ctr_enc;) + +.align 8 +.globl _gcry_camellia_aesni_avx2_cbc_dec +ELF(.type _gcry_camellia_aesni_avx2_cbc_dec,@function;) + +_gcry_camellia_aesni_avx2_cbc_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) + * %rcx: iv + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + movq %rcx, %r9; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %eax; + cmovel %eax, %r8d; /* max */ + + subq $(16 * 32), %rsp; + andq $~63, %rsp; + movq %rsp, %rax; + + inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rdx, (key_table)(CTX, %r8, 8)); + + call __camellia_dec_blk32; + + /* XOR output with IV */ + vmovdqu %ymm8, (%rax); + vmovdqu (%r9), %xmm8; + vinserti128 $1, (%rdx), %ymm8, %ymm8; + vpxor %ymm8, %ymm7, %ymm7; + vmovdqu (%rax), %ymm8; + vpxor (0 * 32 + 16)(%rdx), %ymm6, %ymm6; + vpxor (1 * 32 + 16)(%rdx), %ymm5, %ymm5; + vpxor (2 * 32 + 16)(%rdx), %ymm4, %ymm4; + vpxor (3 * 32 + 16)(%rdx), %ymm3, %ymm3; + vpxor (4 * 32 + 16)(%rdx), %ymm2, %ymm2; + vpxor (5 * 32 + 16)(%rdx), %ymm1, %ymm1; + vpxor (6 * 32 + 16)(%rdx), %ymm0, %ymm0; + vpxor (7 * 32 + 16)(%rdx), %ymm15, %ymm15; + vpxor (8 * 32 + 16)(%rdx), %ymm14, %ymm14; + vpxor (9 * 32 + 16)(%rdx), %ymm13, %ymm13; + vpxor (10 * 32 + 16)(%rdx), %ymm12, %ymm12; + vpxor (11 * 32 + 16)(%rdx), %ymm11, %ymm11; + vpxor (12 * 32 + 16)(%rdx), %ymm10, %ymm10; + vpxor (13 * 32 + 16)(%rdx), %ymm9, %ymm9; + vpxor (14 * 32 + 16)(%rdx), %ymm8, %ymm8; + movq (15 * 32 + 16 + 0)(%rdx), %rax; + movq (15 * 32 + 16 + 8)(%rdx), %rcx; + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + + /* store new IV */ + movq %rax, (0)(%r9); + movq %rcx, (8)(%r9); + + vzeroall; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx2_cbc_dec,.-_gcry_camellia_aesni_avx2_cbc_dec;) + +.align 8 +.globl _gcry_camellia_aesni_avx2_cfb_dec +ELF(.type _gcry_camellia_aesni_avx2_cfb_dec,@function;) + +_gcry_camellia_aesni_avx2_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) + * %rcx: iv + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + subq $(16 * 32), %rsp; + andq $~63, %rsp; + movq %rsp, %rax; + + /* inpack16_pre: */ + vpbroadcastq (key_table)(CTX), %ymm0; + vpshufb .Lpack_bswap RIP, %ymm0, %ymm0; + vmovdqu (%rcx), %xmm15; + vinserti128 $1, (%rdx), %ymm15, %ymm15; + vpxor %ymm15, %ymm0, %ymm15; + vmovdqu (15 * 32 + 16)(%rdx), %xmm1; + vmovdqu %xmm1, (%rcx); /* store new IV */ + vpxor (0 * 32 + 16)(%rdx), %ymm0, %ymm14; + vpxor (1 * 32 + 16)(%rdx), %ymm0, %ymm13; + vpxor (2 * 32 + 16)(%rdx), %ymm0, %ymm12; + vpxor (3 * 32 + 16)(%rdx), %ymm0, %ymm11; + vpxor (4 * 32 + 16)(%rdx), %ymm0, %ymm10; + vpxor (5 * 32 + 16)(%rdx), %ymm0, %ymm9; + vpxor (6 * 32 + 16)(%rdx), %ymm0, %ymm8; + vpxor (7 * 32 + 16)(%rdx), %ymm0, %ymm7; + vpxor (8 * 32 + 16)(%rdx), %ymm0, %ymm6; + vpxor (9 * 32 + 16)(%rdx), %ymm0, %ymm5; + vpxor (10 * 32 + 16)(%rdx), %ymm0, %ymm4; + vpxor (11 * 32 + 16)(%rdx), %ymm0, %ymm3; + vpxor (12 * 32 + 16)(%rdx), %ymm0, %ymm2; + vpxor (13 * 32 + 16)(%rdx), %ymm0, %ymm1; + vpxor (14 * 32 + 16)(%rdx), %ymm0, %ymm0; + + call __camellia_enc_blk32; + + vpxor 0 * 32(%rdx), %ymm7, %ymm7; + vpxor 1 * 32(%rdx), %ymm6, %ymm6; + vpxor 2 * 32(%rdx), %ymm5, %ymm5; + vpxor 3 * 32(%rdx), %ymm4, %ymm4; + vpxor 4 * 32(%rdx), %ymm3, %ymm3; + vpxor 5 * 32(%rdx), %ymm2, %ymm2; + vpxor 6 * 32(%rdx), %ymm1, %ymm1; + vpxor 7 * 32(%rdx), %ymm0, %ymm0; + vpxor 8 * 32(%rdx), %ymm15, %ymm15; + vpxor 9 * 32(%rdx), %ymm14, %ymm14; + vpxor 10 * 32(%rdx), %ymm13, %ymm13; + vpxor 11 * 32(%rdx), %ymm12, %ymm12; + vpxor 12 * 32(%rdx), %ymm11, %ymm11; + vpxor 13 * 32(%rdx), %ymm10, %ymm10; + vpxor 14 * 32(%rdx), %ymm9, %ymm9; + vpxor 15 * 32(%rdx), %ymm8, %ymm8; + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + + vzeroall; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx2_cfb_dec,.-_gcry_camellia_aesni_avx2_cfb_dec;) + +.align 8 +.globl _gcry_camellia_aesni_avx2_ocb_enc +ELF(.type _gcry_camellia_aesni_avx2_ocb_enc,@function;) + +_gcry_camellia_aesni_avx2_ocb_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[32]) + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + subq $(16 * 32 + 4 * 8), %rsp; + andq $~63, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 32 + 0 * 8)(%rax); + movq %r11, (16 * 32 + 1 * 8)(%rax); + movq %r12, (16 * 32 + 2 * 8)(%rax); + movq %r13, (16 * 32 + 3 * 8)(%rax); + + vmovdqu (%rcx), %xmm14; + vmovdqu (%r8), %xmm13; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rdx), yreg; \ + vpxor (l0reg), %xmm14, %xmm15; \ + vpxor (l1reg), %xmm15, %xmm14; \ + vinserti128 $1, %xmm14, %ymm15, %ymm15; \ + vpxor yreg, %ymm13, %ymm13; \ + vpxor yreg, %ymm15, yreg; \ + vmovdqu %ymm15, (n * 32)(%rsi); + + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %r11, %ymm0); + vmovdqu %ymm0, (15 * 32)(%rax); + OCB_INPUT(1, %r12, %r13, %ymm0); + vmovdqu %ymm0, (14 * 32)(%rax); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(2, %r10, %r11, %ymm0); + vmovdqu %ymm0, (13 * 32)(%rax); + OCB_INPUT(3, %r12, %r13, %ymm12); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %r11, %ymm11); + OCB_INPUT(5, %r12, %r13, %ymm10); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(6, %r10, %r11, %ymm9); + OCB_INPUT(7, %r12, %r13, %ymm8); + movq (16 * 8)(%r9), %r10; + movq (17 * 8)(%r9), %r11; + movq (18 * 8)(%r9), %r12; + movq (19 * 8)(%r9), %r13; + OCB_INPUT(8, %r10, %r11, %ymm7); + OCB_INPUT(9, %r12, %r13, %ymm6); + movq (20 * 8)(%r9), %r10; + movq (21 * 8)(%r9), %r11; + movq (22 * 8)(%r9), %r12; + movq (23 * 8)(%r9), %r13; + OCB_INPUT(10, %r10, %r11, %ymm5); + OCB_INPUT(11, %r12, %r13, %ymm4); + movq (24 * 8)(%r9), %r10; + movq (25 * 8)(%r9), %r11; + movq (26 * 8)(%r9), %r12; + movq (27 * 8)(%r9), %r13; + OCB_INPUT(12, %r10, %r11, %ymm3); + OCB_INPUT(13, %r12, %r13, %ymm2); + movq (28 * 8)(%r9), %r10; + movq (29 * 8)(%r9), %r11; + movq (30 * 8)(%r9), %r12; + movq (31 * 8)(%r9), %r13; + OCB_INPUT(14, %r10, %r11, %ymm1); + OCB_INPUT(15, %r12, %r13, %ymm0); +#undef OCB_INPUT + + vextracti128 $1, %ymm13, %xmm15; + vmovdqu %xmm14, (%rcx); + vpxor %xmm13, %xmm15, %xmm15; + vmovdqu %xmm15, (%r8); + + /* inpack16_pre: */ + vpbroadcastq (key_table)(CTX), %ymm15; + vpshufb .Lpack_bswap RIP, %ymm15, %ymm15; + vpxor %ymm0, %ymm15, %ymm0; + vpxor %ymm1, %ymm15, %ymm1; + vpxor %ymm2, %ymm15, %ymm2; + vpxor %ymm3, %ymm15, %ymm3; + vpxor %ymm4, %ymm15, %ymm4; + vpxor %ymm5, %ymm15, %ymm5; + vpxor %ymm6, %ymm15, %ymm6; + vpxor %ymm7, %ymm15, %ymm7; + vpxor %ymm8, %ymm15, %ymm8; + vpxor %ymm9, %ymm15, %ymm9; + vpxor %ymm10, %ymm15, %ymm10; + vpxor %ymm11, %ymm15, %ymm11; + vpxor %ymm12, %ymm15, %ymm12; + vpxor 13 * 32(%rax), %ymm15, %ymm13; + vpxor 14 * 32(%rax), %ymm15, %ymm14; + vpxor 15 * 32(%rax), %ymm15, %ymm15; + + call __camellia_enc_blk32; + + vpxor 0 * 32(%rsi), %ymm7, %ymm7; + vpxor 1 * 32(%rsi), %ymm6, %ymm6; + vpxor 2 * 32(%rsi), %ymm5, %ymm5; + vpxor 3 * 32(%rsi), %ymm4, %ymm4; + vpxor 4 * 32(%rsi), %ymm3, %ymm3; + vpxor 5 * 32(%rsi), %ymm2, %ymm2; + vpxor 6 * 32(%rsi), %ymm1, %ymm1; + vpxor 7 * 32(%rsi), %ymm0, %ymm0; + vpxor 8 * 32(%rsi), %ymm15, %ymm15; + vpxor 9 * 32(%rsi), %ymm14, %ymm14; + vpxor 10 * 32(%rsi), %ymm13, %ymm13; + vpxor 11 * 32(%rsi), %ymm12, %ymm12; + vpxor 12 * 32(%rsi), %ymm11, %ymm11; + vpxor 13 * 32(%rsi), %ymm10, %ymm10; + vpxor 14 * 32(%rsi), %ymm9, %ymm9; + vpxor 15 * 32(%rsi), %ymm8, %ymm8; + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + + vzeroall; + + movq (16 * 32 + 0 * 8)(%rax), %r10; + movq (16 * 32 + 1 * 8)(%rax), %r11; + movq (16 * 32 + 2 * 8)(%rax), %r12; + movq (16 * 32 + 3 * 8)(%rax), %r13; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx2_ocb_enc,.-_gcry_camellia_aesni_avx2_ocb_enc;) + +.align 8 +.globl _gcry_camellia_aesni_avx2_ocb_dec +ELF(.type _gcry_camellia_aesni_avx2_ocb_dec,@function;) + +_gcry_camellia_aesni_avx2_ocb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[32]) + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + subq $(16 * 32 + 4 * 8), %rsp; + andq $~63, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 32 + 0 * 8)(%rax); + movq %r11, (16 * 32 + 1 * 8)(%rax); + movq %r12, (16 * 32 + 2 * 8)(%rax); + movq %r13, (16 * 32 + 3 * 8)(%rax); + + vmovdqu (%rcx), %xmm14; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rdx), yreg; \ + vpxor (l0reg), %xmm14, %xmm15; \ + vpxor (l1reg), %xmm15, %xmm14; \ + vinserti128 $1, %xmm14, %ymm15, %ymm15; \ + vpxor yreg, %ymm15, yreg; \ + vmovdqu %ymm15, (n * 32)(%rsi); + + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %r11, %ymm0); + vmovdqu %ymm0, (15 * 32)(%rax); + OCB_INPUT(1, %r12, %r13, %ymm0); + vmovdqu %ymm0, (14 * 32)(%rax); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(2, %r10, %r11, %ymm13); + OCB_INPUT(3, %r12, %r13, %ymm12); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %r11, %ymm11); + OCB_INPUT(5, %r12, %r13, %ymm10); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(6, %r10, %r11, %ymm9); + OCB_INPUT(7, %r12, %r13, %ymm8); + movq (16 * 8)(%r9), %r10; + movq (17 * 8)(%r9), %r11; + movq (18 * 8)(%r9), %r12; + movq (19 * 8)(%r9), %r13; + OCB_INPUT(8, %r10, %r11, %ymm7); + OCB_INPUT(9, %r12, %r13, %ymm6); + movq (20 * 8)(%r9), %r10; + movq (21 * 8)(%r9), %r11; + movq (22 * 8)(%r9), %r12; + movq (23 * 8)(%r9), %r13; + OCB_INPUT(10, %r10, %r11, %ymm5); + OCB_INPUT(11, %r12, %r13, %ymm4); + movq (24 * 8)(%r9), %r10; + movq (25 * 8)(%r9), %r11; + movq (26 * 8)(%r9), %r12; + movq (27 * 8)(%r9), %r13; + OCB_INPUT(12, %r10, %r11, %ymm3); + OCB_INPUT(13, %r12, %r13, %ymm2); + movq (28 * 8)(%r9), %r10; + movq (29 * 8)(%r9), %r11; + movq (30 * 8)(%r9), %r12; + movq (31 * 8)(%r9), %r13; + OCB_INPUT(14, %r10, %r11, %ymm1); + OCB_INPUT(15, %r12, %r13, %ymm0); +#undef OCB_INPUT + + vmovdqu %xmm14, (%rcx); + + movq %r8, %r10; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %r9d; + cmovel %r9d, %r8d; /* max */ + + /* inpack16_pre: */ + vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15; + vpshufb .Lpack_bswap RIP, %ymm15, %ymm15; + vpxor %ymm0, %ymm15, %ymm0; + vpxor %ymm1, %ymm15, %ymm1; + vpxor %ymm2, %ymm15, %ymm2; + vpxor %ymm3, %ymm15, %ymm3; + vpxor %ymm4, %ymm15, %ymm4; + vpxor %ymm5, %ymm15, %ymm5; + vpxor %ymm6, %ymm15, %ymm6; + vpxor %ymm7, %ymm15, %ymm7; + vpxor %ymm8, %ymm15, %ymm8; + vpxor %ymm9, %ymm15, %ymm9; + vpxor %ymm10, %ymm15, %ymm10; + vpxor %ymm11, %ymm15, %ymm11; + vpxor %ymm12, %ymm15, %ymm12; + vpxor %ymm13, %ymm15, %ymm13; + vpxor 14 * 32(%rax), %ymm15, %ymm14; + vpxor 15 * 32(%rax), %ymm15, %ymm15; + + call __camellia_dec_blk32; + + vpxor 0 * 32(%rsi), %ymm7, %ymm7; + vpxor 1 * 32(%rsi), %ymm6, %ymm6; + vpxor 2 * 32(%rsi), %ymm5, %ymm5; + vpxor 3 * 32(%rsi), %ymm4, %ymm4; + vpxor 4 * 32(%rsi), %ymm3, %ymm3; + vpxor 5 * 32(%rsi), %ymm2, %ymm2; + vpxor 6 * 32(%rsi), %ymm1, %ymm1; + vpxor 7 * 32(%rsi), %ymm0, %ymm0; + vmovdqu %ymm7, (7 * 32)(%rax); + vmovdqu %ymm6, (6 * 32)(%rax); + vpxor 8 * 32(%rsi), %ymm15, %ymm15; + vpxor 9 * 32(%rsi), %ymm14, %ymm14; + vpxor 10 * 32(%rsi), %ymm13, %ymm13; + vpxor 11 * 32(%rsi), %ymm12, %ymm12; + vpxor 12 * 32(%rsi), %ymm11, %ymm11; + vpxor 13 * 32(%rsi), %ymm10, %ymm10; + vpxor 14 * 32(%rsi), %ymm9, %ymm9; + vpxor 15 * 32(%rsi), %ymm8, %ymm8; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + + vpxor %ymm5, %ymm7, %ymm7; + vpxor %ymm4, %ymm6, %ymm6; + vpxor %ymm3, %ymm7, %ymm7; + vpxor %ymm2, %ymm6, %ymm6; + vpxor %ymm1, %ymm7, %ymm7; + vpxor %ymm0, %ymm6, %ymm6; + vpxor %ymm15, %ymm7, %ymm7; + vpxor %ymm14, %ymm6, %ymm6; + vpxor %ymm13, %ymm7, %ymm7; + vpxor %ymm12, %ymm6, %ymm6; + vpxor %ymm11, %ymm7, %ymm7; + vpxor %ymm10, %ymm6, %ymm6; + vpxor %ymm9, %ymm7, %ymm7; + vpxor %ymm8, %ymm6, %ymm6; + vpxor %ymm7, %ymm6, %ymm7; + + vextracti128 $1, %ymm7, %xmm6; + vpxor %xmm6, %xmm7, %xmm7; + vpxor (%r10), %xmm7, %xmm7; + vmovdqu %xmm7, (%r10); + + vmovdqu 7 * 32(%rax), %ymm7; + vmovdqu 6 * 32(%rax), %ymm6; + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + + vzeroall; + + movq (16 * 32 + 0 * 8)(%rax), %r10; + movq (16 * 32 + 1 * 8)(%rax), %r11; + movq (16 * 32 + 2 * 8)(%rax), %r12; + movq (16 * 32 + 3 * 8)(%rax), %r13; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx2_ocb_dec,.-_gcry_camellia_aesni_avx2_ocb_dec;) + +.align 8 +.globl _gcry_camellia_aesni_avx2_ocb_auth +ELF(.type _gcry_camellia_aesni_avx2_ocb_auth,@function;) + +_gcry_camellia_aesni_avx2_ocb_auth: + /* input: + * %rdi: ctx, CTX + * %rsi: abuf (16 blocks) + * %rdx: offset + * %rcx: checksum + * %r8 : L pointers (void *L[16]) + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + subq $(16 * 32 + 4 * 8), %rsp; + andq $~63, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 32 + 0 * 8)(%rax); + movq %r11, (16 * 32 + 1 * 8)(%rax); + movq %r12, (16 * 32 + 2 * 8)(%rax); + movq %r13, (16 * 32 + 3 * 8)(%rax); + + vmovdqu (%rdx), %xmm14; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rsi), yreg; \ + vpxor (l0reg), %xmm14, %xmm15; \ + vpxor (l1reg), %xmm15, %xmm14; \ + vinserti128 $1, %xmm14, %ymm15, %ymm15; \ + vpxor yreg, %ymm15, yreg; + + movq (0 * 8)(%r8), %r10; + movq (1 * 8)(%r8), %r11; + movq (2 * 8)(%r8), %r12; + movq (3 * 8)(%r8), %r13; + OCB_INPUT(0, %r10, %r11, %ymm0); + vmovdqu %ymm0, (15 * 32)(%rax); + OCB_INPUT(1, %r12, %r13, %ymm0); + vmovdqu %ymm0, (14 * 32)(%rax); + movq (4 * 8)(%r8), %r10; + movq (5 * 8)(%r8), %r11; + movq (6 * 8)(%r8), %r12; + movq (7 * 8)(%r8), %r13; + OCB_INPUT(2, %r10, %r11, %ymm13); + OCB_INPUT(3, %r12, %r13, %ymm12); + movq (8 * 8)(%r8), %r10; + movq (9 * 8)(%r8), %r11; + movq (10 * 8)(%r8), %r12; + movq (11 * 8)(%r8), %r13; + OCB_INPUT(4, %r10, %r11, %ymm11); + OCB_INPUT(5, %r12, %r13, %ymm10); + movq (12 * 8)(%r8), %r10; + movq (13 * 8)(%r8), %r11; + movq (14 * 8)(%r8), %r12; + movq (15 * 8)(%r8), %r13; + OCB_INPUT(6, %r10, %r11, %ymm9); + OCB_INPUT(7, %r12, %r13, %ymm8); + movq (16 * 8)(%r8), %r10; + movq (17 * 8)(%r8), %r11; + movq (18 * 8)(%r8), %r12; + movq (19 * 8)(%r8), %r13; + OCB_INPUT(8, %r10, %r11, %ymm7); + OCB_INPUT(9, %r12, %r13, %ymm6); + movq (20 * 8)(%r8), %r10; + movq (21 * 8)(%r8), %r11; + movq (22 * 8)(%r8), %r12; + movq (23 * 8)(%r8), %r13; + OCB_INPUT(10, %r10, %r11, %ymm5); + OCB_INPUT(11, %r12, %r13, %ymm4); + movq (24 * 8)(%r8), %r10; + movq (25 * 8)(%r8), %r11; + movq (26 * 8)(%r8), %r12; + movq (27 * 8)(%r8), %r13; + OCB_INPUT(12, %r10, %r11, %ymm3); + OCB_INPUT(13, %r12, %r13, %ymm2); + movq (28 * 8)(%r8), %r10; + movq (29 * 8)(%r8), %r11; + movq (30 * 8)(%r8), %r12; + movq (31 * 8)(%r8), %r13; + OCB_INPUT(14, %r10, %r11, %ymm1); + OCB_INPUT(15, %r12, %r13, %ymm0); +#undef OCB_INPUT + + vmovdqu %xmm14, (%rdx); + + movq %rcx, %r10; + + /* inpack16_pre: */ + vpbroadcastq (key_table)(CTX), %ymm15; + vpshufb .Lpack_bswap RIP, %ymm15, %ymm15; + vpxor %ymm0, %ymm15, %ymm0; + vpxor %ymm1, %ymm15, %ymm1; + vpxor %ymm2, %ymm15, %ymm2; + vpxor %ymm3, %ymm15, %ymm3; + vpxor %ymm4, %ymm15, %ymm4; + vpxor %ymm5, %ymm15, %ymm5; + vpxor %ymm6, %ymm15, %ymm6; + vpxor %ymm7, %ymm15, %ymm7; + vpxor %ymm8, %ymm15, %ymm8; + vpxor %ymm9, %ymm15, %ymm9; + vpxor %ymm10, %ymm15, %ymm10; + vpxor %ymm11, %ymm15, %ymm11; + vpxor %ymm12, %ymm15, %ymm12; + vpxor %ymm13, %ymm15, %ymm13; + vpxor 14 * 32(%rax), %ymm15, %ymm14; + vpxor 15 * 32(%rax), %ymm15, %ymm15; + + call __camellia_enc_blk32; + + vpxor %ymm7, %ymm6, %ymm6; + vpxor %ymm5, %ymm4, %ymm4; + vpxor %ymm3, %ymm2, %ymm2; + vpxor %ymm1, %ymm0, %ymm0; + vpxor %ymm15, %ymm14, %ymm14; + vpxor %ymm13, %ymm12, %ymm12; + vpxor %ymm11, %ymm10, %ymm10; + vpxor %ymm9, %ymm8, %ymm8; + + vpxor %ymm6, %ymm4, %ymm4; + vpxor %ymm2, %ymm0, %ymm0; + vpxor %ymm14, %ymm12, %ymm12; + vpxor %ymm10, %ymm8, %ymm8; + + vpxor %ymm4, %ymm0, %ymm0; + vpxor %ymm12, %ymm8, %ymm8; + + vpxor %ymm0, %ymm8, %ymm0; + + vextracti128 $1, %ymm0, %xmm1; + vpxor (%r10), %xmm0, %xmm0; + vpxor %xmm0, %xmm1, %xmm0; + vmovdqu %xmm0, (%r10); + + vzeroall; + + movq (16 * 32 + 0 * 8)(%rax), %r10; + movq (16 * 32 + 1 * 8)(%rax), %r11; + movq (16 * 32 + 2 * 8)(%rax), %r12; + movq (16 * 32 + 3 * 8)(%rax), %r13; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx2_ocb_auth,.-_gcry_camellia_aesni_avx2_ocb_auth;) + +#endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)*/ +#endif /*__x86_64*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/camellia-arm.S b/libotr/libgcrypt-1.8.7/cipher/camellia-arm.S new file mode 100644 index 0000000..a3d87d1 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/camellia-arm.S @@ -0,0 +1,626 @@ +/* camellia-arm.S - ARM assembly implementation of Camellia cipher + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(__ARMEL__) +#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS + +.text + +.syntax unified +.arm + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + +/* struct camellia_ctx: */ +#define key_table 0 + +/* register macros */ +#define CTX %r0 +#define RTAB1 %ip +#define RTAB3 %r1 +#define RMASK %lr + +#define IL %r2 +#define IR %r3 + +#define XL %r4 +#define XR %r5 +#define YL %r6 +#define YR %r7 + +#define RT0 %r8 +#define RT1 %r9 +#define RT2 %r10 +#define RT3 %r11 + +/* helper macros */ +#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 3)]; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 0)]; \ + orr rout, rout, rtmp, lsl #24; + +#define str_unaligned_be(rin, rdst, offs, rtmp0, rtmp1) \ + mov rtmp0, rin, lsr #8; \ + strb rin, [rdst, #((offs) + 3)]; \ + mov rtmp1, rin, lsr #16; \ + strb rtmp0, [rdst, #((offs) + 2)]; \ + mov rtmp0, rin, lsr #24; \ + strb rtmp1, [rdst, #((offs) + 1)]; \ + strb rtmp0, [rdst, #((offs) + 0)]; + +#ifdef __ARMEL__ +#ifdef HAVE_ARM_ARCH_V6 + #define host_to_be(reg, rtmp) \ + rev reg, reg; + #define be_to_host(reg, rtmp) \ + rev reg, reg; +#else + #define host_to_be(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; + #define be_to_host(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; +#endif +#else + /* nop on big-endian */ + #define host_to_be(reg, rtmp) /*_*/ + #define be_to_host(reg, rtmp) /*_*/ +#endif + +#define ldr_input_aligned_be(rin, a, b, c, d, rtmp) \ + ldr a, [rin, #0]; \ + ldr b, [rin, #4]; \ + be_to_host(a, rtmp); \ + ldr c, [rin, #8]; \ + be_to_host(b, rtmp); \ + ldr d, [rin, #12]; \ + be_to_host(c, rtmp); \ + be_to_host(d, rtmp); + +#define str_output_aligned_be(rout, a, b, c, d, rtmp) \ + be_to_host(a, rtmp); \ + be_to_host(b, rtmp); \ + str a, [rout, #0]; \ + be_to_host(c, rtmp); \ + str b, [rout, #4]; \ + be_to_host(d, rtmp); \ + str c, [rout, #8]; \ + str d, [rout, #12]; + +#ifdef __ARM_FEATURE_UNALIGNED + /* unaligned word reads/writes allowed */ + #define ldr_input_be(rin, ra, rb, rc, rd, rtmp) \ + ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp) + + #define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \ + str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0) +#else + /* need to handle unaligned reads/writes by byte reads */ + #define ldr_input_be(rin, ra, rb, rc, rd, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_be(ra, rin, 0, rtmp0); \ + ldr_unaligned_be(rb, rin, 4, rtmp0); \ + ldr_unaligned_be(rc, rin, 8, rtmp0); \ + ldr_unaligned_be(rd, rin, 12, rtmp0); \ + b 2f; \ + 1:;\ + ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp0); \ + 2:; + + #define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_be(ra, rout, 0, rtmp0, rtmp1); \ + str_unaligned_be(rb, rout, 4, rtmp0, rtmp1); \ + str_unaligned_be(rc, rout, 8, rtmp0, rtmp1); \ + str_unaligned_be(rd, rout, 12, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0); \ + 2:; +#endif + +/********************************************************************** + 1-way camellia + **********************************************************************/ +#define roundsm(xl, xr, kl, kr, yl, yr) \ + ldr RT2, [CTX, #(key_table + ((kl) * 4))]; \ + and IR, RMASK, xr, lsl#(4); /*sp1110*/ \ + ldr RT3, [CTX, #(key_table + ((kr) * 4))]; \ + and IL, RMASK, xl, lsr#(24 - 4); /*sp1110*/ \ + and RT0, RMASK, xr, lsr#(16 - 4); /*sp3033*/ \ + ldr IR, [RTAB1, IR]; \ + and RT1, RMASK, xl, lsr#(8 - 4); /*sp3033*/ \ + eor yl, RT2; \ + ldr IL, [RTAB1, IL]; \ + eor yr, RT3; \ + \ + ldr RT0, [RTAB3, RT0]; \ + add RTAB1, #4; \ + ldr RT1, [RTAB3, RT1]; \ + add RTAB3, #4; \ + \ + and RT2, RMASK, xr, lsr#(24 - 4); /*sp0222*/ \ + and RT3, RMASK, xl, lsr#(16 - 4); /*sp0222*/ \ + \ + eor IR, RT0; \ + eor IL, RT1; \ + \ + ldr RT2, [RTAB1, RT2]; \ + and RT0, RMASK, xr, lsr#(8 - 4); /*sp4404*/ \ + ldr RT3, [RTAB1, RT3]; \ + and RT1, RMASK, xl, lsl#(4); /*sp4404*/ \ + \ + ldr RT0, [RTAB3, RT0]; \ + sub RTAB1, #4; \ + ldr RT1, [RTAB3, RT1]; \ + sub RTAB3, #4; \ + \ + eor IR, RT2; \ + eor IL, RT3; \ + eor IR, RT0; \ + eor IL, RT1; \ + \ + eor IR, IL; \ + eor yr, yr, IL, ror#8; \ + eor yl, IR; \ + eor yr, IR; + +#define enc_rounds(n) \ + roundsm(XL, XR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, XL, XR); + +#define dec_rounds(n) \ + roundsm(XL, XR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, XL, XR); + +/* perform FL and FL⁻¹ */ +#define fls(ll, lr, rl, rr, kll, klr, krl, krr) \ + ldr RT0, [CTX, #(key_table + ((kll) * 4))]; \ + ldr RT2, [CTX, #(key_table + ((krr) * 4))]; \ + and RT0, ll; \ + ldr RT3, [CTX, #(key_table + ((krl) * 4))]; \ + orr RT2, rr; \ + ldr RT1, [CTX, #(key_table + ((klr) * 4))]; \ + eor rl, RT2; \ + eor lr, lr, RT0, ror#31; \ + and RT3, rl; \ + orr RT1, lr; \ + eor ll, RT1; \ + eor rr, rr, RT3, ror#31; + +#define enc_fls(n) \ + fls(XL, XR, YL, YR, \ + (n) * 2 + 0, (n) * 2 + 1, \ + (n) * 2 + 2, (n) * 2 + 3); + +#define dec_fls(n) \ + fls(XL, XR, YL, YR, \ + (n) * 2 + 2, (n) * 2 + 3, \ + (n) * 2 + 0, (n) * 2 + 1); + +#define inpack(n) \ + ldr_input_be(%r2, XL, XR, YL, YR, RT0); \ + ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \ + ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \ + eor XL, RT0; \ + eor XR, RT1; + +#define outunpack(n) \ + ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \ + ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \ + eor YL, RT0; \ + eor YR, RT1; \ + str_output_be(%r1, YL, YR, XL, XR, RT0, RT1); + +.align 3 +.globl _gcry_camellia_arm_encrypt_block +.type _gcry_camellia_arm_encrypt_block,%function; + +_gcry_camellia_arm_encrypt_block: + /* input: + * %r0: keytable + * %r1: dst + * %r2: src + * %r3: keybitlen + */ + push {%r1, %r4-%r11, %ip, %lr}; + + GET_DATA_POINTER(RTAB1, .Lcamellia_sp1110, RTAB3); + mov RMASK, #0xff; + add RTAB3, RTAB1, #(2 * 4); + push {%r3}; + mov RMASK, RMASK, lsl#4 /* byte mask */ + + inpack(0); + + enc_rounds(0); + enc_fls(8); + enc_rounds(8); + enc_fls(16); + enc_rounds(16); + + pop {RT0}; + cmp RT0, #(16 * 8); + bne .Lenc_256; + + pop {%r1}; + outunpack(24); + + pop {%r4-%r11, %ip, %pc}; +.ltorg + +.Lenc_256: + enc_fls(24); + enc_rounds(24); + + pop {%r1}; + outunpack(32); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block; + +.align 3 +.globl _gcry_camellia_arm_decrypt_block +.type _gcry_camellia_arm_decrypt_block,%function; + +_gcry_camellia_arm_decrypt_block: + /* input: + * %r0: keytable + * %r1: dst + * %r2: src + * %r3: keybitlen + */ + push {%r1, %r4-%r11, %ip, %lr}; + + GET_DATA_POINTER(RTAB1, .Lcamellia_sp1110, RTAB3); + mov RMASK, #0xff; + add RTAB3, RTAB1, #(2 * 4); + mov RMASK, RMASK, lsl#4 /* byte mask */ + + cmp %r3, #(16 * 8); + bne .Ldec_256; + + inpack(24); + +.Ldec_128: + dec_rounds(16); + dec_fls(16); + dec_rounds(8); + dec_fls(8); + dec_rounds(0); + + pop {%r1}; + outunpack(0); + + pop {%r4-%r11, %ip, %pc}; +.ltorg + +.Ldec_256: + inpack(32); + dec_rounds(24); + dec_fls(24); + + b .Ldec_128; +.ltorg +.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block; + +.data + +/* Encryption/Decryption tables */ +.align 5 +.Lcamellia_sp1110: +.long 0x70707000 +.Lcamellia_sp0222: + .long 0x00e0e0e0 +.Lcamellia_sp3033: + .long 0x38003838 +.Lcamellia_sp4404: + .long 0x70700070 +.long 0x82828200, 0x00050505, 0x41004141, 0x2c2c002c +.long 0x2c2c2c00, 0x00585858, 0x16001616, 0xb3b300b3 +.long 0xececec00, 0x00d9d9d9, 0x76007676, 0xc0c000c0 +.long 0xb3b3b300, 0x00676767, 0xd900d9d9, 0xe4e400e4 +.long 0x27272700, 0x004e4e4e, 0x93009393, 0x57570057 +.long 0xc0c0c000, 0x00818181, 0x60006060, 0xeaea00ea +.long 0xe5e5e500, 0x00cbcbcb, 0xf200f2f2, 0xaeae00ae +.long 0xe4e4e400, 0x00c9c9c9, 0x72007272, 0x23230023 +.long 0x85858500, 0x000b0b0b, 0xc200c2c2, 0x6b6b006b +.long 0x57575700, 0x00aeaeae, 0xab00abab, 0x45450045 +.long 0x35353500, 0x006a6a6a, 0x9a009a9a, 0xa5a500a5 +.long 0xeaeaea00, 0x00d5d5d5, 0x75007575, 0xeded00ed +.long 0x0c0c0c00, 0x00181818, 0x06000606, 0x4f4f004f +.long 0xaeaeae00, 0x005d5d5d, 0x57005757, 0x1d1d001d +.long 0x41414100, 0x00828282, 0xa000a0a0, 0x92920092 +.long 0x23232300, 0x00464646, 0x91009191, 0x86860086 +.long 0xefefef00, 0x00dfdfdf, 0xf700f7f7, 0xafaf00af +.long 0x6b6b6b00, 0x00d6d6d6, 0xb500b5b5, 0x7c7c007c +.long 0x93939300, 0x00272727, 0xc900c9c9, 0x1f1f001f +.long 0x45454500, 0x008a8a8a, 0xa200a2a2, 0x3e3e003e +.long 0x19191900, 0x00323232, 0x8c008c8c, 0xdcdc00dc +.long 0xa5a5a500, 0x004b4b4b, 0xd200d2d2, 0x5e5e005e +.long 0x21212100, 0x00424242, 0x90009090, 0x0b0b000b +.long 0xededed00, 0x00dbdbdb, 0xf600f6f6, 0xa6a600a6 +.long 0x0e0e0e00, 0x001c1c1c, 0x07000707, 0x39390039 +.long 0x4f4f4f00, 0x009e9e9e, 0xa700a7a7, 0xd5d500d5 +.long 0x4e4e4e00, 0x009c9c9c, 0x27002727, 0x5d5d005d +.long 0x1d1d1d00, 0x003a3a3a, 0x8e008e8e, 0xd9d900d9 +.long 0x65656500, 0x00cacaca, 0xb200b2b2, 0x5a5a005a +.long 0x92929200, 0x00252525, 0x49004949, 0x51510051 +.long 0xbdbdbd00, 0x007b7b7b, 0xde00dede, 0x6c6c006c +.long 0x86868600, 0x000d0d0d, 0x43004343, 0x8b8b008b +.long 0xb8b8b800, 0x00717171, 0x5c005c5c, 0x9a9a009a +.long 0xafafaf00, 0x005f5f5f, 0xd700d7d7, 0xfbfb00fb +.long 0x8f8f8f00, 0x001f1f1f, 0xc700c7c7, 0xb0b000b0 +.long 0x7c7c7c00, 0x00f8f8f8, 0x3e003e3e, 0x74740074 +.long 0xebebeb00, 0x00d7d7d7, 0xf500f5f5, 0x2b2b002b +.long 0x1f1f1f00, 0x003e3e3e, 0x8f008f8f, 0xf0f000f0 +.long 0xcecece00, 0x009d9d9d, 0x67006767, 0x84840084 +.long 0x3e3e3e00, 0x007c7c7c, 0x1f001f1f, 0xdfdf00df +.long 0x30303000, 0x00606060, 0x18001818, 0xcbcb00cb +.long 0xdcdcdc00, 0x00b9b9b9, 0x6e006e6e, 0x34340034 +.long 0x5f5f5f00, 0x00bebebe, 0xaf00afaf, 0x76760076 +.long 0x5e5e5e00, 0x00bcbcbc, 0x2f002f2f, 0x6d6d006d +.long 0xc5c5c500, 0x008b8b8b, 0xe200e2e2, 0xa9a900a9 +.long 0x0b0b0b00, 0x00161616, 0x85008585, 0xd1d100d1 +.long 0x1a1a1a00, 0x00343434, 0x0d000d0d, 0x04040004 +.long 0xa6a6a600, 0x004d4d4d, 0x53005353, 0x14140014 +.long 0xe1e1e100, 0x00c3c3c3, 0xf000f0f0, 0x3a3a003a +.long 0x39393900, 0x00727272, 0x9c009c9c, 0xdede00de +.long 0xcacaca00, 0x00959595, 0x65006565, 0x11110011 +.long 0xd5d5d500, 0x00ababab, 0xea00eaea, 0x32320032 +.long 0x47474700, 0x008e8e8e, 0xa300a3a3, 0x9c9c009c +.long 0x5d5d5d00, 0x00bababa, 0xae00aeae, 0x53530053 +.long 0x3d3d3d00, 0x007a7a7a, 0x9e009e9e, 0xf2f200f2 +.long 0xd9d9d900, 0x00b3b3b3, 0xec00ecec, 0xfefe00fe +.long 0x01010100, 0x00020202, 0x80008080, 0xcfcf00cf +.long 0x5a5a5a00, 0x00b4b4b4, 0x2d002d2d, 0xc3c300c3 +.long 0xd6d6d600, 0x00adadad, 0x6b006b6b, 0x7a7a007a +.long 0x51515100, 0x00a2a2a2, 0xa800a8a8, 0x24240024 +.long 0x56565600, 0x00acacac, 0x2b002b2b, 0xe8e800e8 +.long 0x6c6c6c00, 0x00d8d8d8, 0x36003636, 0x60600060 +.long 0x4d4d4d00, 0x009a9a9a, 0xa600a6a6, 0x69690069 +.long 0x8b8b8b00, 0x00171717, 0xc500c5c5, 0xaaaa00aa +.long 0x0d0d0d00, 0x001a1a1a, 0x86008686, 0xa0a000a0 +.long 0x9a9a9a00, 0x00353535, 0x4d004d4d, 0xa1a100a1 +.long 0x66666600, 0x00cccccc, 0x33003333, 0x62620062 +.long 0xfbfbfb00, 0x00f7f7f7, 0xfd00fdfd, 0x54540054 +.long 0xcccccc00, 0x00999999, 0x66006666, 0x1e1e001e +.long 0xb0b0b000, 0x00616161, 0x58005858, 0xe0e000e0 +.long 0x2d2d2d00, 0x005a5a5a, 0x96009696, 0x64640064 +.long 0x74747400, 0x00e8e8e8, 0x3a003a3a, 0x10100010 +.long 0x12121200, 0x00242424, 0x09000909, 0x00000000 +.long 0x2b2b2b00, 0x00565656, 0x95009595, 0xa3a300a3 +.long 0x20202000, 0x00404040, 0x10001010, 0x75750075 +.long 0xf0f0f000, 0x00e1e1e1, 0x78007878, 0x8a8a008a +.long 0xb1b1b100, 0x00636363, 0xd800d8d8, 0xe6e600e6 +.long 0x84848400, 0x00090909, 0x42004242, 0x09090009 +.long 0x99999900, 0x00333333, 0xcc00cccc, 0xdddd00dd +.long 0xdfdfdf00, 0x00bfbfbf, 0xef00efef, 0x87870087 +.long 0x4c4c4c00, 0x00989898, 0x26002626, 0x83830083 +.long 0xcbcbcb00, 0x00979797, 0xe500e5e5, 0xcdcd00cd +.long 0xc2c2c200, 0x00858585, 0x61006161, 0x90900090 +.long 0x34343400, 0x00686868, 0x1a001a1a, 0x73730073 +.long 0x7e7e7e00, 0x00fcfcfc, 0x3f003f3f, 0xf6f600f6 +.long 0x76767600, 0x00ececec, 0x3b003b3b, 0x9d9d009d +.long 0x05050500, 0x000a0a0a, 0x82008282, 0xbfbf00bf +.long 0x6d6d6d00, 0x00dadada, 0xb600b6b6, 0x52520052 +.long 0xb7b7b700, 0x006f6f6f, 0xdb00dbdb, 0xd8d800d8 +.long 0xa9a9a900, 0x00535353, 0xd400d4d4, 0xc8c800c8 +.long 0x31313100, 0x00626262, 0x98009898, 0xc6c600c6 +.long 0xd1d1d100, 0x00a3a3a3, 0xe800e8e8, 0x81810081 +.long 0x17171700, 0x002e2e2e, 0x8b008b8b, 0x6f6f006f +.long 0x04040400, 0x00080808, 0x02000202, 0x13130013 +.long 0xd7d7d700, 0x00afafaf, 0xeb00ebeb, 0x63630063 +.long 0x14141400, 0x00282828, 0x0a000a0a, 0xe9e900e9 +.long 0x58585800, 0x00b0b0b0, 0x2c002c2c, 0xa7a700a7 +.long 0x3a3a3a00, 0x00747474, 0x1d001d1d, 0x9f9f009f +.long 0x61616100, 0x00c2c2c2, 0xb000b0b0, 0xbcbc00bc +.long 0xdedede00, 0x00bdbdbd, 0x6f006f6f, 0x29290029 +.long 0x1b1b1b00, 0x00363636, 0x8d008d8d, 0xf9f900f9 +.long 0x11111100, 0x00222222, 0x88008888, 0x2f2f002f +.long 0x1c1c1c00, 0x00383838, 0x0e000e0e, 0xb4b400b4 +.long 0x32323200, 0x00646464, 0x19001919, 0x78780078 +.long 0x0f0f0f00, 0x001e1e1e, 0x87008787, 0x06060006 +.long 0x9c9c9c00, 0x00393939, 0x4e004e4e, 0xe7e700e7 +.long 0x16161600, 0x002c2c2c, 0x0b000b0b, 0x71710071 +.long 0x53535300, 0x00a6a6a6, 0xa900a9a9, 0xd4d400d4 +.long 0x18181800, 0x00303030, 0x0c000c0c, 0xabab00ab +.long 0xf2f2f200, 0x00e5e5e5, 0x79007979, 0x88880088 +.long 0x22222200, 0x00444444, 0x11001111, 0x8d8d008d +.long 0xfefefe00, 0x00fdfdfd, 0x7f007f7f, 0x72720072 +.long 0x44444400, 0x00888888, 0x22002222, 0xb9b900b9 +.long 0xcfcfcf00, 0x009f9f9f, 0xe700e7e7, 0xf8f800f8 +.long 0xb2b2b200, 0x00656565, 0x59005959, 0xacac00ac +.long 0xc3c3c300, 0x00878787, 0xe100e1e1, 0x36360036 +.long 0xb5b5b500, 0x006b6b6b, 0xda00dada, 0x2a2a002a +.long 0x7a7a7a00, 0x00f4f4f4, 0x3d003d3d, 0x3c3c003c +.long 0x91919100, 0x00232323, 0xc800c8c8, 0xf1f100f1 +.long 0x24242400, 0x00484848, 0x12001212, 0x40400040 +.long 0x08080800, 0x00101010, 0x04000404, 0xd3d300d3 +.long 0xe8e8e800, 0x00d1d1d1, 0x74007474, 0xbbbb00bb +.long 0xa8a8a800, 0x00515151, 0x54005454, 0x43430043 +.long 0x60606000, 0x00c0c0c0, 0x30003030, 0x15150015 +.long 0xfcfcfc00, 0x00f9f9f9, 0x7e007e7e, 0xadad00ad +.long 0x69696900, 0x00d2d2d2, 0xb400b4b4, 0x77770077 +.long 0x50505000, 0x00a0a0a0, 0x28002828, 0x80800080 +.long 0xaaaaaa00, 0x00555555, 0x55005555, 0x82820082 +.long 0xd0d0d000, 0x00a1a1a1, 0x68006868, 0xecec00ec +.long 0xa0a0a000, 0x00414141, 0x50005050, 0x27270027 +.long 0x7d7d7d00, 0x00fafafa, 0xbe00bebe, 0xe5e500e5 +.long 0xa1a1a100, 0x00434343, 0xd000d0d0, 0x85850085 +.long 0x89898900, 0x00131313, 0xc400c4c4, 0x35350035 +.long 0x62626200, 0x00c4c4c4, 0x31003131, 0x0c0c000c +.long 0x97979700, 0x002f2f2f, 0xcb00cbcb, 0x41410041 +.long 0x54545400, 0x00a8a8a8, 0x2a002a2a, 0xefef00ef +.long 0x5b5b5b00, 0x00b6b6b6, 0xad00adad, 0x93930093 +.long 0x1e1e1e00, 0x003c3c3c, 0x0f000f0f, 0x19190019 +.long 0x95959500, 0x002b2b2b, 0xca00caca, 0x21210021 +.long 0xe0e0e000, 0x00c1c1c1, 0x70007070, 0x0e0e000e +.long 0xffffff00, 0x00ffffff, 0xff00ffff, 0x4e4e004e +.long 0x64646400, 0x00c8c8c8, 0x32003232, 0x65650065 +.long 0xd2d2d200, 0x00a5a5a5, 0x69006969, 0xbdbd00bd +.long 0x10101000, 0x00202020, 0x08000808, 0xb8b800b8 +.long 0xc4c4c400, 0x00898989, 0x62006262, 0x8f8f008f +.long 0x00000000, 0x00000000, 0x00000000, 0xebeb00eb +.long 0x48484800, 0x00909090, 0x24002424, 0xcece00ce +.long 0xa3a3a300, 0x00474747, 0xd100d1d1, 0x30300030 +.long 0xf7f7f700, 0x00efefef, 0xfb00fbfb, 0x5f5f005f +.long 0x75757500, 0x00eaeaea, 0xba00baba, 0xc5c500c5 +.long 0xdbdbdb00, 0x00b7b7b7, 0xed00eded, 0x1a1a001a +.long 0x8a8a8a00, 0x00151515, 0x45004545, 0xe1e100e1 +.long 0x03030300, 0x00060606, 0x81008181, 0xcaca00ca +.long 0xe6e6e600, 0x00cdcdcd, 0x73007373, 0x47470047 +.long 0xdadada00, 0x00b5b5b5, 0x6d006d6d, 0x3d3d003d +.long 0x09090900, 0x00121212, 0x84008484, 0x01010001 +.long 0x3f3f3f00, 0x007e7e7e, 0x9f009f9f, 0xd6d600d6 +.long 0xdddddd00, 0x00bbbbbb, 0xee00eeee, 0x56560056 +.long 0x94949400, 0x00292929, 0x4a004a4a, 0x4d4d004d +.long 0x87878700, 0x000f0f0f, 0xc300c3c3, 0x0d0d000d +.long 0x5c5c5c00, 0x00b8b8b8, 0x2e002e2e, 0x66660066 +.long 0x83838300, 0x00070707, 0xc100c1c1, 0xcccc00cc +.long 0x02020200, 0x00040404, 0x01000101, 0x2d2d002d +.long 0xcdcdcd00, 0x009b9b9b, 0xe600e6e6, 0x12120012 +.long 0x4a4a4a00, 0x00949494, 0x25002525, 0x20200020 +.long 0x90909000, 0x00212121, 0x48004848, 0xb1b100b1 +.long 0x33333300, 0x00666666, 0x99009999, 0x99990099 +.long 0x73737300, 0x00e6e6e6, 0xb900b9b9, 0x4c4c004c +.long 0x67676700, 0x00cecece, 0xb300b3b3, 0xc2c200c2 +.long 0xf6f6f600, 0x00ededed, 0x7b007b7b, 0x7e7e007e +.long 0xf3f3f300, 0x00e7e7e7, 0xf900f9f9, 0x05050005 +.long 0x9d9d9d00, 0x003b3b3b, 0xce00cece, 0xb7b700b7 +.long 0x7f7f7f00, 0x00fefefe, 0xbf00bfbf, 0x31310031 +.long 0xbfbfbf00, 0x007f7f7f, 0xdf00dfdf, 0x17170017 +.long 0xe2e2e200, 0x00c5c5c5, 0x71007171, 0xd7d700d7 +.long 0x52525200, 0x00a4a4a4, 0x29002929, 0x58580058 +.long 0x9b9b9b00, 0x00373737, 0xcd00cdcd, 0x61610061 +.long 0xd8d8d800, 0x00b1b1b1, 0x6c006c6c, 0x1b1b001b +.long 0x26262600, 0x004c4c4c, 0x13001313, 0x1c1c001c +.long 0xc8c8c800, 0x00919191, 0x64006464, 0x0f0f000f +.long 0x37373700, 0x006e6e6e, 0x9b009b9b, 0x16160016 +.long 0xc6c6c600, 0x008d8d8d, 0x63006363, 0x18180018 +.long 0x3b3b3b00, 0x00767676, 0x9d009d9d, 0x22220022 +.long 0x81818100, 0x00030303, 0xc000c0c0, 0x44440044 +.long 0x96969600, 0x002d2d2d, 0x4b004b4b, 0xb2b200b2 +.long 0x6f6f6f00, 0x00dedede, 0xb700b7b7, 0xb5b500b5 +.long 0x4b4b4b00, 0x00969696, 0xa500a5a5, 0x91910091 +.long 0x13131300, 0x00262626, 0x89008989, 0x08080008 +.long 0xbebebe00, 0x007d7d7d, 0x5f005f5f, 0xa8a800a8 +.long 0x63636300, 0x00c6c6c6, 0xb100b1b1, 0xfcfc00fc +.long 0x2e2e2e00, 0x005c5c5c, 0x17001717, 0x50500050 +.long 0xe9e9e900, 0x00d3d3d3, 0xf400f4f4, 0xd0d000d0 +.long 0x79797900, 0x00f2f2f2, 0xbc00bcbc, 0x7d7d007d +.long 0xa7a7a700, 0x004f4f4f, 0xd300d3d3, 0x89890089 +.long 0x8c8c8c00, 0x00191919, 0x46004646, 0x97970097 +.long 0x9f9f9f00, 0x003f3f3f, 0xcf00cfcf, 0x5b5b005b +.long 0x6e6e6e00, 0x00dcdcdc, 0x37003737, 0x95950095 +.long 0xbcbcbc00, 0x00797979, 0x5e005e5e, 0xffff00ff +.long 0x8e8e8e00, 0x001d1d1d, 0x47004747, 0xd2d200d2 +.long 0x29292900, 0x00525252, 0x94009494, 0xc4c400c4 +.long 0xf5f5f500, 0x00ebebeb, 0xfa00fafa, 0x48480048 +.long 0xf9f9f900, 0x00f3f3f3, 0xfc00fcfc, 0xf7f700f7 +.long 0xb6b6b600, 0x006d6d6d, 0x5b005b5b, 0xdbdb00db +.long 0x2f2f2f00, 0x005e5e5e, 0x97009797, 0x03030003 +.long 0xfdfdfd00, 0x00fbfbfb, 0xfe00fefe, 0xdada00da +.long 0xb4b4b400, 0x00696969, 0x5a005a5a, 0x3f3f003f +.long 0x59595900, 0x00b2b2b2, 0xac00acac, 0x94940094 +.long 0x78787800, 0x00f0f0f0, 0x3c003c3c, 0x5c5c005c +.long 0x98989800, 0x00313131, 0x4c004c4c, 0x02020002 +.long 0x06060600, 0x000c0c0c, 0x03000303, 0x4a4a004a +.long 0x6a6a6a00, 0x00d4d4d4, 0x35003535, 0x33330033 +.long 0xe7e7e700, 0x00cfcfcf, 0xf300f3f3, 0x67670067 +.long 0x46464600, 0x008c8c8c, 0x23002323, 0xf3f300f3 +.long 0x71717100, 0x00e2e2e2, 0xb800b8b8, 0x7f7f007f +.long 0xbababa00, 0x00757575, 0x5d005d5d, 0xe2e200e2 +.long 0xd4d4d400, 0x00a9a9a9, 0x6a006a6a, 0x9b9b009b +.long 0x25252500, 0x004a4a4a, 0x92009292, 0x26260026 +.long 0xababab00, 0x00575757, 0xd500d5d5, 0x37370037 +.long 0x42424200, 0x00848484, 0x21002121, 0x3b3b003b +.long 0x88888800, 0x00111111, 0x44004444, 0x96960096 +.long 0xa2a2a200, 0x00454545, 0x51005151, 0x4b4b004b +.long 0x8d8d8d00, 0x001b1b1b, 0xc600c6c6, 0xbebe00be +.long 0xfafafa00, 0x00f5f5f5, 0x7d007d7d, 0x2e2e002e +.long 0x72727200, 0x00e4e4e4, 0x39003939, 0x79790079 +.long 0x07070700, 0x000e0e0e, 0x83008383, 0x8c8c008c +.long 0xb9b9b900, 0x00737373, 0xdc00dcdc, 0x6e6e006e +.long 0x55555500, 0x00aaaaaa, 0xaa00aaaa, 0x8e8e008e +.long 0xf8f8f800, 0x00f1f1f1, 0x7c007c7c, 0xf5f500f5 +.long 0xeeeeee00, 0x00dddddd, 0x77007777, 0xb6b600b6 +.long 0xacacac00, 0x00595959, 0x56005656, 0xfdfd00fd +.long 0x0a0a0a00, 0x00141414, 0x05000505, 0x59590059 +.long 0x36363600, 0x006c6c6c, 0x1b001b1b, 0x98980098 +.long 0x49494900, 0x00929292, 0xa400a4a4, 0x6a6a006a +.long 0x2a2a2a00, 0x00545454, 0x15001515, 0x46460046 +.long 0x68686800, 0x00d0d0d0, 0x34003434, 0xbaba00ba +.long 0x3c3c3c00, 0x00787878, 0x1e001e1e, 0x25250025 +.long 0x38383800, 0x00707070, 0x1c001c1c, 0x42420042 +.long 0xf1f1f100, 0x00e3e3e3, 0xf800f8f8, 0xa2a200a2 +.long 0xa4a4a400, 0x00494949, 0x52005252, 0xfafa00fa +.long 0x40404000, 0x00808080, 0x20002020, 0x07070007 +.long 0x28282800, 0x00505050, 0x14001414, 0x55550055 +.long 0xd3d3d300, 0x00a7a7a7, 0xe900e9e9, 0xeeee00ee +.long 0x7b7b7b00, 0x00f6f6f6, 0xbd00bdbd, 0x0a0a000a +.long 0xbbbbbb00, 0x00777777, 0xdd00dddd, 0x49490049 +.long 0xc9c9c900, 0x00939393, 0xe400e4e4, 0x68680068 +.long 0x43434300, 0x00868686, 0xa100a1a1, 0x38380038 +.long 0xc1c1c100, 0x00838383, 0xe000e0e0, 0xa4a400a4 +.long 0x15151500, 0x002a2a2a, 0x8a008a8a, 0x28280028 +.long 0xe3e3e300, 0x00c7c7c7, 0xf100f1f1, 0x7b7b007b +.long 0xadadad00, 0x005b5b5b, 0xd600d6d6, 0xc9c900c9 +.long 0xf4f4f400, 0x00e9e9e9, 0x7a007a7a, 0xc1c100c1 +.long 0x77777700, 0x00eeeeee, 0xbb00bbbb, 0xe3e300e3 +.long 0xc7c7c700, 0x008f8f8f, 0xe300e3e3, 0xf4f400f4 +.long 0x80808000, 0x00010101, 0x40004040, 0xc7c700c7 +.long 0x9e9e9e00, 0x003d3d3d, 0x4f004f4f, 0x9e9e009e + +#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/ +#endif /*__ARM_ARCH >= 6*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/camellia-glue.c b/libotr/libgcrypt-1.8.7/cipher/camellia-glue.c new file mode 100644 index 0000000..7687094 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/camellia-glue.c @@ -0,0 +1,1080 @@ +/* camellia-glue.c - Glue for the Camellia cipher + * Copyright (C) 2007 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/* I put all the libgcrypt-specific stuff in this file to keep the + camellia.c/camellia.h files exactly as provided by NTT. If they + update their code, this should make it easier to bring the changes + in. - dshaw + + There is one small change which needs to be done: Include the + following code at the top of camellia.h: */ +#if 0 + +/* To use Camellia with libraries it is often useful to keep the name + * space of the library clean. The following macro is thus useful: + * + * #define CAMELLIA_EXT_SYM_PREFIX foo_ + * + * This prefixes all external symbols with "foo_". + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#ifdef CAMELLIA_EXT_SYM_PREFIX +#define CAMELLIA_PREFIX1(x,y) x ## y +#define CAMELLIA_PREFIX2(x,y) CAMELLIA_PREFIX1(x,y) +#define CAMELLIA_PREFIX(x) CAMELLIA_PREFIX2(CAMELLIA_EXT_SYM_PREFIX,x) +#define Camellia_Ekeygen CAMELLIA_PREFIX(Camellia_Ekeygen) +#define Camellia_EncryptBlock CAMELLIA_PREFIX(Camellia_EncryptBlock) +#define Camellia_DecryptBlock CAMELLIA_PREFIX(Camellia_DecryptBlock) +#define camellia_decrypt128 CAMELLIA_PREFIX(camellia_decrypt128) +#define camellia_decrypt256 CAMELLIA_PREFIX(camellia_decrypt256) +#define camellia_encrypt128 CAMELLIA_PREFIX(camellia_encrypt128) +#define camellia_encrypt256 CAMELLIA_PREFIX(camellia_encrypt256) +#define camellia_setup128 CAMELLIA_PREFIX(camellia_setup128) +#define camellia_setup192 CAMELLIA_PREFIX(camellia_setup192) +#define camellia_setup256 CAMELLIA_PREFIX(camellia_setup256) +#endif /*CAMELLIA_EXT_SYM_PREFIX*/ + +#endif /* Code sample. */ + + +#include <config.h> +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "camellia.h" +#include "bufhelp.h" +#include "cipher-internal.h" +#include "cipher-selftest.h" + +/* Helper macro to force alignment to 16 bytes. */ +#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED +# define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) +#else +# define ATTR_ALIGNED_16 +#endif + +/* USE_AESNI inidicates whether to compile with Intel AES-NI/AVX code. */ +#undef USE_AESNI_AVX +#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT) +# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AESNI_AVX 1 +# endif +#endif + +/* USE_AESNI_AVX2 inidicates whether to compile with Intel AES-NI/AVX2 code. */ +#undef USE_AESNI_AVX2 +#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) +# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AESNI_AVX2 1 +# endif +#endif + +typedef struct +{ + KEY_TABLE_TYPE keytable; + int keybitlength; +#ifdef USE_AESNI_AVX + unsigned int use_aesni_avx:1; /* AES-NI/AVX implementation shall be used. */ +#endif /*USE_AESNI_AVX*/ +#ifdef USE_AESNI_AVX2 + unsigned int use_aesni_avx2:1;/* AES-NI/AVX2 implementation shall be used. */ +#endif /*USE_AESNI_AVX2*/ +} CAMELLIA_context; + +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#undef ASM_EXTRA_STACK +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) +# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_EXTRA_STACK (10 * 16) +# else +# define ASM_FUNC_ABI +# define ASM_EXTRA_STACK 0 +# endif +#endif + +#ifdef USE_AESNI_AVX +/* Assembler implementations of Camellia using AES-NI and AVX. Process data + in 16 block same time. + */ +extern void _gcry_camellia_aesni_avx_ctr_enc(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *ctr) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx_cbc_dec(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx_cfb_dec(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx_ocb_enc(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[16]) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx_ocb_dec(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[16]) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx_ocb_auth(CAMELLIA_context *ctx, + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[16]) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx, + const unsigned char *key, + unsigned int keylen) ASM_FUNC_ABI; +#endif + +#ifdef USE_AESNI_AVX2 +/* Assembler implementations of Camellia using AES-NI and AVX2. Process data + in 32 block same time. + */ +extern void _gcry_camellia_aesni_avx2_ctr_enc(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *ctr) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx2_cbc_dec(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx2_cfb_dec(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx2_ocb_enc(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[32]) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx2_ocb_dec(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[32]) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx2_ocb_auth(CAMELLIA_context *ctx, + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[32]) ASM_FUNC_ABI; +#endif + +static const char *selftest(void); + +static gcry_err_code_t +camellia_setkey(void *c, const byte *key, unsigned keylen) +{ + CAMELLIA_context *ctx=c; + static int initialized=0; + static const char *selftest_failed=NULL; +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) + unsigned int hwf = _gcry_get_hw_features (); +#endif + + if(keylen!=16 && keylen!=24 && keylen!=32) + return GPG_ERR_INV_KEYLEN; + + if(!initialized) + { + initialized=1; + selftest_failed=selftest(); + if(selftest_failed) + log_error("%s\n",selftest_failed); + } + + if(selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + +#ifdef USE_AESNI_AVX + ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX); +#endif +#ifdef USE_AESNI_AVX2 + ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2); +#endif + + ctx->keybitlength=keylen*8; + + if (0) + { } +#ifdef USE_AESNI_AVX + else if (ctx->use_aesni_avx) + _gcry_camellia_aesni_avx_keygen(ctx, key, keylen); + else +#endif + { + Camellia_Ekeygen(ctx->keybitlength,key,ctx->keytable); + _gcry_burn_stack + ((19+34+34)*sizeof(u32)+2*sizeof(void*) /* camellia_setup256 */ + +(4+32)*sizeof(u32)+2*sizeof(void*) /* camellia_setup192 */ + +0+sizeof(int)+2*sizeof(void*) /* Camellia_Ekeygen */ + +3*2*sizeof(void*) /* Function calls. */ + ); + } + + return 0; +} + +#ifdef USE_ARM_ASM + +/* Assembly implementations of Camellia. */ +extern void _gcry_camellia_arm_encrypt_block(const KEY_TABLE_TYPE keyTable, + byte *outbuf, const byte *inbuf, + const int keybits); + +extern void _gcry_camellia_arm_decrypt_block(const KEY_TABLE_TYPE keyTable, + byte *outbuf, const byte *inbuf, + const int keybits); + +static void Camellia_EncryptBlock(const int keyBitLength, + const unsigned char *plaintext, + const KEY_TABLE_TYPE keyTable, + unsigned char *cipherText) +{ + _gcry_camellia_arm_encrypt_block(keyTable, cipherText, plaintext, + keyBitLength); +} + +static void Camellia_DecryptBlock(const int keyBitLength, + const unsigned char *cipherText, + const KEY_TABLE_TYPE keyTable, + unsigned char *plaintext) +{ + _gcry_camellia_arm_decrypt_block(keyTable, plaintext, cipherText, + keyBitLength); +} + +#ifdef __aarch64__ +# define CAMELLIA_encrypt_stack_burn_size (0) +# define CAMELLIA_decrypt_stack_burn_size (0) +#else +# define CAMELLIA_encrypt_stack_burn_size (15*4) +# define CAMELLIA_decrypt_stack_burn_size (15*4) +#endif + +static unsigned int +camellia_encrypt(void *c, byte *outbuf, const byte *inbuf) +{ + CAMELLIA_context *ctx = c; + Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); + return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size); +} + +static unsigned int +camellia_decrypt(void *c, byte *outbuf, const byte *inbuf) +{ + CAMELLIA_context *ctx=c; + Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); + return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size); +} + +#else /*USE_ARM_ASM*/ + +static unsigned int +camellia_encrypt(void *c, byte *outbuf, const byte *inbuf) +{ + CAMELLIA_context *ctx=c; + + Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); + +#define CAMELLIA_encrypt_stack_burn_size \ + (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \ + +4*sizeof(u32)+4*sizeof(u32) \ + +2*sizeof(u32*)+4*sizeof(u32) \ + +2*2*sizeof(void*) /* Function calls. */ \ + ) + + return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size); +} + +static unsigned int +camellia_decrypt(void *c, byte *outbuf, const byte *inbuf) +{ + CAMELLIA_context *ctx=c; + + Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); + +#define CAMELLIA_decrypt_stack_burn_size \ + (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \ + +4*sizeof(u32)+4*sizeof(u32) \ + +2*sizeof(u32*)+4*sizeof(u32) \ + +2*2*sizeof(void*) /* Function calls. */ \ + ) + + return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size); +} + +#endif /*!USE_ARM_ASM*/ + +/* Bulk encryption of complete blocks in CTR mode. This function is only + intended for the bulk encryption feature of cipher.c. CTR is expected to be + of size CAMELLIA_BLOCK_SIZE. */ +void +_gcry_camellia_ctr_enc(void *context, unsigned char *ctr, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + CAMELLIA_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char tmpbuf[CAMELLIA_BLOCK_SIZE]; + int burn_stack_depth = CAMELLIA_encrypt_stack_burn_size; + int i; + +#ifdef USE_AESNI_AVX2 + if (ctx->use_aesni_avx2) + { + int did_use_aesni_avx2 = 0; + + /* Process data in 32 block chunks. */ + while (nblocks >= 32) + { + _gcry_camellia_aesni_avx2_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 32; + outbuf += 32 * CAMELLIA_BLOCK_SIZE; + inbuf += 32 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx2 = 1; + } + + if (did_use_aesni_avx2) + { + int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx2_burn_stack_depth) + burn_stack_depth = avx2_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + /* TODO: use caching instead? */ + } +#endif + +#ifdef USE_AESNI_AVX + if (ctx->use_aesni_avx) + { + int did_use_aesni_avx = 0; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + _gcry_camellia_aesni_avx_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 16; + outbuf += 16 * CAMELLIA_BLOCK_SIZE; + inbuf += 16 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx = 1; + } + + if (did_use_aesni_avx) + { + int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx_burn_stack_depth) + burn_stack_depth = avx_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + /* TODO: use caching instead? */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the counter. */ + Camellia_EncryptBlock(ctx->keybitlength, ctr, ctx->keytable, tmpbuf); + /* XOR the input with the encrypted counter and store in output. */ + buf_xor(outbuf, tmpbuf, inbuf, CAMELLIA_BLOCK_SIZE); + outbuf += CAMELLIA_BLOCK_SIZE; + inbuf += CAMELLIA_BLOCK_SIZE; + /* Increment the counter. */ + for (i = CAMELLIA_BLOCK_SIZE; i > 0; i--) + { + ctr[i-1]++; + if (ctr[i-1]) + break; + } + } + + wipememory(tmpbuf, sizeof(tmpbuf)); + _gcry_burn_stack(burn_stack_depth); +} + +/* Bulk decryption of complete blocks in CBC mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +void +_gcry_camellia_cbc_dec(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + CAMELLIA_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char savebuf[CAMELLIA_BLOCK_SIZE]; + int burn_stack_depth = CAMELLIA_decrypt_stack_burn_size; + +#ifdef USE_AESNI_AVX2 + if (ctx->use_aesni_avx2) + { + int did_use_aesni_avx2 = 0; + + /* Process data in 32 block chunks. */ + while (nblocks >= 32) + { + _gcry_camellia_aesni_avx2_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 32; + outbuf += 32 * CAMELLIA_BLOCK_SIZE; + inbuf += 32 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx2 = 1; + } + + if (did_use_aesni_avx2) + { + int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 + + 2 * sizeof(void *) + ASM_EXTRA_STACK;; + + if (burn_stack_depth < avx2_burn_stack_depth) + burn_stack_depth = avx2_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_AESNI_AVX + if (ctx->use_aesni_avx) + { + int did_use_aesni_avx = 0; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + _gcry_camellia_aesni_avx_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 16; + outbuf += 16 * CAMELLIA_BLOCK_SIZE; + inbuf += 16 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx = 1; + } + + if (did_use_aesni_avx) + { + int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx_burn_stack_depth) + burn_stack_depth = avx_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* INBUF is needed later and it may be identical to OUTBUF, so store + the intermediate result to SAVEBUF. */ + Camellia_DecryptBlock(ctx->keybitlength, inbuf, ctx->keytable, savebuf); + + buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAMELLIA_BLOCK_SIZE); + inbuf += CAMELLIA_BLOCK_SIZE; + outbuf += CAMELLIA_BLOCK_SIZE; + } + + wipememory(savebuf, sizeof(savebuf)); + _gcry_burn_stack(burn_stack_depth); +} + +/* Bulk decryption of complete blocks in CFB mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +void +_gcry_camellia_cfb_dec(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + CAMELLIA_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth = CAMELLIA_decrypt_stack_burn_size; + +#ifdef USE_AESNI_AVX2 + if (ctx->use_aesni_avx2) + { + int did_use_aesni_avx2 = 0; + + /* Process data in 32 block chunks. */ + while (nblocks >= 32) + { + _gcry_camellia_aesni_avx2_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 32; + outbuf += 32 * CAMELLIA_BLOCK_SIZE; + inbuf += 32 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx2 = 1; + } + + if (did_use_aesni_avx2) + { + int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx2_burn_stack_depth) + burn_stack_depth = avx2_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_AESNI_AVX + if (ctx->use_aesni_avx) + { + int did_use_aesni_avx = 0; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + _gcry_camellia_aesni_avx_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 16; + outbuf += 16 * CAMELLIA_BLOCK_SIZE; + inbuf += 16 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx = 1; + } + + if (did_use_aesni_avx) + { + int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx_burn_stack_depth) + burn_stack_depth = avx_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + Camellia_EncryptBlock(ctx->keybitlength, iv, ctx->keytable, iv); + buf_xor_n_copy(outbuf, iv, inbuf, CAMELLIA_BLOCK_SIZE); + outbuf += CAMELLIA_BLOCK_SIZE; + inbuf += CAMELLIA_BLOCK_SIZE; + } + + _gcry_burn_stack(burn_stack_depth); +} + +/* Bulk encryption/decryption of complete blocks in OCB mode. */ +size_t +_gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) + CAMELLIA_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth; + u64 blkn = c->u_mode.ocb.data_nblocks; + + burn_stack_depth = encrypt ? CAMELLIA_encrypt_stack_burn_size : + CAMELLIA_decrypt_stack_burn_size; +#else + (void)c; + (void)outbuf_arg; + (void)inbuf_arg; + (void)encrypt; +#endif + +#ifdef USE_AESNI_AVX2 + if (ctx->use_aesni_avx2) + { + int did_use_aesni_avx2 = 0; + u64 Ls[32]; + unsigned int n = 32 - (blkn % 32); + u64 *l; + int i; + + if (nblocks >= 32) + { + for (i = 0; i < 32; i += 8) + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4]; + Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + l = &Ls[(31 + n) % 32]; + + /* Process data in 32 block chunks. */ + while (nblocks >= 32) + { + blkn += 32; + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32); + + if (encrypt) + _gcry_camellia_aesni_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + else + _gcry_camellia_aesni_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + + nblocks -= 32; + outbuf += 32 * CAMELLIA_BLOCK_SIZE; + inbuf += 32 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx2 = 1; + } + } + + if (did_use_aesni_avx2) + { + int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx2_burn_stack_depth) + burn_stack_depth = avx2_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_AESNI_AVX + if (ctx->use_aesni_avx) + { + int did_use_aesni_avx = 0; + u64 Ls[16]; + unsigned int n = 16 - (blkn % 16); + u64 *l; + int i; + + if (nblocks >= 16) + { + for (i = 0; i < 16; i += 8) + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + l = &Ls[(15 + n) % 16]; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + blkn += 16; + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); + + if (encrypt) + _gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + else + _gcry_camellia_aesni_avx_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + + nblocks -= 16; + outbuf += 16 * CAMELLIA_BLOCK_SIZE; + inbuf += 16 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx = 1; + } + } + + if (did_use_aesni_avx) + { + int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx_burn_stack_depth) + burn_stack_depth = avx_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) + c->u_mode.ocb.data_nblocks = blkn; + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +#endif + + return nblocks; +} + +/* Bulk authentication of complete blocks in OCB mode. */ +size_t +_gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks) +{ +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) + CAMELLIA_context *ctx = (void *)&c->context.c; + const unsigned char *abuf = abuf_arg; + int burn_stack_depth; + u64 blkn = c->u_mode.ocb.aad_nblocks; + + burn_stack_depth = CAMELLIA_encrypt_stack_burn_size; +#else + (void)c; + (void)abuf_arg; +#endif + +#ifdef USE_AESNI_AVX2 + if (ctx->use_aesni_avx2) + { + int did_use_aesni_avx2 = 0; + u64 Ls[32]; + unsigned int n = 32 - (blkn % 32); + u64 *l; + int i; + + if (nblocks >= 32) + { + for (i = 0; i < 32; i += 8) + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4]; + Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + l = &Ls[(31 + n) % 32]; + + /* Process data in 32 block chunks. */ + while (nblocks >= 32) + { + blkn += 32; + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32); + + _gcry_camellia_aesni_avx2_ocb_auth(ctx, abuf, + c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 32; + abuf += 32 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx2 = 1; + } + } + + if (did_use_aesni_avx2) + { + int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx2_burn_stack_depth) + burn_stack_depth = avx2_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_AESNI_AVX + if (ctx->use_aesni_avx) + { + int did_use_aesni_avx = 0; + u64 Ls[16]; + unsigned int n = 16 - (blkn % 16); + u64 *l; + int i; + + if (nblocks >= 16) + { + for (i = 0; i < 16; i += 8) + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + l = &Ls[(15 + n) % 16]; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + blkn += 16; + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); + + _gcry_camellia_aesni_avx_ocb_auth(ctx, abuf, + c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 16; + abuf += 16 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx = 1; + } + } + + if (did_use_aesni_avx) + { + int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx_burn_stack_depth) + burn_stack_depth = avx_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) + c->u_mode.ocb.aad_nblocks = blkn; + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +#endif + + return nblocks; +} + +/* Run the self-tests for CAMELLIA-CTR-128, tests IV increment of bulk CTR + encryption. Returns NULL on success. */ +static const char* +selftest_ctr_128 (void) +{ + const int nblocks = 32+16+1; + const int blocksize = CAMELLIA_BLOCK_SIZE; + const int context_size = sizeof(CAMELLIA_context); + + return _gcry_selftest_helper_ctr("CAMELLIA", &camellia_setkey, + &camellia_encrypt, &_gcry_camellia_ctr_enc, nblocks, blocksize, + context_size); +} + +/* Run the self-tests for CAMELLIA-CBC-128, tests bulk CBC decryption. + Returns NULL on success. */ +static const char* +selftest_cbc_128 (void) +{ + const int nblocks = 32+16+2; + const int blocksize = CAMELLIA_BLOCK_SIZE; + const int context_size = sizeof(CAMELLIA_context); + + return _gcry_selftest_helper_cbc("CAMELLIA", &camellia_setkey, + &camellia_encrypt, &_gcry_camellia_cbc_dec, nblocks, blocksize, + context_size); +} + +/* Run the self-tests for CAMELLIA-CFB-128, tests bulk CFB decryption. + Returns NULL on success. */ +static const char* +selftest_cfb_128 (void) +{ + const int nblocks = 32+16+2; + const int blocksize = CAMELLIA_BLOCK_SIZE; + const int context_size = sizeof(CAMELLIA_context); + + return _gcry_selftest_helper_cfb("CAMELLIA", &camellia_setkey, + &camellia_encrypt, &_gcry_camellia_cfb_dec, nblocks, blocksize, + context_size); +} + +static const char * +selftest(void) +{ + CAMELLIA_context ctx; + byte scratch[16]; + const char *r; + + /* These test vectors are from RFC-3713 */ + static const byte plaintext[]= + { + 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef, + 0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10 + }; + static const byte key_128[]= + { + 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef, + 0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10 + }; + static const byte ciphertext_128[]= + { + 0x67,0x67,0x31,0x38,0x54,0x96,0x69,0x73, + 0x08,0x57,0x06,0x56,0x48,0xea,0xbe,0x43 + }; + static const byte key_192[]= + { + 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,0x98, + 0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77 + }; + static const byte ciphertext_192[]= + { + 0xb4,0x99,0x34,0x01,0xb3,0xe9,0x96,0xf8, + 0x4e,0xe5,0xce,0xe7,0xd7,0x9b,0x09,0xb9 + }; + static const byte key_256[]= + { + 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba, + 0x98,0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55, + 0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff + }; + static const byte ciphertext_256[]= + { + 0x9a,0xcc,0x23,0x7d,0xff,0x16,0xd7,0x6c, + 0x20,0xef,0x7c,0x91,0x9e,0x3a,0x75,0x09 + }; + + camellia_setkey(&ctx,key_128,sizeof(key_128)); + camellia_encrypt(&ctx,scratch,plaintext); + if(memcmp(scratch,ciphertext_128,sizeof(ciphertext_128))!=0) + return "CAMELLIA-128 test encryption failed."; + camellia_decrypt(&ctx,scratch,scratch); + if(memcmp(scratch,plaintext,sizeof(plaintext))!=0) + return "CAMELLIA-128 test decryption failed."; + + camellia_setkey(&ctx,key_192,sizeof(key_192)); + camellia_encrypt(&ctx,scratch,plaintext); + if(memcmp(scratch,ciphertext_192,sizeof(ciphertext_192))!=0) + return "CAMELLIA-192 test encryption failed."; + camellia_decrypt(&ctx,scratch,scratch); + if(memcmp(scratch,plaintext,sizeof(plaintext))!=0) + return "CAMELLIA-192 test decryption failed."; + + camellia_setkey(&ctx,key_256,sizeof(key_256)); + camellia_encrypt(&ctx,scratch,plaintext); + if(memcmp(scratch,ciphertext_256,sizeof(ciphertext_256))!=0) + return "CAMELLIA-256 test encryption failed."; + camellia_decrypt(&ctx,scratch,scratch); + if(memcmp(scratch,plaintext,sizeof(plaintext))!=0) + return "CAMELLIA-256 test decryption failed."; + + if ( (r = selftest_ctr_128 ()) ) + return r; + + if ( (r = selftest_cbc_128 ()) ) + return r; + + if ( (r = selftest_cfb_128 ()) ) + return r; + + return NULL; +} + +/* These oids are from + <http://info.isl.ntt.co.jp/crypt/eng/camellia/specifications_oid.html>, + retrieved May 1, 2007. */ + +static gcry_cipher_oid_spec_t camellia128_oids[] = + { + {"1.2.392.200011.61.1.1.1.2", GCRY_CIPHER_MODE_CBC}, + {"0.3.4401.5.3.1.9.1", GCRY_CIPHER_MODE_ECB}, + {"0.3.4401.5.3.1.9.3", GCRY_CIPHER_MODE_OFB}, + {"0.3.4401.5.3.1.9.4", GCRY_CIPHER_MODE_CFB}, + { NULL } + }; + +static gcry_cipher_oid_spec_t camellia192_oids[] = + { + {"1.2.392.200011.61.1.1.1.3", GCRY_CIPHER_MODE_CBC}, + {"0.3.4401.5.3.1.9.21", GCRY_CIPHER_MODE_ECB}, + {"0.3.4401.5.3.1.9.23", GCRY_CIPHER_MODE_OFB}, + {"0.3.4401.5.3.1.9.24", GCRY_CIPHER_MODE_CFB}, + { NULL } + }; + +static gcry_cipher_oid_spec_t camellia256_oids[] = + { + {"1.2.392.200011.61.1.1.1.4", GCRY_CIPHER_MODE_CBC}, + {"0.3.4401.5.3.1.9.41", GCRY_CIPHER_MODE_ECB}, + {"0.3.4401.5.3.1.9.43", GCRY_CIPHER_MODE_OFB}, + {"0.3.4401.5.3.1.9.44", GCRY_CIPHER_MODE_CFB}, + { NULL } + }; + +gcry_cipher_spec_t _gcry_cipher_spec_camellia128 = + { + GCRY_CIPHER_CAMELLIA128, {0, 0}, + "CAMELLIA128",NULL,camellia128_oids,CAMELLIA_BLOCK_SIZE,128, + sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt + }; + +gcry_cipher_spec_t _gcry_cipher_spec_camellia192 = + { + GCRY_CIPHER_CAMELLIA192, {0, 0}, + "CAMELLIA192",NULL,camellia192_oids,CAMELLIA_BLOCK_SIZE,192, + sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt + }; + +gcry_cipher_spec_t _gcry_cipher_spec_camellia256 = + { + GCRY_CIPHER_CAMELLIA256, {0, 0}, + "CAMELLIA256",NULL,camellia256_oids,CAMELLIA_BLOCK_SIZE,256, + sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/camellia.c b/libotr/libgcrypt-1.8.7/cipher/camellia.c new file mode 100644 index 0000000..e7085a7 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/camellia.c @@ -0,0 +1,1413 @@ +/* camellia.h ver 1.2.0 + * + * Copyright (C) 2006,2007 + * NTT (Nippon Telegraph and Telephone Corporation). + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Algorithm Specification + * http://info.isl.ntt.co.jp/crypt/eng/camellia/specifications.html + */ + +#include <config.h> +#include <string.h> +#include <stdlib.h> + +#include "types.h" +#include "bufhelp.h" +#include "camellia.h" + +typedef byte u8; + +/* key constants */ + +#define CAMELLIA_SIGMA1L (0xA09E667FL) +#define CAMELLIA_SIGMA1R (0x3BCC908BL) +#define CAMELLIA_SIGMA2L (0xB67AE858L) +#define CAMELLIA_SIGMA2R (0x4CAA73B2L) +#define CAMELLIA_SIGMA3L (0xC6EF372FL) +#define CAMELLIA_SIGMA3R (0xE94F82BEL) +#define CAMELLIA_SIGMA4L (0x54FF53A5L) +#define CAMELLIA_SIGMA4R (0xF1D36F1CL) +#define CAMELLIA_SIGMA5L (0x10E527FAL) +#define CAMELLIA_SIGMA5R (0xDE682D1DL) +#define CAMELLIA_SIGMA6L (0xB05688C2L) +#define CAMELLIA_SIGMA6R (0xB3E6C1FDL) + +/* + * macros + */ + + +#if defined(_MSC_VER) + +# define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00) +# define GETU32(p) SWAP(*((u32 *)(p))) +# define PUTU32(ct, st) {*((u32 *)(ct)) = SWAP((st));} + +#else /* not MS-VC */ + +# define GETU32(pt) buf_get_be32(pt) +# define PUTU32(ct, st) buf_put_be32(ct, st) + +#endif + +#define CamelliaSubkeyL(INDEX) (subkey[(INDEX)*2]) +#define CamelliaSubkeyR(INDEX) (subkey[(INDEX)*2 + 1]) + +/* rotation right shift 1byte */ +#define CAMELLIA_RR8(x) (((x) >> 8) + ((x) << 24)) +/* rotation left shift 1bit */ +#define CAMELLIA_RL1(x) (((x) << 1) + ((x) >> 31)) +/* rotation left shift 1byte */ +#define CAMELLIA_RL8(x) (((x) << 8) + ((x) >> 24)) + +#define CAMELLIA_ROLDQ(ll, lr, rl, rr, w0, w1, bits) \ + do { \ + w0 = ll; \ + ll = (ll << bits) + (lr >> (32 - bits)); \ + lr = (lr << bits) + (rl >> (32 - bits)); \ + rl = (rl << bits) + (rr >> (32 - bits)); \ + rr = (rr << bits) + (w0 >> (32 - bits)); \ + } while(0) + +#define CAMELLIA_ROLDQo32(ll, lr, rl, rr, w0, w1, bits) \ + do { \ + w0 = ll; \ + w1 = lr; \ + ll = (lr << (bits - 32)) + (rl >> (64 - bits)); \ + lr = (rl << (bits - 32)) + (rr >> (64 - bits)); \ + rl = (rr << (bits - 32)) + (w0 >> (64 - bits)); \ + rr = (w0 << (bits - 32)) + (w1 >> (64 - bits)); \ + } while(0) + +#define CAMELLIA_SP1110(INDEX) (camellia_sp1110[(INDEX)]) +#define CAMELLIA_SP0222(INDEX) (camellia_sp0222[(INDEX)]) +#define CAMELLIA_SP3033(INDEX) (camellia_sp3033[(INDEX)]) +#define CAMELLIA_SP4404(INDEX) (camellia_sp4404[(INDEX)]) + +#define CAMELLIA_F(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \ + do { \ + il = xl ^ kl; \ + ir = xr ^ kr; \ + t0 = il >> 16; \ + t1 = ir >> 16; \ + yl = CAMELLIA_SP1110(ir & 0xff) \ + ^ CAMELLIA_SP0222((t1 >> 8) & 0xff) \ + ^ CAMELLIA_SP3033(t1 & 0xff) \ + ^ CAMELLIA_SP4404((ir >> 8) & 0xff); \ + yr = CAMELLIA_SP1110((t0 >> 8) & 0xff) \ + ^ CAMELLIA_SP0222(t0 & 0xff) \ + ^ CAMELLIA_SP3033((il >> 8) & 0xff) \ + ^ CAMELLIA_SP4404(il & 0xff); \ + yl ^= yr; \ + yr = CAMELLIA_RR8(yr); \ + yr ^= yl; \ + } while(0) + + +/* + * for speed up + * + */ +#define CAMELLIA_FLS(ll, lr, rl, rr, kll, klr, krl, krr, t0, t1, t2, t3) \ + do { \ + t0 = kll; \ + t0 &= ll; \ + lr ^= CAMELLIA_RL1(t0); \ + t1 = klr; \ + t1 |= lr; \ + ll ^= t1; \ + \ + t2 = krr; \ + t2 |= rr; \ + rl ^= t2; \ + t3 = krl; \ + t3 &= rl; \ + rr ^= CAMELLIA_RL1(t3); \ + } while(0) + +#define CAMELLIA_ROUNDSM(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \ + do { \ + yl ^= kl; \ + yr ^= kr; \ + ir = CAMELLIA_SP1110(xr & 0xff) \ + ^ CAMELLIA_SP0222((xr >> 24) & 0xff) \ + ^ CAMELLIA_SP3033((xr >> 16) & 0xff) \ + ^ CAMELLIA_SP4404((xr >> 8) & 0xff); \ + il = CAMELLIA_SP1110((xl >> 24) & 0xff) \ + ^ CAMELLIA_SP0222((xl >> 16) & 0xff) \ + ^ CAMELLIA_SP3033((xl >> 8) & 0xff) \ + ^ CAMELLIA_SP4404(xl & 0xff); \ + ir ^= il; \ + il = CAMELLIA_RR8(il); \ + il ^= ir; \ + yl ^= ir; \ + yr ^= il; \ + } while(0) + + +static const u32 camellia_sp1110[256] = { + 0x70707000,0x82828200,0x2c2c2c00,0xececec00, + 0xb3b3b300,0x27272700,0xc0c0c000,0xe5e5e500, + 0xe4e4e400,0x85858500,0x57575700,0x35353500, + 0xeaeaea00,0x0c0c0c00,0xaeaeae00,0x41414100, + 0x23232300,0xefefef00,0x6b6b6b00,0x93939300, + 0x45454500,0x19191900,0xa5a5a500,0x21212100, + 0xededed00,0x0e0e0e00,0x4f4f4f00,0x4e4e4e00, + 0x1d1d1d00,0x65656500,0x92929200,0xbdbdbd00, + 0x86868600,0xb8b8b800,0xafafaf00,0x8f8f8f00, + 0x7c7c7c00,0xebebeb00,0x1f1f1f00,0xcecece00, + 0x3e3e3e00,0x30303000,0xdcdcdc00,0x5f5f5f00, + 0x5e5e5e00,0xc5c5c500,0x0b0b0b00,0x1a1a1a00, + 0xa6a6a600,0xe1e1e100,0x39393900,0xcacaca00, + 0xd5d5d500,0x47474700,0x5d5d5d00,0x3d3d3d00, + 0xd9d9d900,0x01010100,0x5a5a5a00,0xd6d6d600, + 0x51515100,0x56565600,0x6c6c6c00,0x4d4d4d00, + 0x8b8b8b00,0x0d0d0d00,0x9a9a9a00,0x66666600, + 0xfbfbfb00,0xcccccc00,0xb0b0b000,0x2d2d2d00, + 0x74747400,0x12121200,0x2b2b2b00,0x20202000, + 0xf0f0f000,0xb1b1b100,0x84848400,0x99999900, + 0xdfdfdf00,0x4c4c4c00,0xcbcbcb00,0xc2c2c200, + 0x34343400,0x7e7e7e00,0x76767600,0x05050500, + 0x6d6d6d00,0xb7b7b700,0xa9a9a900,0x31313100, + 0xd1d1d100,0x17171700,0x04040400,0xd7d7d700, + 0x14141400,0x58585800,0x3a3a3a00,0x61616100, + 0xdedede00,0x1b1b1b00,0x11111100,0x1c1c1c00, + 0x32323200,0x0f0f0f00,0x9c9c9c00,0x16161600, + 0x53535300,0x18181800,0xf2f2f200,0x22222200, + 0xfefefe00,0x44444400,0xcfcfcf00,0xb2b2b200, + 0xc3c3c300,0xb5b5b500,0x7a7a7a00,0x91919100, + 0x24242400,0x08080800,0xe8e8e800,0xa8a8a800, + 0x60606000,0xfcfcfc00,0x69696900,0x50505000, + 0xaaaaaa00,0xd0d0d000,0xa0a0a000,0x7d7d7d00, + 0xa1a1a100,0x89898900,0x62626200,0x97979700, + 0x54545400,0x5b5b5b00,0x1e1e1e00,0x95959500, + 0xe0e0e000,0xffffff00,0x64646400,0xd2d2d200, + 0x10101000,0xc4c4c400,0x00000000,0x48484800, + 0xa3a3a300,0xf7f7f700,0x75757500,0xdbdbdb00, + 0x8a8a8a00,0x03030300,0xe6e6e600,0xdadada00, + 0x09090900,0x3f3f3f00,0xdddddd00,0x94949400, + 0x87878700,0x5c5c5c00,0x83838300,0x02020200, + 0xcdcdcd00,0x4a4a4a00,0x90909000,0x33333300, + 0x73737300,0x67676700,0xf6f6f600,0xf3f3f300, + 0x9d9d9d00,0x7f7f7f00,0xbfbfbf00,0xe2e2e200, + 0x52525200,0x9b9b9b00,0xd8d8d800,0x26262600, + 0xc8c8c800,0x37373700,0xc6c6c600,0x3b3b3b00, + 0x81818100,0x96969600,0x6f6f6f00,0x4b4b4b00, + 0x13131300,0xbebebe00,0x63636300,0x2e2e2e00, + 0xe9e9e900,0x79797900,0xa7a7a700,0x8c8c8c00, + 0x9f9f9f00,0x6e6e6e00,0xbcbcbc00,0x8e8e8e00, + 0x29292900,0xf5f5f500,0xf9f9f900,0xb6b6b600, + 0x2f2f2f00,0xfdfdfd00,0xb4b4b400,0x59595900, + 0x78787800,0x98989800,0x06060600,0x6a6a6a00, + 0xe7e7e700,0x46464600,0x71717100,0xbababa00, + 0xd4d4d400,0x25252500,0xababab00,0x42424200, + 0x88888800,0xa2a2a200,0x8d8d8d00,0xfafafa00, + 0x72727200,0x07070700,0xb9b9b900,0x55555500, + 0xf8f8f800,0xeeeeee00,0xacacac00,0x0a0a0a00, + 0x36363600,0x49494900,0x2a2a2a00,0x68686800, + 0x3c3c3c00,0x38383800,0xf1f1f100,0xa4a4a400, + 0x40404000,0x28282800,0xd3d3d300,0x7b7b7b00, + 0xbbbbbb00,0xc9c9c900,0x43434300,0xc1c1c100, + 0x15151500,0xe3e3e300,0xadadad00,0xf4f4f400, + 0x77777700,0xc7c7c700,0x80808000,0x9e9e9e00, +}; + +static const u32 camellia_sp0222[256] = { + 0x00e0e0e0,0x00050505,0x00585858,0x00d9d9d9, + 0x00676767,0x004e4e4e,0x00818181,0x00cbcbcb, + 0x00c9c9c9,0x000b0b0b,0x00aeaeae,0x006a6a6a, + 0x00d5d5d5,0x00181818,0x005d5d5d,0x00828282, + 0x00464646,0x00dfdfdf,0x00d6d6d6,0x00272727, + 0x008a8a8a,0x00323232,0x004b4b4b,0x00424242, + 0x00dbdbdb,0x001c1c1c,0x009e9e9e,0x009c9c9c, + 0x003a3a3a,0x00cacaca,0x00252525,0x007b7b7b, + 0x000d0d0d,0x00717171,0x005f5f5f,0x001f1f1f, + 0x00f8f8f8,0x00d7d7d7,0x003e3e3e,0x009d9d9d, + 0x007c7c7c,0x00606060,0x00b9b9b9,0x00bebebe, + 0x00bcbcbc,0x008b8b8b,0x00161616,0x00343434, + 0x004d4d4d,0x00c3c3c3,0x00727272,0x00959595, + 0x00ababab,0x008e8e8e,0x00bababa,0x007a7a7a, + 0x00b3b3b3,0x00020202,0x00b4b4b4,0x00adadad, + 0x00a2a2a2,0x00acacac,0x00d8d8d8,0x009a9a9a, + 0x00171717,0x001a1a1a,0x00353535,0x00cccccc, + 0x00f7f7f7,0x00999999,0x00616161,0x005a5a5a, + 0x00e8e8e8,0x00242424,0x00565656,0x00404040, + 0x00e1e1e1,0x00636363,0x00090909,0x00333333, + 0x00bfbfbf,0x00989898,0x00979797,0x00858585, + 0x00686868,0x00fcfcfc,0x00ececec,0x000a0a0a, + 0x00dadada,0x006f6f6f,0x00535353,0x00626262, + 0x00a3a3a3,0x002e2e2e,0x00080808,0x00afafaf, + 0x00282828,0x00b0b0b0,0x00747474,0x00c2c2c2, + 0x00bdbdbd,0x00363636,0x00222222,0x00383838, + 0x00646464,0x001e1e1e,0x00393939,0x002c2c2c, + 0x00a6a6a6,0x00303030,0x00e5e5e5,0x00444444, + 0x00fdfdfd,0x00888888,0x009f9f9f,0x00656565, + 0x00878787,0x006b6b6b,0x00f4f4f4,0x00232323, + 0x00484848,0x00101010,0x00d1d1d1,0x00515151, + 0x00c0c0c0,0x00f9f9f9,0x00d2d2d2,0x00a0a0a0, + 0x00555555,0x00a1a1a1,0x00414141,0x00fafafa, + 0x00434343,0x00131313,0x00c4c4c4,0x002f2f2f, + 0x00a8a8a8,0x00b6b6b6,0x003c3c3c,0x002b2b2b, + 0x00c1c1c1,0x00ffffff,0x00c8c8c8,0x00a5a5a5, + 0x00202020,0x00898989,0x00000000,0x00909090, + 0x00474747,0x00efefef,0x00eaeaea,0x00b7b7b7, + 0x00151515,0x00060606,0x00cdcdcd,0x00b5b5b5, + 0x00121212,0x007e7e7e,0x00bbbbbb,0x00292929, + 0x000f0f0f,0x00b8b8b8,0x00070707,0x00040404, + 0x009b9b9b,0x00949494,0x00212121,0x00666666, + 0x00e6e6e6,0x00cecece,0x00ededed,0x00e7e7e7, + 0x003b3b3b,0x00fefefe,0x007f7f7f,0x00c5c5c5, + 0x00a4a4a4,0x00373737,0x00b1b1b1,0x004c4c4c, + 0x00919191,0x006e6e6e,0x008d8d8d,0x00767676, + 0x00030303,0x002d2d2d,0x00dedede,0x00969696, + 0x00262626,0x007d7d7d,0x00c6c6c6,0x005c5c5c, + 0x00d3d3d3,0x00f2f2f2,0x004f4f4f,0x00191919, + 0x003f3f3f,0x00dcdcdc,0x00797979,0x001d1d1d, + 0x00525252,0x00ebebeb,0x00f3f3f3,0x006d6d6d, + 0x005e5e5e,0x00fbfbfb,0x00696969,0x00b2b2b2, + 0x00f0f0f0,0x00313131,0x000c0c0c,0x00d4d4d4, + 0x00cfcfcf,0x008c8c8c,0x00e2e2e2,0x00757575, + 0x00a9a9a9,0x004a4a4a,0x00575757,0x00848484, + 0x00111111,0x00454545,0x001b1b1b,0x00f5f5f5, + 0x00e4e4e4,0x000e0e0e,0x00737373,0x00aaaaaa, + 0x00f1f1f1,0x00dddddd,0x00595959,0x00141414, + 0x006c6c6c,0x00929292,0x00545454,0x00d0d0d0, + 0x00787878,0x00707070,0x00e3e3e3,0x00494949, + 0x00808080,0x00505050,0x00a7a7a7,0x00f6f6f6, + 0x00777777,0x00939393,0x00868686,0x00838383, + 0x002a2a2a,0x00c7c7c7,0x005b5b5b,0x00e9e9e9, + 0x00eeeeee,0x008f8f8f,0x00010101,0x003d3d3d, +}; + +static const u32 camellia_sp3033[256] = { + 0x38003838,0x41004141,0x16001616,0x76007676, + 0xd900d9d9,0x93009393,0x60006060,0xf200f2f2, + 0x72007272,0xc200c2c2,0xab00abab,0x9a009a9a, + 0x75007575,0x06000606,0x57005757,0xa000a0a0, + 0x91009191,0xf700f7f7,0xb500b5b5,0xc900c9c9, + 0xa200a2a2,0x8c008c8c,0xd200d2d2,0x90009090, + 0xf600f6f6,0x07000707,0xa700a7a7,0x27002727, + 0x8e008e8e,0xb200b2b2,0x49004949,0xde00dede, + 0x43004343,0x5c005c5c,0xd700d7d7,0xc700c7c7, + 0x3e003e3e,0xf500f5f5,0x8f008f8f,0x67006767, + 0x1f001f1f,0x18001818,0x6e006e6e,0xaf00afaf, + 0x2f002f2f,0xe200e2e2,0x85008585,0x0d000d0d, + 0x53005353,0xf000f0f0,0x9c009c9c,0x65006565, + 0xea00eaea,0xa300a3a3,0xae00aeae,0x9e009e9e, + 0xec00ecec,0x80008080,0x2d002d2d,0x6b006b6b, + 0xa800a8a8,0x2b002b2b,0x36003636,0xa600a6a6, + 0xc500c5c5,0x86008686,0x4d004d4d,0x33003333, + 0xfd00fdfd,0x66006666,0x58005858,0x96009696, + 0x3a003a3a,0x09000909,0x95009595,0x10001010, + 0x78007878,0xd800d8d8,0x42004242,0xcc00cccc, + 0xef00efef,0x26002626,0xe500e5e5,0x61006161, + 0x1a001a1a,0x3f003f3f,0x3b003b3b,0x82008282, + 0xb600b6b6,0xdb00dbdb,0xd400d4d4,0x98009898, + 0xe800e8e8,0x8b008b8b,0x02000202,0xeb00ebeb, + 0x0a000a0a,0x2c002c2c,0x1d001d1d,0xb000b0b0, + 0x6f006f6f,0x8d008d8d,0x88008888,0x0e000e0e, + 0x19001919,0x87008787,0x4e004e4e,0x0b000b0b, + 0xa900a9a9,0x0c000c0c,0x79007979,0x11001111, + 0x7f007f7f,0x22002222,0xe700e7e7,0x59005959, + 0xe100e1e1,0xda00dada,0x3d003d3d,0xc800c8c8, + 0x12001212,0x04000404,0x74007474,0x54005454, + 0x30003030,0x7e007e7e,0xb400b4b4,0x28002828, + 0x55005555,0x68006868,0x50005050,0xbe00bebe, + 0xd000d0d0,0xc400c4c4,0x31003131,0xcb00cbcb, + 0x2a002a2a,0xad00adad,0x0f000f0f,0xca00caca, + 0x70007070,0xff00ffff,0x32003232,0x69006969, + 0x08000808,0x62006262,0x00000000,0x24002424, + 0xd100d1d1,0xfb00fbfb,0xba00baba,0xed00eded, + 0x45004545,0x81008181,0x73007373,0x6d006d6d, + 0x84008484,0x9f009f9f,0xee00eeee,0x4a004a4a, + 0xc300c3c3,0x2e002e2e,0xc100c1c1,0x01000101, + 0xe600e6e6,0x25002525,0x48004848,0x99009999, + 0xb900b9b9,0xb300b3b3,0x7b007b7b,0xf900f9f9, + 0xce00cece,0xbf00bfbf,0xdf00dfdf,0x71007171, + 0x29002929,0xcd00cdcd,0x6c006c6c,0x13001313, + 0x64006464,0x9b009b9b,0x63006363,0x9d009d9d, + 0xc000c0c0,0x4b004b4b,0xb700b7b7,0xa500a5a5, + 0x89008989,0x5f005f5f,0xb100b1b1,0x17001717, + 0xf400f4f4,0xbc00bcbc,0xd300d3d3,0x46004646, + 0xcf00cfcf,0x37003737,0x5e005e5e,0x47004747, + 0x94009494,0xfa00fafa,0xfc00fcfc,0x5b005b5b, + 0x97009797,0xfe00fefe,0x5a005a5a,0xac00acac, + 0x3c003c3c,0x4c004c4c,0x03000303,0x35003535, + 0xf300f3f3,0x23002323,0xb800b8b8,0x5d005d5d, + 0x6a006a6a,0x92009292,0xd500d5d5,0x21002121, + 0x44004444,0x51005151,0xc600c6c6,0x7d007d7d, + 0x39003939,0x83008383,0xdc00dcdc,0xaa00aaaa, + 0x7c007c7c,0x77007777,0x56005656,0x05000505, + 0x1b001b1b,0xa400a4a4,0x15001515,0x34003434, + 0x1e001e1e,0x1c001c1c,0xf800f8f8,0x52005252, + 0x20002020,0x14001414,0xe900e9e9,0xbd00bdbd, + 0xdd00dddd,0xe400e4e4,0xa100a1a1,0xe000e0e0, + 0x8a008a8a,0xf100f1f1,0xd600d6d6,0x7a007a7a, + 0xbb00bbbb,0xe300e3e3,0x40004040,0x4f004f4f, +}; + +static const u32 camellia_sp4404[256] = { + 0x70700070,0x2c2c002c,0xb3b300b3,0xc0c000c0, + 0xe4e400e4,0x57570057,0xeaea00ea,0xaeae00ae, + 0x23230023,0x6b6b006b,0x45450045,0xa5a500a5, + 0xeded00ed,0x4f4f004f,0x1d1d001d,0x92920092, + 0x86860086,0xafaf00af,0x7c7c007c,0x1f1f001f, + 0x3e3e003e,0xdcdc00dc,0x5e5e005e,0x0b0b000b, + 0xa6a600a6,0x39390039,0xd5d500d5,0x5d5d005d, + 0xd9d900d9,0x5a5a005a,0x51510051,0x6c6c006c, + 0x8b8b008b,0x9a9a009a,0xfbfb00fb,0xb0b000b0, + 0x74740074,0x2b2b002b,0xf0f000f0,0x84840084, + 0xdfdf00df,0xcbcb00cb,0x34340034,0x76760076, + 0x6d6d006d,0xa9a900a9,0xd1d100d1,0x04040004, + 0x14140014,0x3a3a003a,0xdede00de,0x11110011, + 0x32320032,0x9c9c009c,0x53530053,0xf2f200f2, + 0xfefe00fe,0xcfcf00cf,0xc3c300c3,0x7a7a007a, + 0x24240024,0xe8e800e8,0x60600060,0x69690069, + 0xaaaa00aa,0xa0a000a0,0xa1a100a1,0x62620062, + 0x54540054,0x1e1e001e,0xe0e000e0,0x64640064, + 0x10100010,0x00000000,0xa3a300a3,0x75750075, + 0x8a8a008a,0xe6e600e6,0x09090009,0xdddd00dd, + 0x87870087,0x83830083,0xcdcd00cd,0x90900090, + 0x73730073,0xf6f600f6,0x9d9d009d,0xbfbf00bf, + 0x52520052,0xd8d800d8,0xc8c800c8,0xc6c600c6, + 0x81810081,0x6f6f006f,0x13130013,0x63630063, + 0xe9e900e9,0xa7a700a7,0x9f9f009f,0xbcbc00bc, + 0x29290029,0xf9f900f9,0x2f2f002f,0xb4b400b4, + 0x78780078,0x06060006,0xe7e700e7,0x71710071, + 0xd4d400d4,0xabab00ab,0x88880088,0x8d8d008d, + 0x72720072,0xb9b900b9,0xf8f800f8,0xacac00ac, + 0x36360036,0x2a2a002a,0x3c3c003c,0xf1f100f1, + 0x40400040,0xd3d300d3,0xbbbb00bb,0x43430043, + 0x15150015,0xadad00ad,0x77770077,0x80800080, + 0x82820082,0xecec00ec,0x27270027,0xe5e500e5, + 0x85850085,0x35350035,0x0c0c000c,0x41410041, + 0xefef00ef,0x93930093,0x19190019,0x21210021, + 0x0e0e000e,0x4e4e004e,0x65650065,0xbdbd00bd, + 0xb8b800b8,0x8f8f008f,0xebeb00eb,0xcece00ce, + 0x30300030,0x5f5f005f,0xc5c500c5,0x1a1a001a, + 0xe1e100e1,0xcaca00ca,0x47470047,0x3d3d003d, + 0x01010001,0xd6d600d6,0x56560056,0x4d4d004d, + 0x0d0d000d,0x66660066,0xcccc00cc,0x2d2d002d, + 0x12120012,0x20200020,0xb1b100b1,0x99990099, + 0x4c4c004c,0xc2c200c2,0x7e7e007e,0x05050005, + 0xb7b700b7,0x31310031,0x17170017,0xd7d700d7, + 0x58580058,0x61610061,0x1b1b001b,0x1c1c001c, + 0x0f0f000f,0x16160016,0x18180018,0x22220022, + 0x44440044,0xb2b200b2,0xb5b500b5,0x91910091, + 0x08080008,0xa8a800a8,0xfcfc00fc,0x50500050, + 0xd0d000d0,0x7d7d007d,0x89890089,0x97970097, + 0x5b5b005b,0x95950095,0xffff00ff,0xd2d200d2, + 0xc4c400c4,0x48480048,0xf7f700f7,0xdbdb00db, + 0x03030003,0xdada00da,0x3f3f003f,0x94940094, + 0x5c5c005c,0x02020002,0x4a4a004a,0x33330033, + 0x67670067,0xf3f300f3,0x7f7f007f,0xe2e200e2, + 0x9b9b009b,0x26260026,0x37370037,0x3b3b003b, + 0x96960096,0x4b4b004b,0xbebe00be,0x2e2e002e, + 0x79790079,0x8c8c008c,0x6e6e006e,0x8e8e008e, + 0xf5f500f5,0xb6b600b6,0xfdfd00fd,0x59590059, + 0x98980098,0x6a6a006a,0x46460046,0xbaba00ba, + 0x25250025,0x42420042,0xa2a200a2,0xfafa00fa, + 0x07070007,0x55550055,0xeeee00ee,0x0a0a000a, + 0x49490049,0x68680068,0x38380038,0xa4a400a4, + 0x28280028,0x7b7b007b,0xc9c900c9,0xc1c100c1, + 0xe3e300e3,0xf4f400f4,0xc7c700c7,0x9e9e009e, +}; + + +/** + * Stuff related to the Camellia key schedule + */ +#define subl(x) subL[(x)] +#define subr(x) subR[(x)] + +void camellia_setup128(const unsigned char *key, u32 *subkey) +{ + u32 kll, klr, krl, krr; + u32 il, ir, t0, t1, w0, w1; + u32 kw4l, kw4r, dw, tl, tr; + u32 subL[26]; + u32 subR[26]; + + /** + * k == kll || klr || krl || krr (|| is concatination) + */ + kll = GETU32(key ); + klr = GETU32(key + 4); + krl = GETU32(key + 8); + krr = GETU32(key + 12); + /** + * generate KL dependent subkeys + */ + subl(0) = kll; subr(0) = klr; + subl(1) = krl; subr(1) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(4) = kll; subr(4) = klr; + subl(5) = krl; subr(5) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30); + subl(10) = kll; subr(10) = klr; + subl(11) = krl; subr(11) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(13) = krl; subr(13) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(16) = kll; subr(16) = klr; + subl(17) = krl; subr(17) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(18) = kll; subr(18) = klr; + subl(19) = krl; subr(19) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(22) = kll; subr(22) = klr; + subl(23) = krl; subr(23) = krr; + + /* generate KA */ + kll = subl(0); klr = subr(0); + krl = subl(1); krr = subr(1); + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, + w0, w1, il, ir, t0, t1); + krl ^= w0; krr ^= w1; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, + kll, klr, il, ir, t0, t1); + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, + krl, krr, il, ir, t0, t1); + krl ^= w0; krr ^= w1; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, + w0, w1, il, ir, t0, t1); + kll ^= w0; klr ^= w1; + + /* generate KA dependent subkeys */ + subl(2) = kll; subr(2) = klr; + subl(3) = krl; subr(3) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(6) = kll; subr(6) = klr; + subl(7) = krl; subr(7) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(8) = kll; subr(8) = klr; + subl(9) = krl; subr(9) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(12) = kll; subr(12) = klr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(14) = kll; subr(14) = klr; + subl(15) = krl; subr(15) = krr; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34); + subl(20) = kll; subr(20) = klr; + subl(21) = krl; subr(21) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(24) = kll; subr(24) = klr; + subl(25) = krl; subr(25) = krr; + + + /* absorb kw2 to other subkeys */ + subl(3) ^= subl(1); subr(3) ^= subr(1); + subl(5) ^= subl(1); subr(5) ^= subr(1); + subl(7) ^= subl(1); subr(7) ^= subr(1); + subl(1) ^= subr(1) & ~subr(9); + dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); + subl(11) ^= subl(1); subr(11) ^= subr(1); + subl(13) ^= subl(1); subr(13) ^= subr(1); + subl(15) ^= subl(1); subr(15) ^= subr(1); + subl(1) ^= subr(1) & ~subr(17); + dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); + subl(19) ^= subl(1); subr(19) ^= subr(1); + subl(21) ^= subl(1); subr(21) ^= subr(1); + subl(23) ^= subl(1); subr(23) ^= subr(1); + subl(24) ^= subl(1); subr(24) ^= subr(1); + + /* absorb kw4 to other subkeys */ + kw4l = subl(25); kw4r = subr(25); + subl(22) ^= kw4l; subr(22) ^= kw4r; + subl(20) ^= kw4l; subr(20) ^= kw4r; + subl(18) ^= kw4l; subr(18) ^= kw4r; + kw4l ^= kw4r & ~subr(16); + dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw); + subl(14) ^= kw4l; subr(14) ^= kw4r; + subl(12) ^= kw4l; subr(12) ^= kw4r; + subl(10) ^= kw4l; subr(10) ^= kw4r; + kw4l ^= kw4r & ~subr(8); + dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw); + subl(6) ^= kw4l; subr(6) ^= kw4r; + subl(4) ^= kw4l; subr(4) ^= kw4r; + subl(2) ^= kw4l; subr(2) ^= kw4r; + subl(0) ^= kw4l; subr(0) ^= kw4r; + + /* key XOR is end of F-function */ + CamelliaSubkeyL(0) = subl(0) ^ subl(2); + CamelliaSubkeyR(0) = subr(0) ^ subr(2); + CamelliaSubkeyL(2) = subl(3); + CamelliaSubkeyR(2) = subr(3); + CamelliaSubkeyL(3) = subl(2) ^ subl(4); + CamelliaSubkeyR(3) = subr(2) ^ subr(4); + CamelliaSubkeyL(4) = subl(3) ^ subl(5); + CamelliaSubkeyR(4) = subr(3) ^ subr(5); + CamelliaSubkeyL(5) = subl(4) ^ subl(6); + CamelliaSubkeyR(5) = subr(4) ^ subr(6); + CamelliaSubkeyL(6) = subl(5) ^ subl(7); + CamelliaSubkeyR(6) = subr(5) ^ subr(7); + tl = subl(10) ^ (subr(10) & ~subr(8)); + dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(7) = subl(6) ^ tl; + CamelliaSubkeyR(7) = subr(6) ^ tr; + CamelliaSubkeyL(8) = subl(8); + CamelliaSubkeyR(8) = subr(8); + CamelliaSubkeyL(9) = subl(9); + CamelliaSubkeyR(9) = subr(9); + tl = subl(7) ^ (subr(7) & ~subr(9)); + dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(10) = tl ^ subl(11); + CamelliaSubkeyR(10) = tr ^ subr(11); + CamelliaSubkeyL(11) = subl(10) ^ subl(12); + CamelliaSubkeyR(11) = subr(10) ^ subr(12); + CamelliaSubkeyL(12) = subl(11) ^ subl(13); + CamelliaSubkeyR(12) = subr(11) ^ subr(13); + CamelliaSubkeyL(13) = subl(12) ^ subl(14); + CamelliaSubkeyR(13) = subr(12) ^ subr(14); + CamelliaSubkeyL(14) = subl(13) ^ subl(15); + CamelliaSubkeyR(14) = subr(13) ^ subr(15); + tl = subl(18) ^ (subr(18) & ~subr(16)); + dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(15) = subl(14) ^ tl; + CamelliaSubkeyR(15) = subr(14) ^ tr; + CamelliaSubkeyL(16) = subl(16); + CamelliaSubkeyR(16) = subr(16); + CamelliaSubkeyL(17) = subl(17); + CamelliaSubkeyR(17) = subr(17); + tl = subl(15) ^ (subr(15) & ~subr(17)); + dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(18) = tl ^ subl(19); + CamelliaSubkeyR(18) = tr ^ subr(19); + CamelliaSubkeyL(19) = subl(18) ^ subl(20); + CamelliaSubkeyR(19) = subr(18) ^ subr(20); + CamelliaSubkeyL(20) = subl(19) ^ subl(21); + CamelliaSubkeyR(20) = subr(19) ^ subr(21); + CamelliaSubkeyL(21) = subl(20) ^ subl(22); + CamelliaSubkeyR(21) = subr(20) ^ subr(22); + CamelliaSubkeyL(22) = subl(21) ^ subl(23); + CamelliaSubkeyR(22) = subr(21) ^ subr(23); + CamelliaSubkeyL(23) = subl(22); + CamelliaSubkeyR(23) = subr(22); + CamelliaSubkeyL(24) = subl(24) ^ subl(23); + CamelliaSubkeyR(24) = subr(24) ^ subr(23); + + return; +} + +void camellia_setup256(const unsigned char *key, u32 *subkey) +{ + u32 kll,klr,krl,krr; /* left half of key */ + u32 krll,krlr,krrl,krrr; /* right half of key */ + u32 il, ir, t0, t1, w0, w1; /* temporary variables */ + u32 kw4l, kw4r, dw, tl, tr; + u32 subL[34]; + u32 subR[34]; + + /** + * key = (kll || klr || krl || krr || krll || krlr || krrl || krrr) + * (|| is concatination) + */ + + kll = GETU32(key ); + klr = GETU32(key + 4); + krl = GETU32(key + 8); + krr = GETU32(key + 12); + krll = GETU32(key + 16); + krlr = GETU32(key + 20); + krrl = GETU32(key + 24); + krrr = GETU32(key + 28); + + /* generate KL dependent subkeys */ + subl(0) = kll; subr(0) = klr; + subl(1) = krl; subr(1) = krr; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 45); + subl(12) = kll; subr(12) = klr; + subl(13) = krl; subr(13) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(16) = kll; subr(16) = klr; + subl(17) = krl; subr(17) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(22) = kll; subr(22) = klr; + subl(23) = krl; subr(23) = krr; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34); + subl(30) = kll; subr(30) = klr; + subl(31) = krl; subr(31) = krr; + + /* generate KR dependent subkeys */ + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15); + subl(4) = krll; subr(4) = krlr; + subl(5) = krrl; subr(5) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15); + subl(8) = krll; subr(8) = krlr; + subl(9) = krrl; subr(9) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30); + subl(18) = krll; subr(18) = krlr; + subl(19) = krrl; subr(19) = krrr; + CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34); + subl(26) = krll; subr(26) = krlr; + subl(27) = krrl; subr(27) = krrr; + CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34); + + /* generate KA */ + kll = subl(0) ^ krll; klr = subr(0) ^ krlr; + krl = subl(1) ^ krrl; krr = subr(1) ^ krrr; + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, + w0, w1, il, ir, t0, t1); + krl ^= w0; krr ^= w1; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, + kll, klr, il, ir, t0, t1); + kll ^= krll; klr ^= krlr; + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, + krl, krr, il, ir, t0, t1); + krl ^= w0 ^ krrl; krr ^= w1 ^ krrr; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, + w0, w1, il, ir, t0, t1); + kll ^= w0; klr ^= w1; + + /* generate KB */ + krll ^= kll; krlr ^= klr; + krrl ^= krl; krrr ^= krr; + CAMELLIA_F(krll, krlr, + CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, + w0, w1, il, ir, t0, t1); + krrl ^= w0; krrr ^= w1; + CAMELLIA_F(krrl, krrr, + CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, + w0, w1, il, ir, t0, t1); + krll ^= w0; krlr ^= w1; + + /* generate KA dependent subkeys */ + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(6) = kll; subr(6) = klr; + subl(7) = krl; subr(7) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30); + subl(14) = kll; subr(14) = klr; + subl(15) = krl; subr(15) = krr; + subl(24) = klr; subr(24) = krl; + subl(25) = krr; subr(25) = kll; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 49); + subl(28) = kll; subr(28) = klr; + subl(29) = krl; subr(29) = krr; + + /* generate KB dependent subkeys */ + subl(2) = krll; subr(2) = krlr; + subl(3) = krrl; subr(3) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30); + subl(10) = krll; subr(10) = krlr; + subl(11) = krrl; subr(11) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30); + subl(20) = krll; subr(20) = krlr; + subl(21) = krrl; subr(21) = krrr; + CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 51); + subl(32) = krll; subr(32) = krlr; + subl(33) = krrl; subr(33) = krrr; + + /* absorb kw2 to other subkeys */ + subl(3) ^= subl(1); subr(3) ^= subr(1); + subl(5) ^= subl(1); subr(5) ^= subr(1); + subl(7) ^= subl(1); subr(7) ^= subr(1); + subl(1) ^= subr(1) & ~subr(9); + dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); + subl(11) ^= subl(1); subr(11) ^= subr(1); + subl(13) ^= subl(1); subr(13) ^= subr(1); + subl(15) ^= subl(1); subr(15) ^= subr(1); + subl(1) ^= subr(1) & ~subr(17); + dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); + subl(19) ^= subl(1); subr(19) ^= subr(1); + subl(21) ^= subl(1); subr(21) ^= subr(1); + subl(23) ^= subl(1); subr(23) ^= subr(1); + subl(1) ^= subr(1) & ~subr(25); + dw = subl(1) & subl(25), subr(1) ^= CAMELLIA_RL1(dw); + subl(27) ^= subl(1); subr(27) ^= subr(1); + subl(29) ^= subl(1); subr(29) ^= subr(1); + subl(31) ^= subl(1); subr(31) ^= subr(1); + subl(32) ^= subl(1); subr(32) ^= subr(1); + + /* absorb kw4 to other subkeys */ + kw4l = subl(33); kw4r = subr(33); + subl(30) ^= kw4l; subr(30) ^= kw4r; + subl(28) ^= kw4l; subr(28) ^= kw4r; + subl(26) ^= kw4l; subr(26) ^= kw4r; + kw4l ^= kw4r & ~subr(24); + dw = kw4l & subl(24), kw4r ^= CAMELLIA_RL1(dw); + subl(22) ^= kw4l; subr(22) ^= kw4r; + subl(20) ^= kw4l; subr(20) ^= kw4r; + subl(18) ^= kw4l; subr(18) ^= kw4r; + kw4l ^= kw4r & ~subr(16); + dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw); + subl(14) ^= kw4l; subr(14) ^= kw4r; + subl(12) ^= kw4l; subr(12) ^= kw4r; + subl(10) ^= kw4l; subr(10) ^= kw4r; + kw4l ^= kw4r & ~subr(8); + dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw); + subl(6) ^= kw4l; subr(6) ^= kw4r; + subl(4) ^= kw4l; subr(4) ^= kw4r; + subl(2) ^= kw4l; subr(2) ^= kw4r; + subl(0) ^= kw4l; subr(0) ^= kw4r; + + /* key XOR is end of F-function */ + CamelliaSubkeyL(0) = subl(0) ^ subl(2); + CamelliaSubkeyR(0) = subr(0) ^ subr(2); + CamelliaSubkeyL(2) = subl(3); + CamelliaSubkeyR(2) = subr(3); + CamelliaSubkeyL(3) = subl(2) ^ subl(4); + CamelliaSubkeyR(3) = subr(2) ^ subr(4); + CamelliaSubkeyL(4) = subl(3) ^ subl(5); + CamelliaSubkeyR(4) = subr(3) ^ subr(5); + CamelliaSubkeyL(5) = subl(4) ^ subl(6); + CamelliaSubkeyR(5) = subr(4) ^ subr(6); + CamelliaSubkeyL(6) = subl(5) ^ subl(7); + CamelliaSubkeyR(6) = subr(5) ^ subr(7); + tl = subl(10) ^ (subr(10) & ~subr(8)); + dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(7) = subl(6) ^ tl; + CamelliaSubkeyR(7) = subr(6) ^ tr; + CamelliaSubkeyL(8) = subl(8); + CamelliaSubkeyR(8) = subr(8); + CamelliaSubkeyL(9) = subl(9); + CamelliaSubkeyR(9) = subr(9); + tl = subl(7) ^ (subr(7) & ~subr(9)); + dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(10) = tl ^ subl(11); + CamelliaSubkeyR(10) = tr ^ subr(11); + CamelliaSubkeyL(11) = subl(10) ^ subl(12); + CamelliaSubkeyR(11) = subr(10) ^ subr(12); + CamelliaSubkeyL(12) = subl(11) ^ subl(13); + CamelliaSubkeyR(12) = subr(11) ^ subr(13); + CamelliaSubkeyL(13) = subl(12) ^ subl(14); + CamelliaSubkeyR(13) = subr(12) ^ subr(14); + CamelliaSubkeyL(14) = subl(13) ^ subl(15); + CamelliaSubkeyR(14) = subr(13) ^ subr(15); + tl = subl(18) ^ (subr(18) & ~subr(16)); + dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(15) = subl(14) ^ tl; + CamelliaSubkeyR(15) = subr(14) ^ tr; + CamelliaSubkeyL(16) = subl(16); + CamelliaSubkeyR(16) = subr(16); + CamelliaSubkeyL(17) = subl(17); + CamelliaSubkeyR(17) = subr(17); + tl = subl(15) ^ (subr(15) & ~subr(17)); + dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(18) = tl ^ subl(19); + CamelliaSubkeyR(18) = tr ^ subr(19); + CamelliaSubkeyL(19) = subl(18) ^ subl(20); + CamelliaSubkeyR(19) = subr(18) ^ subr(20); + CamelliaSubkeyL(20) = subl(19) ^ subl(21); + CamelliaSubkeyR(20) = subr(19) ^ subr(21); + CamelliaSubkeyL(21) = subl(20) ^ subl(22); + CamelliaSubkeyR(21) = subr(20) ^ subr(22); + CamelliaSubkeyL(22) = subl(21) ^ subl(23); + CamelliaSubkeyR(22) = subr(21) ^ subr(23); + tl = subl(26) ^ (subr(26) & ~subr(24)); + dw = tl & subl(24), tr = subr(26) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(23) = subl(22) ^ tl; + CamelliaSubkeyR(23) = subr(22) ^ tr; + CamelliaSubkeyL(24) = subl(24); + CamelliaSubkeyR(24) = subr(24); + CamelliaSubkeyL(25) = subl(25); + CamelliaSubkeyR(25) = subr(25); + tl = subl(23) ^ (subr(23) & ~subr(25)); + dw = tl & subl(25), tr = subr(23) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(26) = tl ^ subl(27); + CamelliaSubkeyR(26) = tr ^ subr(27); + CamelliaSubkeyL(27) = subl(26) ^ subl(28); + CamelliaSubkeyR(27) = subr(26) ^ subr(28); + CamelliaSubkeyL(28) = subl(27) ^ subl(29); + CamelliaSubkeyR(28) = subr(27) ^ subr(29); + CamelliaSubkeyL(29) = subl(28) ^ subl(30); + CamelliaSubkeyR(29) = subr(28) ^ subr(30); + CamelliaSubkeyL(30) = subl(29) ^ subl(31); + CamelliaSubkeyR(30) = subr(29) ^ subr(31); + CamelliaSubkeyL(31) = subl(30); + CamelliaSubkeyR(31) = subr(30); + CamelliaSubkeyL(32) = subl(32) ^ subl(31); + CamelliaSubkeyR(32) = subr(32) ^ subr(31); + + return; +} + +void camellia_setup192(const unsigned char *key, u32 *subkey) +{ + unsigned char kk[32]; + u32 krll, krlr, krrl,krrr; + + memcpy(kk, key, 24); + memcpy((unsigned char *)&krll, key+16,4); + memcpy((unsigned char *)&krlr, key+20,4); + krrl = ~krll; + krrr = ~krlr; + memcpy(kk+24, (unsigned char *)&krrl, 4); + memcpy(kk+28, (unsigned char *)&krrr, 4); + camellia_setup256(kk, subkey); + return; +} + + +#ifndef USE_ARM_ASM +/** + * Stuff related to camellia encryption/decryption + * + * "io" must be 4byte aligned and big-endian data. + */ +void camellia_encrypt128(const u32 *subkey, u32 *blocks) +{ + u32 il, ir, t0, t1; + u32 io[4]; + + io[0] = blocks[0]; + io[1] = blocks[1]; + io[2] = blocks[2]; + io[3] = blocks[3]; + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(0); + io[1] ^= CamelliaSubkeyR(0); + /* main iteration */ + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(2),CamelliaSubkeyR(2), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(3),CamelliaSubkeyR(3), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(4),CamelliaSubkeyR(4), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(5),CamelliaSubkeyR(5), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(6),CamelliaSubkeyR(6), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(7),CamelliaSubkeyR(7), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(8),CamelliaSubkeyR(8), + CamelliaSubkeyL(9),CamelliaSubkeyR(9), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(10),CamelliaSubkeyR(10), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(11),CamelliaSubkeyR(11), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(12),CamelliaSubkeyR(12), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(13),CamelliaSubkeyR(13), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(14),CamelliaSubkeyR(14), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(15),CamelliaSubkeyR(15), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(16),CamelliaSubkeyR(16), + CamelliaSubkeyL(17),CamelliaSubkeyR(17), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(18),CamelliaSubkeyR(18), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(19),CamelliaSubkeyR(19), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(20),CamelliaSubkeyR(20), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(21),CamelliaSubkeyR(21), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(22),CamelliaSubkeyR(22), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(23),CamelliaSubkeyR(23), + io[0],io[1],il,ir,t0,t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(24); + io[3] ^= CamelliaSubkeyR(24); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + blocks[0] = io[0]; + blocks[1] = io[1]; + blocks[2] = io[2]; + blocks[3] = io[3]; + + return; +} + +void camellia_decrypt128(const u32 *subkey, u32 *blocks) +{ + u32 il,ir,t0,t1; /* temporary valiables */ + u32 io[4]; + + io[0] = blocks[0]; + io[1] = blocks[1]; + io[2] = blocks[2]; + io[3] = blocks[3]; + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(24); + io[1] ^= CamelliaSubkeyR(24); + + /* main iteration */ + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(23),CamelliaSubkeyR(23), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(22),CamelliaSubkeyR(22), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(21),CamelliaSubkeyR(21), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(20),CamelliaSubkeyR(20), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(19),CamelliaSubkeyR(19), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(18),CamelliaSubkeyR(18), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(17),CamelliaSubkeyR(17), + CamelliaSubkeyL(16),CamelliaSubkeyR(16), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(15),CamelliaSubkeyR(15), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(14),CamelliaSubkeyR(14), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(13),CamelliaSubkeyR(13), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(12),CamelliaSubkeyR(12), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(11),CamelliaSubkeyR(11), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(10),CamelliaSubkeyR(10), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(9),CamelliaSubkeyR(9), + CamelliaSubkeyL(8),CamelliaSubkeyR(8), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(7),CamelliaSubkeyR(7), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(6),CamelliaSubkeyR(6), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(5),CamelliaSubkeyR(5), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(4),CamelliaSubkeyR(4), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(3),CamelliaSubkeyR(3), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(2),CamelliaSubkeyR(2), + io[0],io[1],il,ir,t0,t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(0); + io[3] ^= CamelliaSubkeyR(0); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + blocks[0] = io[0]; + blocks[1] = io[1]; + blocks[2] = io[2]; + blocks[3] = io[3]; + + return; +} + +/** + * stuff for 192 and 256bit encryption/decryption + */ +void camellia_encrypt256(const u32 *subkey, u32 *blocks) +{ + u32 il,ir,t0,t1; /* temporary valiables */ + u32 io[4]; + + io[0] = blocks[0]; + io[1] = blocks[1]; + io[2] = blocks[2]; + io[3] = blocks[3]; + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(0); + io[1] ^= CamelliaSubkeyR(0); + + /* main iteration */ + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(2),CamelliaSubkeyR(2), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(3),CamelliaSubkeyR(3), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(4),CamelliaSubkeyR(4), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(5),CamelliaSubkeyR(5), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(6),CamelliaSubkeyR(6), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(7),CamelliaSubkeyR(7), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(8),CamelliaSubkeyR(8), + CamelliaSubkeyL(9),CamelliaSubkeyR(9), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(10),CamelliaSubkeyR(10), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(11),CamelliaSubkeyR(11), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(12),CamelliaSubkeyR(12), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(13),CamelliaSubkeyR(13), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(14),CamelliaSubkeyR(14), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(15),CamelliaSubkeyR(15), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(16),CamelliaSubkeyR(16), + CamelliaSubkeyL(17),CamelliaSubkeyR(17), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(18),CamelliaSubkeyR(18), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(19),CamelliaSubkeyR(19), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(20),CamelliaSubkeyR(20), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(21),CamelliaSubkeyR(21), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(22),CamelliaSubkeyR(22), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(23),CamelliaSubkeyR(23), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(24),CamelliaSubkeyR(24), + CamelliaSubkeyL(25),CamelliaSubkeyR(25), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(26),CamelliaSubkeyR(26), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(27),CamelliaSubkeyR(27), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(28),CamelliaSubkeyR(28), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(29),CamelliaSubkeyR(29), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(30),CamelliaSubkeyR(30), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(31),CamelliaSubkeyR(31), + io[0],io[1],il,ir,t0,t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(32); + io[3] ^= CamelliaSubkeyR(32); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + blocks[0] = io[0]; + blocks[1] = io[1]; + blocks[2] = io[2]; + blocks[3] = io[3]; + + return; +} + +void camellia_decrypt256(const u32 *subkey, u32 *blocks) +{ + u32 il,ir,t0,t1; /* temporary valiables */ + u32 io[4]; + + io[0] = blocks[0]; + io[1] = blocks[1]; + io[2] = blocks[2]; + io[3] = blocks[3]; + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(32); + io[1] ^= CamelliaSubkeyR(32); + + /* main iteration */ + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(31),CamelliaSubkeyR(31), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(30),CamelliaSubkeyR(30), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(29),CamelliaSubkeyR(29), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(28),CamelliaSubkeyR(28), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(27),CamelliaSubkeyR(27), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(26),CamelliaSubkeyR(26), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(25),CamelliaSubkeyR(25), + CamelliaSubkeyL(24),CamelliaSubkeyR(24), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(23),CamelliaSubkeyR(23), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(22),CamelliaSubkeyR(22), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(21),CamelliaSubkeyR(21), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(20),CamelliaSubkeyR(20), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(19),CamelliaSubkeyR(19), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(18),CamelliaSubkeyR(18), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(17),CamelliaSubkeyR(17), + CamelliaSubkeyL(16),CamelliaSubkeyR(16), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(15),CamelliaSubkeyR(15), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(14),CamelliaSubkeyR(14), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(13),CamelliaSubkeyR(13), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(12),CamelliaSubkeyR(12), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(11),CamelliaSubkeyR(11), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(10),CamelliaSubkeyR(10), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(9),CamelliaSubkeyR(9), + CamelliaSubkeyL(8),CamelliaSubkeyR(8), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(7),CamelliaSubkeyR(7), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(6),CamelliaSubkeyR(6), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(5),CamelliaSubkeyR(5), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(4),CamelliaSubkeyR(4), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(3),CamelliaSubkeyR(3), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(2),CamelliaSubkeyR(2), + io[0],io[1],il,ir,t0,t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(0); + io[3] ^= CamelliaSubkeyR(0); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + blocks[0] = io[0]; + blocks[1] = io[1]; + blocks[2] = io[2]; + blocks[3] = io[3]; + + return; +} +#endif /*!USE_ARM_ASM*/ + + +/*** + * + * API for compatibility + */ + +void Camellia_Ekeygen(const int keyBitLength, + const unsigned char *rawKey, + KEY_TABLE_TYPE keyTable) +{ + switch(keyBitLength) { + case 128: + camellia_setup128(rawKey, keyTable); + break; + case 192: + camellia_setup192(rawKey, keyTable); + break; + case 256: + camellia_setup256(rawKey, keyTable); + break; + default: + break; + } +} + + +#ifndef USE_ARM_ASM +void Camellia_EncryptBlock(const int keyBitLength, + const unsigned char *plaintext, + const KEY_TABLE_TYPE keyTable, + unsigned char *ciphertext) +{ + u32 tmp[4]; + + tmp[0] = GETU32(plaintext); + tmp[1] = GETU32(plaintext + 4); + tmp[2] = GETU32(plaintext + 8); + tmp[3] = GETU32(plaintext + 12); + + switch (keyBitLength) { + case 128: + camellia_encrypt128(keyTable, tmp); + break; + case 192: + /* fall through */ + case 256: + camellia_encrypt256(keyTable, tmp); + break; + default: + break; + } + + PUTU32(ciphertext, tmp[0]); + PUTU32(ciphertext + 4, tmp[1]); + PUTU32(ciphertext + 8, tmp[2]); + PUTU32(ciphertext + 12, tmp[3]); +} + +void Camellia_DecryptBlock(const int keyBitLength, + const unsigned char *ciphertext, + const KEY_TABLE_TYPE keyTable, + unsigned char *plaintext) +{ + u32 tmp[4]; + + tmp[0] = GETU32(ciphertext); + tmp[1] = GETU32(ciphertext + 4); + tmp[2] = GETU32(ciphertext + 8); + tmp[3] = GETU32(ciphertext + 12); + + switch (keyBitLength) { + case 128: + camellia_decrypt128(keyTable, tmp); + break; + case 192: + /* fall through */ + case 256: + camellia_decrypt256(keyTable, tmp); + break; + default: + break; + } + PUTU32(plaintext, tmp[0]); + PUTU32(plaintext + 4, tmp[1]); + PUTU32(plaintext + 8, tmp[2]); + PUTU32(plaintext + 12, tmp[3]); +} +#endif /*!USE_ARM_ASM*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/camellia.h b/libotr/libgcrypt-1.8.7/cipher/camellia.h new file mode 100644 index 0000000..d7a1e6f --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/camellia.h @@ -0,0 +1,95 @@ +/* camellia.h ver 1.2.0 + * + * Copyright (C) 2006,2007 + * NTT (Nippon Telegraph and Telephone Corporation). + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef HEADER_CAMELLIA_H +#define HEADER_CAMELLIA_H + +/* To use Camellia with libraries it is often useful to keep the name + * space of the library clean. The following macro is thus useful: + * + * #define CAMELLIA_EXT_SYM_PREFIX foo_ + * + * This prefixes all external symbols with "foo_". + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +/* USE_ARM_ASM indicates whether to use ARM assembly code. */ +# undef USE_ARM_ASM +# if defined(__ARMEL__) +# ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS +# define USE_ARM_ASM 1 +# endif +# endif +# if defined(__AARCH64EL__) +# ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS +# define USE_ARM_ASM 1 +# endif +# endif +#endif +#ifdef CAMELLIA_EXT_SYM_PREFIX +#define CAMELLIA_PREFIX1(x,y) x ## y +#define CAMELLIA_PREFIX2(x,y) CAMELLIA_PREFIX1(x,y) +#define CAMELLIA_PREFIX(x) CAMELLIA_PREFIX2(CAMELLIA_EXT_SYM_PREFIX,x) +#define Camellia_Ekeygen CAMELLIA_PREFIX(Camellia_Ekeygen) +#define Camellia_EncryptBlock CAMELLIA_PREFIX(Camellia_EncryptBlock) +#define Camellia_DecryptBlock CAMELLIA_PREFIX(Camellia_DecryptBlock) +#define camellia_decrypt128 CAMELLIA_PREFIX(camellia_decrypt128) +#define camellia_decrypt256 CAMELLIA_PREFIX(camellia_decrypt256) +#define camellia_encrypt128 CAMELLIA_PREFIX(camellia_encrypt128) +#define camellia_encrypt256 CAMELLIA_PREFIX(camellia_encrypt256) +#define camellia_setup128 CAMELLIA_PREFIX(camellia_setup128) +#define camellia_setup192 CAMELLIA_PREFIX(camellia_setup192) +#define camellia_setup256 CAMELLIA_PREFIX(camellia_setup256) +#endif /*CAMELLIA_EXT_SYM_PREFIX*/ + + +#ifdef __cplusplus +extern "C" { +#endif + +#define CAMELLIA_BLOCK_SIZE 16 +#define CAMELLIA_TABLE_BYTE_LEN 272 +#define CAMELLIA_TABLE_WORD_LEN (CAMELLIA_TABLE_BYTE_LEN / 4) + +typedef unsigned int KEY_TABLE_TYPE[CAMELLIA_TABLE_WORD_LEN]; + + +void Camellia_Ekeygen(const int keyBitLength, + const unsigned char *rawKey, + KEY_TABLE_TYPE keyTable); + +#ifndef USE_ARM_ASM +void Camellia_EncryptBlock(const int keyBitLength, + const unsigned char *plaintext, + const KEY_TABLE_TYPE keyTable, + unsigned char *cipherText); + +void Camellia_DecryptBlock(const int keyBitLength, + const unsigned char *cipherText, + const KEY_TABLE_TYPE keyTable, + unsigned char *plaintext); +#endif /*!USE_ARM_ASM*/ + + +#ifdef __cplusplus +} +#endif + +#endif /* HEADER_CAMELLIA_H */ diff --git a/libotr/libgcrypt-1.8.7/cipher/cast5-amd64.S b/libotr/libgcrypt-1.8.7/cipher/cast5-amd64.S new file mode 100644 index 0000000..c04015a --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cast5-amd64.S @@ -0,0 +1,605 @@ +/* cast5-amd64.S - AMD64 assembly implementation of CAST5 cipher + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_CAST5) + +#if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) || !defined(__PIC__) +# define GET_EXTERN_POINTER(name, reg) movabsq $name, reg +#else +# ifdef __code_model_large__ +# define GET_EXTERN_POINTER(name, reg) \ + pushq %r15; \ + pushq %r14; \ + 1: leaq 1b(%rip), reg; \ + movabsq $_GLOBAL_OFFSET_TABLE_-1b, %r14; \ + movabsq $name@GOT, %r15; \ + addq %r14, reg; \ + popq %r14; \ + movq (reg, %r15), reg; \ + popq %r15; +# else +# define GET_EXTERN_POINTER(name, reg) movq name@GOTPCREL(%rip), reg +# endif +#endif + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.text + +.extern _gcry_cast5_s1to4; + +#define s1 0 +#define s2 (s1 + (4 * 256)) +#define s3 (s2 + (4 * 256)) +#define s4 (s3 + (4 * 256)) + +/* structure of CAST5_context: */ +#define Km 0 +#define Kr (Km + (16 * 4)) + +/* register macros */ +#define CTX %rdi +#define RIO %rsi +#define RTAB %r8 + +#define RLR0 %r9 +#define RLR1 %r10 +#define RLR2 %r11 +#define RLR3 %r12 + +#define RLR0d %r9d +#define RLR1d %r10d +#define RLR2d %r11d +#define RLR3d %r12d + +#define RX0 %rax +#define RX1 %rbx +#define RX2 %rdx + +#define RX0d %eax +#define RX1d %ebx +#define RX2d %edx + +#define RX0bl %al +#define RX1bl %bl +#define RX2bl %dl + +#define RX0bh %ah +#define RX1bh %bh +#define RX2bh %dh + +#define RKR %rcx +#define RKRd %ecx +#define RKRbl %cl + +#define RT0 %rbp +#define RT1 %rsi + +#define RT0d %ebp +#define RT1d %esi + +#define RKM0d %r13d +#define RKM1d %r14d + +/*********************************************************************** + * 1-way cast5 + ***********************************************************************/ +#define dummy(x) + +#define shr_kr(none) \ + shrq $8, RKR; + +#define F(km, load_next_kr, op0, op1, op2, op3) \ + op0 ## l RLR0d, km ## d; \ + roll RKRbl, km ## d; \ + rorq $32, RLR0; \ + movzbl km ## bh, RT0d; \ + movzbl km ## bl, RT1d; \ + roll $16, km ## d; \ + movl s1(RTAB,RT0,4), RT0d; \ + op1 ## l s2(RTAB,RT1,4), RT0d; \ + load_next_kr(kr_next); \ + movzbl km ## bh, RT1d; \ + movzbl km ## bl, km ## d; \ + op2 ## l s3(RTAB,RT1,4), RT0d; \ + op3 ## l s4(RTAB,km,4), RT0d; \ + xorq RT0, RLR0; + +#define F1(km, load_next_kr) \ + F(##km, load_next_kr, add, xor, sub, add) +#define F2(km, load_next_kr) \ + F(##km, load_next_kr, xor, sub, add, xor) +#define F3(km, load_next_kr) \ + F(##km, load_next_kr, sub, add, xor, sub) + +#define get_round_km(n, km) \ + movl Km+4*(n)(CTX), km; + +#define get_round_kr_enc(n) \ + movq $0x1010101010101010, RKR; \ + \ + /* merge rorl rk and rorl $16 */ \ + xorq Kr+(n)(CTX), RKR; + +#define get_round_kr_dec(n) \ + movq $0x1010101010101010, RKR; \ + \ + /* merge rorl rk and rorl $16 */ \ + xorq Kr+(n - 7)(CTX), RKR; \ + bswapq RKR; + +#define round_enc(n, FA, FB, fn1, fn2) \ + get_round_km(n + 1, RX2d); \ + FA(RX0, fn1); \ + get_round_km(n + 2, RX0d); \ + FB(RX2, fn2); + +#define round_enc_last(n, FXA, FXB) \ + get_round_km(n + 1, RX2d); \ + \ + FXA(RX0, shr_kr); \ + FXB(RX2, dummy); + +#define round_enc_1(n, FA, FB) \ + round_enc(n, FA, FB, shr_kr, shr_kr) + +#define round_enc_2(n, FA, FB) \ + round_enc(n, FA, FB, shr_kr, dummy) + +#define round_dec(n, FA, FB, fn1, fn2) \ + get_round_km(n - 1, RX2d); \ + FA(RX0, fn1); \ + get_round_km(n - 2, RX0d); \ + FB(RX2, fn2); + +#define round_dec_last(n, FXA, FXB) \ + get_round_km(n - 1, RX2d); \ + FXA(RX0, shr_kr); \ + FXB(RX2, dummy); + +#define round_dec_1(n, FA, FB) \ + round_dec(n, FA, FB, shr_kr, shr_kr) + +#define round_dec_2(n, FA, FB) \ + round_dec(n, FA, FB, shr_kr, dummy) + +#define read_block() \ + movq (RIO), RLR0; \ + bswapq RLR0; + +#define write_block() \ + bswapq RLR0; \ + rorq $32, RLR0; \ + movq RLR0, (RIO); + +.align 8 +.globl _gcry_cast5_amd64_encrypt_block +ELF(.type _gcry_cast5_amd64_encrypt_block,@function;) + +_gcry_cast5_amd64_encrypt_block: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + pushq %rbp; + pushq %rbx; + + movq %rsi, %r10; + + GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB); + + movq %rdx, RIO; + read_block(); + + get_round_km(0, RX0d); + get_round_kr_enc(0); + round_enc_1(0, F1, F2); + round_enc_1(2, F3, F1); + round_enc_1(4, F2, F3); + round_enc_2(6, F1, F2); + get_round_kr_enc(8); + round_enc_1(8, F3, F1); + round_enc_1(10, F2, F3); + round_enc_1(12, F1, F2); + round_enc_last(14, F3, F1); + + movq %r10, RIO; + write_block(); + + popq %rbx; + popq %rbp; + ret; +ELF(.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;) + +.align 8 +.globl _gcry_cast5_amd64_decrypt_block +ELF(.type _gcry_cast5_amd64_decrypt_block,@function;) + +_gcry_cast5_amd64_decrypt_block: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + pushq %rbp; + pushq %rbx; + + movq %rsi, %r10; + + GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB); + + movq %rdx, RIO; + read_block(); + + get_round_km(15, RX0d); + get_round_kr_dec(15); + round_dec_1(15, F1, F3); + round_dec_1(13, F2, F1); + round_dec_1(11, F3, F2); + round_dec_2(9, F1, F3); + get_round_kr_dec(7); + round_dec_1(7, F2, F1); + round_dec_1(5, F3, F2); + round_dec_1(3, F1, F3); + round_dec_last(1, F2, F1); + + movq %r10, RIO; + write_block(); + + popq %rbx; + popq %rbp; + ret; +ELF(.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;) + +/********************************************************************** + 4-way cast5, four blocks parallel + **********************************************************************/ +#define F_tail(rlr, rx, op1, op2, op3) \ + movzbl rx ## bh, RT0d; \ + movzbl rx ## bl, RT1d; \ + roll $16, rx ## d; \ + movl s1(RTAB,RT0,4), RT0d; \ + op1 ## l s2(RTAB,RT1,4), RT0d; \ + movzbl rx ## bh, RT1d; \ + movzbl rx ## bl, rx ## d; \ + op2 ## l s3(RTAB,RT1,4), RT0d; \ + op3 ## l s4(RTAB,rx,4), RT0d; \ + xorq RT0, rlr; + +#define F4(km, load_next_kr, op0, op1, op2, op3) \ + movl km, RX0d; \ + op0 ## l RLR0d, RX0d; \ + roll RKRbl, RX0d; \ + rorq $32, RLR0; \ + \ + movl km, RX1d; \ + op0 ## l RLR1d, RX1d; \ + roll RKRbl, RX1d; \ + rorq $32, RLR1; \ + \ + movl km, RX2d; \ + op0 ## l RLR2d, RX2d; \ + roll RKRbl, RX2d; \ + rorq $32, RLR2; \ + \ + F_tail(RLR0, RX0, op1, op2, op3); \ + F_tail(RLR1, RX1, op1, op2, op3); \ + F_tail(RLR2, RX2, op1, op2, op3); \ + \ + movl km, RX0d; \ + op0 ## l RLR3d, RX0d; \ + roll RKRbl, RX0d; \ + load_next_kr(); \ + rorq $32, RLR3; \ + \ + F_tail(RLR3, RX0, op1, op2, op3); + +#define F4_1(km, load_next_kr) \ + F4(km, load_next_kr, add, xor, sub, add) +#define F4_2(km, load_next_kr) \ + F4(km, load_next_kr, xor, sub, add, xor) +#define F4_3(km, load_next_kr) \ + F4(km, load_next_kr, sub, add, xor, sub) + +#define round_enc4(n, FA, FB, fn1, fn2) \ + get_round_km(n + 1, RKM1d); \ + FA(RKM0d, fn1); \ + get_round_km(n + 2, RKM0d); \ + FB(RKM1d, fn2); + +#define round_enc_last4(n, FXA, FXB) \ + get_round_km(n + 1, RKM1d); \ + FXA(RKM0d, shr_kr); \ + FXB(RKM1d, dummy); + +#define round_enc4_1(n, FA, FB) \ + round_enc4(n, FA, FB, shr_kr, shr_kr); + +#define round_enc4_2(n, FA, FB) \ + round_enc4(n, FA, FB, shr_kr, dummy); + +#define round_dec4(n, FA, FB, fn1, fn2) \ + get_round_km(n - 1, RKM1d); \ + FA(RKM0d, fn1); \ + get_round_km(n - 2, RKM0d); \ + FB(RKM1d, fn2); + +#define round_dec_last4(n, FXA, FXB) \ + get_round_km(n - 1, RKM1d); \ + FXA(RKM0d, shr_kr); \ + FXB(RKM1d, dummy); + +#define round_dec4_1(n, FA, FB) \ + round_dec4(n, FA, FB, shr_kr, shr_kr); + +#define round_dec4_2(n, FA, FB) \ + round_dec4(n, FA, FB, shr_kr, dummy); + +#define inbswap_block4(a, b, c, d) \ + bswapq a; \ + bswapq b; \ + bswapq c; \ + bswapq d; + +#define outbswap_block4(a, b, c, d) \ + bswapq a; \ + bswapq b; \ + bswapq c; \ + bswapq d; \ + rorq $32, a; \ + rorq $32, b; \ + rorq $32, c; \ + rorq $32, d; + +.align 8 +ELF(.type __cast5_enc_blk4,@function;) + +__cast5_enc_blk4: + /* input: + * %rdi: ctx, CTX + * RLR0,RLR1,RLR2,RLR3: four input plaintext blocks + * output: + * RLR0,RLR1,RLR2,RLR3: four output ciphertext blocks + */ + GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB); + + get_round_km(0, RKM0d); + get_round_kr_enc(0); + round_enc4_1(0, F4_1, F4_2); + round_enc4_1(2, F4_3, F4_1); + round_enc4_1(4, F4_2, F4_3); + round_enc4_2(6, F4_1, F4_2); + get_round_kr_enc(8); + round_enc4_1(8, F4_3, F4_1); + round_enc4_1(10, F4_2, F4_3); + round_enc4_1(12, F4_1, F4_2); + round_enc_last4(14, F4_3, F4_1); + + outbswap_block4(RLR0, RLR1, RLR2, RLR3); + ret; +ELF(.size __cast5_enc_blk4,.-__cast5_enc_blk4;) + +.align 8 +ELF(.type __cast5_dec_blk4,@function;) + +__cast5_dec_blk4: + /* input: + * %rdi: ctx, CTX + * RLR0,RLR1,RLR2,RLR3: four input ciphertext blocks + * output: + * RLR0,RLR1,RLR2,RLR3: four output plaintext blocks + */ + GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB); + + inbswap_block4(RLR0, RLR1, RLR2, RLR3); + + get_round_km(15, RKM0d); + get_round_kr_dec(15); + round_dec4_1(15, F4_1, F4_3); + round_dec4_1(13, F4_2, F4_1); + round_dec4_1(11, F4_3, F4_2); + round_dec4_2(9, F4_1, F4_3); + get_round_kr_dec(7); + round_dec4_1(7, F4_2, F4_1); + round_dec4_1(5, F4_3, F4_2); + round_dec4_1(3, F4_1, F4_3); + round_dec_last4(1, F4_2, F4_1); + + outbswap_block4(RLR0, RLR1, RLR2, RLR3); + ret; +ELF(.size __cast5_dec_blk4,.-__cast5_dec_blk4;) + +.align 8 +.globl _gcry_cast5_amd64_ctr_enc +ELF(.type _gcry_cast5_amd64_ctr_enc,@function;) +_gcry_cast5_amd64_ctr_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv (big endian, 64bit) + */ + + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + pushq %r14; + + pushq %rsi; + pushq %rdx; + + /* load IV and byteswap */ + movq (%rcx), RX0; + bswapq RX0; + movq RX0, RLR0; + + /* construct IVs */ + leaq 1(RX0), RLR1; + leaq 2(RX0), RLR2; + leaq 3(RX0), RLR3; + leaq 4(RX0), RX0; + bswapq RX0; + + /* store new IV */ + movq RX0, (%rcx); + + call __cast5_enc_blk4; + + popq %r14; /*src*/ + popq %r13; /*dst*/ + + /* XOR key-stream with plaintext */ + xorq 0 * 8(%r14), RLR0; + xorq 1 * 8(%r14), RLR1; + xorq 2 * 8(%r14), RLR2; + xorq 3 * 8(%r14), RLR3; + movq RLR0, 0 * 8(%r13); + movq RLR1, 1 * 8(%r13); + movq RLR2, 2 * 8(%r13); + movq RLR3, 3 * 8(%r13); + + popq %r14; + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + ret +ELF(.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;) + +.align 8 +.globl _gcry_cast5_amd64_cbc_dec +ELF(.type _gcry_cast5_amd64_cbc_dec,@function;) +_gcry_cast5_amd64_cbc_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv (64bit) + */ + + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + pushq %r14; + + pushq %rcx; + pushq %rsi; + pushq %rdx; + + /* load input */ + movq 0 * 8(%rdx), RLR0; + movq 1 * 8(%rdx), RLR1; + movq 2 * 8(%rdx), RLR2; + movq 3 * 8(%rdx), RLR3; + + call __cast5_dec_blk4; + + popq RX0; /*src*/ + popq RX1; /*dst*/ + popq RX2; /*iv*/ + + movq 3 * 8(RX0), %r14; + xorq (RX2), RLR0; + xorq 0 * 8(RX0), RLR1; + xorq 1 * 8(RX0), RLR2; + xorq 2 * 8(RX0), RLR3; + movq %r14, (RX2); /* store new IV */ + + movq RLR0, 0 * 8(RX1); + movq RLR1, 1 * 8(RX1); + movq RLR2, 2 * 8(RX1); + movq RLR3, 3 * 8(RX1); + + popq %r14; + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + ret; + +ELF(.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;) + +.align 8 +.globl _gcry_cast5_amd64_cfb_dec +ELF(.type _gcry_cast5_amd64_cfb_dec,@function;) +_gcry_cast5_amd64_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv (64bit) + */ + + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + pushq %r14; + + pushq %rsi; + pushq %rdx; + + /* Load input */ + movq (%rcx), RLR0; + movq 0 * 8(%rdx), RLR1; + movq 1 * 8(%rdx), RLR2; + movq 2 * 8(%rdx), RLR3; + + inbswap_block4(RLR0, RLR1, RLR2, RLR3); + + /* Update IV */ + movq 3 * 8(%rdx), %rdx; + movq %rdx, (%rcx); + + call __cast5_enc_blk4; + + popq %rdx; /*src*/ + popq %rcx; /*dst*/ + + xorq 0 * 8(%rdx), RLR0; + xorq 1 * 8(%rdx), RLR1; + xorq 2 * 8(%rdx), RLR2; + xorq 3 * 8(%rdx), RLR3; + movq RLR0, 0 * 8(%rcx); + movq RLR1, 1 * 8(%rcx); + movq RLR2, 2 * 8(%rcx); + movq RLR3, 3 * 8(%rcx); + + popq %r14; + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + ret; + +ELF(.size _gcry_cast5_amd64_cfb_dec,.-_gcry_cast5_amd64_cfb_dec;) + +#endif /*defined(USE_CAST5)*/ +#endif /*__x86_64*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/cast5-arm.S b/libotr/libgcrypt-1.8.7/cipher/cast5-arm.S new file mode 100644 index 0000000..76ddd2e --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cast5-arm.S @@ -0,0 +1,728 @@ +/* cast5-arm.S - ARM assembly implementation of CAST5 cipher + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(__ARMEL__) +#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS + +.text + +.syntax unified +.arm + +.extern _gcry_cast5_s1to4; + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + +/* structure of crypto context */ +#define Km 0 +#define Kr (Km + (16 * 4)) +#define Kr_arm_enc (Kr + (16)) +#define Kr_arm_dec (Kr_arm_enc + (16)) + +/* register macros */ +#define CTX %r0 +#define Rs1 %r7 +#define Rs2 %r8 +#define Rs3 %r9 +#define Rs4 %r10 +#define RMASK %r11 +#define RKM %r1 +#define RKR %r2 + +#define RL0 %r3 +#define RR0 %r4 + +#define RL1 %r9 +#define RR1 %r10 + +#define RT0 %lr +#define RT1 %ip +#define RT2 %r5 +#define RT3 %r6 + +/* helper macros */ +#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 0)]; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 3)]; \ + orr rout, rout, rtmp, lsl #24; + +#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \ + mov rtmp0, rin, lsr #8; \ + strb rin, [rdst, #((offs) + 0)]; \ + mov rtmp1, rin, lsr #16; \ + strb rtmp0, [rdst, #((offs) + 1)]; \ + mov rtmp0, rin, lsr #24; \ + strb rtmp1, [rdst, #((offs) + 2)]; \ + strb rtmp0, [rdst, #((offs) + 3)]; + +#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 3)]; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 0)]; \ + orr rout, rout, rtmp, lsl #24; + +#define str_unaligned_be(rin, rdst, offs, rtmp0, rtmp1) \ + mov rtmp0, rin, lsr #8; \ + strb rin, [rdst, #((offs) + 3)]; \ + mov rtmp1, rin, lsr #16; \ + strb rtmp0, [rdst, #((offs) + 2)]; \ + mov rtmp0, rin, lsr #24; \ + strb rtmp1, [rdst, #((offs) + 1)]; \ + strb rtmp0, [rdst, #((offs) + 0)]; + +#ifdef __ARMEL__ + #define ldr_unaligned_host ldr_unaligned_le + #define str_unaligned_host str_unaligned_le + + /* bswap on little-endian */ +#ifdef HAVE_ARM_ARCH_V6 + #define host_to_be(reg, rtmp) \ + rev reg, reg; + #define be_to_host(reg, rtmp) \ + rev reg, reg; +#else + #define host_to_be(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; + #define be_to_host(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; +#endif +#else + #define ldr_unaligned_host ldr_unaligned_be + #define str_unaligned_host str_unaligned_be + + /* nop on big-endian */ + #define host_to_be(reg, rtmp) /*_*/ + #define be_to_host(reg, rtmp) /*_*/ +#endif + +#define host_to_host(x, y) /*_*/ + +/********************************************************************** + 1-way cast5 + **********************************************************************/ + +#define dummy(n) /*_*/ + +#define load_kr(n) \ + ldr RKR, [CTX, #(Kr_arm_enc + (n))]; /* Kr[n] */ + +#define load_dec_kr(n) \ + ldr RKR, [CTX, #(Kr_arm_dec + (n) - 3)]; /* Kr[n] */ + +#define load_km(n) \ + ldr RKM, [CTX, #(Km + (n) * 4)]; /* Km[n] */ + +#define shift_kr(dummy) \ + mov RKR, RKR, lsr #8; + +#define F(n, rl, rr, op1, op2, op3, op4, dec, loadkm, shiftkr, loadkr) \ + op1 RKM, rr; \ + mov RKM, RKM, ror RKR; \ + \ + and RT0, RMASK, RKM, ror #(24); \ + and RT1, RMASK, RKM, lsr #(16); \ + and RT2, RMASK, RKM, lsr #(8); \ + ldr RT0, [Rs1, RT0]; \ + and RT3, RMASK, RKM; \ + ldr RT1, [Rs2, RT1]; \ + shiftkr(RKR); \ + \ + ldr RT2, [Rs3, RT2]; \ + \ + op2 RT0, RT1; \ + ldr RT3, [Rs4, RT3]; \ + op3 RT0, RT2; \ + loadkm((n) + (1 - ((dec) * 2))); \ + op4 RT0, RT3; \ + loadkr((n) + (1 - ((dec) * 2))); \ + eor rl, RT0; + +#define F1(n, rl, rr, dec, loadkm, shiftkr, loadkr) \ + F(n, rl, rr, add, eor, sub, add, dec, loadkm, shiftkr, loadkr) +#define F2(n, rl, rr, dec, loadkm, shiftkr, loadkr) \ + F(n, rl, rr, eor, sub, add, eor, dec, loadkm, shiftkr, loadkr) +#define F3(n, rl, rr, dec, loadkm, shiftkr, loadkr) \ + F(n, rl, rr, sub, add, eor, sub, dec, loadkm, shiftkr, loadkr) + +#define enc_round(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \ + Fx(n, rl, rr, 0, loadkm, shiftkr, loadkr) + +#define dec_round(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \ + Fx(n, rl, rr, 1, loadkm, shiftkr, loadkr) + +#define read_block_aligned(rin, offs, l0, r0, convert, rtmp) \ + ldr l0, [rin, #((offs) + 0)]; \ + ldr r0, [rin, #((offs) + 4)]; \ + convert(l0, rtmp); \ + convert(r0, rtmp); + +#define write_block_aligned(rout, offs, l0, r0, convert, rtmp) \ + convert(l0, rtmp); \ + convert(r0, rtmp); \ + str l0, [rout, #((offs) + 0)]; \ + str r0, [rout, #((offs) + 4)]; + +#ifdef __ARM_FEATURE_UNALIGNED + /* unaligned word reads allowed */ + #define read_block(rin, offs, l0, r0, rtmp0) \ + read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0) + + #define write_block(rout, offs, r0, l0, rtmp0, rtmp1) \ + write_block_aligned(rout, offs, r0, l0, be_to_host, rtmp0) + + #define read_block_host(rin, offs, l0, r0, rtmp0) \ + read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0) + + #define write_block_host(rout, offs, r0, l0, rtmp0, rtmp1) \ + write_block_aligned(rout, offs, r0, l0, host_to_host, rtmp0) +#else + /* need to handle unaligned reads by byte reads */ + #define read_block(rin, offs, l0, r0, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_be(l0, rin, (offs) + 0, rtmp0); \ + ldr_unaligned_be(r0, rin, (offs) + 4, rtmp0); \ + b 2f; \ + 1:;\ + read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0); \ + 2:; + + #define write_block(rout, offs, l0, r0, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_be(l0, rout, (offs) + 0, rtmp0, rtmp1); \ + str_unaligned_be(r0, rout, (offs) + 4, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block_aligned(rout, offs, l0, r0, be_to_host, rtmp0); \ + 2:; + + #define read_block_host(rin, offs, l0, r0, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_host(l0, rin, (offs) + 0, rtmp0); \ + ldr_unaligned_host(r0, rin, (offs) + 4, rtmp0); \ + b 2f; \ + 1:;\ + read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0); \ + 2:; + + #define write_block_host(rout, offs, l0, r0, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_host(l0, rout, (offs) + 0, rtmp0, rtmp1); \ + str_unaligned_host(r0, rout, (offs) + 4, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block_aligned(rout, offs, l0, r0, host_to_host, rtmp0); \ + 2:; +#endif + +.align 3 +.globl _gcry_cast5_arm_encrypt_block +.type _gcry_cast5_arm_encrypt_block,%function; + +_gcry_cast5_arm_encrypt_block: + /* input: + * %r0: CTX + * %r1: dst + * %r2: src + */ + push {%r1, %r4-%r11, %ip, %lr}; + + GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2); + mov RMASK, #(0xff << 2); + add Rs2, Rs1, #(0x100*4); + add Rs3, Rs1, #(0x100*4*2); + add Rs4, Rs1, #(0x100*4*3); + + read_block(%r2, 0, RL0, RR0, RT0); + + load_km(0); + load_kr(0); + enc_round(0, F1, RL0, RR0, load_km, shift_kr, dummy); + enc_round(1, F2, RR0, RL0, load_km, shift_kr, dummy); + enc_round(2, F3, RL0, RR0, load_km, shift_kr, dummy); + enc_round(3, F1, RR0, RL0, load_km, dummy, load_kr); + enc_round(4, F2, RL0, RR0, load_km, shift_kr, dummy); + enc_round(5, F3, RR0, RL0, load_km, shift_kr, dummy); + enc_round(6, F1, RL0, RR0, load_km, shift_kr, dummy); + enc_round(7, F2, RR0, RL0, load_km, dummy, load_kr); + enc_round(8, F3, RL0, RR0, load_km, shift_kr, dummy); + enc_round(9, F1, RR0, RL0, load_km, shift_kr, dummy); + enc_round(10, F2, RL0, RR0, load_km, shift_kr, dummy); + enc_round(11, F3, RR0, RL0, load_km, dummy, load_kr); + enc_round(12, F1, RL0, RR0, load_km, shift_kr, dummy); + enc_round(13, F2, RR0, RL0, load_km, shift_kr, dummy); + enc_round(14, F3, RL0, RR0, load_km, shift_kr, dummy); + enc_round(15, F1, RR0, RL0, dummy, dummy, dummy); + + ldr %r1, [%sp], #4; + write_block(%r1, 0, RR0, RL0, RT0, RT1); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_cast5_arm_encrypt_block,.-_gcry_cast5_arm_encrypt_block; + +.align 3 +.globl _gcry_cast5_arm_decrypt_block +.type _gcry_cast5_arm_decrypt_block,%function; + +_gcry_cast5_arm_decrypt_block: + /* input: + * %r0: CTX + * %r1: dst + * %r2: src + */ + push {%r1, %r4-%r11, %ip, %lr}; + + GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2); + mov RMASK, #(0xff << 2); + add Rs2, Rs1, #(0x100 * 4); + add Rs3, Rs1, #(0x100 * 4 * 2); + add Rs4, Rs1, #(0x100 * 4 * 3); + + read_block(%r2, 0, RL0, RR0, RT0); + + load_km(15); + load_dec_kr(15); + dec_round(15, F1, RL0, RR0, load_km, shift_kr, dummy); + dec_round(14, F3, RR0, RL0, load_km, shift_kr, dummy); + dec_round(13, F2, RL0, RR0, load_km, shift_kr, dummy); + dec_round(12, F1, RR0, RL0, load_km, dummy, load_dec_kr); + dec_round(11, F3, RL0, RR0, load_km, shift_kr, dummy); + dec_round(10, F2, RR0, RL0, load_km, shift_kr, dummy); + dec_round(9, F1, RL0, RR0, load_km, shift_kr, dummy); + dec_round(8, F3, RR0, RL0, load_km, dummy, load_dec_kr); + dec_round(7, F2, RL0, RR0, load_km, shift_kr, dummy); + dec_round(6, F1, RR0, RL0, load_km, shift_kr, dummy); + dec_round(5, F3, RL0, RR0, load_km, shift_kr, dummy); + dec_round(4, F2, RR0, RL0, load_km, dummy, load_dec_kr); + dec_round(3, F1, RL0, RR0, load_km, shift_kr, dummy); + dec_round(2, F3, RR0, RL0, load_km, shift_kr, dummy); + dec_round(1, F2, RL0, RR0, load_km, shift_kr, dummy); + dec_round(0, F1, RR0, RL0, dummy, dummy, dummy); + + ldr %r1, [%sp], #4; + write_block(%r1, 0, RR0, RL0, RT0, RT1); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_cast5_arm_decrypt_block,.-_gcry_cast5_arm_decrypt_block; + +/********************************************************************** + 2-way cast5 + **********************************************************************/ + +#define F_2w(n, rl0, rr0, rl1, rr1, op1, op2, op3, op4, dec, loadkm, shiftkr, \ + loadkr) \ + op1 RT3, RKM, rr0; \ + op1 RKM, RKM, rr1; \ + mov RT3, RT3, ror RKR; \ + mov RKM, RKM, ror RKR; \ + \ + and RT0, RMASK, RT3, ror #(24); \ + and RT1, RMASK, RT3, lsr #(16); \ + and RT2, RMASK, RT3, lsr #(8); \ + and RT3, RMASK, RT3; \ + \ + ldr RT0, [Rs1, RT0]; \ + add RT2, #(0x100 * 4); \ + ldr RT1, [Rs2, RT1]; \ + add RT3, #(0x100 * 4 * 2); \ + \ + ldr RT2, [Rs2, RT2]; \ + \ + op2 RT0, RT1; \ + ldr RT3, [Rs2, RT3]; \ + and RT1, RMASK, RKM, ror #(24); \ + op3 RT0, RT2; \ + and RT2, RMASK, RKM, lsr #(16); \ + op4 RT0, RT3; \ + and RT3, RMASK, RKM, lsr #(8); \ + eor rl0, RT0; \ + add RT3, #(0x100 * 4); \ + ldr RT1, [Rs1, RT1]; \ + and RT0, RMASK, RKM; \ + ldr RT2, [Rs2, RT2]; \ + add RT0, #(0x100 * 4 * 2); \ + \ + ldr RT3, [Rs2, RT3]; \ + \ + op2 RT1, RT2; \ + ldr RT0, [Rs2, RT0]; \ + op3 RT1, RT3; \ + loadkm((n) + (1 - ((dec) * 2))); \ + op4 RT1, RT0; \ + loadkr((n) + (1 - ((dec) * 2))); \ + shiftkr(RKR); \ + eor rl1, RT1; + +#define F1_2w(n, rl0, rr0, rl1, rr1, dec, loadkm, shiftkr, loadkr) \ + F_2w(n, rl0, rr0, rl1, rr1, add, eor, sub, add, dec, \ + loadkm, shiftkr, loadkr) +#define F2_2w(n, rl0, rr0, rl1, rr1, dec, loadkm, shiftkr, loadkr) \ + F_2w(n, rl0, rr0, rl1, rr1, eor, sub, add, eor, dec, \ + loadkm, shiftkr, loadkr) +#define F3_2w(n, rl0, rr0, rl1, rr1, dec, loadkm, shiftkr, loadkr) \ + F_2w(n, rl0, rr0, rl1, rr1, sub, add, eor, sub, dec, \ + loadkm, shiftkr, loadkr) + +#define enc_round2(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \ + Fx##_2w(n, rl##0, rr##0, rl##1, rr##1, 0, loadkm, shiftkr, loadkr) + +#define dec_round2(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \ + Fx##_2w(n, rl##0, rr##0, rl##1, rr##1, 1, loadkm, shiftkr, loadkr) + +#define read_block2_aligned(rin, l0, r0, l1, r1, convert, rtmp) \ + ldr l0, [rin, #(0)]; \ + ldr r0, [rin, #(4)]; \ + convert(l0, rtmp); \ + ldr l1, [rin, #(8)]; \ + convert(r0, rtmp); \ + ldr r1, [rin, #(12)]; \ + convert(l1, rtmp); \ + convert(r1, rtmp); + +#define write_block2_aligned(rout, l0, r0, l1, r1, convert, rtmp) \ + convert(l0, rtmp); \ + convert(r0, rtmp); \ + convert(l1, rtmp); \ + str l0, [rout, #(0)]; \ + convert(r1, rtmp); \ + str r0, [rout, #(4)]; \ + str l1, [rout, #(8)]; \ + str r1, [rout, #(12)]; + +#ifdef __ARM_FEATURE_UNALIGNED + /* unaligned word reads allowed */ + #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0) + + #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0) + + #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0) + + #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0) +#else + /* need to handle unaligned reads by byte reads */ + #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_be(l0, rin, 0, rtmp0); \ + ldr_unaligned_be(r0, rin, 4, rtmp0); \ + ldr_unaligned_be(l1, rin, 8, rtmp0); \ + ldr_unaligned_be(r1, rin, 12, rtmp0); \ + b 2f; \ + 1:;\ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0); \ + 2:; + + #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_be(l0, rout, 0, rtmp0, rtmp1); \ + str_unaligned_be(r0, rout, 4, rtmp0, rtmp1); \ + str_unaligned_be(l1, rout, 8, rtmp0, rtmp1); \ + str_unaligned_be(r1, rout, 12, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0); \ + 2:; + + #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_host(l0, rin, 0, rtmp0); \ + ldr_unaligned_host(r0, rin, 4, rtmp0); \ + ldr_unaligned_host(l1, rin, 8, rtmp0); \ + ldr_unaligned_host(r1, rin, 12, rtmp0); \ + b 2f; \ + 1:;\ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0); \ + 2:; + + #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_host(l0, rout, 0, rtmp0, rtmp1); \ + str_unaligned_host(r0, rout, 4, rtmp0, rtmp1); \ + str_unaligned_host(l1, rout, 8, rtmp0, rtmp1); \ + str_unaligned_host(r1, rout, 12, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0); \ + 2:; +#endif + +.align 3 +.type _gcry_cast5_arm_enc_blk2,%function; + +_gcry_cast5_arm_enc_blk2: + /* input: + * preloaded: CTX + * [RL0, RR0], [RL1, RR1]: src + * output: + * [RR0, RL0], [RR1, RL1]: dst + */ + push {%lr}; + + GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2); + mov RMASK, #(0xff << 2); + add Rs2, Rs1, #(0x100 * 4); + + load_km(0); + load_kr(0); + enc_round2(0, F1, RL, RR, load_km, shift_kr, dummy); + enc_round2(1, F2, RR, RL, load_km, shift_kr, dummy); + enc_round2(2, F3, RL, RR, load_km, shift_kr, dummy); + enc_round2(3, F1, RR, RL, load_km, dummy, load_kr); + enc_round2(4, F2, RL, RR, load_km, shift_kr, dummy); + enc_round2(5, F3, RR, RL, load_km, shift_kr, dummy); + enc_round2(6, F1, RL, RR, load_km, shift_kr, dummy); + enc_round2(7, F2, RR, RL, load_km, dummy, load_kr); + enc_round2(8, F3, RL, RR, load_km, shift_kr, dummy); + enc_round2(9, F1, RR, RL, load_km, shift_kr, dummy); + enc_round2(10, F2, RL, RR, load_km, shift_kr, dummy); + enc_round2(11, F3, RR, RL, load_km, dummy, load_kr); + enc_round2(12, F1, RL, RR, load_km, shift_kr, dummy); + enc_round2(13, F2, RR, RL, load_km, shift_kr, dummy); + enc_round2(14, F3, RL, RR, load_km, shift_kr, dummy); + enc_round2(15, F1, RR, RL, dummy, dummy, dummy); + + host_to_be(RR0, RT0); + host_to_be(RL0, RT0); + host_to_be(RR1, RT0); + host_to_be(RL1, RT0); + + pop {%pc}; +.ltorg +.size _gcry_cast5_arm_enc_blk2,.-_gcry_cast5_arm_enc_blk2; + +.align 3 +.globl _gcry_cast5_arm_cfb_dec; +.type _gcry_cast5_arm_cfb_dec,%function; + +_gcry_cast5_arm_cfb_dec: + /* input: + * %r0: CTX + * %r1: dst (2 blocks) + * %r2: src (2 blocks) + * %r3: iv (64bit) + */ + push {%r1, %r2, %r4-%r11, %ip, %lr}; + + mov %lr, %r3; + + /* Load input (iv/%r3 is aligned, src/%r2 might not be) */ + ldm %r3, {RL0, RR0}; + host_to_be(RL0, RT1); + host_to_be(RR0, RT1); + read_block(%r2, 0, RL1, RR1, %ip); + + /* Update IV, load src[1] and save to iv[0] */ + read_block_host(%r2, 8, %r5, %r6, %r7); + stm %lr, {%r5, %r6}; + + bl _gcry_cast5_arm_enc_blk2; + /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + + /* %r0: dst, %r1: %src */ + pop {%r0, %r1}; + + /* dst = src ^ result */ + read_block2_host(%r1, %r5, %r6, %r7, %r8, %lr); + eor %r5, %r4; + eor %r6, %r3; + eor %r7, %r10; + eor %r8, %r9; + write_block2_host(%r0, %r5, %r6, %r7, %r8, %r1, %r2); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_cast5_arm_cfb_dec,.-_gcry_cast5_arm_cfb_dec; + +.align 3 +.globl _gcry_cast5_arm_ctr_enc; +.type _gcry_cast5_arm_ctr_enc,%function; + +_gcry_cast5_arm_ctr_enc: + /* input: + * %r0: CTX + * %r1: dst (2 blocks) + * %r2: src (2 blocks) + * %r3: iv (64bit, big-endian) + */ + push {%r1, %r2, %r4-%r11, %ip, %lr}; + + mov %lr, %r3; + + /* Load IV (big => host endian) */ + read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT1); + + /* Construct IVs */ + adds RR1, RR0, #1; /* +1 */ + adc RL1, RL0, #0; + adds %r6, RR1, #1; /* +2 */ + adc %r5, RL1, #0; + + /* Store new IV (host => big-endian) */ + write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT1); + + bl _gcry_cast5_arm_enc_blk2; + /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + + /* %r0: dst, %r1: %src */ + pop {%r0, %r1}; + + /* XOR key-stream with plaintext */ + read_block2_host(%r1, %r5, %r6, %r7, %r8, %lr); + eor %r5, %r4; + eor %r6, %r3; + eor %r7, %r10; + eor %r8, %r9; + write_block2_host(%r0, %r5, %r6, %r7, %r8, %r1, %r2); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_cast5_arm_ctr_enc,.-_gcry_cast5_arm_ctr_enc; + +.align 3 +.type _gcry_cast5_arm_dec_blk2,%function; + +_gcry_cast5_arm_dec_blk2: + /* input: + * preloaded: CTX + * [RL0, RR0], [RL1, RR1]: src + * output: + * [RR0, RL0], [RR1, RL1]: dst + */ + + GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2); + mov RMASK, #(0xff << 2); + add Rs2, Rs1, #(0x100 * 4); + + load_km(15); + load_dec_kr(15); + dec_round2(15, F1, RL, RR, load_km, shift_kr, dummy); + dec_round2(14, F3, RR, RL, load_km, shift_kr, dummy); + dec_round2(13, F2, RL, RR, load_km, shift_kr, dummy); + dec_round2(12, F1, RR, RL, load_km, dummy, load_dec_kr); + dec_round2(11, F3, RL, RR, load_km, shift_kr, dummy); + dec_round2(10, F2, RR, RL, load_km, shift_kr, dummy); + dec_round2(9, F1, RL, RR, load_km, shift_kr, dummy); + dec_round2(8, F3, RR, RL, load_km, dummy, load_dec_kr); + dec_round2(7, F2, RL, RR, load_km, shift_kr, dummy); + dec_round2(6, F1, RR, RL, load_km, shift_kr, dummy); + dec_round2(5, F3, RL, RR, load_km, shift_kr, dummy); + dec_round2(4, F2, RR, RL, load_km, dummy, load_dec_kr); + dec_round2(3, F1, RL, RR, load_km, shift_kr, dummy); + dec_round2(2, F3, RR, RL, load_km, shift_kr, dummy); + dec_round2(1, F2, RL, RR, load_km, shift_kr, dummy); + dec_round2(0, F1, RR, RL, dummy, dummy, dummy); + + host_to_be(RR0, RT0); + host_to_be(RL0, RT0); + host_to_be(RR1, RT0); + host_to_be(RL1, RT0); + + b .Ldec_cbc_tail; +.ltorg +.size _gcry_cast5_arm_dec_blk2,.-_gcry_cast5_arm_dec_blk2; + +.align 3 +.globl _gcry_cast5_arm_cbc_dec; +.type _gcry_cast5_arm_cbc_dec,%function; + +_gcry_cast5_arm_cbc_dec: + /* input: + * %r0: CTX + * %r1: dst (2 blocks) + * %r2: src (2 blocks) + * %r3: iv (64bit) + */ + push {%r1-%r11, %ip, %lr}; + + read_block2(%r2, RL0, RR0, RL1, RR1, RT0); + + /* dec_blk2 is only used by cbc_dec, jump directly in/out instead + * of function call. */ + b _gcry_cast5_arm_dec_blk2; +.Ldec_cbc_tail: + /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + + /* %r0: dst, %r1: %src, %r2: iv */ + pop {%r0-%r2}; + + /* load IV+1 (src[0]) to %r7:%r8. Might be unaligned. */ + read_block_host(%r1, 0, %r7, %r8, %r5); + /* load IV (iv[0]) to %r5:%r6. 'iv' is aligned. */ + ldm %r2, {%r5, %r6}; + + /* out[1] ^= IV+1 */ + eor %r10, %r7; + eor %r9, %r8; + /* out[0] ^= IV */ + eor %r4, %r5; + eor %r3, %r6; + + /* load IV+2 (src[1]) to %r7:%r8. Might be unaligned. */ + read_block_host(%r1, 8, %r7, %r8, %r5); + /* store IV+2 to iv[0] (aligned). */ + stm %r2, {%r7, %r8}; + + /* store result to dst[0-3]. Might be unaligned. */ + write_block2_host(%r0, %r4, %r3, %r10, %r9, %r5, %r6); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_cast5_arm_cbc_dec,.-_gcry_cast5_arm_cbc_dec; + +#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/ +#endif /*__ARM_ARCH >= 6*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/cast5.c b/libotr/libgcrypt-1.8.7/cipher/cast5.c new file mode 100644 index 0000000..94dcee7 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cast5.c @@ -0,0 +1,1045 @@ +/* cast5.c - CAST5 cipher (RFC2144) + * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +/* Test vectors: + * + * 128-bit key = 01 23 45 67 12 34 56 78 23 45 67 89 34 56 78 9A + * plaintext = 01 23 45 67 89 AB CD EF + * ciphertext = 23 8B 4F E5 84 7E 44 B2 + * + * 80-bit key = 01 23 45 67 12 34 56 78 23 45 + * = 01 23 45 67 12 34 56 78 23 45 00 00 00 00 00 00 + * plaintext = 01 23 45 67 89 AB CD EF + * ciphertext = EB 6A 71 1A 2C 02 27 1B + * + * 40-bit key = 01 23 45 67 12 + * = 01 23 45 67 12 00 00 00 00 00 00 00 00 00 00 00 + * plaintext = 01 23 45 67 89 AB CD EF + * ciphertext = 7A C8 16 D1 6E 9B 30 2E + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "g10lib.h" +#include "types.h" +#include "cipher.h" +#include "bithelp.h" +#include "bufhelp.h" +#include "cipher-selftest.h" + +/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ +#undef USE_AMD64_ASM +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AMD64_ASM 1 +#endif + +/* USE_ARM_ASM indicates whether to use ARM assembly code. */ +#undef USE_ARM_ASM +#if defined(__ARMEL__) +# ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS +# define USE_ARM_ASM 1 +# endif +#endif + +#define CAST5_BLOCKSIZE 8 + +typedef struct { + u32 Km[16]; + byte Kr[16]; +#ifdef USE_ARM_ASM + u32 Kr_arm_enc[16 / sizeof(u32)]; + u32 Kr_arm_dec[16 / sizeof(u32)]; +#endif +} CAST5_context; + +static gcry_err_code_t cast_setkey (void *c, const byte *key, unsigned keylen); +static unsigned int encrypt_block (void *c, byte *outbuf, const byte *inbuf); +static unsigned int decrypt_block (void *c, byte *outbuf, const byte *inbuf); + + + +#define s1 _gcry_cast5_s1to4[0] +#define s2 _gcry_cast5_s1to4[1] +#define s3 _gcry_cast5_s1to4[2] +#define s4 _gcry_cast5_s1to4[3] + +const u32 _gcry_cast5_s1to4[4][256] = { { +0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f, 0x9c004dd3, 0x6003e540, 0xcf9fc949, +0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, 0x6e63a0e0, 0x15c361d2, 0xc2e7661d, 0x22d4ff8e, +0x28683b6f, 0xc07fd059, 0xff2379c8, 0x775f50e2, 0x43c340d3, 0xdf2f8656, 0x887ca41a, 0xa2d2bd2d, +0xa1c9e0d6, 0x346c4819, 0x61b76d87, 0x22540f2f, 0x2abe32e1, 0xaa54166b, 0x22568e3a, 0xa2d341d0, +0x66db40c8, 0xa784392f, 0x004dff2f, 0x2db9d2de, 0x97943fac, 0x4a97c1d8, 0x527644b7, 0xb5f437a7, +0xb82cbaef, 0xd751d159, 0x6ff7f0ed, 0x5a097a1f, 0x827b68d0, 0x90ecf52e, 0x22b0c054, 0xbc8e5935, +0x4b6d2f7f, 0x50bb64a2, 0xd2664910, 0xbee5812d, 0xb7332290, 0xe93b159f, 0xb48ee411, 0x4bff345d, +0xfd45c240, 0xad31973f, 0xc4f6d02e, 0x55fc8165, 0xd5b1caad, 0xa1ac2dae, 0xa2d4b76d, 0xc19b0c50, +0x882240f2, 0x0c6e4f38, 0xa4e4bfd7, 0x4f5ba272, 0x564c1d2f, 0xc59c5319, 0xb949e354, 0xb04669fe, +0xb1b6ab8a, 0xc71358dd, 0x6385c545, 0x110f935d, 0x57538ad5, 0x6a390493, 0xe63d37e0, 0x2a54f6b3, +0x3a787d5f, 0x6276a0b5, 0x19a6fcdf, 0x7a42206a, 0x29f9d4d5, 0xf61b1891, 0xbb72275e, 0xaa508167, +0x38901091, 0xc6b505eb, 0x84c7cb8c, 0x2ad75a0f, 0x874a1427, 0xa2d1936b, 0x2ad286af, 0xaa56d291, +0xd7894360, 0x425c750d, 0x93b39e26, 0x187184c9, 0x6c00b32d, 0x73e2bb14, 0xa0bebc3c, 0x54623779, +0x64459eab, 0x3f328b82, 0x7718cf82, 0x59a2cea6, 0x04ee002e, 0x89fe78e6, 0x3fab0950, 0x325ff6c2, +0x81383f05, 0x6963c5c8, 0x76cb5ad6, 0xd49974c9, 0xca180dcf, 0x380782d5, 0xc7fa5cf6, 0x8ac31511, +0x35e79e13, 0x47da91d0, 0xf40f9086, 0xa7e2419e, 0x31366241, 0x051ef495, 0xaa573b04, 0x4a805d8d, +0x548300d0, 0x00322a3c, 0xbf64cddf, 0xba57a68e, 0x75c6372b, 0x50afd341, 0xa7c13275, 0x915a0bf5, +0x6b54bfab, 0x2b0b1426, 0xab4cc9d7, 0x449ccd82, 0xf7fbf265, 0xab85c5f3, 0x1b55db94, 0xaad4e324, +0xcfa4bd3f, 0x2deaa3e2, 0x9e204d02, 0xc8bd25ac, 0xeadf55b3, 0xd5bd9e98, 0xe31231b2, 0x2ad5ad6c, +0x954329de, 0xadbe4528, 0xd8710f69, 0xaa51c90f, 0xaa786bf6, 0x22513f1e, 0xaa51a79b, 0x2ad344cc, +0x7b5a41f0, 0xd37cfbad, 0x1b069505, 0x41ece491, 0xb4c332e6, 0x032268d4, 0xc9600acc, 0xce387e6d, +0xbf6bb16c, 0x6a70fb78, 0x0d03d9c9, 0xd4df39de, 0xe01063da, 0x4736f464, 0x5ad328d8, 0xb347cc96, +0x75bb0fc3, 0x98511bfb, 0x4ffbcc35, 0xb58bcf6a, 0xe11f0abc, 0xbfc5fe4a, 0xa70aec10, 0xac39570a, +0x3f04442f, 0x6188b153, 0xe0397a2e, 0x5727cb79, 0x9ceb418f, 0x1cacd68d, 0x2ad37c96, 0x0175cb9d, +0xc69dff09, 0xc75b65f0, 0xd9db40d8, 0xec0e7779, 0x4744ead4, 0xb11c3274, 0xdd24cb9e, 0x7e1c54bd, +0xf01144f9, 0xd2240eb1, 0x9675b3fd, 0xa3ac3755, 0xd47c27af, 0x51c85f4d, 0x56907596, 0xa5bb15e6, +0x580304f0, 0xca042cf1, 0x011a37ea, 0x8dbfaadb, 0x35ba3e4a, 0x3526ffa0, 0xc37b4d09, 0xbc306ed9, +0x98a52666, 0x5648f725, 0xff5e569d, 0x0ced63d0, 0x7c63b2cf, 0x700b45e1, 0xd5ea50f1, 0x85a92872, +0xaf1fbda7, 0xd4234870, 0xa7870bf3, 0x2d3b4d79, 0x42e04198, 0x0cd0ede7, 0x26470db8, 0xf881814c, +0x474d6ad7, 0x7c0c5e5c, 0xd1231959, 0x381b7298, 0xf5d2f4db, 0xab838653, 0x6e2f1e23, 0x83719c9e, +0xbd91e046, 0x9a56456e, 0xdc39200c, 0x20c8c571, 0x962bda1c, 0xe1e696ff, 0xb141ab08, 0x7cca89b9, +0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, 0x427b169c, 0x5ac9f049, 0xdd8f0f00, 0x5c8165bf +}, { +0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a, 0xeec5207a, 0x55889c94, 0x72fc0651, +0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, 0x99c430ef, 0x5f0c0794, 0x18dcdb7d, 0xa1d6eff3, +0xa0b52f7b, 0x59e83605, 0xee15b094, 0xe9ffd909, 0xdc440086, 0xef944459, 0xba83ccb3, 0xe0c3cdfb, +0xd1da4181, 0x3b092ab1, 0xf997f1c1, 0xa5e6cf7b, 0x01420ddb, 0xe4e7ef5b, 0x25a1ff41, 0xe180f806, +0x1fc41080, 0x179bee7a, 0xd37ac6a9, 0xfe5830a4, 0x98de8b7f, 0x77e83f4e, 0x79929269, 0x24fa9f7b, +0xe113c85b, 0xacc40083, 0xd7503525, 0xf7ea615f, 0x62143154, 0x0d554b63, 0x5d681121, 0xc866c359, +0x3d63cf73, 0xcee234c0, 0xd4d87e87, 0x5c672b21, 0x071f6181, 0x39f7627f, 0x361e3084, 0xe4eb573b, +0x602f64a4, 0xd63acd9c, 0x1bbc4635, 0x9e81032d, 0x2701f50c, 0x99847ab4, 0xa0e3df79, 0xba6cf38c, +0x10843094, 0x2537a95e, 0xf46f6ffe, 0xa1ff3b1f, 0x208cfb6a, 0x8f458c74, 0xd9e0a227, 0x4ec73a34, +0xfc884f69, 0x3e4de8df, 0xef0e0088, 0x3559648d, 0x8a45388c, 0x1d804366, 0x721d9bfd, 0xa58684bb, +0xe8256333, 0x844e8212, 0x128d8098, 0xfed33fb4, 0xce280ae1, 0x27e19ba5, 0xd5a6c252, 0xe49754bd, +0xc5d655dd, 0xeb667064, 0x77840b4d, 0xa1b6a801, 0x84db26a9, 0xe0b56714, 0x21f043b7, 0xe5d05860, +0x54f03084, 0x066ff472, 0xa31aa153, 0xdadc4755, 0xb5625dbf, 0x68561be6, 0x83ca6b94, 0x2d6ed23b, +0xeccf01db, 0xa6d3d0ba, 0xb6803d5c, 0xaf77a709, 0x33b4a34c, 0x397bc8d6, 0x5ee22b95, 0x5f0e5304, +0x81ed6f61, 0x20e74364, 0xb45e1378, 0xde18639b, 0x881ca122, 0xb96726d1, 0x8049a7e8, 0x22b7da7b, +0x5e552d25, 0x5272d237, 0x79d2951c, 0xc60d894c, 0x488cb402, 0x1ba4fe5b, 0xa4b09f6b, 0x1ca815cf, +0xa20c3005, 0x8871df63, 0xb9de2fcb, 0x0cc6c9e9, 0x0beeff53, 0xe3214517, 0xb4542835, 0x9f63293c, +0xee41e729, 0x6e1d2d7c, 0x50045286, 0x1e6685f3, 0xf33401c6, 0x30a22c95, 0x31a70850, 0x60930f13, +0x73f98417, 0xa1269859, 0xec645c44, 0x52c877a9, 0xcdff33a6, 0xa02b1741, 0x7cbad9a2, 0x2180036f, +0x50d99c08, 0xcb3f4861, 0xc26bd765, 0x64a3f6ab, 0x80342676, 0x25a75e7b, 0xe4e6d1fc, 0x20c710e6, +0xcdf0b680, 0x17844d3b, 0x31eef84d, 0x7e0824e4, 0x2ccb49eb, 0x846a3bae, 0x8ff77888, 0xee5d60f6, +0x7af75673, 0x2fdd5cdb, 0xa11631c1, 0x30f66f43, 0xb3faec54, 0x157fd7fa, 0xef8579cc, 0xd152de58, +0xdb2ffd5e, 0x8f32ce19, 0x306af97a, 0x02f03ef8, 0x99319ad5, 0xc242fa0f, 0xa7e3ebb0, 0xc68e4906, +0xb8da230c, 0x80823028, 0xdcdef3c8, 0xd35fb171, 0x088a1bc8, 0xbec0c560, 0x61a3c9e8, 0xbca8f54d, +0xc72feffa, 0x22822e99, 0x82c570b4, 0xd8d94e89, 0x8b1c34bc, 0x301e16e6, 0x273be979, 0xb0ffeaa6, +0x61d9b8c6, 0x00b24869, 0xb7ffce3f, 0x08dc283b, 0x43daf65a, 0xf7e19798, 0x7619b72f, 0x8f1c9ba4, +0xdc8637a0, 0x16a7d3b1, 0x9fc393b7, 0xa7136eeb, 0xc6bcc63e, 0x1a513742, 0xef6828bc, 0x520365d6, +0x2d6a77ab, 0x3527ed4b, 0x821fd216, 0x095c6e2e, 0xdb92f2fb, 0x5eea29cb, 0x145892f5, 0x91584f7f, +0x5483697b, 0x2667a8cc, 0x85196048, 0x8c4bacea, 0x833860d4, 0x0d23e0f9, 0x6c387e8a, 0x0ae6d249, +0xb284600c, 0xd835731d, 0xdcb1c647, 0xac4c56ea, 0x3ebd81b3, 0x230eabb0, 0x6438bc87, 0xf0b5b1fa, +0x8f5ea2b3, 0xfc184642, 0x0a036b7a, 0x4fb089bd, 0x649da589, 0xa345415e, 0x5c038323, 0x3e5d3bb9, +0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, 0x7160a539, 0x73bfbe70, 0x83877605, 0x4523ecf1 +}, { +0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff, 0x369fe44b, 0x8c1fc644, 0xaececa90, +0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806, 0xf0ad0548, 0xe13c8d83, 0x927010d5, +0x11107d9f, 0x07647db9, 0xb2e3e4d4, 0x3d4f285e, 0xb9afa820, 0xfade82e0, 0xa067268b, 0x8272792e, +0x553fb2c0, 0x489ae22b, 0xd4ef9794, 0x125e3fbc, 0x21fffcee, 0x825b1bfd, 0x9255c5ed, 0x1257a240, +0x4e1a8302, 0xbae07fff, 0x528246e7, 0x8e57140e, 0x3373f7bf, 0x8c9f8188, 0xa6fc4ee8, 0xc982b5a5, +0xa8c01db7, 0x579fc264, 0x67094f31, 0xf2bd3f5f, 0x40fff7c1, 0x1fb78dfc, 0x8e6bd2c1, 0x437be59b, +0x99b03dbf, 0xb5dbc64b, 0x638dc0e6, 0x55819d99, 0xa197c81c, 0x4a012d6e, 0xc5884a28, 0xccc36f71, +0xb843c213, 0x6c0743f1, 0x8309893c, 0x0feddd5f, 0x2f7fe850, 0xd7c07f7e, 0x02507fbf, 0x5afb9a04, +0xa747d2d0, 0x1651192e, 0xaf70bf3e, 0x58c31380, 0x5f98302e, 0x727cc3c4, 0x0a0fb402, 0x0f7fef82, +0x8c96fdad, 0x5d2c2aae, 0x8ee99a49, 0x50da88b8, 0x8427f4a0, 0x1eac5790, 0x796fb449, 0x8252dc15, +0xefbd7d9b, 0xa672597d, 0xada840d8, 0x45f54504, 0xfa5d7403, 0xe83ec305, 0x4f91751a, 0x925669c2, +0x23efe941, 0xa903f12e, 0x60270df2, 0x0276e4b6, 0x94fd6574, 0x927985b2, 0x8276dbcb, 0x02778176, +0xf8af918d, 0x4e48f79e, 0x8f616ddf, 0xe29d840e, 0x842f7d83, 0x340ce5c8, 0x96bbb682, 0x93b4b148, +0xef303cab, 0x984faf28, 0x779faf9b, 0x92dc560d, 0x224d1e20, 0x8437aa88, 0x7d29dc96, 0x2756d3dc, +0x8b907cee, 0xb51fd240, 0xe7c07ce3, 0xe566b4a1, 0xc3e9615e, 0x3cf8209d, 0x6094d1e3, 0xcd9ca341, +0x5c76460e, 0x00ea983b, 0xd4d67881, 0xfd47572c, 0xf76cedd9, 0xbda8229c, 0x127dadaa, 0x438a074e, +0x1f97c090, 0x081bdb8a, 0x93a07ebe, 0xb938ca15, 0x97b03cff, 0x3dc2c0f8, 0x8d1ab2ec, 0x64380e51, +0x68cc7bfb, 0xd90f2788, 0x12490181, 0x5de5ffd4, 0xdd7ef86a, 0x76a2e214, 0xb9a40368, 0x925d958f, +0x4b39fffa, 0xba39aee9, 0xa4ffd30b, 0xfaf7933b, 0x6d498623, 0x193cbcfa, 0x27627545, 0x825cf47a, +0x61bd8ba0, 0xd11e42d1, 0xcead04f4, 0x127ea392, 0x10428db7, 0x8272a972, 0x9270c4a8, 0x127de50b, +0x285ba1c8, 0x3c62f44f, 0x35c0eaa5, 0xe805d231, 0x428929fb, 0xb4fcdf82, 0x4fb66a53, 0x0e7dc15b, +0x1f081fab, 0x108618ae, 0xfcfd086d, 0xf9ff2889, 0x694bcc11, 0x236a5cae, 0x12deca4d, 0x2c3f8cc5, +0xd2d02dfe, 0xf8ef5896, 0xe4cf52da, 0x95155b67, 0x494a488c, 0xb9b6a80c, 0x5c8f82bc, 0x89d36b45, +0x3a609437, 0xec00c9a9, 0x44715253, 0x0a874b49, 0xd773bc40, 0x7c34671c, 0x02717ef6, 0x4feb5536, +0xa2d02fff, 0xd2bf60c4, 0xd43f03c0, 0x50b4ef6d, 0x07478cd1, 0x006e1888, 0xa2e53f55, 0xb9e6d4bc, +0xa2048016, 0x97573833, 0xd7207d67, 0xde0f8f3d, 0x72f87b33, 0xabcc4f33, 0x7688c55d, 0x7b00a6b0, +0x947b0001, 0x570075d2, 0xf9bb88f8, 0x8942019e, 0x4264a5ff, 0x856302e0, 0x72dbd92b, 0xee971b69, +0x6ea22fde, 0x5f08ae2b, 0xaf7a616d, 0xe5c98767, 0xcf1febd2, 0x61efc8c2, 0xf1ac2571, 0xcc8239c2, +0x67214cb8, 0xb1e583d1, 0xb7dc3e62, 0x7f10bdce, 0xf90a5c38, 0x0ff0443d, 0x606e6dc6, 0x60543a49, +0x5727c148, 0x2be98a1d, 0x8ab41738, 0x20e1be24, 0xaf96da0f, 0x68458425, 0x99833be5, 0x600d457d, +0x282f9350, 0x8334b362, 0xd91d1120, 0x2b6d8da0, 0x642b1e31, 0x9c305a00, 0x52bce688, 0x1b03588a, +0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, 0xdfef4636, 0xa133c501, 0xe9d3531c, 0xee353783 +}, { +0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb, 0x64ad8c57, 0x85510443, 0xfa020ed1, +0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, 0xfd059d43, 0x6497b7b1, 0xf3641f63, 0x241e4adf, +0x28147f5f, 0x4fa2b8cd, 0xc9430040, 0x0cc32220, 0xfdd30b30, 0xc0a5374f, 0x1d2d00d9, 0x24147b15, +0xee4d111a, 0x0fca5167, 0x71ff904c, 0x2d195ffe, 0x1a05645f, 0x0c13fefe, 0x081b08ca, 0x05170121, +0x80530100, 0xe83e5efe, 0xac9af4f8, 0x7fe72701, 0xd2b8ee5f, 0x06df4261, 0xbb9e9b8a, 0x7293ea25, +0xce84ffdf, 0xf5718801, 0x3dd64b04, 0xa26f263b, 0x7ed48400, 0x547eebe6, 0x446d4ca0, 0x6cf3d6f5, +0x2649abdf, 0xaea0c7f5, 0x36338cc1, 0x503f7e93, 0xd3772061, 0x11b638e1, 0x72500e03, 0xf80eb2bb, +0xabe0502e, 0xec8d77de, 0x57971e81, 0xe14f6746, 0xc9335400, 0x6920318f, 0x081dbb99, 0xffc304a5, +0x4d351805, 0x7f3d5ce3, 0xa6c866c6, 0x5d5bcca9, 0xdaec6fea, 0x9f926f91, 0x9f46222f, 0x3991467d, +0xa5bf6d8e, 0x1143c44f, 0x43958302, 0xd0214eeb, 0x022083b8, 0x3fb6180c, 0x18f8931e, 0x281658e6, +0x26486e3e, 0x8bd78a70, 0x7477e4c1, 0xb506e07c, 0xf32d0a25, 0x79098b02, 0xe4eabb81, 0x28123b23, +0x69dead38, 0x1574ca16, 0xdf871b62, 0x211c40b7, 0xa51a9ef9, 0x0014377b, 0x041e8ac8, 0x09114003, +0xbd59e4d2, 0xe3d156d5, 0x4fe876d5, 0x2f91a340, 0x557be8de, 0x00eae4a7, 0x0ce5c2ec, 0x4db4bba6, +0xe756bdff, 0xdd3369ac, 0xec17b035, 0x06572327, 0x99afc8b0, 0x56c8c391, 0x6b65811c, 0x5e146119, +0x6e85cb75, 0xbe07c002, 0xc2325577, 0x893ff4ec, 0x5bbfc92d, 0xd0ec3b25, 0xb7801ab7, 0x8d6d3b24, +0x20c763ef, 0xc366a5fc, 0x9c382880, 0x0ace3205, 0xaac9548a, 0xeca1d7c7, 0x041afa32, 0x1d16625a, +0x6701902c, 0x9b757a54, 0x31d477f7, 0x9126b031, 0x36cc6fdb, 0xc70b8b46, 0xd9e66a48, 0x56e55a79, +0x026a4ceb, 0x52437eff, 0x2f8f76b4, 0x0df980a5, 0x8674cde3, 0xedda04eb, 0x17a9be04, 0x2c18f4df, +0xb7747f9d, 0xab2af7b4, 0xefc34d20, 0x2e096b7c, 0x1741a254, 0xe5b6a035, 0x213d42f6, 0x2c1c7c26, +0x61c2f50f, 0x6552daf9, 0xd2c231f8, 0x25130f69, 0xd8167fa2, 0x0418f2c8, 0x001a96a6, 0x0d1526ab, +0x63315c21, 0x5e0a72ec, 0x49bafefd, 0x187908d9, 0x8d0dbd86, 0x311170a7, 0x3e9b640c, 0xcc3e10d7, +0xd5cad3b6, 0x0caec388, 0xf73001e1, 0x6c728aff, 0x71eae2a1, 0x1f9af36e, 0xcfcbd12f, 0xc1de8417, +0xac07be6b, 0xcb44a1d8, 0x8b9b0f56, 0x013988c3, 0xb1c52fca, 0xb4be31cd, 0xd8782806, 0x12a3a4e2, +0x6f7de532, 0x58fd7eb6, 0xd01ee900, 0x24adffc2, 0xf4990fc5, 0x9711aac5, 0x001d7b95, 0x82e5e7d2, +0x109873f6, 0x00613096, 0xc32d9521, 0xada121ff, 0x29908415, 0x7fbb977f, 0xaf9eb3db, 0x29c9ed2a, +0x5ce2a465, 0xa730f32c, 0xd0aa3fe8, 0x8a5cc091, 0xd49e2ce7, 0x0ce454a9, 0xd60acd86, 0x015f1919, +0x77079103, 0xdea03af6, 0x78a8565e, 0xdee356df, 0x21f05cbe, 0x8b75e387, 0xb3c50651, 0xb8a5c3ef, +0xd8eeb6d2, 0xe523be77, 0xc2154529, 0x2f69efdf, 0xafe67afb, 0xf470c4b2, 0xf3e0eb5b, 0xd6cc9876, +0x39e4460c, 0x1fda8538, 0x1987832f, 0xca007367, 0xa99144f8, 0x296b299e, 0x492fc295, 0x9266beab, +0xb5676e69, 0x9bd3ddda, 0xdf7e052f, 0xdb25701c, 0x1b5e51ee, 0xf65324e6, 0x6afce36c, 0x0316cc04, +0x8644213e, 0xb7dc59d0, 0x7965291f, 0xccd6fd43, 0x41823979, 0x932bcdf6, 0xb657c34d, 0x4edfd282, +0x7ae5290c, 0x3cb9536b, 0x851e20fe, 0x9833557e, 0x13ecf0b0, 0xd3ffb372, 0x3f85c5c1, 0x0aef7ed2 +} }; +static const u32 s5[256] = { +0x7ec90c04, 0x2c6e74b9, 0x9b0e66df, 0xa6337911, 0xb86a7fff, 0x1dd358f5, 0x44dd9d44, 0x1731167f, +0x08fbf1fa, 0xe7f511cc, 0xd2051b00, 0x735aba00, 0x2ab722d8, 0x386381cb, 0xacf6243a, 0x69befd7a, +0xe6a2e77f, 0xf0c720cd, 0xc4494816, 0xccf5c180, 0x38851640, 0x15b0a848, 0xe68b18cb, 0x4caadeff, +0x5f480a01, 0x0412b2aa, 0x259814fc, 0x41d0efe2, 0x4e40b48d, 0x248eb6fb, 0x8dba1cfe, 0x41a99b02, +0x1a550a04, 0xba8f65cb, 0x7251f4e7, 0x95a51725, 0xc106ecd7, 0x97a5980a, 0xc539b9aa, 0x4d79fe6a, +0xf2f3f763, 0x68af8040, 0xed0c9e56, 0x11b4958b, 0xe1eb5a88, 0x8709e6b0, 0xd7e07156, 0x4e29fea7, +0x6366e52d, 0x02d1c000, 0xc4ac8e05, 0x9377f571, 0x0c05372a, 0x578535f2, 0x2261be02, 0xd642a0c9, +0xdf13a280, 0x74b55bd2, 0x682199c0, 0xd421e5ec, 0x53fb3ce8, 0xc8adedb3, 0x28a87fc9, 0x3d959981, +0x5c1ff900, 0xfe38d399, 0x0c4eff0b, 0x062407ea, 0xaa2f4fb1, 0x4fb96976, 0x90c79505, 0xb0a8a774, +0xef55a1ff, 0xe59ca2c2, 0xa6b62d27, 0xe66a4263, 0xdf65001f, 0x0ec50966, 0xdfdd55bc, 0x29de0655, +0x911e739a, 0x17af8975, 0x32c7911c, 0x89f89468, 0x0d01e980, 0x524755f4, 0x03b63cc9, 0x0cc844b2, +0xbcf3f0aa, 0x87ac36e9, 0xe53a7426, 0x01b3d82b, 0x1a9e7449, 0x64ee2d7e, 0xcddbb1da, 0x01c94910, +0xb868bf80, 0x0d26f3fd, 0x9342ede7, 0x04a5c284, 0x636737b6, 0x50f5b616, 0xf24766e3, 0x8eca36c1, +0x136e05db, 0xfef18391, 0xfb887a37, 0xd6e7f7d4, 0xc7fb7dc9, 0x3063fcdf, 0xb6f589de, 0xec2941da, +0x26e46695, 0xb7566419, 0xf654efc5, 0xd08d58b7, 0x48925401, 0xc1bacb7f, 0xe5ff550f, 0xb6083049, +0x5bb5d0e8, 0x87d72e5a, 0xab6a6ee1, 0x223a66ce, 0xc62bf3cd, 0x9e0885f9, 0x68cb3e47, 0x086c010f, +0xa21de820, 0xd18b69de, 0xf3f65777, 0xfa02c3f6, 0x407edac3, 0xcbb3d550, 0x1793084d, 0xb0d70eba, +0x0ab378d5, 0xd951fb0c, 0xded7da56, 0x4124bbe4, 0x94ca0b56, 0x0f5755d1, 0xe0e1e56e, 0x6184b5be, +0x580a249f, 0x94f74bc0, 0xe327888e, 0x9f7b5561, 0xc3dc0280, 0x05687715, 0x646c6bd7, 0x44904db3, +0x66b4f0a3, 0xc0f1648a, 0x697ed5af, 0x49e92ff6, 0x309e374f, 0x2cb6356a, 0x85808573, 0x4991f840, +0x76f0ae02, 0x083be84d, 0x28421c9a, 0x44489406, 0x736e4cb8, 0xc1092910, 0x8bc95fc6, 0x7d869cf4, +0x134f616f, 0x2e77118d, 0xb31b2be1, 0xaa90b472, 0x3ca5d717, 0x7d161bba, 0x9cad9010, 0xaf462ba2, +0x9fe459d2, 0x45d34559, 0xd9f2da13, 0xdbc65487, 0xf3e4f94e, 0x176d486f, 0x097c13ea, 0x631da5c7, +0x445f7382, 0x175683f4, 0xcdc66a97, 0x70be0288, 0xb3cdcf72, 0x6e5dd2f3, 0x20936079, 0x459b80a5, +0xbe60e2db, 0xa9c23101, 0xeba5315c, 0x224e42f2, 0x1c5c1572, 0xf6721b2c, 0x1ad2fff3, 0x8c25404e, +0x324ed72f, 0x4067b7fd, 0x0523138e, 0x5ca3bc78, 0xdc0fd66e, 0x75922283, 0x784d6b17, 0x58ebb16e, +0x44094f85, 0x3f481d87, 0xfcfeae7b, 0x77b5ff76, 0x8c2302bf, 0xaaf47556, 0x5f46b02a, 0x2b092801, +0x3d38f5f7, 0x0ca81f36, 0x52af4a8a, 0x66d5e7c0, 0xdf3b0874, 0x95055110, 0x1b5ad7a8, 0xf61ed5ad, +0x6cf6e479, 0x20758184, 0xd0cefa65, 0x88f7be58, 0x4a046826, 0x0ff6f8f3, 0xa09c7f70, 0x5346aba0, +0x5ce96c28, 0xe176eda3, 0x6bac307f, 0x376829d2, 0x85360fa9, 0x17e3fe2a, 0x24b79767, 0xf5a96b20, +0xd6cd2595, 0x68ff1ebf, 0x7555442c, 0xf19f06be, 0xf9e0659a, 0xeeb9491d, 0x34010718, 0xbb30cab8, +0xe822fe15, 0x88570983, 0x750e6249, 0xda627e55, 0x5e76ffa8, 0xb1534546, 0x6d47de08, 0xefe9e7d4 +}; +static const u32 s6[256] = { +0xf6fa8f9d, 0x2cac6ce1, 0x4ca34867, 0xe2337f7c, 0x95db08e7, 0x016843b4, 0xeced5cbc, 0x325553ac, +0xbf9f0960, 0xdfa1e2ed, 0x83f0579d, 0x63ed86b9, 0x1ab6a6b8, 0xde5ebe39, 0xf38ff732, 0x8989b138, +0x33f14961, 0xc01937bd, 0xf506c6da, 0xe4625e7e, 0xa308ea99, 0x4e23e33c, 0x79cbd7cc, 0x48a14367, +0xa3149619, 0xfec94bd5, 0xa114174a, 0xeaa01866, 0xa084db2d, 0x09a8486f, 0xa888614a, 0x2900af98, +0x01665991, 0xe1992863, 0xc8f30c60, 0x2e78ef3c, 0xd0d51932, 0xcf0fec14, 0xf7ca07d2, 0xd0a82072, +0xfd41197e, 0x9305a6b0, 0xe86be3da, 0x74bed3cd, 0x372da53c, 0x4c7f4448, 0xdab5d440, 0x6dba0ec3, +0x083919a7, 0x9fbaeed9, 0x49dbcfb0, 0x4e670c53, 0x5c3d9c01, 0x64bdb941, 0x2c0e636a, 0xba7dd9cd, +0xea6f7388, 0xe70bc762, 0x35f29adb, 0x5c4cdd8d, 0xf0d48d8c, 0xb88153e2, 0x08a19866, 0x1ae2eac8, +0x284caf89, 0xaa928223, 0x9334be53, 0x3b3a21bf, 0x16434be3, 0x9aea3906, 0xefe8c36e, 0xf890cdd9, +0x80226dae, 0xc340a4a3, 0xdf7e9c09, 0xa694a807, 0x5b7c5ecc, 0x221db3a6, 0x9a69a02f, 0x68818a54, +0xceb2296f, 0x53c0843a, 0xfe893655, 0x25bfe68a, 0xb4628abc, 0xcf222ebf, 0x25ac6f48, 0xa9a99387, +0x53bddb65, 0xe76ffbe7, 0xe967fd78, 0x0ba93563, 0x8e342bc1, 0xe8a11be9, 0x4980740d, 0xc8087dfc, +0x8de4bf99, 0xa11101a0, 0x7fd37975, 0xda5a26c0, 0xe81f994f, 0x9528cd89, 0xfd339fed, 0xb87834bf, +0x5f04456d, 0x22258698, 0xc9c4c83b, 0x2dc156be, 0x4f628daa, 0x57f55ec5, 0xe2220abe, 0xd2916ebf, +0x4ec75b95, 0x24f2c3c0, 0x42d15d99, 0xcd0d7fa0, 0x7b6e27ff, 0xa8dc8af0, 0x7345c106, 0xf41e232f, +0x35162386, 0xe6ea8926, 0x3333b094, 0x157ec6f2, 0x372b74af, 0x692573e4, 0xe9a9d848, 0xf3160289, +0x3a62ef1d, 0xa787e238, 0xf3a5f676, 0x74364853, 0x20951063, 0x4576698d, 0xb6fad407, 0x592af950, +0x36f73523, 0x4cfb6e87, 0x7da4cec0, 0x6c152daa, 0xcb0396a8, 0xc50dfe5d, 0xfcd707ab, 0x0921c42f, +0x89dff0bb, 0x5fe2be78, 0x448f4f33, 0x754613c9, 0x2b05d08d, 0x48b9d585, 0xdc049441, 0xc8098f9b, +0x7dede786, 0xc39a3373, 0x42410005, 0x6a091751, 0x0ef3c8a6, 0x890072d6, 0x28207682, 0xa9a9f7be, +0xbf32679d, 0xd45b5b75, 0xb353fd00, 0xcbb0e358, 0x830f220a, 0x1f8fb214, 0xd372cf08, 0xcc3c4a13, +0x8cf63166, 0x061c87be, 0x88c98f88, 0x6062e397, 0x47cf8e7a, 0xb6c85283, 0x3cc2acfb, 0x3fc06976, +0x4e8f0252, 0x64d8314d, 0xda3870e3, 0x1e665459, 0xc10908f0, 0x513021a5, 0x6c5b68b7, 0x822f8aa0, +0x3007cd3e, 0x74719eef, 0xdc872681, 0x073340d4, 0x7e432fd9, 0x0c5ec241, 0x8809286c, 0xf592d891, +0x08a930f6, 0x957ef305, 0xb7fbffbd, 0xc266e96f, 0x6fe4ac98, 0xb173ecc0, 0xbc60b42a, 0x953498da, +0xfba1ae12, 0x2d4bd736, 0x0f25faab, 0xa4f3fceb, 0xe2969123, 0x257f0c3d, 0x9348af49, 0x361400bc, +0xe8816f4a, 0x3814f200, 0xa3f94043, 0x9c7a54c2, 0xbc704f57, 0xda41e7f9, 0xc25ad33a, 0x54f4a084, +0xb17f5505, 0x59357cbe, 0xedbd15c8, 0x7f97c5ab, 0xba5ac7b5, 0xb6f6deaf, 0x3a479c3a, 0x5302da25, +0x653d7e6a, 0x54268d49, 0x51a477ea, 0x5017d55b, 0xd7d25d88, 0x44136c76, 0x0404a8c8, 0xb8e5a121, +0xb81a928a, 0x60ed5869, 0x97c55b96, 0xeaec991b, 0x29935913, 0x01fdb7f1, 0x088e8dfa, 0x9ab6f6f5, +0x3b4cbf9f, 0x4a5de3ab, 0xe6051d35, 0xa0e1d855, 0xd36b4cf1, 0xf544edeb, 0xb0e93524, 0xbebb8fbd, +0xa2d762cf, 0x49c92f54, 0x38b5f331, 0x7128a454, 0x48392905, 0xa65b1db8, 0x851c97bd, 0xd675cf2f +}; +static const u32 s7[256] = { +0x85e04019, 0x332bf567, 0x662dbfff, 0xcfc65693, 0x2a8d7f6f, 0xab9bc912, 0xde6008a1, 0x2028da1f, +0x0227bce7, 0x4d642916, 0x18fac300, 0x50f18b82, 0x2cb2cb11, 0xb232e75c, 0x4b3695f2, 0xb28707de, +0xa05fbcf6, 0xcd4181e9, 0xe150210c, 0xe24ef1bd, 0xb168c381, 0xfde4e789, 0x5c79b0d8, 0x1e8bfd43, +0x4d495001, 0x38be4341, 0x913cee1d, 0x92a79c3f, 0x089766be, 0xbaeeadf4, 0x1286becf, 0xb6eacb19, +0x2660c200, 0x7565bde4, 0x64241f7a, 0x8248dca9, 0xc3b3ad66, 0x28136086, 0x0bd8dfa8, 0x356d1cf2, +0x107789be, 0xb3b2e9ce, 0x0502aa8f, 0x0bc0351e, 0x166bf52a, 0xeb12ff82, 0xe3486911, 0xd34d7516, +0x4e7b3aff, 0x5f43671b, 0x9cf6e037, 0x4981ac83, 0x334266ce, 0x8c9341b7, 0xd0d854c0, 0xcb3a6c88, +0x47bc2829, 0x4725ba37, 0xa66ad22b, 0x7ad61f1e, 0x0c5cbafa, 0x4437f107, 0xb6e79962, 0x42d2d816, +0x0a961288, 0xe1a5c06e, 0x13749e67, 0x72fc081a, 0xb1d139f7, 0xf9583745, 0xcf19df58, 0xbec3f756, +0xc06eba30, 0x07211b24, 0x45c28829, 0xc95e317f, 0xbc8ec511, 0x38bc46e9, 0xc6e6fa14, 0xbae8584a, +0xad4ebc46, 0x468f508b, 0x7829435f, 0xf124183b, 0x821dba9f, 0xaff60ff4, 0xea2c4e6d, 0x16e39264, +0x92544a8b, 0x009b4fc3, 0xaba68ced, 0x9ac96f78, 0x06a5b79a, 0xb2856e6e, 0x1aec3ca9, 0xbe838688, +0x0e0804e9, 0x55f1be56, 0xe7e5363b, 0xb3a1f25d, 0xf7debb85, 0x61fe033c, 0x16746233, 0x3c034c28, +0xda6d0c74, 0x79aac56c, 0x3ce4e1ad, 0x51f0c802, 0x98f8f35a, 0x1626a49f, 0xeed82b29, 0x1d382fe3, +0x0c4fb99a, 0xbb325778, 0x3ec6d97b, 0x6e77a6a9, 0xcb658b5c, 0xd45230c7, 0x2bd1408b, 0x60c03eb7, +0xb9068d78, 0xa33754f4, 0xf430c87d, 0xc8a71302, 0xb96d8c32, 0xebd4e7be, 0xbe8b9d2d, 0x7979fb06, +0xe7225308, 0x8b75cf77, 0x11ef8da4, 0xe083c858, 0x8d6b786f, 0x5a6317a6, 0xfa5cf7a0, 0x5dda0033, +0xf28ebfb0, 0xf5b9c310, 0xa0eac280, 0x08b9767a, 0xa3d9d2b0, 0x79d34217, 0x021a718d, 0x9ac6336a, +0x2711fd60, 0x438050e3, 0x069908a8, 0x3d7fedc4, 0x826d2bef, 0x4eeb8476, 0x488dcf25, 0x36c9d566, +0x28e74e41, 0xc2610aca, 0x3d49a9cf, 0xbae3b9df, 0xb65f8de6, 0x92aeaf64, 0x3ac7d5e6, 0x9ea80509, +0xf22b017d, 0xa4173f70, 0xdd1e16c3, 0x15e0d7f9, 0x50b1b887, 0x2b9f4fd5, 0x625aba82, 0x6a017962, +0x2ec01b9c, 0x15488aa9, 0xd716e740, 0x40055a2c, 0x93d29a22, 0xe32dbf9a, 0x058745b9, 0x3453dc1e, +0xd699296e, 0x496cff6f, 0x1c9f4986, 0xdfe2ed07, 0xb87242d1, 0x19de7eae, 0x053e561a, 0x15ad6f8c, +0x66626c1c, 0x7154c24c, 0xea082b2a, 0x93eb2939, 0x17dcb0f0, 0x58d4f2ae, 0x9ea294fb, 0x52cf564c, +0x9883fe66, 0x2ec40581, 0x763953c3, 0x01d6692e, 0xd3a0c108, 0xa1e7160e, 0xe4f2dfa6, 0x693ed285, +0x74904698, 0x4c2b0edd, 0x4f757656, 0x5d393378, 0xa132234f, 0x3d321c5d, 0xc3f5e194, 0x4b269301, +0xc79f022f, 0x3c997e7e, 0x5e4f9504, 0x3ffafbbd, 0x76f7ad0e, 0x296693f4, 0x3d1fce6f, 0xc61e45be, +0xd3b5ab34, 0xf72bf9b7, 0x1b0434c0, 0x4e72b567, 0x5592a33d, 0xb5229301, 0xcfd2a87f, 0x60aeb767, +0x1814386b, 0x30bcc33d, 0x38a0c07d, 0xfd1606f2, 0xc363519b, 0x589dd390, 0x5479f8e6, 0x1cb8d647, +0x97fd61a9, 0xea7759f4, 0x2d57539d, 0x569a58cf, 0xe84e63ad, 0x462e1b78, 0x6580f87e, 0xf3817914, +0x91da55f4, 0x40a230f3, 0xd1988f35, 0xb6e318d2, 0x3ffa50bc, 0x3d40f021, 0xc3c0bdae, 0x4958c24c, +0x518f36b2, 0x84b1d370, 0x0fedce83, 0x878ddada, 0xf2a279c7, 0x94e01be8, 0x90716f4b, 0x954b8aa3 +}; +static const u32 s8[256] = { +0xe216300d, 0xbbddfffc, 0xa7ebdabd, 0x35648095, 0x7789f8b7, 0xe6c1121b, 0x0e241600, 0x052ce8b5, +0x11a9cfb0, 0xe5952f11, 0xece7990a, 0x9386d174, 0x2a42931c, 0x76e38111, 0xb12def3a, 0x37ddddfc, +0xde9adeb1, 0x0a0cc32c, 0xbe197029, 0x84a00940, 0xbb243a0f, 0xb4d137cf, 0xb44e79f0, 0x049eedfd, +0x0b15a15d, 0x480d3168, 0x8bbbde5a, 0x669ded42, 0xc7ece831, 0x3f8f95e7, 0x72df191b, 0x7580330d, +0x94074251, 0x5c7dcdfa, 0xabbe6d63, 0xaa402164, 0xb301d40a, 0x02e7d1ca, 0x53571dae, 0x7a3182a2, +0x12a8ddec, 0xfdaa335d, 0x176f43e8, 0x71fb46d4, 0x38129022, 0xce949ad4, 0xb84769ad, 0x965bd862, +0x82f3d055, 0x66fb9767, 0x15b80b4e, 0x1d5b47a0, 0x4cfde06f, 0xc28ec4b8, 0x57e8726e, 0x647a78fc, +0x99865d44, 0x608bd593, 0x6c200e03, 0x39dc5ff6, 0x5d0b00a3, 0xae63aff2, 0x7e8bd632, 0x70108c0c, +0xbbd35049, 0x2998df04, 0x980cf42a, 0x9b6df491, 0x9e7edd53, 0x06918548, 0x58cb7e07, 0x3b74ef2e, +0x522fffb1, 0xd24708cc, 0x1c7e27cd, 0xa4eb215b, 0x3cf1d2e2, 0x19b47a38, 0x424f7618, 0x35856039, +0x9d17dee7, 0x27eb35e6, 0xc9aff67b, 0x36baf5b8, 0x09c467cd, 0xc18910b1, 0xe11dbf7b, 0x06cd1af8, +0x7170c608, 0x2d5e3354, 0xd4de495a, 0x64c6d006, 0xbcc0c62c, 0x3dd00db3, 0x708f8f34, 0x77d51b42, +0x264f620f, 0x24b8d2bf, 0x15c1b79e, 0x46a52564, 0xf8d7e54e, 0x3e378160, 0x7895cda5, 0x859c15a5, +0xe6459788, 0xc37bc75f, 0xdb07ba0c, 0x0676a3ab, 0x7f229b1e, 0x31842e7b, 0x24259fd7, 0xf8bef472, +0x835ffcb8, 0x6df4c1f2, 0x96f5b195, 0xfd0af0fc, 0xb0fe134c, 0xe2506d3d, 0x4f9b12ea, 0xf215f225, +0xa223736f, 0x9fb4c428, 0x25d04979, 0x34c713f8, 0xc4618187, 0xea7a6e98, 0x7cd16efc, 0x1436876c, +0xf1544107, 0xbedeee14, 0x56e9af27, 0xa04aa441, 0x3cf7c899, 0x92ecbae6, 0xdd67016d, 0x151682eb, +0xa842eedf, 0xfdba60b4, 0xf1907b75, 0x20e3030f, 0x24d8c29e, 0xe139673b, 0xefa63fb8, 0x71873054, +0xb6f2cf3b, 0x9f326442, 0xcb15a4cc, 0xb01a4504, 0xf1e47d8d, 0x844a1be5, 0xbae7dfdc, 0x42cbda70, +0xcd7dae0a, 0x57e85b7a, 0xd53f5af6, 0x20cf4d8c, 0xcea4d428, 0x79d130a4, 0x3486ebfb, 0x33d3cddc, +0x77853b53, 0x37effcb5, 0xc5068778, 0xe580b3e6, 0x4e68b8f4, 0xc5c8b37e, 0x0d809ea2, 0x398feb7c, +0x132a4f94, 0x43b7950e, 0x2fee7d1c, 0x223613bd, 0xdd06caa2, 0x37df932b, 0xc4248289, 0xacf3ebc3, +0x5715f6b7, 0xef3478dd, 0xf267616f, 0xc148cbe4, 0x9052815e, 0x5e410fab, 0xb48a2465, 0x2eda7fa4, +0xe87b40e4, 0xe98ea084, 0x5889e9e1, 0xefd390fc, 0xdd07d35b, 0xdb485694, 0x38d7e5b2, 0x57720101, +0x730edebc, 0x5b643113, 0x94917e4f, 0x503c2fba, 0x646f1282, 0x7523d24a, 0xe0779695, 0xf9c17a8f, +0x7a5b2121, 0xd187b896, 0x29263a4d, 0xba510cdf, 0x81f47c9f, 0xad1163ed, 0xea7b5965, 0x1a00726e, +0x11403092, 0x00da6d77, 0x4a0cdd61, 0xad1f4603, 0x605bdfb0, 0x9eedc364, 0x22ebe6a8, 0xcee7d28a, +0xa0e736a0, 0x5564a6b9, 0x10853209, 0xc7eb8f37, 0x2de705ca, 0x8951570f, 0xdf09822b, 0xbd691a6c, +0xaa12e4f2, 0x87451c0f, 0xe0f6a27a, 0x3ada4819, 0x4cf1764f, 0x0d771c2b, 0x67cdb156, 0x350d8384, +0x5938fa0f, 0x42399ef3, 0x36997b07, 0x0e84093d, 0x4aa93e61, 0x8360d87b, 0x1fa98b0c, 0x1149382c, +0xe97625a5, 0x0614d1b7, 0x0e25244b, 0x0c768347, 0x589e8d82, 0x0d2059d1, 0xa466bb1e, 0xf8da0a82, +0x04f19130, 0xba6e4ec0, 0x99265164, 0x1ee7230d, 0x50b2ad80, 0xeaee6801, 0x8db2a283, 0xea8bf59e +}; + + +#ifdef USE_AMD64_ASM + +/* Assembly implementations of CAST5. */ +extern void _gcry_cast5_amd64_encrypt_block(CAST5_context *c, byte *outbuf, + const byte *inbuf); + +extern void _gcry_cast5_amd64_decrypt_block(CAST5_context *c, byte *outbuf, + const byte *inbuf); + +/* These assembly implementations process four blocks in parallel. */ +extern void _gcry_cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out, + const byte *in, byte *ctr); + +extern void _gcry_cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out, + const byte *in, byte *iv); + +extern void _gcry_cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out, + const byte *in, byte *iv); + +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +static inline void +call_sysv_fn (const void *fn, const void *arg1, const void *arg2, + const void *arg3, const void *arg4) +{ + /* Call SystemV ABI function without storing non-volatile XMM registers, + * as target function does not use vector instruction sets. */ + asm volatile ("callq *%0\n\t" + : "+a" (fn), + "+D" (arg1), + "+S" (arg2), + "+d" (arg3), + "+c" (arg4) + : + : "cc", "memory", "r8", "r9", "r10", "r11"); +} +#endif + +static void +do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_cast5_amd64_encrypt_block, context, outbuf, inbuf, NULL); +#else + _gcry_cast5_amd64_encrypt_block (context, outbuf, inbuf); +#endif +} + +static void +do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_cast5_amd64_decrypt_block, context, outbuf, inbuf, NULL); +#else + _gcry_cast5_amd64_decrypt_block (context, outbuf, inbuf); +#endif +} + +static void +cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out, const byte *in, byte *ctr) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_cast5_amd64_ctr_enc, ctx, out, in, ctr); +#else + _gcry_cast5_amd64_ctr_enc (ctx, out, in, ctr); +#endif +} + +static void +cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_cast5_amd64_cbc_dec, ctx, out, in, iv); +#else + _gcry_cast5_amd64_cbc_dec (ctx, out, in, iv); +#endif +} + +static void +cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_cast5_amd64_cfb_dec, ctx, out, in, iv); +#else + _gcry_cast5_amd64_cfb_dec (ctx, out, in, iv); +#endif +} + +static unsigned int +encrypt_block (void *context , byte *outbuf, const byte *inbuf) +{ + CAST5_context *c = (CAST5_context *) context; + do_encrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (2*8); +} + +static unsigned int +decrypt_block (void *context, byte *outbuf, const byte *inbuf) +{ + CAST5_context *c = (CAST5_context *) context; + do_decrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (2*8); +} + +#elif defined(USE_ARM_ASM) + +/* ARM assembly implementations of CAST5. */ +extern void _gcry_cast5_arm_encrypt_block(CAST5_context *c, byte *outbuf, + const byte *inbuf); + +extern void _gcry_cast5_arm_decrypt_block(CAST5_context *c, byte *outbuf, + const byte *inbuf); + +/* These assembly implementations process two blocks in parallel. */ +extern void _gcry_cast5_arm_ctr_enc(CAST5_context *ctx, byte *out, + const byte *in, byte *ctr); + +extern void _gcry_cast5_arm_cbc_dec(CAST5_context *ctx, byte *out, + const byte *in, byte *iv); + +extern void _gcry_cast5_arm_cfb_dec(CAST5_context *ctx, byte *out, + const byte *in, byte *iv); + +static void +do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf) +{ + _gcry_cast5_arm_encrypt_block (context, outbuf, inbuf); +} + +static void +do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf) +{ + _gcry_cast5_arm_decrypt_block (context, outbuf, inbuf); +} + +static unsigned int +encrypt_block (void *context , byte *outbuf, const byte *inbuf) +{ + CAST5_context *c = (CAST5_context *) context; + do_encrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (10*4); +} + +static unsigned int +decrypt_block (void *context, byte *outbuf, const byte *inbuf) +{ + CAST5_context *c = (CAST5_context *) context; + do_decrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (10*4); +} + +#else /*USE_ARM_ASM*/ + +#define F1(D,m,r) ( (I = ((m) + (D))), (I=rol(I,(r))), \ + (((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff]) ) +#define F2(D,m,r) ( (I = ((m) ^ (D))), (I=rol(I,(r))), \ + (((s1[I >> 24] - s2[(I>>16)&0xff]) + s3[(I>>8)&0xff]) ^ s4[I&0xff]) ) +#define F3(D,m,r) ( (I = ((m) - (D))), (I=rol(I,(r))), \ + (((s1[I >> 24] + s2[(I>>16)&0xff]) ^ s3[(I>>8)&0xff]) - s4[I&0xff]) ) + +static void +do_encrypt_block( CAST5_context *c, byte *outbuf, const byte *inbuf ) +{ + u32 l, r, t; + u32 I; /* used by the Fx macros */ + u32 *Km; + byte *Kr; + + Km = c->Km; + Kr = c->Kr; + + /* (L0,R0) <-- (m1...m64). (Split the plaintext into left and + * right 32-bit halves L0 = m1...m32 and R0 = m33...m64.) + */ + l = buf_get_be32(inbuf + 0); + r = buf_get_be32(inbuf + 4); + + /* (16 rounds) for i from 1 to 16, compute Li and Ri as follows: + * Li = Ri-1; + * Ri = Li-1 ^ f(Ri-1,Kmi,Kri), where f is defined in Section 2.2 + * Rounds 1, 4, 7, 10, 13, and 16 use f function Type 1. + * Rounds 2, 5, 8, 11, and 14 use f function Type 2. + * Rounds 3, 6, 9, 12, and 15 use f function Type 3. + */ + + t = l; l = r; r = t ^ F1(r, Km[ 0], Kr[ 0]); + t = l; l = r; r = t ^ F2(r, Km[ 1], Kr[ 1]); + t = l; l = r; r = t ^ F3(r, Km[ 2], Kr[ 2]); + t = l; l = r; r = t ^ F1(r, Km[ 3], Kr[ 3]); + t = l; l = r; r = t ^ F2(r, Km[ 4], Kr[ 4]); + t = l; l = r; r = t ^ F3(r, Km[ 5], Kr[ 5]); + t = l; l = r; r = t ^ F1(r, Km[ 6], Kr[ 6]); + t = l; l = r; r = t ^ F2(r, Km[ 7], Kr[ 7]); + t = l; l = r; r = t ^ F3(r, Km[ 8], Kr[ 8]); + t = l; l = r; r = t ^ F1(r, Km[ 9], Kr[ 9]); + t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); + t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); + t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); + t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); + t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); + t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]); + + /* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and + * concatenate to form the ciphertext.) */ + buf_put_be32(outbuf + 0, r); + buf_put_be32(outbuf + 4, l); +} + +static unsigned int +encrypt_block (void *context , byte *outbuf, const byte *inbuf) +{ + CAST5_context *c = (CAST5_context *) context; + do_encrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (20+4*sizeof(void*)); +} + + +static void +do_decrypt_block (CAST5_context *c, byte *outbuf, const byte *inbuf ) +{ + u32 l, r, t; + u32 I; + u32 *Km; + byte *Kr; + + Km = c->Km; + Kr = c->Kr; + + l = buf_get_be32(inbuf + 0); + r = buf_get_be32(inbuf + 4); + + t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]); + t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); + t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); + t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); + t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); + t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); + t = l; l = r; r = t ^ F1(r, Km[ 9], Kr[ 9]); + t = l; l = r; r = t ^ F3(r, Km[ 8], Kr[ 8]); + t = l; l = r; r = t ^ F2(r, Km[ 7], Kr[ 7]); + t = l; l = r; r = t ^ F1(r, Km[ 6], Kr[ 6]); + t = l; l = r; r = t ^ F3(r, Km[ 5], Kr[ 5]); + t = l; l = r; r = t ^ F2(r, Km[ 4], Kr[ 4]); + t = l; l = r; r = t ^ F1(r, Km[ 3], Kr[ 3]); + t = l; l = r; r = t ^ F3(r, Km[ 2], Kr[ 2]); + t = l; l = r; r = t ^ F2(r, Km[ 1], Kr[ 1]); + t = l; l = r; r = t ^ F1(r, Km[ 0], Kr[ 0]); + + buf_put_be32(outbuf + 0, r); + buf_put_be32(outbuf + 4, l); +} + +static unsigned int +decrypt_block (void *context, byte *outbuf, const byte *inbuf) +{ + CAST5_context *c = (CAST5_context *) context; + do_decrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (20+4*sizeof(void*)); +} + +#endif /*!USE_ARM_ASM*/ + + +/* Bulk encryption of complete blocks in CTR mode. This function is only + intended for the bulk encryption feature of cipher.c. CTR is expected to be + of size CAST5_BLOCKSIZE. */ +void +_gcry_cast5_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + CAST5_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char tmpbuf[CAST5_BLOCKSIZE]; + int burn_stack_depth = (20 + 4 * sizeof(void*)) + 2 * CAST5_BLOCKSIZE; + + int i; + +#ifdef USE_AMD64_ASM + { + if (nblocks >= 4) + burn_stack_depth += 8 * sizeof(void*); + + /* Process data in 4 block chunks. */ + while (nblocks >= 4) + { + cast5_amd64_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 4; + outbuf += 4 * CAST5_BLOCKSIZE; + inbuf += 4 * CAST5_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + /* TODO: use caching instead? */ + } +#elif defined(USE_ARM_ASM) + { + /* Process data in 2 block chunks. */ + while (nblocks >= 2) + { + _gcry_cast5_arm_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 2; + outbuf += 2 * CAST5_BLOCKSIZE; + inbuf += 2 * CAST5_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + /* TODO: use caching instead? */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the counter. */ + do_encrypt_block(ctx, tmpbuf, ctr); + /* XOR the input with the encrypted counter and store in output. */ + buf_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE); + outbuf += CAST5_BLOCKSIZE; + inbuf += CAST5_BLOCKSIZE; + /* Increment the counter. */ + for (i = CAST5_BLOCKSIZE; i > 0; i--) + { + ctr[i-1]++; + if (ctr[i-1]) + break; + } + } + + wipememory(tmpbuf, sizeof(tmpbuf)); + _gcry_burn_stack(burn_stack_depth); +} + + +/* Bulk decryption of complete blocks in CBC mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +void +_gcry_cast5_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + CAST5_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char savebuf[CAST5_BLOCKSIZE]; + int burn_stack_depth = (20 + 4 * sizeof(void*)) + 2 * CAST5_BLOCKSIZE; + +#ifdef USE_AMD64_ASM + { + if (nblocks >= 4) + burn_stack_depth += 8 * sizeof(void*); + + /* Process data in 4 block chunks. */ + while (nblocks >= 4) + { + cast5_amd64_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 4; + outbuf += 4 * CAST5_BLOCKSIZE; + inbuf += 4 * CAST5_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#elif defined(USE_ARM_ASM) + { + /* Process data in 2 block chunks. */ + while (nblocks >= 2) + { + _gcry_cast5_arm_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 2; + outbuf += 2 * CAST5_BLOCKSIZE; + inbuf += 2 * CAST5_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* INBUF is needed later and it may be identical to OUTBUF, so store + the intermediate result to SAVEBUF. */ + do_decrypt_block (ctx, savebuf, inbuf); + + buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAST5_BLOCKSIZE); + inbuf += CAST5_BLOCKSIZE; + outbuf += CAST5_BLOCKSIZE; + } + + wipememory(savebuf, sizeof(savebuf)); + _gcry_burn_stack(burn_stack_depth); +} + +/* Bulk decryption of complete blocks in CFB mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +void +_gcry_cast5_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + CAST5_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth = (20 + 4 * sizeof(void*)) + 2 * CAST5_BLOCKSIZE; + +#ifdef USE_AMD64_ASM + { + if (nblocks >= 4) + burn_stack_depth += 8 * sizeof(void*); + + /* Process data in 4 block chunks. */ + while (nblocks >= 4) + { + cast5_amd64_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 4; + outbuf += 4 * CAST5_BLOCKSIZE; + inbuf += 4 * CAST5_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#elif defined(USE_ARM_ASM) + { + /* Process data in 2 block chunks. */ + while (nblocks >= 2) + { + _gcry_cast5_arm_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 2; + outbuf += 2 * CAST5_BLOCKSIZE; + inbuf += 2 * CAST5_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + do_encrypt_block(ctx, iv, iv); + buf_xor_n_copy(outbuf, iv, inbuf, CAST5_BLOCKSIZE); + outbuf += CAST5_BLOCKSIZE; + inbuf += CAST5_BLOCKSIZE; + } + + _gcry_burn_stack(burn_stack_depth); +} + + +/* Run the self-tests for CAST5-CTR, tests IV increment of bulk CTR + encryption. Returns NULL on success. */ +static const char * +selftest_ctr (void) +{ + const int nblocks = 4+1; + const int blocksize = CAST5_BLOCKSIZE; + const int context_size = sizeof(CAST5_context); + + return _gcry_selftest_helper_ctr("CAST5", &cast_setkey, + &encrypt_block, &_gcry_cast5_ctr_enc, nblocks, blocksize, + context_size); +} + + +/* Run the self-tests for CAST5-CBC, tests bulk CBC decryption. + Returns NULL on success. */ +static const char * +selftest_cbc (void) +{ + const int nblocks = 4+2; + const int blocksize = CAST5_BLOCKSIZE; + const int context_size = sizeof(CAST5_context); + + return _gcry_selftest_helper_cbc("CAST5", &cast_setkey, + &encrypt_block, &_gcry_cast5_cbc_dec, nblocks, blocksize, + context_size); +} + + +/* Run the self-tests for CAST5-CFB, tests bulk CBC decryption. + Returns NULL on success. */ +static const char * +selftest_cfb (void) +{ + const int nblocks = 4+2; + const int blocksize = CAST5_BLOCKSIZE; + const int context_size = sizeof(CAST5_context); + + return _gcry_selftest_helper_cfb("CAST5", &cast_setkey, + &encrypt_block, &_gcry_cast5_cfb_dec, nblocks, blocksize, + context_size); +} + + +static const char* +selftest(void) +{ + CAST5_context c; + static const byte key[16] = + { 0x01, 0x23, 0x45, 0x67, 0x12, 0x34, 0x56, 0x78, + 0x23, 0x45, 0x67, 0x89, 0x34, 0x56, 0x78, 0x9A }; + static const byte plain[8] = + { 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF }; + static const byte cipher[8] = + { 0x23, 0x8B, 0x4F, 0xE5, 0x84, 0x7E, 0x44, 0xB2 }; + byte buffer[8]; + const char *r; + + cast_setkey( &c, key, 16 ); + encrypt_block( &c, buffer, plain ); + if( memcmp( buffer, cipher, 8 ) ) + return "1"; + decrypt_block( &c, buffer, buffer ); + if( memcmp( buffer, plain, 8 ) ) + return "2"; + +#if 0 /* full maintenance test */ + { + int i; + byte a0[16] = { 0x01,0x23,0x45,0x67,0x12,0x34,0x56,0x78, + 0x23,0x45,0x67,0x89,0x34,0x56,0x78,0x9A }; + byte b0[16] = { 0x01,0x23,0x45,0x67,0x12,0x34,0x56,0x78, + 0x23,0x45,0x67,0x89,0x34,0x56,0x78,0x9A }; + byte a1[16] = { 0xEE,0xA9,0xD0,0xA2,0x49,0xFD,0x3B,0xA6, + 0xB3,0x43,0x6F,0xB8,0x9D,0x6D,0xCA,0x92 }; + byte b1[16] = { 0xB2,0xC9,0x5E,0xB0,0x0C,0x31,0xAD,0x71, + 0x80,0xAC,0x05,0xB8,0xE8,0x3D,0x69,0x6E }; + + for(i=0; i < 1000000; i++ ) { + cast_setkey( &c, b0, 16 ); + encrypt_block( &c, a0, a0 ); + encrypt_block( &c, a0+8, a0+8 ); + cast_setkey( &c, a0, 16 ); + encrypt_block( &c, b0, b0 ); + encrypt_block( &c, b0+8, b0+8 ); + } + if( memcmp( a0, a1, 16 ) || memcmp( b0, b1, 16 ) ) + return "3"; + + } +#endif + + if ( (r = selftest_cbc ()) ) + return r; + + if ( (r = selftest_cfb ()) ) + return r; + + if ( (r = selftest_ctr ()) ) + return r; + + return NULL; +} + + +static void +key_schedule( u32 *x, u32 *z, u32 *k ) +{ + +#define xi(i) ((x[(i)/4] >> (8*(3-((i)%4)))) & 0xff) +#define zi(i) ((z[(i)/4] >> (8*(3-((i)%4)))) & 0xff) + + z[0] = x[0] ^ s5[xi(13)]^s6[xi(15)]^s7[xi(12)]^s8[xi(14)]^s7[xi( 8)]; + z[1] = x[2] ^ s5[zi( 0)]^s6[zi( 2)]^s7[zi( 1)]^s8[zi( 3)]^s8[xi(10)]; + z[2] = x[3] ^ s5[zi( 7)]^s6[zi( 6)]^s7[zi( 5)]^s8[zi( 4)]^s5[xi( 9)]; + z[3] = x[1] ^ s5[zi(10)]^s6[zi( 9)]^s7[zi(11)]^s8[zi( 8)]^s6[xi(11)]; + k[0] = s5[zi( 8)]^s6[zi( 9)]^s7[zi( 7)]^s8[zi( 6)]^s5[zi( 2)]; + k[1] = s5[zi(10)]^s6[zi(11)]^s7[zi( 5)]^s8[zi( 4)]^s6[zi( 6)]; + k[2] = s5[zi(12)]^s6[zi(13)]^s7[zi( 3)]^s8[zi( 2)]^s7[zi( 9)]; + k[3] = s5[zi(14)]^s6[zi(15)]^s7[zi( 1)]^s8[zi( 0)]^s8[zi(12)]; + + x[0] = z[2] ^ s5[zi( 5)]^s6[zi( 7)]^s7[zi( 4)]^s8[zi( 6)]^s7[zi( 0)]; + x[1] = z[0] ^ s5[xi( 0)]^s6[xi( 2)]^s7[xi( 1)]^s8[xi( 3)]^s8[zi( 2)]; + x[2] = z[1] ^ s5[xi( 7)]^s6[xi( 6)]^s7[xi( 5)]^s8[xi( 4)]^s5[zi( 1)]; + x[3] = z[3] ^ s5[xi(10)]^s6[xi( 9)]^s7[xi(11)]^s8[xi( 8)]^s6[zi( 3)]; + k[4] = s5[xi( 3)]^s6[xi( 2)]^s7[xi(12)]^s8[xi(13)]^s5[xi( 8)]; + k[5] = s5[xi( 1)]^s6[xi( 0)]^s7[xi(14)]^s8[xi(15)]^s6[xi(13)]; + k[6] = s5[xi( 7)]^s6[xi( 6)]^s7[xi( 8)]^s8[xi( 9)]^s7[xi( 3)]; + k[7] = s5[xi( 5)]^s6[xi( 4)]^s7[xi(10)]^s8[xi(11)]^s8[xi( 7)]; + + z[0] = x[0] ^ s5[xi(13)]^s6[xi(15)]^s7[xi(12)]^s8[xi(14)]^s7[xi( 8)]; + z[1] = x[2] ^ s5[zi( 0)]^s6[zi( 2)]^s7[zi( 1)]^s8[zi( 3)]^s8[xi(10)]; + z[2] = x[3] ^ s5[zi( 7)]^s6[zi( 6)]^s7[zi( 5)]^s8[zi( 4)]^s5[xi( 9)]; + z[3] = x[1] ^ s5[zi(10)]^s6[zi( 9)]^s7[zi(11)]^s8[zi( 8)]^s6[xi(11)]; + k[8] = s5[zi( 3)]^s6[zi( 2)]^s7[zi(12)]^s8[zi(13)]^s5[zi( 9)]; + k[9] = s5[zi( 1)]^s6[zi( 0)]^s7[zi(14)]^s8[zi(15)]^s6[zi(12)]; + k[10]= s5[zi( 7)]^s6[zi( 6)]^s7[zi( 8)]^s8[zi( 9)]^s7[zi( 2)]; + k[11]= s5[zi( 5)]^s6[zi( 4)]^s7[zi(10)]^s8[zi(11)]^s8[zi( 6)]; + + x[0] = z[2] ^ s5[zi( 5)]^s6[zi( 7)]^s7[zi( 4)]^s8[zi( 6)]^s7[zi( 0)]; + x[1] = z[0] ^ s5[xi( 0)]^s6[xi( 2)]^s7[xi( 1)]^s8[xi( 3)]^s8[zi( 2)]; + x[2] = z[1] ^ s5[xi( 7)]^s6[xi( 6)]^s7[xi( 5)]^s8[xi( 4)]^s5[zi( 1)]; + x[3] = z[3] ^ s5[xi(10)]^s6[xi( 9)]^s7[xi(11)]^s8[xi( 8)]^s6[zi( 3)]; + k[12]= s5[xi( 8)]^s6[xi( 9)]^s7[xi( 7)]^s8[xi( 6)]^s5[xi( 3)]; + k[13]= s5[xi(10)]^s6[xi(11)]^s7[xi( 5)]^s8[xi( 4)]^s6[xi( 7)]; + k[14]= s5[xi(12)]^s6[xi(13)]^s7[xi( 3)]^s8[xi( 2)]^s7[xi( 8)]; + k[15]= s5[xi(14)]^s6[xi(15)]^s7[xi( 1)]^s8[xi( 0)]^s8[xi(13)]; + +#undef xi +#undef zi +} + + +static gcry_err_code_t +do_cast_setkey( CAST5_context *c, const byte *key, unsigned keylen ) +{ + static int initialized; + static const char* selftest_failed; + int i; + u32 x[4]; + u32 z[4]; + u32 k[16]; + + if( !initialized ) + { + initialized = 1; + selftest_failed = selftest(); + if( selftest_failed ) + log_error ("CAST5 selftest failed (%s).\n", selftest_failed ); + } + if( selftest_failed ) + return GPG_ERR_SELFTEST_FAILED; + + if( keylen != 16 ) + return GPG_ERR_INV_KEYLEN; + + x[0] = buf_get_be32(key + 0); + x[1] = buf_get_be32(key + 4); + x[2] = buf_get_be32(key + 8); + x[3] = buf_get_be32(key + 12); + + key_schedule( x, z, k ); + for(i=0; i < 16; i++ ) + c->Km[i] = k[i]; + key_schedule( x, z, k ); + for(i=0; i < 16; i++ ) + c->Kr[i] = k[i] & 0x1f; + +#ifdef USE_ARM_ASM + for (i = 0; i < 4; i++) + { + byte Kr_arm[4]; + + /* Convert rotate left to rotate right and add shift left + * by 2. */ + Kr_arm[0] = ((32 - c->Kr[4 * i + 0]) - 2) & 0x1f; + Kr_arm[1] = ((32 - c->Kr[4 * i + 1]) - 2) & 0x1f; + Kr_arm[2] = ((32 - c->Kr[4 * i + 2]) - 2) & 0x1f; + Kr_arm[3] = ((32 - c->Kr[4 * i + 3]) - 2) & 0x1f; + + /* Endian friendly store. */ + c->Kr_arm_enc[i] = Kr_arm[0] | + (Kr_arm[1] << 8) | + (Kr_arm[2] << 16) | + (Kr_arm[3] << 24); + c->Kr_arm_dec[i] = Kr_arm[3] | + (Kr_arm[2] << 8) | + (Kr_arm[1] << 16) | + (Kr_arm[0] << 24); + + wipememory(Kr_arm, sizeof(Kr_arm)); + } +#endif + + wipememory(x, sizeof x); + wipememory(z, sizeof z); + wipememory(k, sizeof k); + +#undef xi +#undef zi + return GPG_ERR_NO_ERROR; +} + +static gcry_err_code_t +cast_setkey (void *context, const byte *key, unsigned keylen ) +{ + CAST5_context *c = (CAST5_context *) context; + gcry_err_code_t rc = do_cast_setkey (c, key, keylen); + return rc; +} + + +gcry_cipher_spec_t _gcry_cipher_spec_cast5 = + { + GCRY_CIPHER_CAST5, {0, 0}, + "CAST5", NULL, NULL, CAST5_BLOCKSIZE, 128, sizeof (CAST5_context), + cast_setkey, encrypt_block, decrypt_block + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/chacha20-armv7-neon.S b/libotr/libgcrypt-1.8.7/cipher/chacha20-armv7-neon.S new file mode 100644 index 0000000..c1971fc --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/chacha20-armv7-neon.S @@ -0,0 +1,750 @@ +/* chacha20-armv7-neon.S - ARM/NEON accelerated chacha20 blocks function + * + * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Based on public domain implementation by Andrew Moon at + * https://github.com/floodyberry/chacha-opt + */ + +#include <config.h> + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) && defined(USE_CHACHA20) + +.syntax unified +.fpu neon +.arm + +#define UNALIGNED_STMIA8(ptr, l0, l1, l2, l3, l4, l5, l6, l7) \ + tst ptr, #3; \ + beq 1f; \ + vpush {d0-d3}; \ + vmov s0, l0; \ + vmov s1, l1; \ + vmov s2, l2; \ + vmov s3, l3; \ + vmov s4, l4; \ + vmov s5, l5; \ + vmov s6, l6; \ + vmov s7, l7; \ + vst1.32 {d0-d3}, [ptr]; \ + add ptr, #32; \ + vpop {d0-d3}; \ + b 2f; \ + 1: stmia ptr!, {l0-l7}; \ + 2: ; + +#define UNALIGNED_LDMIA4(ptr, l0, l1, l2, l3) \ + tst ptr, #3; \ + beq 1f; \ + vpush {d0-d1}; \ + vld1.32 {d0-d1}, [ptr]; \ + add ptr, #16; \ + vmov l0, s0; \ + vmov l1, s1; \ + vmov l2, s2; \ + vmov l3, s3; \ + vpop {d0-d1}; \ + b 2f; \ + 1: ldmia ptr!, {l0-l3}; \ + 2: ; + +.text + +.globl _gcry_chacha20_armv7_neon_blocks +.type _gcry_chacha20_armv7_neon_blocks,%function; +_gcry_chacha20_armv7_neon_blocks: +.Lchacha_blocks_neon_local: + tst r3, r3 + beq .Lchacha_blocks_neon_nobytes + vstmdb sp!, {q4,q5,q6,q7} + stmfd sp!, {r4-r12, r14} + mov r8, sp + sub sp, sp, #196 + and sp, sp, #0xffffffe0 + str r0, [sp, #60] + str r1, [sp, #48] + str r2, [sp, #40] + str r3, [sp, #52] + str r8, [sp, #192] + add r1, sp, #64 + ldmia r0!, {r4-r11} + stmia r1!, {r4-r11} + ldmia r0!, {r4-r11} + stmia r1!, {r4-r11} + mov r4, #20 + str r4, [sp, #44] + cmp r3, #256 + blo .Lchacha_blocks_neon_mainloop2 +.Lchacha_blocks_neon_mainloop1: + ldr r0, [sp, #44] + str r0, [sp, #0] + add r1, sp, #(64) + mov r2, #1 + veor q12, q12 + vld1.32 {q0,q1}, [r1,:128]! + vld1.32 {q2,q3}, [r1,:128] + vmov.32 d24[0], r2 + vadd.u64 q3, q3, q12 + vmov q4, q0 + vmov q5, q1 + vmov q6, q2 + vadd.u64 q7, q3, q12 + vmov q8, q0 + vmov q9, q1 + vmov q10, q2 + vadd.u64 q11, q7, q12 + add r0, sp, #64 + ldm r0, {r0-r12} + ldr r14, [sp, #(64 +60)] + str r6, [sp, #8] + str r11, [sp, #12] + str r14, [sp, #28] + ldr r11, [sp, #(64 +52)] + ldr r14, [sp, #(64 +56)] +.Lchacha_blocks_neon_rounds1: + ldr r6, [sp, #0] + vadd.i32 q0, q0, q1 + add r0, r0, r4 + vadd.i32 q4, q4, q5 + add r1, r1, r5 + vadd.i32 q8, q8, q9 + eor r12, r12, r0 + veor q12, q3, q0 + eor r11, r11, r1 + veor q13, q7, q4 + ror r12, r12, #16 + veor q14, q11, q8 + ror r11, r11, #16 + vrev32.16 q3, q12 + subs r6, r6, #2 + vrev32.16 q7, q13 + add r8, r8, r12 + vrev32.16 q11, q14 + add r9, r9, r11 + vadd.i32 q2, q2, q3 + eor r4, r4, r8 + vadd.i32 q6, q6, q7 + eor r5, r5, r9 + vadd.i32 q10, q10, q11 + str r6, [sp, #0] + veor q12, q1, q2 + ror r4, r4, #20 + veor q13, q5, q6 + ror r5, r5, #20 + veor q14, q9, q10 + add r0, r0, r4 + vshl.i32 q1, q12, #12 + add r1, r1, r5 + vshl.i32 q5, q13, #12 + ldr r6, [sp, #8] + vshl.i32 q9, q14, #12 + eor r12, r12, r0 + vsri.u32 q1, q12, #20 + eor r11, r11, r1 + vsri.u32 q5, q13, #20 + ror r12, r12, #24 + vsri.u32 q9, q14, #20 + ror r11, r11, #24 + vadd.i32 q0, q0, q1 + add r8, r8, r12 + vadd.i32 q4, q4, q5 + add r9, r9, r11 + vadd.i32 q8, q8, q9 + eor r4, r4, r8 + veor q12, q3, q0 + eor r5, r5, r9 + veor q13, q7, q4 + str r11, [sp, #20] + veor q14, q11, q8 + ror r4, r4, #25 + vshl.i32 q3, q12, #8 + ror r5, r5, #25 + vshl.i32 q7, q13, #8 + str r4, [sp, #4] + vshl.i32 q11, q14, #8 + ldr r4, [sp, #28] + vsri.u32 q3, q12, #24 + add r2, r2, r6 + vsri.u32 q7, q13, #24 + add r3, r3, r7 + vsri.u32 q11, q14, #24 + ldr r11, [sp, #12] + vadd.i32 q2, q2, q3 + eor r14, r14, r2 + vadd.i32 q6, q6, q7 + eor r4, r4, r3 + vadd.i32 q10, q10, q11 + ror r14, r14, #16 + veor q12, q1, q2 + ror r4, r4, #16 + veor q13, q5, q6 + add r10, r10, r14 + veor q14, q9, q10 + add r11, r11, r4 + vshl.i32 q1, q12, #7 + eor r6, r6, r10 + vshl.i32 q5, q13, #7 + eor r7, r7, r11 + vshl.i32 q9, q14, #7 + ror r6, r6, #20 + vsri.u32 q1, q12, #25 + ror r7, r7, #20 + vsri.u32 q5, q13, #25 + add r2, r2, r6 + vsri.u32 q9, q14, #25 + add r3, r3, r7 + vext.32 q3, q3, q3, #3 + eor r14, r14, r2 + vext.32 q7, q7, q7, #3 + eor r4, r4, r3 + vext.32 q11, q11, q11, #3 + ror r14, r14, #24 + vext.32 q1, q1, q1, #1 + ror r4, r4, #24 + vext.32 q5, q5, q5, #1 + add r10, r10, r14 + vext.32 q9, q9, q9, #1 + add r11, r11, r4 + vext.32 q2, q2, q2, #2 + eor r6, r6, r10 + vext.32 q6, q6, q6, #2 + eor r7, r7, r11 + vext.32 q10, q10, q10, #2 + ror r6, r6, #25 + vadd.i32 q0, q0, q1 + ror r7, r7, #25 + vadd.i32 q4, q4, q5 + add r0, r0, r5 + vadd.i32 q8, q8, q9 + add r1, r1, r6 + veor q12, q3, q0 + eor r4, r4, r0 + veor q13, q7, q4 + eor r12, r12, r1 + veor q14, q11, q8 + ror r4, r4, #16 + vrev32.16 q3, q12 + ror r12, r12, #16 + vrev32.16 q7, q13 + add r10, r10, r4 + vrev32.16 q11, q14 + add r11, r11, r12 + vadd.i32 q2, q2, q3 + eor r5, r5, r10 + vadd.i32 q6, q6, q7 + eor r6, r6, r11 + vadd.i32 q10, q10, q11 + ror r5, r5, #20 + veor q12, q1, q2 + ror r6, r6, #20 + veor q13, q5, q6 + add r0, r0, r5 + veor q14, q9, q10 + add r1, r1, r6 + vshl.i32 q1, q12, #12 + eor r4, r4, r0 + vshl.i32 q5, q13, #12 + eor r12, r12, r1 + vshl.i32 q9, q14, #12 + ror r4, r4, #24 + vsri.u32 q1, q12, #20 + ror r12, r12, #24 + vsri.u32 q5, q13, #20 + add r10, r10, r4 + vsri.u32 q9, q14, #20 + add r11, r11, r12 + vadd.i32 q0, q0, q1 + eor r5, r5, r10 + vadd.i32 q4, q4, q5 + eor r6, r6, r11 + vadd.i32 q8, q8, q9 + str r11, [sp, #12] + veor q12, q3, q0 + ror r5, r5, #25 + veor q13, q7, q4 + ror r6, r6, #25 + veor q14, q11, q8 + str r4, [sp, #28] + vshl.i32 q3, q12, #8 + ldr r4, [sp, #4] + vshl.i32 q7, q13, #8 + add r2, r2, r7 + vshl.i32 q11, q14, #8 + add r3, r3, r4 + vsri.u32 q3, q12, #24 + ldr r11, [sp, #20] + vsri.u32 q7, q13, #24 + eor r11, r11, r2 + vsri.u32 q11, q14, #24 + eor r14, r14, r3 + vadd.i32 q2, q2, q3 + ror r11, r11, #16 + vadd.i32 q6, q6, q7 + ror r14, r14, #16 + vadd.i32 q10, q10, q11 + add r8, r8, r11 + veor q12, q1, q2 + add r9, r9, r14 + veor q13, q5, q6 + eor r7, r7, r8 + veor q14, q9, q10 + eor r4, r4, r9 + vshl.i32 q1, q12, #7 + ror r7, r7, #20 + vshl.i32 q5, q13, #7 + ror r4, r4, #20 + vshl.i32 q9, q14, #7 + str r6, [sp, #8] + vsri.u32 q1, q12, #25 + add r2, r2, r7 + vsri.u32 q5, q13, #25 + add r3, r3, r4 + vsri.u32 q9, q14, #25 + eor r11, r11, r2 + vext.32 q3, q3, q3, #1 + eor r14, r14, r3 + vext.32 q7, q7, q7, #1 + ror r11, r11, #24 + vext.32 q11, q11, q11, #1 + ror r14, r14, #24 + vext.32 q1, q1, q1, #3 + add r8, r8, r11 + vext.32 q5, q5, q5, #3 + add r9, r9, r14 + vext.32 q9, q9, q9, #3 + eor r7, r7, r8 + vext.32 q2, q2, q2, #2 + eor r4, r4, r9 + vext.32 q6, q6, q6, #2 + ror r7, r7, #25 + vext.32 q10, q10, q10, #2 + ror r4, r4, #25 + bne .Lchacha_blocks_neon_rounds1 + str r8, [sp, #0] + str r9, [sp, #4] + str r10, [sp, #8] + str r12, [sp, #16] + str r11, [sp, #20] + str r14, [sp, #24] + add r9, sp, #64 + vld1.32 {q12,q13}, [r9,:128]! + ldr r12, [sp, #48] + vld1.32 {q14,q15}, [r9,:128] + ldr r14, [sp, #40] + vadd.i32 q0, q0, q12 + ldr r8, [sp, #(64 +0)] + vadd.i32 q4, q4, q12 + ldr r9, [sp, #(64 +4)] + vadd.i32 q8, q8, q12 + ldr r10, [sp, #(64 +8)] + vadd.i32 q1, q1, q13 + ldr r11, [sp, #(64 +12)] + vadd.i32 q5, q5, q13 + add r0, r0, r8 + vadd.i32 q9, q9, q13 + add r1, r1, r9 + vadd.i32 q2, q2, q14 + add r2, r2, r10 + vadd.i32 q6, q6, q14 + ldr r8, [sp, #(64 +16)] + vadd.i32 q10, q10, q14 + add r3, r3, r11 + veor q14, q14, q14 + ldr r9, [sp, #(64 +20)] + mov r11, #1 + add r4, r4, r8 + vmov.32 d28[0], r11 + ldr r10, [sp, #(64 +24)] + vadd.u64 q12, q14, q15 + add r5, r5, r9 + vadd.u64 q13, q14, q12 + ldr r11, [sp, #(64 +28)] + vadd.u64 q14, q14, q13 + add r6, r6, r10 + vadd.i32 q3, q3, q12 + tst r12, r12 + vadd.i32 q7, q7, q13 + add r7, r7, r11 + vadd.i32 q11, q11, q14 + beq .Lchacha_blocks_neon_nomessage11 + UNALIGNED_LDMIA4(r12, r8, r9, r10, r11) + tst r12, r12 + eor r0, r0, r8 + eor r1, r1, r9 + eor r2, r2, r10 + ldr r8, [r12, #0] + eor r3, r3, r11 + ldr r9, [r12, #4] + eor r4, r4, r8 + ldr r10, [r12, #8] + eor r5, r5, r9 + ldr r11, [r12, #12] + eor r6, r6, r10 + add r12, r12, #16 + eor r7, r7, r11 +.Lchacha_blocks_neon_nomessage11: + UNALIGNED_STMIA8(r14, r0, r1, r2, r3, r4, r5, r6, r7) + tst r12, r12 + ldm sp, {r0-r7} + ldr r8, [sp, #(64 +32)] + ldr r9, [sp, #(64 +36)] + ldr r10, [sp, #(64 +40)] + ldr r11, [sp, #(64 +44)] + add r0, r0, r8 + add r1, r1, r9 + add r2, r2, r10 + ldr r8, [sp, #(64 +48)] + add r3, r3, r11 + ldr r9, [sp, #(64 +52)] + add r4, r4, r8 + ldr r10, [sp, #(64 +56)] + add r5, r5, r9 + ldr r11, [sp, #(64 +60)] + add r6, r6, r10 + adds r8, r8, #4 + add r7, r7, r11 + adc r9, r9, #0 + str r8, [sp, #(64 +48)] + tst r12, r12 + str r9, [sp, #(64 +52)] + beq .Lchacha_blocks_neon_nomessage12 + UNALIGNED_LDMIA4(r12, r8, r9, r10, r11) + tst r12, r12 + eor r0, r0, r8 + eor r1, r1, r9 + eor r2, r2, r10 + ldr r8, [r12, #0] + eor r3, r3, r11 + ldr r9, [r12, #4] + eor r4, r4, r8 + ldr r10, [r12, #8] + eor r5, r5, r9 + ldr r11, [r12, #12] + eor r6, r6, r10 + add r12, r12, #16 + eor r7, r7, r11 +.Lchacha_blocks_neon_nomessage12: + UNALIGNED_STMIA8(r14, r0, r1, r2, r3, r4, r5, r6, r7) + tst r12, r12 + beq .Lchacha_blocks_neon_nomessage13 + vld1.32 {q12,q13}, [r12]! + vld1.32 {q14,q15}, [r12]! + veor q0, q0, q12 + veor q1, q1, q13 + veor q2, q2, q14 + veor q3, q3, q15 +.Lchacha_blocks_neon_nomessage13: + vst1.32 {q0,q1}, [r14]! + vst1.32 {q2,q3}, [r14]! + beq .Lchacha_blocks_neon_nomessage14 + vld1.32 {q12,q13}, [r12]! + vld1.32 {q14,q15}, [r12]! + veor q4, q4, q12 + veor q5, q5, q13 + veor q6, q6, q14 + veor q7, q7, q15 +.Lchacha_blocks_neon_nomessage14: + vst1.32 {q4,q5}, [r14]! + vst1.32 {q6,q7}, [r14]! + beq .Lchacha_blocks_neon_nomessage15 + vld1.32 {q12,q13}, [r12]! + vld1.32 {q14,q15}, [r12]! + veor q8, q8, q12 + veor q9, q9, q13 + veor q10, q10, q14 + veor q11, q11, q15 +.Lchacha_blocks_neon_nomessage15: + vst1.32 {q8,q9}, [r14]! + vst1.32 {q10,q11}, [r14]! + str r12, [sp, #48] + str r14, [sp, #40] + ldr r3, [sp, #52] + sub r3, r3, #256 + cmp r3, #256 + str r3, [sp, #52] + bhs .Lchacha_blocks_neon_mainloop1 + tst r3, r3 + beq .Lchacha_blocks_neon_done +.Lchacha_blocks_neon_mainloop2: + ldr r3, [sp, #52] + ldr r1, [sp, #48] + cmp r3, #64 + bhs .Lchacha_blocks_neon_noswap1 + add r4, sp, #128 + mov r5, r4 + tst r1, r1 + beq .Lchacha_blocks_neon_nocopy1 +.Lchacha_blocks_neon_copyinput1: + subs r3, r3, #1 + ldrb r0, [r1], #1 + strb r0, [r4], #1 + bne .Lchacha_blocks_neon_copyinput1 + str r5, [sp, #48] +.Lchacha_blocks_neon_nocopy1: + ldr r4, [sp, #40] + str r5, [sp, #40] + str r4, [sp, #56] +.Lchacha_blocks_neon_noswap1: + ldr r0, [sp, #44] + str r0, [sp, #0] + add r0, sp, #64 + ldm r0, {r0-r12} + ldr r14, [sp, #(64 +60)] + str r6, [sp, #8] + str r11, [sp, #12] + str r14, [sp, #28] + ldr r11, [sp, #(64 +52)] + ldr r14, [sp, #(64 +56)] +.Lchacha_blocks_neon_rounds2: + ldr r6, [sp, #0] + add r0, r0, r4 + add r1, r1, r5 + eor r12, r12, r0 + eor r11, r11, r1 + ror r12, r12, #16 + ror r11, r11, #16 + subs r6, r6, #2 + add r8, r8, r12 + add r9, r9, r11 + eor r4, r4, r8 + eor r5, r5, r9 + str r6, [sp, #0] + ror r4, r4, #20 + ror r5, r5, #20 + add r0, r0, r4 + add r1, r1, r5 + ldr r6, [sp, #8] + eor r12, r12, r0 + eor r11, r11, r1 + ror r12, r12, #24 + ror r11, r11, #24 + add r8, r8, r12 + add r9, r9, r11 + eor r4, r4, r8 + eor r5, r5, r9 + str r11, [sp, #20] + ror r4, r4, #25 + ror r5, r5, #25 + str r4, [sp, #4] + ldr r4, [sp, #28] + add r2, r2, r6 + add r3, r3, r7 + ldr r11, [sp, #12] + eor r14, r14, r2 + eor r4, r4, r3 + ror r14, r14, #16 + ror r4, r4, #16 + add r10, r10, r14 + add r11, r11, r4 + eor r6, r6, r10 + eor r7, r7, r11 + ror r6, r6, #20 + ror r7, r7, #20 + add r2, r2, r6 + add r3, r3, r7 + eor r14, r14, r2 + eor r4, r4, r3 + ror r14, r14, #24 + ror r4, r4, #24 + add r10, r10, r14 + add r11, r11, r4 + eor r6, r6, r10 + eor r7, r7, r11 + ror r6, r6, #25 + ror r7, r7, #25 + add r0, r0, r5 + add r1, r1, r6 + eor r4, r4, r0 + eor r12, r12, r1 + ror r4, r4, #16 + ror r12, r12, #16 + add r10, r10, r4 + add r11, r11, r12 + eor r5, r5, r10 + eor r6, r6, r11 + ror r5, r5, #20 + ror r6, r6, #20 + add r0, r0, r5 + add r1, r1, r6 + eor r4, r4, r0 + eor r12, r12, r1 + ror r4, r4, #24 + ror r12, r12, #24 + add r10, r10, r4 + add r11, r11, r12 + eor r5, r5, r10 + eor r6, r6, r11 + str r11, [sp, #12] + ror r5, r5, #25 + ror r6, r6, #25 + str r4, [sp, #28] + ldr r4, [sp, #4] + add r2, r2, r7 + add r3, r3, r4 + ldr r11, [sp, #20] + eor r11, r11, r2 + eor r14, r14, r3 + ror r11, r11, #16 + ror r14, r14, #16 + add r8, r8, r11 + add r9, r9, r14 + eor r7, r7, r8 + eor r4, r4, r9 + ror r7, r7, #20 + ror r4, r4, #20 + str r6, [sp, #8] + add r2, r2, r7 + add r3, r3, r4 + eor r11, r11, r2 + eor r14, r14, r3 + ror r11, r11, #24 + ror r14, r14, #24 + add r8, r8, r11 + add r9, r9, r14 + eor r7, r7, r8 + eor r4, r4, r9 + ror r7, r7, #25 + ror r4, r4, #25 + bne .Lchacha_blocks_neon_rounds2 + str r8, [sp, #0] + str r9, [sp, #4] + str r10, [sp, #8] + str r12, [sp, #16] + str r11, [sp, #20] + str r14, [sp, #24] + ldr r12, [sp, #48] + ldr r14, [sp, #40] + ldr r8, [sp, #(64 +0)] + ldr r9, [sp, #(64 +4)] + ldr r10, [sp, #(64 +8)] + ldr r11, [sp, #(64 +12)] + add r0, r0, r8 + add r1, r1, r9 + add r2, r2, r10 + ldr r8, [sp, #(64 +16)] + add r3, r3, r11 + ldr r9, [sp, #(64 +20)] + add r4, r4, r8 + ldr r10, [sp, #(64 +24)] + add r5, r5, r9 + ldr r11, [sp, #(64 +28)] + add r6, r6, r10 + tst r12, r12 + add r7, r7, r11 + beq .Lchacha_blocks_neon_nomessage21 + UNALIGNED_LDMIA4(r12, r8, r9, r10, r11) + tst r12, r12 + eor r0, r0, r8 + eor r1, r1, r9 + eor r2, r2, r10 + ldr r8, [r12, #0] + eor r3, r3, r11 + ldr r9, [r12, #4] + eor r4, r4, r8 + ldr r10, [r12, #8] + eor r5, r5, r9 + ldr r11, [r12, #12] + eor r6, r6, r10 + add r12, r12, #16 + eor r7, r7, r11 +.Lchacha_blocks_neon_nomessage21: + UNALIGNED_STMIA8(r14, r0, r1, r2, r3, r4, r5, r6, r7) + ldm sp, {r0-r7} + ldr r8, [sp, #(64 +32)] + ldr r9, [sp, #(64 +36)] + ldr r10, [sp, #(64 +40)] + ldr r11, [sp, #(64 +44)] + add r0, r0, r8 + add r1, r1, r9 + add r2, r2, r10 + ldr r8, [sp, #(64 +48)] + add r3, r3, r11 + ldr r9, [sp, #(64 +52)] + add r4, r4, r8 + ldr r10, [sp, #(64 +56)] + add r5, r5, r9 + ldr r11, [sp, #(64 +60)] + add r6, r6, r10 + adds r8, r8, #1 + add r7, r7, r11 + adc r9, r9, #0 + str r8, [sp, #(64 +48)] + tst r12, r12 + str r9, [sp, #(64 +52)] + beq .Lchacha_blocks_neon_nomessage22 + UNALIGNED_LDMIA4(r12, r8, r9, r10, r11) + tst r12, r12 + eor r0, r0, r8 + eor r1, r1, r9 + eor r2, r2, r10 + ldr r8, [r12, #0] + eor r3, r3, r11 + ldr r9, [r12, #4] + eor r4, r4, r8 + ldr r10, [r12, #8] + eor r5, r5, r9 + ldr r11, [r12, #12] + eor r6, r6, r10 + add r12, r12, #16 + eor r7, r7, r11 +.Lchacha_blocks_neon_nomessage22: + UNALIGNED_STMIA8(r14, r0, r1, r2, r3, r4, r5, r6, r7) + str r12, [sp, #48] + str r14, [sp, #40] + ldr r3, [sp, #52] + cmp r3, #64 + sub r4, r3, #64 + str r4, [sp, #52] + bhi .Lchacha_blocks_neon_mainloop2 + cmp r3, #64 + beq .Lchacha_blocks_neon_nocopy2 + ldr r1, [sp, #56] + sub r14, r14, #64 +.Lchacha_blocks_neon_copyinput2: + subs r3, r3, #1 + ldrb r0, [r14], #1 + strb r0, [r1], #1 + bne .Lchacha_blocks_neon_copyinput2 +.Lchacha_blocks_neon_nocopy2: +.Lchacha_blocks_neon_done: + ldr r7, [sp, #60] + ldr r8, [sp, #(64 +48)] + ldr r9, [sp, #(64 +52)] + str r8, [r7, #(48 + 0)] + str r9, [r7, #(48 + 4)] + mov r12, sp + stmia r12!, {r0-r7} + add r12, r12, #48 + stmia r12!, {r0-r7} + sub r0, sp, #8 + ldr sp, [sp, #192] + ldmfd sp!, {r4-r12, r14} + vldm sp!, {q4-q7} + sub r0, sp, r0 + bx lr +.Lchacha_blocks_neon_nobytes: + mov r0, #0; + bx lr +.ltorg +.size _gcry_chacha20_armv7_neon_blocks,.-_gcry_chacha20_armv7_neon_blocks; + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/chacha20-avx2-amd64.S b/libotr/libgcrypt-1.8.7/cipher/chacha20-avx2-amd64.S new file mode 100644 index 0000000..8c085ba --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/chacha20-avx2-amd64.S @@ -0,0 +1,956 @@ +/* chacha20-avx2-amd64.S - AMD64/AVX2 implementation of ChaCha20 + * + * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Based on public domain implementation by Andrew Moon at + * https://github.com/floodyberry/chacha-opt + */ + +#ifdef __x86_64__ +#include <config.h> + +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(ENABLE_AVX2_SUPPORT) && USE_CHACHA20 + +#ifdef __PIC__ +# define RIP (%rip) +#else +# define RIP +#endif + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.text + +.align 8 +.globl _gcry_chacha20_amd64_avx2_blocks +ELF(.type _gcry_chacha20_amd64_avx2_blocks,@function;) +_gcry_chacha20_amd64_avx2_blocks: +.Lchacha_blocks_avx2_local: + vzeroupper + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + movq %rsp, %rbp + andq $~63, %rsp + subq $512, %rsp + leaq .LC RIP, %rax + vmovdqu 0(%rax), %xmm6 + vmovdqu 16(%rax), %xmm7 + vmovdqu 0(%rdi), %xmm8 + vmovdqu 16(%rdi), %xmm9 + vmovdqu 32(%rdi), %xmm10 + vmovdqu 48(%rdi), %xmm11 + movl $20, %eax + movq $1, %r9 + vmovdqa %xmm8, 0(%rsp) + vmovdqa %xmm9, 16(%rsp) + vmovdqa %xmm10, 32(%rsp) + vmovdqa %xmm11, 48(%rsp) + movq %rax, 64(%rsp) + vmovdqa %xmm6, 448(%rsp) + vmovdqa %xmm6, 464(%rsp) + vmovdqa %xmm7, 480(%rsp) + vmovdqa %xmm7, 496(%rsp) + cmpq $512, %rcx + jae .Lchacha_blocks_avx2_atleast512 + cmp $256, %rcx + jae .Lchacha_blocks_avx2_atleast256 + jmp .Lchacha_blocks_avx2_below256 + .p2align 6,,63 +.Lchacha_blocks_avx2_atleast512: + movq 48(%rsp), %rax + leaq 1(%rax), %r8 + leaq 2(%rax), %r9 + leaq 3(%rax), %r10 + leaq 4(%rax), %rbx + leaq 5(%rax), %r11 + leaq 6(%rax), %r12 + leaq 7(%rax), %r13 + leaq 8(%rax), %r14 + movl %eax, 128(%rsp) + movl %r8d, 4+128(%rsp) + movl %r9d, 8+128(%rsp) + movl %r10d, 12+128(%rsp) + movl %ebx, 16+128(%rsp) + movl %r11d, 20+128(%rsp) + movl %r12d, 24+128(%rsp) + movl %r13d, 28+128(%rsp) + shrq $32, %rax + shrq $32, %r8 + shrq $32, %r9 + shrq $32, %r10 + shrq $32, %rbx + shrq $32, %r11 + shrq $32, %r12 + shrq $32, %r13 + movl %eax, 160(%rsp) + movl %r8d, 4+160(%rsp) + movl %r9d, 8+160(%rsp) + movl %r10d, 12+160(%rsp) + movl %ebx, 16+160(%rsp) + movl %r11d, 20+160(%rsp) + movl %r12d, 24+160(%rsp) + movl %r13d, 28+160(%rsp) + movq %r14, 48(%rsp) + movq 64(%rsp), %rax + vpbroadcastd 0(%rsp), %ymm0 + vpbroadcastd 4+0(%rsp), %ymm1 + vpbroadcastd 8+0(%rsp), %ymm2 + vpbroadcastd 12+0(%rsp), %ymm3 + vpbroadcastd 16(%rsp), %ymm4 + vpbroadcastd 4+16(%rsp), %ymm5 + vpbroadcastd 8+16(%rsp), %ymm6 + vpbroadcastd 12+16(%rsp), %ymm7 + vpbroadcastd 32(%rsp), %ymm8 + vpbroadcastd 4+32(%rsp), %ymm9 + vpbroadcastd 8+32(%rsp), %ymm10 + vpbroadcastd 12+32(%rsp), %ymm11 + vpbroadcastd 8+48(%rsp), %ymm14 + vpbroadcastd 12+48(%rsp), %ymm15 + vmovdqa 128(%rsp), %ymm12 + vmovdqa 160(%rsp), %ymm13 +.Lchacha_blocks_avx2_mainloop1: + vpaddd %ymm0, %ymm4, %ymm0 + vpaddd %ymm1, %ymm5, %ymm1 + vpxor %ymm12, %ymm0, %ymm12 + vpxor %ymm13, %ymm1, %ymm13 + vpaddd %ymm2, %ymm6, %ymm2 + vpaddd %ymm3, %ymm7, %ymm3 + vpxor %ymm14, %ymm2, %ymm14 + vpxor %ymm15, %ymm3, %ymm15 + vpshufb 448(%rsp), %ymm12, %ymm12 + vpshufb 448(%rsp), %ymm13, %ymm13 + vpaddd %ymm8, %ymm12, %ymm8 + vpaddd %ymm9, %ymm13, %ymm9 + vpshufb 448(%rsp), %ymm14, %ymm14 + vpshufb 448(%rsp), %ymm15, %ymm15 + vpaddd %ymm10, %ymm14, %ymm10 + vpaddd %ymm11, %ymm15, %ymm11 + vmovdqa %ymm12, 96(%rsp) + vpxor %ymm4, %ymm8, %ymm4 + vpxor %ymm5, %ymm9, %ymm5 + vpslld $ 12, %ymm4, %ymm12 + vpsrld $20, %ymm4, %ymm4 + vpxor %ymm4, %ymm12, %ymm4 + vpslld $ 12, %ymm5, %ymm12 + vpsrld $20, %ymm5, %ymm5 + vpxor %ymm5, %ymm12, %ymm5 + vpxor %ymm6, %ymm10, %ymm6 + vpxor %ymm7, %ymm11, %ymm7 + vpslld $ 12, %ymm6, %ymm12 + vpsrld $20, %ymm6, %ymm6 + vpxor %ymm6, %ymm12, %ymm6 + vpslld $ 12, %ymm7, %ymm12 + vpsrld $20, %ymm7, %ymm7 + vpxor %ymm7, %ymm12, %ymm7 + vpaddd %ymm0, %ymm4, %ymm0 + vpaddd %ymm1, %ymm5, %ymm1 + vpxor 96(%rsp), %ymm0, %ymm12 + vpxor %ymm13, %ymm1, %ymm13 + vpaddd %ymm2, %ymm6, %ymm2 + vpaddd %ymm3, %ymm7, %ymm3 + vpxor %ymm14, %ymm2, %ymm14 + vpxor %ymm15, %ymm3, %ymm15 + vpshufb 480(%rsp), %ymm12, %ymm12 + vpshufb 480(%rsp), %ymm13, %ymm13 + vpaddd %ymm8, %ymm12, %ymm8 + vpaddd %ymm9, %ymm13, %ymm9 + vpshufb 480(%rsp), %ymm14, %ymm14 + vpshufb 480(%rsp), %ymm15, %ymm15 + vpaddd %ymm10, %ymm14, %ymm10 + vpaddd %ymm11, %ymm15, %ymm11 + vmovdqa %ymm12, 96(%rsp) + vpxor %ymm4, %ymm8, %ymm4 + vpxor %ymm5, %ymm9, %ymm5 + vpslld $ 7, %ymm4, %ymm12 + vpsrld $25, %ymm4, %ymm4 + vpxor %ymm4, %ymm12, %ymm4 + vpslld $ 7, %ymm5, %ymm12 + vpsrld $25, %ymm5, %ymm5 + vpxor %ymm5, %ymm12, %ymm5 + vpxor %ymm6, %ymm10, %ymm6 + vpxor %ymm7, %ymm11, %ymm7 + vpslld $ 7, %ymm6, %ymm12 + vpsrld $25, %ymm6, %ymm6 + vpxor %ymm6, %ymm12, %ymm6 + vpslld $ 7, %ymm7, %ymm12 + vpsrld $25, %ymm7, %ymm7 + vpxor %ymm7, %ymm12, %ymm7 + vpaddd %ymm0, %ymm5, %ymm0 + vpaddd %ymm1, %ymm6, %ymm1 + vpxor %ymm15, %ymm0, %ymm15 + vpxor 96(%rsp), %ymm1, %ymm12 + vpaddd %ymm2, %ymm7, %ymm2 + vpaddd %ymm3, %ymm4, %ymm3 + vpxor %ymm13, %ymm2, %ymm13 + vpxor %ymm14, %ymm3, %ymm14 + vpshufb 448(%rsp), %ymm15, %ymm15 + vpshufb 448(%rsp), %ymm12, %ymm12 + vpaddd %ymm10, %ymm15, %ymm10 + vpaddd %ymm11, %ymm12, %ymm11 + vpshufb 448(%rsp), %ymm13, %ymm13 + vpshufb 448(%rsp), %ymm14, %ymm14 + vpaddd %ymm8, %ymm13, %ymm8 + vpaddd %ymm9, %ymm14, %ymm9 + vmovdqa %ymm15, 96(%rsp) + vpxor %ymm5, %ymm10, %ymm5 + vpxor %ymm6, %ymm11, %ymm6 + vpslld $ 12, %ymm5, %ymm15 + vpsrld $20, %ymm5, %ymm5 + vpxor %ymm5, %ymm15, %ymm5 + vpslld $ 12, %ymm6, %ymm15 + vpsrld $20, %ymm6, %ymm6 + vpxor %ymm6, %ymm15, %ymm6 + vpxor %ymm7, %ymm8, %ymm7 + vpxor %ymm4, %ymm9, %ymm4 + vpslld $ 12, %ymm7, %ymm15 + vpsrld $20, %ymm7, %ymm7 + vpxor %ymm7, %ymm15, %ymm7 + vpslld $ 12, %ymm4, %ymm15 + vpsrld $20, %ymm4, %ymm4 + vpxor %ymm4, %ymm15, %ymm4 + vpaddd %ymm0, %ymm5, %ymm0 + vpaddd %ymm1, %ymm6, %ymm1 + vpxor 96(%rsp), %ymm0, %ymm15 + vpxor %ymm12, %ymm1, %ymm12 + vpaddd %ymm2, %ymm7, %ymm2 + vpaddd %ymm3, %ymm4, %ymm3 + vpxor %ymm13, %ymm2, %ymm13 + vpxor %ymm14, %ymm3, %ymm14 + vpshufb 480(%rsp), %ymm15, %ymm15 + vpshufb 480(%rsp), %ymm12, %ymm12 + vpaddd %ymm10, %ymm15, %ymm10 + vpaddd %ymm11, %ymm12, %ymm11 + vpshufb 480(%rsp), %ymm13, %ymm13 + vpshufb 480(%rsp), %ymm14, %ymm14 + vpaddd %ymm8, %ymm13, %ymm8 + vpaddd %ymm9, %ymm14, %ymm9 + vmovdqa %ymm15, 96(%rsp) + vpxor %ymm5, %ymm10, %ymm5 + vpxor %ymm6, %ymm11, %ymm6 + vpslld $ 7, %ymm5, %ymm15 + vpsrld $25, %ymm5, %ymm5 + vpxor %ymm5, %ymm15, %ymm5 + vpslld $ 7, %ymm6, %ymm15 + vpsrld $25, %ymm6, %ymm6 + vpxor %ymm6, %ymm15, %ymm6 + vpxor %ymm7, %ymm8, %ymm7 + vpxor %ymm4, %ymm9, %ymm4 + vpslld $ 7, %ymm7, %ymm15 + vpsrld $25, %ymm7, %ymm7 + vpxor %ymm7, %ymm15, %ymm7 + vpslld $ 7, %ymm4, %ymm15 + vpsrld $25, %ymm4, %ymm4 + vpxor %ymm4, %ymm15, %ymm4 + vmovdqa 96(%rsp), %ymm15 + subq $2, %rax + jnz .Lchacha_blocks_avx2_mainloop1 + vmovdqa %ymm8, 192(%rsp) + vmovdqa %ymm9, 224(%rsp) + vmovdqa %ymm10, 256(%rsp) + vmovdqa %ymm11, 288(%rsp) + vmovdqa %ymm12, 320(%rsp) + vmovdqa %ymm13, 352(%rsp) + vmovdqa %ymm14, 384(%rsp) + vmovdqa %ymm15, 416(%rsp) + vpbroadcastd 0(%rsp), %ymm8 + vpbroadcastd 4+0(%rsp), %ymm9 + vpbroadcastd 8+0(%rsp), %ymm10 + vpbroadcastd 12+0(%rsp), %ymm11 + vpbroadcastd 16(%rsp), %ymm12 + vpbroadcastd 4+16(%rsp), %ymm13 + vpbroadcastd 8+16(%rsp), %ymm14 + vpbroadcastd 12+16(%rsp), %ymm15 + vpaddd %ymm8, %ymm0, %ymm0 + vpaddd %ymm9, %ymm1, %ymm1 + vpaddd %ymm10, %ymm2, %ymm2 + vpaddd %ymm11, %ymm3, %ymm3 + vpaddd %ymm12, %ymm4, %ymm4 + vpaddd %ymm13, %ymm5, %ymm5 + vpaddd %ymm14, %ymm6, %ymm6 + vpaddd %ymm15, %ymm7, %ymm7 + vpunpckldq %ymm1, %ymm0, %ymm8 + vpunpckldq %ymm3, %ymm2, %ymm9 + vpunpckhdq %ymm1, %ymm0, %ymm12 + vpunpckhdq %ymm3, %ymm2, %ymm13 + vpunpckldq %ymm5, %ymm4, %ymm10 + vpunpckldq %ymm7, %ymm6, %ymm11 + vpunpckhdq %ymm5, %ymm4, %ymm14 + vpunpckhdq %ymm7, %ymm6, %ymm15 + vpunpcklqdq %ymm9, %ymm8, %ymm0 + vpunpcklqdq %ymm11, %ymm10, %ymm1 + vpunpckhqdq %ymm9, %ymm8, %ymm2 + vpunpckhqdq %ymm11, %ymm10, %ymm3 + vpunpcklqdq %ymm13, %ymm12, %ymm4 + vpunpcklqdq %ymm15, %ymm14, %ymm5 + vpunpckhqdq %ymm13, %ymm12, %ymm6 + vpunpckhqdq %ymm15, %ymm14, %ymm7 + vperm2i128 $0x20, %ymm1, %ymm0, %ymm8 + vperm2i128 $0x20, %ymm3, %ymm2, %ymm9 + vperm2i128 $0x31, %ymm1, %ymm0, %ymm12 + vperm2i128 $0x31, %ymm3, %ymm2, %ymm13 + vperm2i128 $0x20, %ymm5, %ymm4, %ymm10 + vperm2i128 $0x20, %ymm7, %ymm6, %ymm11 + vperm2i128 $0x31, %ymm5, %ymm4, %ymm14 + vperm2i128 $0x31, %ymm7, %ymm6, %ymm15 + andq %rsi, %rsi + jz .Lchacha_blocks_avx2_noinput1 + vpxor 0(%rsi), %ymm8, %ymm8 + vpxor 64(%rsi), %ymm9, %ymm9 + vpxor 128(%rsi), %ymm10, %ymm10 + vpxor 192(%rsi), %ymm11, %ymm11 + vpxor 256(%rsi), %ymm12, %ymm12 + vpxor 320(%rsi), %ymm13, %ymm13 + vpxor 384(%rsi), %ymm14, %ymm14 + vpxor 448(%rsi), %ymm15, %ymm15 + vmovdqu %ymm8, 0(%rdx) + vmovdqu %ymm9, 64(%rdx) + vmovdqu %ymm10, 128(%rdx) + vmovdqu %ymm11, 192(%rdx) + vmovdqu %ymm12, 256(%rdx) + vmovdqu %ymm13, 320(%rdx) + vmovdqu %ymm14, 384(%rdx) + vmovdqu %ymm15, 448(%rdx) + vmovdqa 192(%rsp), %ymm0 + vmovdqa 224(%rsp), %ymm1 + vmovdqa 256(%rsp), %ymm2 + vmovdqa 288(%rsp), %ymm3 + vmovdqa 320(%rsp), %ymm4 + vmovdqa 352(%rsp), %ymm5 + vmovdqa 384(%rsp), %ymm6 + vmovdqa 416(%rsp), %ymm7 + vpbroadcastd 32(%rsp), %ymm8 + vpbroadcastd 4+32(%rsp), %ymm9 + vpbroadcastd 8+32(%rsp), %ymm10 + vpbroadcastd 12+32(%rsp), %ymm11 + vmovdqa 128(%rsp), %ymm12 + vmovdqa 160(%rsp), %ymm13 + vpbroadcastd 8+48(%rsp), %ymm14 + vpbroadcastd 12+48(%rsp), %ymm15 + vpaddd %ymm8, %ymm0, %ymm0 + vpaddd %ymm9, %ymm1, %ymm1 + vpaddd %ymm10, %ymm2, %ymm2 + vpaddd %ymm11, %ymm3, %ymm3 + vpaddd %ymm12, %ymm4, %ymm4 + vpaddd %ymm13, %ymm5, %ymm5 + vpaddd %ymm14, %ymm6, %ymm6 + vpaddd %ymm15, %ymm7, %ymm7 + vpunpckldq %ymm1, %ymm0, %ymm8 + vpunpckldq %ymm3, %ymm2, %ymm9 + vpunpckhdq %ymm1, %ymm0, %ymm12 + vpunpckhdq %ymm3, %ymm2, %ymm13 + vpunpckldq %ymm5, %ymm4, %ymm10 + vpunpckldq %ymm7, %ymm6, %ymm11 + vpunpckhdq %ymm5, %ymm4, %ymm14 + vpunpckhdq %ymm7, %ymm6, %ymm15 + vpunpcklqdq %ymm9, %ymm8, %ymm0 + vpunpcklqdq %ymm11, %ymm10, %ymm1 + vpunpckhqdq %ymm9, %ymm8, %ymm2 + vpunpckhqdq %ymm11, %ymm10, %ymm3 + vpunpcklqdq %ymm13, %ymm12, %ymm4 + vpunpcklqdq %ymm15, %ymm14, %ymm5 + vpunpckhqdq %ymm13, %ymm12, %ymm6 + vpunpckhqdq %ymm15, %ymm14, %ymm7 + vperm2i128 $0x20, %ymm1, %ymm0, %ymm8 + vperm2i128 $0x20, %ymm3, %ymm2, %ymm9 + vperm2i128 $0x31, %ymm1, %ymm0, %ymm12 + vperm2i128 $0x31, %ymm3, %ymm2, %ymm13 + vperm2i128 $0x20, %ymm5, %ymm4, %ymm10 + vperm2i128 $0x20, %ymm7, %ymm6, %ymm11 + vperm2i128 $0x31, %ymm5, %ymm4, %ymm14 + vperm2i128 $0x31, %ymm7, %ymm6, %ymm15 + vpxor 32(%rsi), %ymm8, %ymm8 + vpxor 96(%rsi), %ymm9, %ymm9 + vpxor 160(%rsi), %ymm10, %ymm10 + vpxor 224(%rsi), %ymm11, %ymm11 + vpxor 288(%rsi), %ymm12, %ymm12 + vpxor 352(%rsi), %ymm13, %ymm13 + vpxor 416(%rsi), %ymm14, %ymm14 + vpxor 480(%rsi), %ymm15, %ymm15 + vmovdqu %ymm8, 32(%rdx) + vmovdqu %ymm9, 96(%rdx) + vmovdqu %ymm10, 160(%rdx) + vmovdqu %ymm11, 224(%rdx) + vmovdqu %ymm12, 288(%rdx) + vmovdqu %ymm13, 352(%rdx) + vmovdqu %ymm14, 416(%rdx) + vmovdqu %ymm15, 480(%rdx) + addq $512, %rsi + jmp .Lchacha_blocks_avx2_mainloop1_cont +.Lchacha_blocks_avx2_noinput1: + vmovdqu %ymm8, 0(%rdx) + vmovdqu %ymm9, 64(%rdx) + vmovdqu %ymm10, 128(%rdx) + vmovdqu %ymm11, 192(%rdx) + vmovdqu %ymm12, 256(%rdx) + vmovdqu %ymm13, 320(%rdx) + vmovdqu %ymm14, 384(%rdx) + vmovdqu %ymm15, 448(%rdx) + vmovdqa 192(%rsp), %ymm0 + vmovdqa 224(%rsp), %ymm1 + vmovdqa 256(%rsp), %ymm2 + vmovdqa 288(%rsp), %ymm3 + vmovdqa 320(%rsp), %ymm4 + vmovdqa 352(%rsp), %ymm5 + vmovdqa 384(%rsp), %ymm6 + vmovdqa 416(%rsp), %ymm7 + vpbroadcastd 32(%rsp), %ymm8 + vpbroadcastd 4+32(%rsp), %ymm9 + vpbroadcastd 8+32(%rsp), %ymm10 + vpbroadcastd 12+32(%rsp), %ymm11 + vmovdqa 128(%rsp), %ymm12 + vmovdqa 160(%rsp), %ymm13 + vpbroadcastd 8+48(%rsp), %ymm14 + vpbroadcastd 12+48(%rsp), %ymm15 + vpaddd %ymm8, %ymm0, %ymm0 + vpaddd %ymm9, %ymm1, %ymm1 + vpaddd %ymm10, %ymm2, %ymm2 + vpaddd %ymm11, %ymm3, %ymm3 + vpaddd %ymm12, %ymm4, %ymm4 + vpaddd %ymm13, %ymm5, %ymm5 + vpaddd %ymm14, %ymm6, %ymm6 + vpaddd %ymm15, %ymm7, %ymm7 + vpunpckldq %ymm1, %ymm0, %ymm8 + vpunpckldq %ymm3, %ymm2, %ymm9 + vpunpckhdq %ymm1, %ymm0, %ymm12 + vpunpckhdq %ymm3, %ymm2, %ymm13 + vpunpckldq %ymm5, %ymm4, %ymm10 + vpunpckldq %ymm7, %ymm6, %ymm11 + vpunpckhdq %ymm5, %ymm4, %ymm14 + vpunpckhdq %ymm7, %ymm6, %ymm15 + vpunpcklqdq %ymm9, %ymm8, %ymm0 + vpunpcklqdq %ymm11, %ymm10, %ymm1 + vpunpckhqdq %ymm9, %ymm8, %ymm2 + vpunpckhqdq %ymm11, %ymm10, %ymm3 + vpunpcklqdq %ymm13, %ymm12, %ymm4 + vpunpcklqdq %ymm15, %ymm14, %ymm5 + vpunpckhqdq %ymm13, %ymm12, %ymm6 + vpunpckhqdq %ymm15, %ymm14, %ymm7 + vperm2i128 $0x20, %ymm1, %ymm0, %ymm8 + vperm2i128 $0x20, %ymm3, %ymm2, %ymm9 + vperm2i128 $0x31, %ymm1, %ymm0, %ymm12 + vperm2i128 $0x31, %ymm3, %ymm2, %ymm13 + vperm2i128 $0x20, %ymm5, %ymm4, %ymm10 + vperm2i128 $0x20, %ymm7, %ymm6, %ymm11 + vperm2i128 $0x31, %ymm5, %ymm4, %ymm14 + vperm2i128 $0x31, %ymm7, %ymm6, %ymm15 + vmovdqu %ymm8, 32(%rdx) + vmovdqu %ymm9, 96(%rdx) + vmovdqu %ymm10, 160(%rdx) + vmovdqu %ymm11, 224(%rdx) + vmovdqu %ymm12, 288(%rdx) + vmovdqu %ymm13, 352(%rdx) + vmovdqu %ymm14, 416(%rdx) + vmovdqu %ymm15, 480(%rdx) +.Lchacha_blocks_avx2_mainloop1_cont: + addq $512, %rdx + subq $512, %rcx + cmp $512, %rcx + jae .Lchacha_blocks_avx2_atleast512 + cmp $256, %rcx + jb .Lchacha_blocks_avx2_below256_fixup +.Lchacha_blocks_avx2_atleast256: + movq 48(%rsp), %rax + leaq 1(%rax), %r8 + leaq 2(%rax), %r9 + leaq 3(%rax), %r10 + leaq 4(%rax), %rbx + movl %eax, 128(%rsp) + movl %r8d, 4+128(%rsp) + movl %r9d, 8+128(%rsp) + movl %r10d, 12+128(%rsp) + shrq $32, %rax + shrq $32, %r8 + shrq $32, %r9 + shrq $32, %r10 + movl %eax, 160(%rsp) + movl %r8d, 4+160(%rsp) + movl %r9d, 8+160(%rsp) + movl %r10d, 12+160(%rsp) + movq %rbx, 48(%rsp) + movq 64(%rsp), %rax + vpbroadcastd 0(%rsp), %xmm0 + vpbroadcastd 4+0(%rsp), %xmm1 + vpbroadcastd 8+0(%rsp), %xmm2 + vpbroadcastd 12+0(%rsp), %xmm3 + vpbroadcastd 16(%rsp), %xmm4 + vpbroadcastd 4+16(%rsp), %xmm5 + vpbroadcastd 8+16(%rsp), %xmm6 + vpbroadcastd 12+16(%rsp), %xmm7 + vpbroadcastd 32(%rsp), %xmm8 + vpbroadcastd 4+32(%rsp), %xmm9 + vpbroadcastd 8+32(%rsp), %xmm10 + vpbroadcastd 12+32(%rsp), %xmm11 + vmovdqa 128(%rsp), %xmm12 + vmovdqa 160(%rsp), %xmm13 + vpbroadcastd 8+48(%rsp), %xmm14 + vpbroadcastd 12+48(%rsp), %xmm15 +.Lchacha_blocks_avx2_mainloop2: + vpaddd %xmm0, %xmm4, %xmm0 + vpaddd %xmm1, %xmm5, %xmm1 + vpxor %xmm12, %xmm0, %xmm12 + vpxor %xmm13, %xmm1, %xmm13 + vpaddd %xmm2, %xmm6, %xmm2 + vpaddd %xmm3, %xmm7, %xmm3 + vpxor %xmm14, %xmm2, %xmm14 + vpxor %xmm15, %xmm3, %xmm15 + vpshufb 448(%rsp), %xmm12, %xmm12 + vpshufb 448(%rsp), %xmm13, %xmm13 + vpaddd %xmm8, %xmm12, %xmm8 + vpaddd %xmm9, %xmm13, %xmm9 + vpshufb 448(%rsp), %xmm14, %xmm14 + vpshufb 448(%rsp), %xmm15, %xmm15 + vpaddd %xmm10, %xmm14, %xmm10 + vpaddd %xmm11, %xmm15, %xmm11 + vmovdqa %xmm12, 96(%rsp) + vpxor %xmm4, %xmm8, %xmm4 + vpxor %xmm5, %xmm9, %xmm5 + vpslld $ 12, %xmm4, %xmm12 + vpsrld $20, %xmm4, %xmm4 + vpxor %xmm4, %xmm12, %xmm4 + vpslld $ 12, %xmm5, %xmm12 + vpsrld $20, %xmm5, %xmm5 + vpxor %xmm5, %xmm12, %xmm5 + vpxor %xmm6, %xmm10, %xmm6 + vpxor %xmm7, %xmm11, %xmm7 + vpslld $ 12, %xmm6, %xmm12 + vpsrld $20, %xmm6, %xmm6 + vpxor %xmm6, %xmm12, %xmm6 + vpslld $ 12, %xmm7, %xmm12 + vpsrld $20, %xmm7, %xmm7 + vpxor %xmm7, %xmm12, %xmm7 + vpaddd %xmm0, %xmm4, %xmm0 + vpaddd %xmm1, %xmm5, %xmm1 + vpxor 96(%rsp), %xmm0, %xmm12 + vpxor %xmm13, %xmm1, %xmm13 + vpaddd %xmm2, %xmm6, %xmm2 + vpaddd %xmm3, %xmm7, %xmm3 + vpxor %xmm14, %xmm2, %xmm14 + vpxor %xmm15, %xmm3, %xmm15 + vpshufb 480(%rsp), %xmm12, %xmm12 + vpshufb 480(%rsp), %xmm13, %xmm13 + vpaddd %xmm8, %xmm12, %xmm8 + vpaddd %xmm9, %xmm13, %xmm9 + vpshufb 480(%rsp), %xmm14, %xmm14 + vpshufb 480(%rsp), %xmm15, %xmm15 + vpaddd %xmm10, %xmm14, %xmm10 + vpaddd %xmm11, %xmm15, %xmm11 + vmovdqa %xmm12, 96(%rsp) + vpxor %xmm4, %xmm8, %xmm4 + vpxor %xmm5, %xmm9, %xmm5 + vpslld $ 7, %xmm4, %xmm12 + vpsrld $25, %xmm4, %xmm4 + vpxor %xmm4, %xmm12, %xmm4 + vpslld $ 7, %xmm5, %xmm12 + vpsrld $25, %xmm5, %xmm5 + vpxor %xmm5, %xmm12, %xmm5 + vpxor %xmm6, %xmm10, %xmm6 + vpxor %xmm7, %xmm11, %xmm7 + vpslld $ 7, %xmm6, %xmm12 + vpsrld $25, %xmm6, %xmm6 + vpxor %xmm6, %xmm12, %xmm6 + vpslld $ 7, %xmm7, %xmm12 + vpsrld $25, %xmm7, %xmm7 + vpxor %xmm7, %xmm12, %xmm7 + vpaddd %xmm0, %xmm5, %xmm0 + vpaddd %xmm1, %xmm6, %xmm1 + vpxor %xmm15, %xmm0, %xmm15 + vpxor 96(%rsp), %xmm1, %xmm12 + vpaddd %xmm2, %xmm7, %xmm2 + vpaddd %xmm3, %xmm4, %xmm3 + vpxor %xmm13, %xmm2, %xmm13 + vpxor %xmm14, %xmm3, %xmm14 + vpshufb 448(%rsp), %xmm15, %xmm15 + vpshufb 448(%rsp), %xmm12, %xmm12 + vpaddd %xmm10, %xmm15, %xmm10 + vpaddd %xmm11, %xmm12, %xmm11 + vpshufb 448(%rsp), %xmm13, %xmm13 + vpshufb 448(%rsp), %xmm14, %xmm14 + vpaddd %xmm8, %xmm13, %xmm8 + vpaddd %xmm9, %xmm14, %xmm9 + vmovdqa %xmm15, 96(%rsp) + vpxor %xmm5, %xmm10, %xmm5 + vpxor %xmm6, %xmm11, %xmm6 + vpslld $ 12, %xmm5, %xmm15 + vpsrld $20, %xmm5, %xmm5 + vpxor %xmm5, %xmm15, %xmm5 + vpslld $ 12, %xmm6, %xmm15 + vpsrld $20, %xmm6, %xmm6 + vpxor %xmm6, %xmm15, %xmm6 + vpxor %xmm7, %xmm8, %xmm7 + vpxor %xmm4, %xmm9, %xmm4 + vpslld $ 12, %xmm7, %xmm15 + vpsrld $20, %xmm7, %xmm7 + vpxor %xmm7, %xmm15, %xmm7 + vpslld $ 12, %xmm4, %xmm15 + vpsrld $20, %xmm4, %xmm4 + vpxor %xmm4, %xmm15, %xmm4 + vpaddd %xmm0, %xmm5, %xmm0 + vpaddd %xmm1, %xmm6, %xmm1 + vpxor 96(%rsp), %xmm0, %xmm15 + vpxor %xmm12, %xmm1, %xmm12 + vpaddd %xmm2, %xmm7, %xmm2 + vpaddd %xmm3, %xmm4, %xmm3 + vpxor %xmm13, %xmm2, %xmm13 + vpxor %xmm14, %xmm3, %xmm14 + vpshufb 480(%rsp), %xmm15, %xmm15 + vpshufb 480(%rsp), %xmm12, %xmm12 + vpaddd %xmm10, %xmm15, %xmm10 + vpaddd %xmm11, %xmm12, %xmm11 + vpshufb 480(%rsp), %xmm13, %xmm13 + vpshufb 480(%rsp), %xmm14, %xmm14 + vpaddd %xmm8, %xmm13, %xmm8 + vpaddd %xmm9, %xmm14, %xmm9 + vmovdqa %xmm15, 96(%rsp) + vpxor %xmm5, %xmm10, %xmm5 + vpxor %xmm6, %xmm11, %xmm6 + vpslld $ 7, %xmm5, %xmm15 + vpsrld $25, %xmm5, %xmm5 + vpxor %xmm5, %xmm15, %xmm5 + vpslld $ 7, %xmm6, %xmm15 + vpsrld $25, %xmm6, %xmm6 + vpxor %xmm6, %xmm15, %xmm6 + vpxor %xmm7, %xmm8, %xmm7 + vpxor %xmm4, %xmm9, %xmm4 + vpslld $ 7, %xmm7, %xmm15 + vpsrld $25, %xmm7, %xmm7 + vpxor %xmm7, %xmm15, %xmm7 + vpslld $ 7, %xmm4, %xmm15 + vpsrld $25, %xmm4, %xmm4 + vpxor %xmm4, %xmm15, %xmm4 + vmovdqa 96(%rsp), %xmm15 + subq $2, %rax + jnz .Lchacha_blocks_avx2_mainloop2 + vmovdqa %xmm8, 192(%rsp) + vmovdqa %xmm9, 208(%rsp) + vmovdqa %xmm10, 224(%rsp) + vmovdqa %xmm11, 240(%rsp) + vmovdqa %xmm12, 256(%rsp) + vmovdqa %xmm13, 272(%rsp) + vmovdqa %xmm14, 288(%rsp) + vmovdqa %xmm15, 304(%rsp) + vpbroadcastd 0(%rsp), %xmm8 + vpbroadcastd 4+0(%rsp), %xmm9 + vpbroadcastd 8+0(%rsp), %xmm10 + vpbroadcastd 12+0(%rsp), %xmm11 + vpbroadcastd 16(%rsp), %xmm12 + vpbroadcastd 4+16(%rsp), %xmm13 + vpbroadcastd 8+16(%rsp), %xmm14 + vpbroadcastd 12+16(%rsp), %xmm15 + vpaddd %xmm8, %xmm0, %xmm0 + vpaddd %xmm9, %xmm1, %xmm1 + vpaddd %xmm10, %xmm2, %xmm2 + vpaddd %xmm11, %xmm3, %xmm3 + vpaddd %xmm12, %xmm4, %xmm4 + vpaddd %xmm13, %xmm5, %xmm5 + vpaddd %xmm14, %xmm6, %xmm6 + vpaddd %xmm15, %xmm7, %xmm7 + vpunpckldq %xmm1, %xmm0, %xmm8 + vpunpckldq %xmm3, %xmm2, %xmm9 + vpunpckhdq %xmm1, %xmm0, %xmm12 + vpunpckhdq %xmm3, %xmm2, %xmm13 + vpunpckldq %xmm5, %xmm4, %xmm10 + vpunpckldq %xmm7, %xmm6, %xmm11 + vpunpckhdq %xmm5, %xmm4, %xmm14 + vpunpckhdq %xmm7, %xmm6, %xmm15 + vpunpcklqdq %xmm9, %xmm8, %xmm0 + vpunpcklqdq %xmm11, %xmm10, %xmm1 + vpunpckhqdq %xmm9, %xmm8, %xmm2 + vpunpckhqdq %xmm11, %xmm10, %xmm3 + vpunpcklqdq %xmm13, %xmm12, %xmm4 + vpunpcklqdq %xmm15, %xmm14, %xmm5 + vpunpckhqdq %xmm13, %xmm12, %xmm6 + vpunpckhqdq %xmm15, %xmm14, %xmm7 + andq %rsi, %rsi + jz .Lchacha_blocks_avx2_noinput2 + vpxor 0(%rsi), %xmm0, %xmm0 + vpxor 16(%rsi), %xmm1, %xmm1 + vpxor 64(%rsi), %xmm2, %xmm2 + vpxor 80(%rsi), %xmm3, %xmm3 + vpxor 128(%rsi), %xmm4, %xmm4 + vpxor 144(%rsi), %xmm5, %xmm5 + vpxor 192(%rsi), %xmm6, %xmm6 + vpxor 208(%rsi), %xmm7, %xmm7 + vmovdqu %xmm0, 0(%rdx) + vmovdqu %xmm1, 16(%rdx) + vmovdqu %xmm2, 64(%rdx) + vmovdqu %xmm3, 80(%rdx) + vmovdqu %xmm4, 128(%rdx) + vmovdqu %xmm5, 144(%rdx) + vmovdqu %xmm6, 192(%rdx) + vmovdqu %xmm7, 208(%rdx) + vmovdqa 192(%rsp), %xmm0 + vmovdqa 208(%rsp), %xmm1 + vmovdqa 224(%rsp), %xmm2 + vmovdqa 240(%rsp), %xmm3 + vmovdqa 256(%rsp), %xmm4 + vmovdqa 272(%rsp), %xmm5 + vmovdqa 288(%rsp), %xmm6 + vmovdqa 304(%rsp), %xmm7 + vpbroadcastd 32(%rsp), %xmm8 + vpbroadcastd 4+32(%rsp), %xmm9 + vpbroadcastd 8+32(%rsp), %xmm10 + vpbroadcastd 12+32(%rsp), %xmm11 + vmovdqa 128(%rsp), %xmm12 + vmovdqa 160(%rsp), %xmm13 + vpbroadcastd 8+48(%rsp), %xmm14 + vpbroadcastd 12+48(%rsp), %xmm15 + vpaddd %xmm8, %xmm0, %xmm0 + vpaddd %xmm9, %xmm1, %xmm1 + vpaddd %xmm10, %xmm2, %xmm2 + vpaddd %xmm11, %xmm3, %xmm3 + vpaddd %xmm12, %xmm4, %xmm4 + vpaddd %xmm13, %xmm5, %xmm5 + vpaddd %xmm14, %xmm6, %xmm6 + vpaddd %xmm15, %xmm7, %xmm7 + vpunpckldq %xmm1, %xmm0, %xmm8 + vpunpckldq %xmm3, %xmm2, %xmm9 + vpunpckhdq %xmm1, %xmm0, %xmm12 + vpunpckhdq %xmm3, %xmm2, %xmm13 + vpunpckldq %xmm5, %xmm4, %xmm10 + vpunpckldq %xmm7, %xmm6, %xmm11 + vpunpckhdq %xmm5, %xmm4, %xmm14 + vpunpckhdq %xmm7, %xmm6, %xmm15 + vpunpcklqdq %xmm9, %xmm8, %xmm0 + vpunpcklqdq %xmm11, %xmm10, %xmm1 + vpunpckhqdq %xmm9, %xmm8, %xmm2 + vpunpckhqdq %xmm11, %xmm10, %xmm3 + vpunpcklqdq %xmm13, %xmm12, %xmm4 + vpunpcklqdq %xmm15, %xmm14, %xmm5 + vpunpckhqdq %xmm13, %xmm12, %xmm6 + vpunpckhqdq %xmm15, %xmm14, %xmm7 + vpxor 32(%rsi), %xmm0, %xmm0 + vpxor 48(%rsi), %xmm1, %xmm1 + vpxor 96(%rsi), %xmm2, %xmm2 + vpxor 112(%rsi), %xmm3, %xmm3 + vpxor 160(%rsi), %xmm4, %xmm4 + vpxor 176(%rsi), %xmm5, %xmm5 + vpxor 224(%rsi), %xmm6, %xmm6 + vpxor 240(%rsi), %xmm7, %xmm7 + vmovdqu %xmm0, 32(%rdx) + vmovdqu %xmm1, 48(%rdx) + vmovdqu %xmm2, 96(%rdx) + vmovdqu %xmm3, 112(%rdx) + vmovdqu %xmm4, 160(%rdx) + vmovdqu %xmm5, 176(%rdx) + vmovdqu %xmm6, 224(%rdx) + vmovdqu %xmm7, 240(%rdx) + addq $256, %rsi + jmp .Lchacha_blocks_avx2_mainloop2_cont +.Lchacha_blocks_avx2_noinput2: + vmovdqu %xmm0, 0(%rdx) + vmovdqu %xmm1, 16(%rdx) + vmovdqu %xmm2, 64(%rdx) + vmovdqu %xmm3, 80(%rdx) + vmovdqu %xmm4, 128(%rdx) + vmovdqu %xmm5, 144(%rdx) + vmovdqu %xmm6, 192(%rdx) + vmovdqu %xmm7, 208(%rdx) + vmovdqa 192(%rsp), %xmm0 + vmovdqa 208(%rsp), %xmm1 + vmovdqa 224(%rsp), %xmm2 + vmovdqa 240(%rsp), %xmm3 + vmovdqa 256(%rsp), %xmm4 + vmovdqa 272(%rsp), %xmm5 + vmovdqa 288(%rsp), %xmm6 + vmovdqa 304(%rsp), %xmm7 + vpbroadcastd 32(%rsp), %xmm8 + vpbroadcastd 4+32(%rsp), %xmm9 + vpbroadcastd 8+32(%rsp), %xmm10 + vpbroadcastd 12+32(%rsp), %xmm11 + vmovdqa 128(%rsp), %xmm12 + vmovdqa 160(%rsp), %xmm13 + vpbroadcastd 8+48(%rsp), %xmm14 + vpbroadcastd 12+48(%rsp), %xmm15 + vpaddd %xmm8, %xmm0, %xmm0 + vpaddd %xmm9, %xmm1, %xmm1 + vpaddd %xmm10, %xmm2, %xmm2 + vpaddd %xmm11, %xmm3, %xmm3 + vpaddd %xmm12, %xmm4, %xmm4 + vpaddd %xmm13, %xmm5, %xmm5 + vpaddd %xmm14, %xmm6, %xmm6 + vpaddd %xmm15, %xmm7, %xmm7 + vpunpckldq %xmm1, %xmm0, %xmm8 + vpunpckldq %xmm3, %xmm2, %xmm9 + vpunpckhdq %xmm1, %xmm0, %xmm12 + vpunpckhdq %xmm3, %xmm2, %xmm13 + vpunpckldq %xmm5, %xmm4, %xmm10 + vpunpckldq %xmm7, %xmm6, %xmm11 + vpunpckhdq %xmm5, %xmm4, %xmm14 + vpunpckhdq %xmm7, %xmm6, %xmm15 + vpunpcklqdq %xmm9, %xmm8, %xmm0 + vpunpcklqdq %xmm11, %xmm10, %xmm1 + vpunpckhqdq %xmm9, %xmm8, %xmm2 + vpunpckhqdq %xmm11, %xmm10, %xmm3 + vpunpcklqdq %xmm13, %xmm12, %xmm4 + vpunpcklqdq %xmm15, %xmm14, %xmm5 + vpunpckhqdq %xmm13, %xmm12, %xmm6 + vpunpckhqdq %xmm15, %xmm14, %xmm7 + vmovdqu %xmm0, 32(%rdx) + vmovdqu %xmm1, 48(%rdx) + vmovdqu %xmm2, 96(%rdx) + vmovdqu %xmm3, 112(%rdx) + vmovdqu %xmm4, 160(%rdx) + vmovdqu %xmm5, 176(%rdx) + vmovdqu %xmm6, 224(%rdx) + vmovdqu %xmm7, 240(%rdx) +.Lchacha_blocks_avx2_mainloop2_cont: + addq $256, %rdx + subq $256, %rcx + cmp $256, %rcx + jae .Lchacha_blocks_avx2_atleast256 +.Lchacha_blocks_avx2_below256_fixup: + vmovdqa 448(%rsp), %xmm6 + vmovdqa 480(%rsp), %xmm7 + vmovdqa 0(%rsp), %xmm8 + vmovdqa 16(%rsp), %xmm9 + vmovdqa 32(%rsp), %xmm10 + vmovdqa 48(%rsp), %xmm11 + movq $1, %r9 +.Lchacha_blocks_avx2_below256: + vmovq %r9, %xmm5 + andq %rcx, %rcx + jz .Lchacha_blocks_avx2_done + cmpq $64, %rcx + jae .Lchacha_blocks_avx2_above63 + movq %rdx, %r9 + andq %rsi, %rsi + jz .Lchacha_blocks_avx2_noinput3 + movq %rcx, %r10 + movq %rsp, %rdx + addq %r10, %rsi + addq %r10, %rdx + negq %r10 +.Lchacha_blocks_avx2_copyinput: + movb (%rsi, %r10), %al + movb %al, (%rdx, %r10) + incq %r10 + jnz .Lchacha_blocks_avx2_copyinput + movq %rsp, %rsi +.Lchacha_blocks_avx2_noinput3: + movq %rsp, %rdx +.Lchacha_blocks_avx2_above63: + vmovdqa %xmm8, %xmm0 + vmovdqa %xmm9, %xmm1 + vmovdqa %xmm10, %xmm2 + vmovdqa %xmm11, %xmm3 + movq 64(%rsp), %rax +.Lchacha_blocks_avx2_mainloop3: + vpaddd %xmm0, %xmm1, %xmm0 + vpxor %xmm3, %xmm0, %xmm3 + vpshufb %xmm6, %xmm3, %xmm3 + vpaddd %xmm2, %xmm3, %xmm2 + vpxor %xmm1, %xmm2, %xmm1 + vpslld $12, %xmm1, %xmm4 + vpsrld $20, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm1 + vpaddd %xmm0, %xmm1, %xmm0 + vpxor %xmm3, %xmm0, %xmm3 + vpshufb %xmm7, %xmm3, %xmm3 + vpshufd $0x93, %xmm0, %xmm0 + vpaddd %xmm2, %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm1, %xmm2, %xmm1 + vpshufd $0x39, %xmm2, %xmm2 + vpslld $7, %xmm1, %xmm4 + vpsrld $25, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm1 + vpaddd %xmm0, %xmm1, %xmm0 + vpxor %xmm3, %xmm0, %xmm3 + vpshufb %xmm6, %xmm3, %xmm3 + vpaddd %xmm2, %xmm3, %xmm2 + vpxor %xmm1, %xmm2, %xmm1 + vpslld $12, %xmm1, %xmm4 + vpsrld $20, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm1 + vpaddd %xmm0, %xmm1, %xmm0 + vpxor %xmm3, %xmm0, %xmm3 + vpshufb %xmm7, %xmm3, %xmm3 + vpshufd $0x39, %xmm0, %xmm0 + vpaddd %xmm2, %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm1, %xmm2, %xmm1 + vpshufd $0x93, %xmm2, %xmm2 + vpslld $7, %xmm1, %xmm4 + vpsrld $25, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm1 + subq $2, %rax + jnz .Lchacha_blocks_avx2_mainloop3 + vpaddd %xmm0, %xmm8, %xmm0 + vpaddd %xmm1, %xmm9, %xmm1 + vpaddd %xmm2, %xmm10, %xmm2 + vpaddd %xmm3, %xmm11, %xmm3 + andq %rsi, %rsi + jz .Lchacha_blocks_avx2_noinput4 + vpxor 0(%rsi), %xmm0, %xmm0 + vpxor 16(%rsi), %xmm1, %xmm1 + vpxor 32(%rsi), %xmm2, %xmm2 + vpxor 48(%rsi), %xmm3, %xmm3 + addq $64, %rsi +.Lchacha_blocks_avx2_noinput4: + vmovdqu %xmm0, 0(%rdx) + vmovdqu %xmm1, 16(%rdx) + vmovdqu %xmm2, 32(%rdx) + vmovdqu %xmm3, 48(%rdx) + vpaddq %xmm11, %xmm5, %xmm11 + cmpq $64, %rcx + jbe .Lchacha_blocks_avx2_mainloop3_finishup + addq $64, %rdx + subq $64, %rcx + jmp .Lchacha_blocks_avx2_below256 +.Lchacha_blocks_avx2_mainloop3_finishup: + cmpq $64, %rcx + je .Lchacha_blocks_avx2_done + addq %rcx, %r9 + addq %rcx, %rdx + negq %rcx +.Lchacha_blocks_avx2_copyoutput: + movb (%rdx, %rcx), %al + movb %al, (%r9, %rcx) + incq %rcx + jnz .Lchacha_blocks_avx2_copyoutput +.Lchacha_blocks_avx2_done: + vmovdqu %xmm11, 48(%rdi) + movq %rbp, %rsp + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + vzeroall + movl $(63 + 512), %eax + ret +ELF(.size _gcry_chacha20_amd64_avx2_blocks,.-_gcry_chacha20_amd64_avx2_blocks;) + +.align 16 +.LC: +.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 /* pshufb rotate by 16 */ +.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 /* pshufb rotate by 8 */ + +#endif /*defined(USE_CHACHA20)*/ +#endif /*__x86_64*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/chacha20-sse2-amd64.S b/libotr/libgcrypt-1.8.7/cipher/chacha20-sse2-amd64.S new file mode 100644 index 0000000..2b9842c --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/chacha20-sse2-amd64.S @@ -0,0 +1,659 @@ +/* chacha20-sse2-amd64.S - AMD64/SSE2 implementation of ChaCha20 + * + * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Based on public domain implementation by Andrew Moon at + * https://github.com/floodyberry/chacha-opt + */ + +#ifdef __x86_64__ +#include <config.h> + +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && USE_CHACHA20 + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.text + +.align 8 +.globl _gcry_chacha20_amd64_sse2_blocks +ELF(.type _gcry_chacha20_amd64_sse2_blocks,@function;) +_gcry_chacha20_amd64_sse2_blocks: +.Lchacha_blocks_sse2_local: + pushq %rbx + pushq %rbp + movq %rsp, %rbp + andq $~63, %rsp + subq $512, %rsp + movdqu (%rdi), %xmm8 + movdqu 16(%rdi), %xmm9 + movdqu 32(%rdi), %xmm10 + movdqu 48(%rdi), %xmm11 + movq $20, %rax + movq $1, %r9 + movdqa %xmm8, 0(%rsp) + movdqa %xmm9, 16(%rsp) + movdqa %xmm10, 32(%rsp) + movdqa %xmm11, 48(%rsp) + movq %rax, 64(%rsp) + cmpq $256, %rcx + jb .Lchacha_blocks_sse2_below256 + pshufd $0x00, %xmm8, %xmm0 + pshufd $0x55, %xmm8, %xmm1 + pshufd $0xaa, %xmm8, %xmm2 + pshufd $0xff, %xmm8, %xmm3 + movdqa %xmm0, 128(%rsp) + movdqa %xmm1, 144(%rsp) + movdqa %xmm2, 160(%rsp) + movdqa %xmm3, 176(%rsp) + pshufd $0x00, %xmm9, %xmm0 + pshufd $0x55, %xmm9, %xmm1 + pshufd $0xaa, %xmm9, %xmm2 + pshufd $0xff, %xmm9, %xmm3 + movdqa %xmm0, 192(%rsp) + movdqa %xmm1, 208(%rsp) + movdqa %xmm2, 224(%rsp) + movdqa %xmm3, 240(%rsp) + pshufd $0x00, %xmm10, %xmm0 + pshufd $0x55, %xmm10, %xmm1 + pshufd $0xaa, %xmm10, %xmm2 + pshufd $0xff, %xmm10, %xmm3 + movdqa %xmm0, 256(%rsp) + movdqa %xmm1, 272(%rsp) + movdqa %xmm2, 288(%rsp) + movdqa %xmm3, 304(%rsp) + pshufd $0xaa, %xmm11, %xmm0 + pshufd $0xff, %xmm11, %xmm1 + movdqa %xmm0, 352(%rsp) + movdqa %xmm1, 368(%rsp) + jmp .Lchacha_blocks_sse2_atleast256 +.p2align 6,,63 +.Lchacha_blocks_sse2_atleast256: + movq 48(%rsp), %rax + leaq 1(%rax), %r8 + leaq 2(%rax), %r9 + leaq 3(%rax), %r10 + leaq 4(%rax), %rbx + movl %eax, 320(%rsp) + movl %r8d, 4+320(%rsp) + movl %r9d, 8+320(%rsp) + movl %r10d, 12+320(%rsp) + shrq $32, %rax + shrq $32, %r8 + shrq $32, %r9 + shrq $32, %r10 + movl %eax, 336(%rsp) + movl %r8d, 4+336(%rsp) + movl %r9d, 8+336(%rsp) + movl %r10d, 12+336(%rsp) + movq %rbx, 48(%rsp) + movq 64(%rsp), %rax + movdqa 128(%rsp), %xmm0 + movdqa 144(%rsp), %xmm1 + movdqa 160(%rsp), %xmm2 + movdqa 176(%rsp), %xmm3 + movdqa 192(%rsp), %xmm4 + movdqa 208(%rsp), %xmm5 + movdqa 224(%rsp), %xmm6 + movdqa 240(%rsp), %xmm7 + movdqa 256(%rsp), %xmm8 + movdqa 272(%rsp), %xmm9 + movdqa 288(%rsp), %xmm10 + movdqa 304(%rsp), %xmm11 + movdqa 320(%rsp), %xmm12 + movdqa 336(%rsp), %xmm13 + movdqa 352(%rsp), %xmm14 + movdqa 368(%rsp), %xmm15 +.Lchacha_blocks_sse2_mainloop1: + paddd %xmm4, %xmm0 + paddd %xmm5, %xmm1 + pxor %xmm0, %xmm12 + pxor %xmm1, %xmm13 + paddd %xmm6, %xmm2 + paddd %xmm7, %xmm3 + movdqa %xmm6, 96(%rsp) + pxor %xmm2, %xmm14 + pxor %xmm3, %xmm15 + pshuflw $0xb1,%xmm12,%xmm12 + pshufhw $0xb1,%xmm12,%xmm12 + pshuflw $0xb1,%xmm13,%xmm13 + pshufhw $0xb1,%xmm13,%xmm13 + pshuflw $0xb1,%xmm14,%xmm14 + pshufhw $0xb1,%xmm14,%xmm14 + pshuflw $0xb1,%xmm15,%xmm15 + pshufhw $0xb1,%xmm15,%xmm15 + paddd %xmm12, %xmm8 + paddd %xmm13, %xmm9 + paddd %xmm14, %xmm10 + paddd %xmm15, %xmm11 + movdqa %xmm12, 112(%rsp) + pxor %xmm8, %xmm4 + pxor %xmm9, %xmm5 + movdqa 96(%rsp), %xmm6 + movdqa %xmm4, %xmm12 + pslld $ 12, %xmm4 + psrld $20, %xmm12 + pxor %xmm12, %xmm4 + movdqa %xmm5, %xmm12 + pslld $ 12, %xmm5 + psrld $20, %xmm12 + pxor %xmm12, %xmm5 + pxor %xmm10, %xmm6 + pxor %xmm11, %xmm7 + movdqa %xmm6, %xmm12 + pslld $ 12, %xmm6 + psrld $20, %xmm12 + pxor %xmm12, %xmm6 + movdqa %xmm7, %xmm12 + pslld $ 12, %xmm7 + psrld $20, %xmm12 + pxor %xmm12, %xmm7 + movdqa 112(%rsp), %xmm12 + paddd %xmm4, %xmm0 + paddd %xmm5, %xmm1 + pxor %xmm0, %xmm12 + pxor %xmm1, %xmm13 + paddd %xmm6, %xmm2 + paddd %xmm7, %xmm3 + movdqa %xmm6, 96(%rsp) + pxor %xmm2, %xmm14 + pxor %xmm3, %xmm15 + movdqa %xmm12, %xmm6 + pslld $ 8, %xmm12 + psrld $24, %xmm6 + pxor %xmm6, %xmm12 + movdqa %xmm13, %xmm6 + pslld $ 8, %xmm13 + psrld $24, %xmm6 + pxor %xmm6, %xmm13 + paddd %xmm12, %xmm8 + paddd %xmm13, %xmm9 + movdqa %xmm14, %xmm6 + pslld $ 8, %xmm14 + psrld $24, %xmm6 + pxor %xmm6, %xmm14 + movdqa %xmm15, %xmm6 + pslld $ 8, %xmm15 + psrld $24, %xmm6 + pxor %xmm6, %xmm15 + paddd %xmm14, %xmm10 + paddd %xmm15, %xmm11 + movdqa %xmm12, 112(%rsp) + pxor %xmm8, %xmm4 + pxor %xmm9, %xmm5 + movdqa 96(%rsp), %xmm6 + movdqa %xmm4, %xmm12 + pslld $ 7, %xmm4 + psrld $25, %xmm12 + pxor %xmm12, %xmm4 + movdqa %xmm5, %xmm12 + pslld $ 7, %xmm5 + psrld $25, %xmm12 + pxor %xmm12, %xmm5 + pxor %xmm10, %xmm6 + pxor %xmm11, %xmm7 + movdqa %xmm6, %xmm12 + pslld $ 7, %xmm6 + psrld $25, %xmm12 + pxor %xmm12, %xmm6 + movdqa %xmm7, %xmm12 + pslld $ 7, %xmm7 + psrld $25, %xmm12 + pxor %xmm12, %xmm7 + movdqa 112(%rsp), %xmm12 + paddd %xmm5, %xmm0 + paddd %xmm6, %xmm1 + pxor %xmm0, %xmm15 + pxor %xmm1, %xmm12 + paddd %xmm7, %xmm2 + paddd %xmm4, %xmm3 + movdqa %xmm7, 96(%rsp) + pxor %xmm2, %xmm13 + pxor %xmm3, %xmm14 + pshuflw $0xb1,%xmm15,%xmm15 + pshufhw $0xb1,%xmm15,%xmm15 + pshuflw $0xb1,%xmm12,%xmm12 + pshufhw $0xb1,%xmm12,%xmm12 + pshuflw $0xb1,%xmm13,%xmm13 + pshufhw $0xb1,%xmm13,%xmm13 + pshuflw $0xb1,%xmm14,%xmm14 + pshufhw $0xb1,%xmm14,%xmm14 + paddd %xmm15, %xmm10 + paddd %xmm12, %xmm11 + paddd %xmm13, %xmm8 + paddd %xmm14, %xmm9 + movdqa %xmm15, 112(%rsp) + pxor %xmm10, %xmm5 + pxor %xmm11, %xmm6 + movdqa 96(%rsp), %xmm7 + movdqa %xmm5, %xmm15 + pslld $ 12, %xmm5 + psrld $20, %xmm15 + pxor %xmm15, %xmm5 + movdqa %xmm6, %xmm15 + pslld $ 12, %xmm6 + psrld $20, %xmm15 + pxor %xmm15, %xmm6 + pxor %xmm8, %xmm7 + pxor %xmm9, %xmm4 + movdqa %xmm7, %xmm15 + pslld $ 12, %xmm7 + psrld $20, %xmm15 + pxor %xmm15, %xmm7 + movdqa %xmm4, %xmm15 + pslld $ 12, %xmm4 + psrld $20, %xmm15 + pxor %xmm15, %xmm4 + movdqa 112(%rsp), %xmm15 + paddd %xmm5, %xmm0 + paddd %xmm6, %xmm1 + pxor %xmm0, %xmm15 + pxor %xmm1, %xmm12 + paddd %xmm7, %xmm2 + paddd %xmm4, %xmm3 + movdqa %xmm7, 96(%rsp) + pxor %xmm2, %xmm13 + pxor %xmm3, %xmm14 + movdqa %xmm15, %xmm7 + pslld $ 8, %xmm15 + psrld $24, %xmm7 + pxor %xmm7, %xmm15 + movdqa %xmm12, %xmm7 + pslld $ 8, %xmm12 + psrld $24, %xmm7 + pxor %xmm7, %xmm12 + paddd %xmm15, %xmm10 + paddd %xmm12, %xmm11 + movdqa %xmm13, %xmm7 + pslld $ 8, %xmm13 + psrld $24, %xmm7 + pxor %xmm7, %xmm13 + movdqa %xmm14, %xmm7 + pslld $ 8, %xmm14 + psrld $24, %xmm7 + pxor %xmm7, %xmm14 + paddd %xmm13, %xmm8 + paddd %xmm14, %xmm9 + movdqa %xmm15, 112(%rsp) + pxor %xmm10, %xmm5 + pxor %xmm11, %xmm6 + movdqa 96(%rsp), %xmm7 + movdqa %xmm5, %xmm15 + pslld $ 7, %xmm5 + psrld $25, %xmm15 + pxor %xmm15, %xmm5 + movdqa %xmm6, %xmm15 + pslld $ 7, %xmm6 + psrld $25, %xmm15 + pxor %xmm15, %xmm6 + pxor %xmm8, %xmm7 + pxor %xmm9, %xmm4 + movdqa %xmm7, %xmm15 + pslld $ 7, %xmm7 + psrld $25, %xmm15 + pxor %xmm15, %xmm7 + movdqa %xmm4, %xmm15 + pslld $ 7, %xmm4 + psrld $25, %xmm15 + pxor %xmm15, %xmm4 + movdqa 112(%rsp), %xmm15 + subq $2, %rax + jnz .Lchacha_blocks_sse2_mainloop1 + paddd 128(%rsp), %xmm0 + paddd 144(%rsp), %xmm1 + paddd 160(%rsp), %xmm2 + paddd 176(%rsp), %xmm3 + paddd 192(%rsp), %xmm4 + paddd 208(%rsp), %xmm5 + paddd 224(%rsp), %xmm6 + paddd 240(%rsp), %xmm7 + paddd 256(%rsp), %xmm8 + paddd 272(%rsp), %xmm9 + paddd 288(%rsp), %xmm10 + paddd 304(%rsp), %xmm11 + paddd 320(%rsp), %xmm12 + paddd 336(%rsp), %xmm13 + paddd 352(%rsp), %xmm14 + paddd 368(%rsp), %xmm15 + movdqa %xmm8, 384(%rsp) + movdqa %xmm9, 400(%rsp) + movdqa %xmm10, 416(%rsp) + movdqa %xmm11, 432(%rsp) + movdqa %xmm12, 448(%rsp) + movdqa %xmm13, 464(%rsp) + movdqa %xmm14, 480(%rsp) + movdqa %xmm15, 496(%rsp) + movdqa %xmm0, %xmm8 + movdqa %xmm2, %xmm9 + movdqa %xmm4, %xmm10 + movdqa %xmm6, %xmm11 + punpckhdq %xmm1, %xmm0 + punpckhdq %xmm3, %xmm2 + punpckhdq %xmm5, %xmm4 + punpckhdq %xmm7, %xmm6 + punpckldq %xmm1, %xmm8 + punpckldq %xmm3, %xmm9 + punpckldq %xmm5, %xmm10 + punpckldq %xmm7, %xmm11 + movdqa %xmm0, %xmm1 + movdqa %xmm4, %xmm3 + movdqa %xmm8, %xmm5 + movdqa %xmm10, %xmm7 + punpckhqdq %xmm2, %xmm0 + punpckhqdq %xmm6, %xmm4 + punpckhqdq %xmm9, %xmm8 + punpckhqdq %xmm11, %xmm10 + punpcklqdq %xmm2, %xmm1 + punpcklqdq %xmm6, %xmm3 + punpcklqdq %xmm9, %xmm5 + punpcklqdq %xmm11, %xmm7 + andq %rsi, %rsi + jz .Lchacha_blocks_sse2_noinput1 + movdqu 0(%rsi), %xmm2 + movdqu 16(%rsi), %xmm6 + movdqu 64(%rsi), %xmm9 + movdqu 80(%rsi), %xmm11 + movdqu 128(%rsi), %xmm12 + movdqu 144(%rsi), %xmm13 + movdqu 192(%rsi), %xmm14 + movdqu 208(%rsi), %xmm15 + pxor %xmm2, %xmm5 + pxor %xmm6, %xmm7 + pxor %xmm9, %xmm8 + pxor %xmm11, %xmm10 + pxor %xmm12, %xmm1 + pxor %xmm13, %xmm3 + pxor %xmm14, %xmm0 + pxor %xmm15, %xmm4 + movdqu %xmm5, 0(%rdx) + movdqu %xmm7, 16(%rdx) + movdqu %xmm8, 64(%rdx) + movdqu %xmm10, 80(%rdx) + movdqu %xmm1, 128(%rdx) + movdqu %xmm3, 144(%rdx) + movdqu %xmm0, 192(%rdx) + movdqu %xmm4, 208(%rdx) + movdqa 384(%rsp), %xmm0 + movdqa 400(%rsp), %xmm1 + movdqa 416(%rsp), %xmm2 + movdqa 432(%rsp), %xmm3 + movdqa 448(%rsp), %xmm4 + movdqa 464(%rsp), %xmm5 + movdqa 480(%rsp), %xmm6 + movdqa 496(%rsp), %xmm7 + movdqa %xmm0, %xmm8 + movdqa %xmm2, %xmm9 + movdqa %xmm4, %xmm10 + movdqa %xmm6, %xmm11 + punpckldq %xmm1, %xmm8 + punpckldq %xmm3, %xmm9 + punpckhdq %xmm1, %xmm0 + punpckhdq %xmm3, %xmm2 + punpckldq %xmm5, %xmm10 + punpckldq %xmm7, %xmm11 + punpckhdq %xmm5, %xmm4 + punpckhdq %xmm7, %xmm6 + movdqa %xmm8, %xmm1 + movdqa %xmm0, %xmm3 + movdqa %xmm10, %xmm5 + movdqa %xmm4, %xmm7 + punpcklqdq %xmm9, %xmm1 + punpcklqdq %xmm11, %xmm5 + punpckhqdq %xmm9, %xmm8 + punpckhqdq %xmm11, %xmm10 + punpcklqdq %xmm2, %xmm3 + punpcklqdq %xmm6, %xmm7 + punpckhqdq %xmm2, %xmm0 + punpckhqdq %xmm6, %xmm4 + movdqu 32(%rsi), %xmm2 + movdqu 48(%rsi), %xmm6 + movdqu 96(%rsi), %xmm9 + movdqu 112(%rsi), %xmm11 + movdqu 160(%rsi), %xmm12 + movdqu 176(%rsi), %xmm13 + movdqu 224(%rsi), %xmm14 + movdqu 240(%rsi), %xmm15 + pxor %xmm2, %xmm1 + pxor %xmm6, %xmm5 + pxor %xmm9, %xmm8 + pxor %xmm11, %xmm10 + pxor %xmm12, %xmm3 + pxor %xmm13, %xmm7 + pxor %xmm14, %xmm0 + pxor %xmm15, %xmm4 + movdqu %xmm1, 32(%rdx) + movdqu %xmm5, 48(%rdx) + movdqu %xmm8, 96(%rdx) + movdqu %xmm10, 112(%rdx) + movdqu %xmm3, 160(%rdx) + movdqu %xmm7, 176(%rdx) + movdqu %xmm0, 224(%rdx) + movdqu %xmm4, 240(%rdx) + addq $256, %rsi + jmp .Lchacha_blocks_sse2_mainloop_cont +.Lchacha_blocks_sse2_noinput1: + movdqu %xmm5, 0(%rdx) + movdqu %xmm7, 16(%rdx) + movdqu %xmm8, 64(%rdx) + movdqu %xmm10, 80(%rdx) + movdqu %xmm1, 128(%rdx) + movdqu %xmm3, 144(%rdx) + movdqu %xmm0, 192(%rdx) + movdqu %xmm4, 208(%rdx) + movdqa 384(%rsp), %xmm0 + movdqa 400(%rsp), %xmm1 + movdqa 416(%rsp), %xmm2 + movdqa 432(%rsp), %xmm3 + movdqa 448(%rsp), %xmm4 + movdqa 464(%rsp), %xmm5 + movdqa 480(%rsp), %xmm6 + movdqa 496(%rsp), %xmm7 + movdqa %xmm0, %xmm8 + movdqa %xmm2, %xmm9 + movdqa %xmm4, %xmm10 + movdqa %xmm6, %xmm11 + punpckldq %xmm1, %xmm8 + punpckldq %xmm3, %xmm9 + punpckhdq %xmm1, %xmm0 + punpckhdq %xmm3, %xmm2 + punpckldq %xmm5, %xmm10 + punpckldq %xmm7, %xmm11 + punpckhdq %xmm5, %xmm4 + punpckhdq %xmm7, %xmm6 + movdqa %xmm8, %xmm1 + movdqa %xmm0, %xmm3 + movdqa %xmm10, %xmm5 + movdqa %xmm4, %xmm7 + punpcklqdq %xmm9, %xmm1 + punpcklqdq %xmm11, %xmm5 + punpckhqdq %xmm9, %xmm8 + punpckhqdq %xmm11, %xmm10 + punpcklqdq %xmm2, %xmm3 + punpcklqdq %xmm6, %xmm7 + punpckhqdq %xmm2, %xmm0 + punpckhqdq %xmm6, %xmm4 + movdqu %xmm1, 32(%rdx) + movdqu %xmm5, 48(%rdx) + movdqu %xmm8, 96(%rdx) + movdqu %xmm10, 112(%rdx) + movdqu %xmm3, 160(%rdx) + movdqu %xmm7, 176(%rdx) + movdqu %xmm0, 224(%rdx) + movdqu %xmm4, 240(%rdx) +.Lchacha_blocks_sse2_mainloop_cont: + addq $256, %rdx + subq $256, %rcx + cmp $256, %rcx + jae .Lchacha_blocks_sse2_atleast256 + movdqa 0(%rsp), %xmm8 + movdqa 16(%rsp), %xmm9 + movdqa 32(%rsp), %xmm10 + movdqa 48(%rsp), %xmm11 + movq $1, %r9 +.Lchacha_blocks_sse2_below256: + movq %r9, %xmm5 + andq %rcx, %rcx + jz .Lchacha_blocks_sse2_done + cmpq $64, %rcx + jae .Lchacha_blocks_sse2_above63 + movq %rdx, %r9 + andq %rsi, %rsi + jz .Lchacha_blocks_sse2_noinput2 + movq %rcx, %r10 + movq %rsp, %rdx + addq %r10, %rsi + addq %r10, %rdx + negq %r10 +.Lchacha_blocks_sse2_copyinput: + movb (%rsi, %r10), %al + movb %al, (%rdx, %r10) + incq %r10 + jnz .Lchacha_blocks_sse2_copyinput + movq %rsp, %rsi +.Lchacha_blocks_sse2_noinput2: + movq %rsp, %rdx +.Lchacha_blocks_sse2_above63: + movdqa %xmm8, %xmm0 + movdqa %xmm9, %xmm1 + movdqa %xmm10, %xmm2 + movdqa %xmm11, %xmm3 + movq 64(%rsp), %rax +.Lchacha_blocks_sse2_mainloop2: + paddd %xmm1, %xmm0 + pxor %xmm0, %xmm3 + pshuflw $0xb1,%xmm3,%xmm3 + pshufhw $0xb1,%xmm3,%xmm3 + paddd %xmm3, %xmm2 + pxor %xmm2, %xmm1 + movdqa %xmm1,%xmm4 + pslld $12, %xmm1 + psrld $20, %xmm4 + pxor %xmm4, %xmm1 + paddd %xmm1, %xmm0 + pxor %xmm0, %xmm3 + movdqa %xmm3,%xmm4 + pslld $8, %xmm3 + psrld $24, %xmm4 + pshufd $0x93,%xmm0,%xmm0 + pxor %xmm4, %xmm3 + paddd %xmm3, %xmm2 + pshufd $0x4e,%xmm3,%xmm3 + pxor %xmm2, %xmm1 + pshufd $0x39,%xmm2,%xmm2 + movdqa %xmm1,%xmm4 + pslld $7, %xmm1 + psrld $25, %xmm4 + pxor %xmm4, %xmm1 + subq $2, %rax + paddd %xmm1, %xmm0 + pxor %xmm0, %xmm3 + pshuflw $0xb1,%xmm3,%xmm3 + pshufhw $0xb1,%xmm3,%xmm3 + paddd %xmm3, %xmm2 + pxor %xmm2, %xmm1 + movdqa %xmm1,%xmm4 + pslld $12, %xmm1 + psrld $20, %xmm4 + pxor %xmm4, %xmm1 + paddd %xmm1, %xmm0 + pxor %xmm0, %xmm3 + movdqa %xmm3,%xmm4 + pslld $8, %xmm3 + psrld $24, %xmm4 + pshufd $0x39,%xmm0,%xmm0 + pxor %xmm4, %xmm3 + paddd %xmm3, %xmm2 + pshufd $0x4e,%xmm3,%xmm3 + pxor %xmm2, %xmm1 + pshufd $0x93,%xmm2,%xmm2 + movdqa %xmm1,%xmm4 + pslld $7, %xmm1 + psrld $25, %xmm4 + pxor %xmm4, %xmm1 + jnz .Lchacha_blocks_sse2_mainloop2 + paddd %xmm8, %xmm0 + paddd %xmm9, %xmm1 + paddd %xmm10, %xmm2 + paddd %xmm11, %xmm3 + andq %rsi, %rsi + jz .Lchacha_blocks_sse2_noinput3 + movdqu 0(%rsi), %xmm12 + movdqu 16(%rsi), %xmm13 + movdqu 32(%rsi), %xmm14 + movdqu 48(%rsi), %xmm15 + pxor %xmm12, %xmm0 + pxor %xmm13, %xmm1 + pxor %xmm14, %xmm2 + pxor %xmm15, %xmm3 + addq $64, %rsi +.Lchacha_blocks_sse2_noinput3: + movdqu %xmm0, 0(%rdx) + movdqu %xmm1, 16(%rdx) + movdqu %xmm2, 32(%rdx) + movdqu %xmm3, 48(%rdx) + paddq %xmm5, %xmm11 + cmpq $64, %rcx + jbe .Lchacha_blocks_sse2_mainloop2_finishup + addq $64, %rdx + subq $64, %rcx + jmp .Lchacha_blocks_sse2_below256 +.Lchacha_blocks_sse2_mainloop2_finishup: + cmpq $64, %rcx + je .Lchacha_blocks_sse2_done + addq %rcx, %r9 + addq %rcx, %rdx + negq %rcx +.Lchacha_blocks_sse2_copyoutput: + movb (%rdx, %rcx), %al + movb %al, (%r9, %rcx) + incq %rcx + jnz .Lchacha_blocks_sse2_copyoutput +.Lchacha_blocks_sse2_done: + movdqu %xmm11, 48(%rdi) + movq %rbp, %rsp + pxor %xmm15, %xmm15 + pxor %xmm7, %xmm7 + pxor %xmm14, %xmm14 + pxor %xmm6, %xmm6 + pxor %xmm13, %xmm13 + pxor %xmm5, %xmm5 + pxor %xmm12, %xmm12 + pxor %xmm4, %xmm4 + popq %rbp + popq %rbx + movl $(63 + 512 + 16), %eax + pxor %xmm11, %xmm11 + pxor %xmm3, %xmm3 + pxor %xmm10, %xmm10 + pxor %xmm2, %xmm2 + pxor %xmm9, %xmm9 + pxor %xmm1, %xmm1 + pxor %xmm8, %xmm8 + pxor %xmm0, %xmm0 + ret +ELF(.size _gcry_chacha20_amd64_sse2_blocks,.-_gcry_chacha20_amd64_sse2_blocks;) + +#endif /*defined(USE_CHACHA20)*/ +#endif /*__x86_64*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/chacha20-ssse3-amd64.S b/libotr/libgcrypt-1.8.7/cipher/chacha20-ssse3-amd64.S new file mode 100644 index 0000000..c04010e --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/chacha20-ssse3-amd64.S @@ -0,0 +1,632 @@ +/* chacha20-ssse3-amd64.S - AMD64/SSSE3 implementation of ChaCha20 + * + * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Based on public domain implementation by Andrew Moon at + * https://github.com/floodyberry/chacha-opt + */ + +#ifdef __x86_64__ +#include <config.h> + +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(HAVE_GCC_INLINE_ASM_SSSE3) && USE_CHACHA20 + +#ifdef __PIC__ +# define RIP (%rip) +#else +# define RIP +#endif + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.text + +.align 8 +.globl _gcry_chacha20_amd64_ssse3_blocks +ELF(.type _gcry_chacha20_amd64_ssse3_blocks,@function;) +_gcry_chacha20_amd64_ssse3_blocks: +.Lchacha_blocks_ssse3_local: + pushq %rbx + pushq %rbp + movq %rsp, %rbp + andq $~63, %rsp + subq $512, %rsp + leaq .LC RIP, %rax + movdqa 0(%rax), %xmm6 + movdqa 16(%rax), %xmm7 + movdqu 0(%rdi), %xmm8 + movdqu 16(%rdi), %xmm9 + movdqu 32(%rdi), %xmm10 + movdqu 48(%rdi), %xmm11 + movl $20, %eax + movq $1, %r9 + movdqa %xmm8, 0(%rsp) + movdqa %xmm9, 16(%rsp) + movdqa %xmm10, 32(%rsp) + movdqa %xmm11, 48(%rsp) + movdqa %xmm6, 80(%rsp) + movdqa %xmm7, 96(%rsp) + movq %rax, 64(%rsp) + cmpq $256, %rcx + jb .Lchacha_blocks_ssse3_below256 + pshufd $0x00, %xmm8, %xmm0 + pshufd $0x55, %xmm8, %xmm1 + pshufd $0xaa, %xmm8, %xmm2 + pshufd $0xff, %xmm8, %xmm3 + movdqa %xmm0, 128(%rsp) + movdqa %xmm1, 144(%rsp) + movdqa %xmm2, 160(%rsp) + movdqa %xmm3, 176(%rsp) + pshufd $0x00, %xmm9, %xmm0 + pshufd $0x55, %xmm9, %xmm1 + pshufd $0xaa, %xmm9, %xmm2 + pshufd $0xff, %xmm9, %xmm3 + movdqa %xmm0, 192(%rsp) + movdqa %xmm1, 208(%rsp) + movdqa %xmm2, 224(%rsp) + movdqa %xmm3, 240(%rsp) + pshufd $0x00, %xmm10, %xmm0 + pshufd $0x55, %xmm10, %xmm1 + pshufd $0xaa, %xmm10, %xmm2 + pshufd $0xff, %xmm10, %xmm3 + movdqa %xmm0, 256(%rsp) + movdqa %xmm1, 272(%rsp) + movdqa %xmm2, 288(%rsp) + movdqa %xmm3, 304(%rsp) + pshufd $0xaa, %xmm11, %xmm0 + pshufd $0xff, %xmm11, %xmm1 + movdqa %xmm0, 352(%rsp) + movdqa %xmm1, 368(%rsp) + jmp .Lchacha_blocks_ssse3_atleast256 +.p2align 6,,63 + # align to 4 mod 64 + nop;nop;nop;nop; +.Lchacha_blocks_ssse3_atleast256: + movq 48(%rsp), %rax + leaq 1(%rax), %r8 + leaq 2(%rax), %r9 + leaq 3(%rax), %r10 + leaq 4(%rax), %rbx + movl %eax, 320(%rsp) + movl %r8d, 4+320(%rsp) + movl %r9d, 8+320(%rsp) + movl %r10d, 12+320(%rsp) + shrq $32, %rax + shrq $32, %r8 + shrq $32, %r9 + shrq $32, %r10 + movl %eax, 336(%rsp) + movl %r8d, 4+336(%rsp) + movl %r9d, 8+336(%rsp) + movl %r10d, 12+336(%rsp) + movq %rbx, 48(%rsp) + movq 64(%rsp), %rax + movdqa 128(%rsp), %xmm0 + movdqa 144(%rsp), %xmm1 + movdqa 160(%rsp), %xmm2 + movdqa 176(%rsp), %xmm3 + movdqa 192(%rsp), %xmm4 + movdqa 208(%rsp), %xmm5 + movdqa 224(%rsp), %xmm6 + movdqa 240(%rsp), %xmm7 + movdqa 256(%rsp), %xmm8 + movdqa 272(%rsp), %xmm9 + movdqa 288(%rsp), %xmm10 + movdqa 304(%rsp), %xmm11 + movdqa 320(%rsp), %xmm12 + movdqa 336(%rsp), %xmm13 + movdqa 352(%rsp), %xmm14 + movdqa 368(%rsp), %xmm15 +.Lchacha_blocks_ssse3_mainloop1: + paddd %xmm4, %xmm0 + paddd %xmm5, %xmm1 + pxor %xmm0, %xmm12 + pxor %xmm1, %xmm13 + paddd %xmm6, %xmm2 + paddd %xmm7, %xmm3 + pxor %xmm2, %xmm14 + pxor %xmm3, %xmm15 + pshufb 80(%rsp), %xmm12 + pshufb 80(%rsp), %xmm13 + paddd %xmm12, %xmm8 + paddd %xmm13, %xmm9 + pshufb 80(%rsp), %xmm14 + pshufb 80(%rsp), %xmm15 + paddd %xmm14, %xmm10 + paddd %xmm15, %xmm11 + movdqa %xmm12, 112(%rsp) + pxor %xmm8, %xmm4 + pxor %xmm9, %xmm5 + movdqa %xmm4, %xmm12 + pslld $ 12, %xmm4 + psrld $20, %xmm12 + pxor %xmm12, %xmm4 + movdqa %xmm5, %xmm12 + pslld $ 12, %xmm5 + psrld $20, %xmm12 + pxor %xmm12, %xmm5 + pxor %xmm10, %xmm6 + pxor %xmm11, %xmm7 + movdqa %xmm6, %xmm12 + pslld $ 12, %xmm6 + psrld $20, %xmm12 + pxor %xmm12, %xmm6 + movdqa %xmm7, %xmm12 + pslld $ 12, %xmm7 + psrld $20, %xmm12 + pxor %xmm12, %xmm7 + movdqa 112(%rsp), %xmm12 + paddd %xmm4, %xmm0 + paddd %xmm5, %xmm1 + pxor %xmm0, %xmm12 + pxor %xmm1, %xmm13 + paddd %xmm6, %xmm2 + paddd %xmm7, %xmm3 + pxor %xmm2, %xmm14 + pxor %xmm3, %xmm15 + pshufb 96(%rsp), %xmm12 + pshufb 96(%rsp), %xmm13 + paddd %xmm12, %xmm8 + paddd %xmm13, %xmm9 + pshufb 96(%rsp), %xmm14 + pshufb 96(%rsp), %xmm15 + paddd %xmm14, %xmm10 + paddd %xmm15, %xmm11 + movdqa %xmm12, 112(%rsp) + pxor %xmm8, %xmm4 + pxor %xmm9, %xmm5 + movdqa %xmm4, %xmm12 + pslld $ 7, %xmm4 + psrld $25, %xmm12 + pxor %xmm12, %xmm4 + movdqa %xmm5, %xmm12 + pslld $ 7, %xmm5 + psrld $25, %xmm12 + pxor %xmm12, %xmm5 + pxor %xmm10, %xmm6 + pxor %xmm11, %xmm7 + movdqa %xmm6, %xmm12 + pslld $ 7, %xmm6 + psrld $25, %xmm12 + pxor %xmm12, %xmm6 + movdqa %xmm7, %xmm12 + pslld $ 7, %xmm7 + psrld $25, %xmm12 + pxor %xmm12, %xmm7 + movdqa 112(%rsp), %xmm12 + paddd %xmm5, %xmm0 + paddd %xmm6, %xmm1 + pxor %xmm0, %xmm15 + pxor %xmm1, %xmm12 + paddd %xmm7, %xmm2 + paddd %xmm4, %xmm3 + pxor %xmm2, %xmm13 + pxor %xmm3, %xmm14 + pshufb 80(%rsp), %xmm15 + pshufb 80(%rsp), %xmm12 + paddd %xmm15, %xmm10 + paddd %xmm12, %xmm11 + pshufb 80(%rsp), %xmm13 + pshufb 80(%rsp), %xmm14 + paddd %xmm13, %xmm8 + paddd %xmm14, %xmm9 + movdqa %xmm15, 112(%rsp) + pxor %xmm10, %xmm5 + pxor %xmm11, %xmm6 + movdqa %xmm5, %xmm15 + pslld $ 12, %xmm5 + psrld $20, %xmm15 + pxor %xmm15, %xmm5 + movdqa %xmm6, %xmm15 + pslld $ 12, %xmm6 + psrld $20, %xmm15 + pxor %xmm15, %xmm6 + pxor %xmm8, %xmm7 + pxor %xmm9, %xmm4 + movdqa %xmm7, %xmm15 + pslld $ 12, %xmm7 + psrld $20, %xmm15 + pxor %xmm15, %xmm7 + movdqa %xmm4, %xmm15 + pslld $ 12, %xmm4 + psrld $20, %xmm15 + pxor %xmm15, %xmm4 + movdqa 112(%rsp), %xmm15 + paddd %xmm5, %xmm0 + paddd %xmm6, %xmm1 + pxor %xmm0, %xmm15 + pxor %xmm1, %xmm12 + paddd %xmm7, %xmm2 + paddd %xmm4, %xmm3 + pxor %xmm2, %xmm13 + pxor %xmm3, %xmm14 + pshufb 96(%rsp), %xmm15 + pshufb 96(%rsp), %xmm12 + paddd %xmm15, %xmm10 + paddd %xmm12, %xmm11 + pshufb 96(%rsp), %xmm13 + pshufb 96(%rsp), %xmm14 + paddd %xmm13, %xmm8 + paddd %xmm14, %xmm9 + movdqa %xmm15, 112(%rsp) + pxor %xmm10, %xmm5 + pxor %xmm11, %xmm6 + movdqa %xmm5, %xmm15 + pslld $ 7, %xmm5 + psrld $25, %xmm15 + pxor %xmm15, %xmm5 + movdqa %xmm6, %xmm15 + pslld $ 7, %xmm6 + psrld $25, %xmm15 + pxor %xmm15, %xmm6 + pxor %xmm8, %xmm7 + pxor %xmm9, %xmm4 + movdqa %xmm7, %xmm15 + pslld $ 7, %xmm7 + psrld $25, %xmm15 + pxor %xmm15, %xmm7 + movdqa %xmm4, %xmm15 + pslld $ 7, %xmm4 + psrld $25, %xmm15 + pxor %xmm15, %xmm4 + subq $2, %rax + movdqa 112(%rsp), %xmm15 + jnz .Lchacha_blocks_ssse3_mainloop1 + paddd 128(%rsp), %xmm0 + paddd 144(%rsp), %xmm1 + paddd 160(%rsp), %xmm2 + paddd 176(%rsp), %xmm3 + paddd 192(%rsp), %xmm4 + paddd 208(%rsp), %xmm5 + paddd 224(%rsp), %xmm6 + paddd 240(%rsp), %xmm7 + paddd 256(%rsp), %xmm8 + paddd 272(%rsp), %xmm9 + paddd 288(%rsp), %xmm10 + paddd 304(%rsp), %xmm11 + paddd 320(%rsp), %xmm12 + paddd 336(%rsp), %xmm13 + paddd 352(%rsp), %xmm14 + paddd 368(%rsp), %xmm15 + movdqa %xmm8, 384(%rsp) + movdqa %xmm9, 400(%rsp) + movdqa %xmm10, 416(%rsp) + movdqa %xmm11, 432(%rsp) + movdqa %xmm12, 448(%rsp) + movdqa %xmm13, 464(%rsp) + movdqa %xmm14, 480(%rsp) + movdqa %xmm15, 496(%rsp) + movdqa %xmm0, %xmm8 + movdqa %xmm2, %xmm9 + movdqa %xmm4, %xmm10 + movdqa %xmm6, %xmm11 + punpckhdq %xmm1, %xmm0 + punpckhdq %xmm3, %xmm2 + punpckhdq %xmm5, %xmm4 + punpckhdq %xmm7, %xmm6 + punpckldq %xmm1, %xmm8 + punpckldq %xmm3, %xmm9 + punpckldq %xmm5, %xmm10 + punpckldq %xmm7, %xmm11 + movdqa %xmm0, %xmm1 + movdqa %xmm4, %xmm3 + movdqa %xmm8, %xmm5 + movdqa %xmm10, %xmm7 + punpckhqdq %xmm2, %xmm0 + punpckhqdq %xmm6, %xmm4 + punpckhqdq %xmm9, %xmm8 + punpckhqdq %xmm11, %xmm10 + punpcklqdq %xmm2, %xmm1 + punpcklqdq %xmm6, %xmm3 + punpcklqdq %xmm9, %xmm5 + punpcklqdq %xmm11, %xmm7 + andq %rsi, %rsi + jz .Lchacha_blocks_ssse3_noinput1 + movdqu 0(%rsi), %xmm2 + movdqu 16(%rsi), %xmm6 + movdqu 64(%rsi), %xmm9 + movdqu 80(%rsi), %xmm11 + movdqu 128(%rsi), %xmm12 + movdqu 144(%rsi), %xmm13 + movdqu 192(%rsi), %xmm14 + movdqu 208(%rsi), %xmm15 + pxor %xmm2, %xmm5 + pxor %xmm6, %xmm7 + pxor %xmm9, %xmm8 + pxor %xmm11, %xmm10 + pxor %xmm12, %xmm1 + pxor %xmm13, %xmm3 + pxor %xmm14, %xmm0 + pxor %xmm15, %xmm4 + movdqu %xmm5, 0(%rdx) + movdqu %xmm7, 16(%rdx) + movdqu %xmm8, 64(%rdx) + movdqu %xmm10, 80(%rdx) + movdqu %xmm1, 128(%rdx) + movdqu %xmm3, 144(%rdx) + movdqu %xmm0, 192(%rdx) + movdqu %xmm4, 208(%rdx) + movdqa 384(%rsp), %xmm0 + movdqa 400(%rsp), %xmm1 + movdqa 416(%rsp), %xmm2 + movdqa 432(%rsp), %xmm3 + movdqa 448(%rsp), %xmm4 + movdqa 464(%rsp), %xmm5 + movdqa 480(%rsp), %xmm6 + movdqa 496(%rsp), %xmm7 + movdqa %xmm0, %xmm8 + movdqa %xmm2, %xmm9 + movdqa %xmm4, %xmm10 + movdqa %xmm6, %xmm11 + punpckldq %xmm1, %xmm8 + punpckldq %xmm3, %xmm9 + punpckhdq %xmm1, %xmm0 + punpckhdq %xmm3, %xmm2 + punpckldq %xmm5, %xmm10 + punpckldq %xmm7, %xmm11 + punpckhdq %xmm5, %xmm4 + punpckhdq %xmm7, %xmm6 + movdqa %xmm8, %xmm1 + movdqa %xmm0, %xmm3 + movdqa %xmm10, %xmm5 + movdqa %xmm4, %xmm7 + punpcklqdq %xmm9, %xmm1 + punpcklqdq %xmm11, %xmm5 + punpckhqdq %xmm9, %xmm8 + punpckhqdq %xmm11, %xmm10 + punpcklqdq %xmm2, %xmm3 + punpcklqdq %xmm6, %xmm7 + punpckhqdq %xmm2, %xmm0 + punpckhqdq %xmm6, %xmm4 + movdqu 32(%rsi), %xmm2 + movdqu 48(%rsi), %xmm6 + movdqu 96(%rsi), %xmm9 + movdqu 112(%rsi), %xmm11 + movdqu 160(%rsi), %xmm12 + movdqu 176(%rsi), %xmm13 + movdqu 224(%rsi), %xmm14 + movdqu 240(%rsi), %xmm15 + pxor %xmm2, %xmm1 + pxor %xmm6, %xmm5 + pxor %xmm9, %xmm8 + pxor %xmm11, %xmm10 + pxor %xmm12, %xmm3 + pxor %xmm13, %xmm7 + pxor %xmm14, %xmm0 + pxor %xmm15, %xmm4 + movdqu %xmm1, 32(%rdx) + movdqu %xmm5, 48(%rdx) + movdqu %xmm8, 96(%rdx) + movdqu %xmm10, 112(%rdx) + movdqu %xmm3, 160(%rdx) + movdqu %xmm7, 176(%rdx) + movdqu %xmm0, 224(%rdx) + movdqu %xmm4, 240(%rdx) + addq $256, %rsi + jmp .Lchacha_blocks_ssse3_mainloop_cont +.Lchacha_blocks_ssse3_noinput1: + movdqu %xmm5, 0(%rdx) + movdqu %xmm7, 16(%rdx) + movdqu %xmm8, 64(%rdx) + movdqu %xmm10, 80(%rdx) + movdqu %xmm1, 128(%rdx) + movdqu %xmm3, 144(%rdx) + movdqu %xmm0, 192(%rdx) + movdqu %xmm4, 208(%rdx) + movdqa 384(%rsp), %xmm0 + movdqa 400(%rsp), %xmm1 + movdqa 416(%rsp), %xmm2 + movdqa 432(%rsp), %xmm3 + movdqa 448(%rsp), %xmm4 + movdqa 464(%rsp), %xmm5 + movdqa 480(%rsp), %xmm6 + movdqa 496(%rsp), %xmm7 + movdqa %xmm0, %xmm8 + movdqa %xmm2, %xmm9 + movdqa %xmm4, %xmm10 + movdqa %xmm6, %xmm11 + punpckldq %xmm1, %xmm8 + punpckldq %xmm3, %xmm9 + punpckhdq %xmm1, %xmm0 + punpckhdq %xmm3, %xmm2 + punpckldq %xmm5, %xmm10 + punpckldq %xmm7, %xmm11 + punpckhdq %xmm5, %xmm4 + punpckhdq %xmm7, %xmm6 + movdqa %xmm8, %xmm1 + movdqa %xmm0, %xmm3 + movdqa %xmm10, %xmm5 + movdqa %xmm4, %xmm7 + punpcklqdq %xmm9, %xmm1 + punpcklqdq %xmm11, %xmm5 + punpckhqdq %xmm9, %xmm8 + punpckhqdq %xmm11, %xmm10 + punpcklqdq %xmm2, %xmm3 + punpcklqdq %xmm6, %xmm7 + punpckhqdq %xmm2, %xmm0 + punpckhqdq %xmm6, %xmm4 + movdqu %xmm1, 32(%rdx) + movdqu %xmm5, 48(%rdx) + movdqu %xmm8, 96(%rdx) + movdqu %xmm10, 112(%rdx) + movdqu %xmm3, 160(%rdx) + movdqu %xmm7, 176(%rdx) + movdqu %xmm0, 224(%rdx) + movdqu %xmm4, 240(%rdx) +.Lchacha_blocks_ssse3_mainloop_cont: + addq $256, %rdx + subq $256, %rcx + cmp $256, %rcx + jae .Lchacha_blocks_ssse3_atleast256 + movdqa 80(%rsp), %xmm6 + movdqa 96(%rsp), %xmm7 + movdqa 0(%rsp), %xmm8 + movdqa 16(%rsp), %xmm9 + movdqa 32(%rsp), %xmm10 + movdqa 48(%rsp), %xmm11 + movq $1, %r9 +.Lchacha_blocks_ssse3_below256: + movq %r9, %xmm5 + andq %rcx, %rcx + jz .Lchacha_blocks_ssse3_done + cmpq $64, %rcx + jae .Lchacha_blocks_ssse3_above63 + movq %rdx, %r9 + andq %rsi, %rsi + jz .Lchacha_blocks_ssse3_noinput2 + movq %rcx, %r10 + movq %rsp, %rdx + addq %r10, %rsi + addq %r10, %rdx + negq %r10 +.Lchacha_blocks_ssse3_copyinput: + movb (%rsi, %r10), %al + movb %al, (%rdx, %r10) + incq %r10 + jnz .Lchacha_blocks_ssse3_copyinput + movq %rsp, %rsi +.Lchacha_blocks_ssse3_noinput2: + movq %rsp, %rdx +.Lchacha_blocks_ssse3_above63: + movdqa %xmm8, %xmm0 + movdqa %xmm9, %xmm1 + movdqa %xmm10, %xmm2 + movdqa %xmm11, %xmm3 + movq 64(%rsp), %rax +.Lchacha_blocks_ssse3_mainloop2: + paddd %xmm1, %xmm0 + pxor %xmm0, %xmm3 + pshufb %xmm6, %xmm3 + paddd %xmm3, %xmm2 + pxor %xmm2, %xmm1 + movdqa %xmm1, %xmm4 + pslld $12, %xmm4 + psrld $20, %xmm1 + pxor %xmm4, %xmm1 + paddd %xmm1, %xmm0 + pxor %xmm0, %xmm3 + pshufb %xmm7, %xmm3 + pshufd $0x93, %xmm0, %xmm0 + paddd %xmm3, %xmm2 + pshufd $0x4e, %xmm3, %xmm3 + pxor %xmm2, %xmm1 + pshufd $0x39, %xmm2, %xmm2 + movdqa %xmm1, %xmm4 + pslld $7, %xmm4 + psrld $25, %xmm1 + pxor %xmm4, %xmm1 + paddd %xmm1, %xmm0 + pxor %xmm0, %xmm3 + pshufb %xmm6, %xmm3 + paddd %xmm3, %xmm2 + pxor %xmm2, %xmm1 + movdqa %xmm1, %xmm4 + pslld $12, %xmm4 + psrld $20, %xmm1 + pxor %xmm4, %xmm1 + paddd %xmm1, %xmm0 + pxor %xmm0, %xmm3 + pshufb %xmm7, %xmm3 + pshufd $0x39, %xmm0, %xmm0 + paddd %xmm3, %xmm2 + pshufd $0x4e, %xmm3, %xmm3 + pxor %xmm2, %xmm1 + pshufd $0x93, %xmm2, %xmm2 + movdqa %xmm1, %xmm4 + pslld $7, %xmm4 + psrld $25, %xmm1 + pxor %xmm4, %xmm1 + subq $2, %rax + jnz .Lchacha_blocks_ssse3_mainloop2 + paddd %xmm8, %xmm0 + paddd %xmm9, %xmm1 + paddd %xmm10, %xmm2 + paddd %xmm11, %xmm3 + andq %rsi, %rsi + jz .Lchacha_blocks_ssse3_noinput3 + movdqu 0(%rsi), %xmm12 + movdqu 16(%rsi), %xmm13 + movdqu 32(%rsi), %xmm14 + movdqu 48(%rsi), %xmm15 + pxor %xmm12, %xmm0 + pxor %xmm13, %xmm1 + pxor %xmm14, %xmm2 + pxor %xmm15, %xmm3 + addq $64, %rsi +.Lchacha_blocks_ssse3_noinput3: + movdqu %xmm0, 0(%rdx) + movdqu %xmm1, 16(%rdx) + movdqu %xmm2, 32(%rdx) + movdqu %xmm3, 48(%rdx) + paddq %xmm5, %xmm11 + cmpq $64, %rcx + jbe .Lchacha_blocks_ssse3_mainloop2_finishup + addq $64, %rdx + subq $64, %rcx + jmp .Lchacha_blocks_ssse3_below256 +.Lchacha_blocks_ssse3_mainloop2_finishup: + cmpq $64, %rcx + je .Lchacha_blocks_ssse3_done + addq %rcx, %r9 + addq %rcx, %rdx + negq %rcx +.Lchacha_blocks_ssse3_copyoutput: + movb (%rdx, %rcx), %al + movb %al, (%r9, %rcx) + incq %rcx + jnz .Lchacha_blocks_ssse3_copyoutput +.Lchacha_blocks_ssse3_done: + movdqu %xmm11, 48(%rdi) + movq %rbp, %rsp + pxor %xmm15, %xmm15 + pxor %xmm7, %xmm7 + pxor %xmm14, %xmm14 + pxor %xmm6, %xmm6 + pxor %xmm13, %xmm13 + pxor %xmm5, %xmm5 + pxor %xmm12, %xmm12 + pxor %xmm4, %xmm4 + popq %rbp + popq %rbx + movl $(63 + 512 + 16), %eax + pxor %xmm11, %xmm11 + pxor %xmm3, %xmm3 + pxor %xmm10, %xmm10 + pxor %xmm2, %xmm2 + pxor %xmm9, %xmm9 + pxor %xmm1, %xmm1 + pxor %xmm8, %xmm8 + pxor %xmm0, %xmm0 + ret +ELF(.size _gcry_chacha20_amd64_ssse3_blocks,.-_gcry_chacha20_amd64_ssse3_blocks;) + +.align 16; +.LC: +.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 /* pshufb rotate by 16 */ +.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 /* pshufb rotate by 8 */ + +#endif /*defined(USE_CHACHA20)*/ +#endif /*__x86_64*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/chacha20.c b/libotr/libgcrypt-1.8.7/cipher/chacha20.c new file mode 100644 index 0000000..613fa82 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/chacha20.c @@ -0,0 +1,637 @@ +/* chacha20.c - Bernstein's ChaCha20 cipher + * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * For a description of the algorithm, see: + * http://cr.yp.to/chacha.html + */ + +/* The code is based on salsa20.c and public-domain ChaCha implementations: + * chacha-ref.c version 20080118 + * D. J. Bernstein + * Public domain. + * and + * Andrew Moon + * https://github.com/floodyberry/chacha-opt + */ + + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" + + +#define CHACHA20_MIN_KEY_SIZE 16 /* Bytes. */ +#define CHACHA20_MAX_KEY_SIZE 32 /* Bytes. */ +#define CHACHA20_BLOCK_SIZE 64 /* Bytes. */ +#define CHACHA20_MIN_IV_SIZE 8 /* Bytes. */ +#define CHACHA20_MAX_IV_SIZE 12 /* Bytes. */ +#define CHACHA20_CTR_SIZE 16 /* Bytes. */ +#define CHACHA20_INPUT_LENGTH (CHACHA20_BLOCK_SIZE / 4) + +/* USE_SSE2 indicates whether to compile with Intel SSE2 code. */ +#undef USE_SSE2 +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_SSE2 1 +#endif + +/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ +#undef USE_SSSE3 +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(HAVE_GCC_INLINE_ASM_SSSE3) +# define USE_SSSE3 1 +#endif + +/* USE_AVX2 indicates whether to compile with Intel AVX2 code. */ +#undef USE_AVX2 +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(ENABLE_AVX2_SUPPORT) +# define USE_AVX2 1 +#endif + +/* USE_NEON indicates whether to enable ARM NEON assembly code. */ +#undef USE_NEON +#ifdef ENABLE_NEON_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_NEON) +# define USE_NEON 1 +# endif +#endif /*ENABLE_NEON_SUPPORT*/ + + +struct CHACHA20_context_s; + + +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#undef ASM_EXTRA_STACK +#if (defined(USE_SSE2) || defined(USE_SSSE3) || defined(USE_AVX2)) && \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_EXTRA_STACK (10 * 16) +#else +# define ASM_FUNC_ABI +# define ASM_EXTRA_STACK 0 +#endif + + +typedef unsigned int (* chacha20_blocks_t)(u32 *state, const byte *src, + byte *dst, + size_t bytes) ASM_FUNC_ABI; + +typedef struct CHACHA20_context_s +{ + u32 input[CHACHA20_INPUT_LENGTH]; + u32 pad[CHACHA20_INPUT_LENGTH]; + chacha20_blocks_t blocks; + unsigned int unused; /* bytes in the pad. */ +} CHACHA20_context_t; + + +#ifdef USE_SSE2 + +unsigned int _gcry_chacha20_amd64_sse2_blocks(u32 *state, const byte *in, + byte *out, + size_t bytes) ASM_FUNC_ABI; + +#endif /* USE_SSE2 */ + +#ifdef USE_SSSE3 + +unsigned int _gcry_chacha20_amd64_ssse3_blocks(u32 *state, const byte *in, + byte *out, + size_t bytes) ASM_FUNC_ABI; + +#endif /* USE_SSSE3 */ + +#ifdef USE_AVX2 + +unsigned int _gcry_chacha20_amd64_avx2_blocks(u32 *state, const byte *in, + byte *out, + size_t bytes) ASM_FUNC_ABI; + +#endif /* USE_AVX2 */ + +#ifdef USE_NEON + +unsigned int _gcry_chacha20_armv7_neon_blocks(u32 *state, const byte *in, + byte *out, + size_t bytes) ASM_FUNC_ABI; + +#endif /* USE_NEON */ + + +static void chacha20_setiv (void *context, const byte * iv, size_t ivlen); +static const char *selftest (void); + + + +#define QROUND(a,b,c,d) \ + do { \ + a += b; d = rol(d ^ a, 16); \ + c += d; b = rol(b ^ c, 12); \ + a += b; d = rol(d ^ a, 8); \ + c += d; b = rol(b ^ c, 7); \ + } while (0) + +#define QOUT(ai, bi, ci, di) \ + DO_OUT(ai); DO_OUT(bi); DO_OUT(ci); DO_OUT(di) + + +#ifndef USE_SSE2 +ASM_FUNC_ABI static unsigned int +chacha20_blocks (u32 *state, const byte *src, byte *dst, size_t bytes) +{ + u32 pad[CHACHA20_INPUT_LENGTH]; + u32 inp[CHACHA20_INPUT_LENGTH]; + unsigned int i; + + /* Note: 'bytes' must be multiple of 64 and not zero. */ + + inp[0] = state[0]; + inp[1] = state[1]; + inp[2] = state[2]; + inp[3] = state[3]; + inp[4] = state[4]; + inp[5] = state[5]; + inp[6] = state[6]; + inp[7] = state[7]; + inp[8] = state[8]; + inp[9] = state[9]; + inp[10] = state[10]; + inp[11] = state[11]; + inp[12] = state[12]; + inp[13] = state[13]; + inp[14] = state[14]; + inp[15] = state[15]; + + do + { + /* First round. */ + pad[0] = inp[0]; + pad[4] = inp[4]; + pad[8] = inp[8]; + pad[12] = inp[12]; + QROUND (pad[0], pad[4], pad[8], pad[12]); + pad[1] = inp[1]; + pad[5] = inp[5]; + pad[9] = inp[9]; + pad[13] = inp[13]; + QROUND (pad[1], pad[5], pad[9], pad[13]); + pad[2] = inp[2]; + pad[6] = inp[6]; + pad[10] = inp[10]; + pad[14] = inp[14]; + QROUND (pad[2], pad[6], pad[10], pad[14]); + pad[3] = inp[3]; + pad[7] = inp[7]; + pad[11] = inp[11]; + pad[15] = inp[15]; + QROUND (pad[3], pad[7], pad[11], pad[15]); + + QROUND (pad[0], pad[5], pad[10], pad[15]); + QROUND (pad[1], pad[6], pad[11], pad[12]); + QROUND (pad[2], pad[7], pad[8], pad[13]); + QROUND (pad[3], pad[4], pad[9], pad[14]); + + for (i = 2; i < 20 - 2; i += 2) + { + QROUND (pad[0], pad[4], pad[8], pad[12]); + QROUND (pad[1], pad[5], pad[9], pad[13]); + QROUND (pad[2], pad[6], pad[10], pad[14]); + QROUND (pad[3], pad[7], pad[11], pad[15]); + + QROUND (pad[0], pad[5], pad[10], pad[15]); + QROUND (pad[1], pad[6], pad[11], pad[12]); + QROUND (pad[2], pad[7], pad[8], pad[13]); + QROUND (pad[3], pad[4], pad[9], pad[14]); + } + + QROUND (pad[0], pad[4], pad[8], pad[12]); + QROUND (pad[1], pad[5], pad[9], pad[13]); + QROUND (pad[2], pad[6], pad[10], pad[14]); + QROUND (pad[3], pad[7], pad[11], pad[15]); + + if (src) + { +#define DO_OUT(idx) buf_put_le32(dst + (idx) * 4, \ + (pad[idx] + inp[idx]) ^ \ + buf_get_le32(src + (idx) * 4)) + /* Last round. */ + QROUND (pad[0], pad[5], pad[10], pad[15]); + QOUT(0, 5, 10, 15); + QROUND (pad[1], pad[6], pad[11], pad[12]); + QOUT(1, 6, 11, 12); + QROUND (pad[2], pad[7], pad[8], pad[13]); + QOUT(2, 7, 8, 13); + QROUND (pad[3], pad[4], pad[9], pad[14]); + QOUT(3, 4, 9, 14); +#undef DO_OUT + } + else + { +#define DO_OUT(idx) buf_put_le32(dst + (idx) * 4, pad[idx] + inp[idx]) + /* Last round. */ + QROUND (pad[0], pad[5], pad[10], pad[15]); + QOUT(0, 5, 10, 15); + QROUND (pad[1], pad[6], pad[11], pad[12]); + QOUT(1, 6, 11, 12); + QROUND (pad[2], pad[7], pad[8], pad[13]); + QOUT(2, 7, 8, 13); + QROUND (pad[3], pad[4], pad[9], pad[14]); + QOUT(3, 4, 9, 14); +#undef DO_OUT + } + + /* Update counter. */ + inp[13] += (!++inp[12]); + + bytes -= CHACHA20_BLOCK_SIZE; + dst += CHACHA20_BLOCK_SIZE; + src += (src) ? CHACHA20_BLOCK_SIZE : 0; + } + while (bytes >= CHACHA20_BLOCK_SIZE); + + state[12] = inp[12]; + state[13] = inp[13]; + + /* burn_stack */ + return (2 * CHACHA20_INPUT_LENGTH * sizeof(u32) + 6 * sizeof(void *)); +} +#endif /*!USE_SSE2*/ + +#undef QROUND +#undef QOUT + + +static unsigned int +chacha20_core(u32 *dst, struct CHACHA20_context_s *ctx) +{ + return ctx->blocks(ctx->input, NULL, (byte *)dst, CHACHA20_BLOCK_SIZE) + + ASM_EXTRA_STACK; +} + + +static void +chacha20_keysetup (CHACHA20_context_t * ctx, const byte * key, + unsigned int keylen) +{ + /* These constants are the little endian encoding of the string + "expand 32-byte k". For the 128 bit variant, the "32" in that + string will be fixed up to "16". */ + ctx->input[0] = 0x61707865; /* "apxe" */ + ctx->input[1] = 0x3320646e; /* "3 dn" */ + ctx->input[2] = 0x79622d32; /* "yb-2" */ + ctx->input[3] = 0x6b206574; /* "k et" */ + + ctx->input[4] = buf_get_le32 (key + 0); + ctx->input[5] = buf_get_le32 (key + 4); + ctx->input[6] = buf_get_le32 (key + 8); + ctx->input[7] = buf_get_le32 (key + 12); + + if (keylen == CHACHA20_MAX_KEY_SIZE) /* 256 bits */ + { + ctx->input[8] = buf_get_le32 (key + 16); + ctx->input[9] = buf_get_le32 (key + 20); + ctx->input[10] = buf_get_le32 (key + 24); + ctx->input[11] = buf_get_le32 (key + 28); + } + else /* 128 bits */ + { + ctx->input[8] = ctx->input[4]; + ctx->input[9] = ctx->input[5]; + ctx->input[10] = ctx->input[6]; + ctx->input[11] = ctx->input[7]; + + ctx->input[1] -= 0x02000000; /* Change to "1 dn". */ + ctx->input[2] += 0x00000004; /* Change to "yb-6". */ + } +} + + +static void +chacha20_ivsetup (CHACHA20_context_t * ctx, const byte * iv, size_t ivlen) +{ + if (ivlen == CHACHA20_CTR_SIZE) + { + ctx->input[12] = buf_get_le32 (iv + 0); + ctx->input[13] = buf_get_le32 (iv + 4); + ctx->input[14] = buf_get_le32 (iv + 8); + ctx->input[15] = buf_get_le32 (iv + 12); + } + else if (ivlen == CHACHA20_MAX_IV_SIZE) + { + ctx->input[12] = 0; + ctx->input[13] = buf_get_le32 (iv + 0); + ctx->input[14] = buf_get_le32 (iv + 4); + ctx->input[15] = buf_get_le32 (iv + 8); + } + else if (ivlen == CHACHA20_MIN_IV_SIZE) + { + ctx->input[12] = 0; + ctx->input[13] = 0; + ctx->input[14] = buf_get_le32 (iv + 0); + ctx->input[15] = buf_get_le32 (iv + 4); + } + else + { + ctx->input[12] = 0; + ctx->input[13] = 0; + ctx->input[14] = 0; + ctx->input[15] = 0; + } +} + + +static gcry_err_code_t +chacha20_do_setkey (CHACHA20_context_t * ctx, + const byte * key, unsigned int keylen) +{ + static int initialized; + static const char *selftest_failed; + unsigned int features = _gcry_get_hw_features (); + + if (!initialized) + { + initialized = 1; + selftest_failed = selftest (); + if (selftest_failed) + log_error ("CHACHA20 selftest failed (%s)\n", selftest_failed); + } + if (selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + + if (keylen != CHACHA20_MAX_KEY_SIZE && keylen != CHACHA20_MIN_KEY_SIZE) + return GPG_ERR_INV_KEYLEN; + +#ifdef USE_SSE2 + ctx->blocks = _gcry_chacha20_amd64_sse2_blocks; +#else + ctx->blocks = chacha20_blocks; +#endif + +#ifdef USE_SSSE3 + if (features & HWF_INTEL_SSSE3) + ctx->blocks = _gcry_chacha20_amd64_ssse3_blocks; +#endif +#ifdef USE_AVX2 + if (features & HWF_INTEL_AVX2) + ctx->blocks = _gcry_chacha20_amd64_avx2_blocks; +#endif +#ifdef USE_NEON + if (features & HWF_ARM_NEON) + ctx->blocks = _gcry_chacha20_armv7_neon_blocks; +#endif + + (void)features; + + chacha20_keysetup (ctx, key, keylen); + + /* We default to a zero nonce. */ + chacha20_setiv (ctx, NULL, 0); + + return 0; +} + + +static gcry_err_code_t +chacha20_setkey (void *context, const byte * key, unsigned int keylen) +{ + CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; + gcry_err_code_t rc = chacha20_do_setkey (ctx, key, keylen); + _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *)); + return rc; +} + + +static void +chacha20_setiv (void *context, const byte * iv, size_t ivlen) +{ + CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; + + /* draft-nir-cfrg-chacha20-poly1305-02 defines 96-bit and 64-bit nonce. */ + if (iv && ivlen != CHACHA20_MAX_IV_SIZE && ivlen != CHACHA20_MIN_IV_SIZE + && ivlen != CHACHA20_CTR_SIZE) + log_info ("WARNING: chacha20_setiv: bad ivlen=%u\n", (u32) ivlen); + + if (iv && (ivlen == CHACHA20_MAX_IV_SIZE || ivlen == CHACHA20_MIN_IV_SIZE + || ivlen == CHACHA20_CTR_SIZE)) + chacha20_ivsetup (ctx, iv, ivlen); + else + chacha20_ivsetup (ctx, NULL, 0); + + /* Reset the unused pad bytes counter. */ + ctx->unused = 0; +} + + + +/* Note: This function requires LENGTH > 0. */ +static void +chacha20_do_encrypt_stream (CHACHA20_context_t * ctx, + byte * outbuf, const byte * inbuf, size_t length) +{ + unsigned int nburn, burn = 0; + + if (ctx->unused) + { + unsigned char *p = (void *) ctx->pad; + size_t n; + + gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE); + + n = ctx->unused; + if (n > length) + n = length; + buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n); + length -= n; + outbuf += n; + inbuf += n; + ctx->unused -= n; + if (!length) + return; + gcry_assert (!ctx->unused); + } + + if (length >= CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + size_t bytes = nblocks * CHACHA20_BLOCK_SIZE; + burn = ctx->blocks(ctx->input, inbuf, outbuf, bytes); + length -= bytes; + outbuf += bytes; + inbuf += bytes; + } + + if (length > 0) + { + nburn = chacha20_core (ctx->pad, ctx); + burn = nburn > burn ? nburn : burn; + + buf_xor (outbuf, inbuf, ctx->pad, length); + ctx->unused = CHACHA20_BLOCK_SIZE - length; + } + + _gcry_burn_stack (burn); +} + + +static void +chacha20_encrypt_stream (void *context, byte * outbuf, const byte * inbuf, + size_t length) +{ + CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; + + if (length) + chacha20_do_encrypt_stream (ctx, outbuf, inbuf, length); +} + + +static const char * +selftest (void) +{ + byte ctxbuf[sizeof(CHACHA20_context_t) + 15]; + CHACHA20_context_t *ctx; + byte scratch[127 + 1]; + byte buf[512 + 64 + 4]; + int i; + + /* From draft-strombergson-chacha-test-vectors */ + static byte key_1[] = { + 0xc4, 0x6e, 0xc1, 0xb1, 0x8c, 0xe8, 0xa8, 0x78, + 0x72, 0x5a, 0x37, 0xe7, 0x80, 0xdf, 0xb7, 0x35, + 0x1f, 0x68, 0xed, 0x2e, 0x19, 0x4c, 0x79, 0xfb, + 0xc6, 0xae, 0xbe, 0xe1, 0xa6, 0x67, 0x97, 0x5d + }; + static const byte nonce_1[] = + { 0x1a, 0xda, 0x31, 0xd5, 0xcf, 0x68, 0x82, 0x21 }; + static const byte plaintext_1[127] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }; + static const byte ciphertext_1[127] = { + 0xf6, 0x3a, 0x89, 0xb7, 0x5c, 0x22, 0x71, 0xf9, + 0x36, 0x88, 0x16, 0x54, 0x2b, 0xa5, 0x2f, 0x06, + 0xed, 0x49, 0x24, 0x17, 0x92, 0x30, 0x2b, 0x00, + 0xb5, 0xe8, 0xf8, 0x0a, 0xe9, 0xa4, 0x73, 0xaf, + 0xc2, 0x5b, 0x21, 0x8f, 0x51, 0x9a, 0xf0, 0xfd, + 0xd4, 0x06, 0x36, 0x2e, 0x8d, 0x69, 0xde, 0x7f, + 0x54, 0xc6, 0x04, 0xa6, 0xe0, 0x0f, 0x35, 0x3f, + 0x11, 0x0f, 0x77, 0x1b, 0xdc, 0xa8, 0xab, 0x92, + 0xe5, 0xfb, 0xc3, 0x4e, 0x60, 0xa1, 0xd9, 0xa9, + 0xdb, 0x17, 0x34, 0x5b, 0x0a, 0x40, 0x27, 0x36, + 0x85, 0x3b, 0xf9, 0x10, 0xb0, 0x60, 0xbd, 0xf1, + 0xf8, 0x97, 0xb6, 0x29, 0x0f, 0x01, 0xd1, 0x38, + 0xae, 0x2c, 0x4c, 0x90, 0x22, 0x5b, 0xa9, 0xea, + 0x14, 0xd5, 0x18, 0xf5, 0x59, 0x29, 0xde, 0xa0, + 0x98, 0xca, 0x7a, 0x6c, 0xcf, 0xe6, 0x12, 0x27, + 0x05, 0x3c, 0x84, 0xe4, 0x9a, 0x4a, 0x33 + }; + + /* 16-byte alignment required for amd64 implementation. */ + ctx = (CHACHA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15); + + chacha20_setkey (ctx, key_1, sizeof key_1); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); + scratch[sizeof (scratch) - 1] = 0; + chacha20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1); + if (memcmp (scratch, ciphertext_1, sizeof ciphertext_1)) + return "ChaCha20 encryption test 1 failed."; + if (scratch[sizeof (scratch) - 1]) + return "ChaCha20 wrote too much."; + chacha20_setkey (ctx, key_1, sizeof (key_1)); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); + chacha20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1); + if (memcmp (scratch, plaintext_1, sizeof plaintext_1)) + return "ChaCha20 decryption test 1 failed."; + + for (i = 0; i < sizeof buf; i++) + buf[i] = i; + chacha20_setkey (ctx, key_1, sizeof key_1); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); + /*encrypt */ + chacha20_encrypt_stream (ctx, buf, buf, sizeof buf); + /*decrypt */ + chacha20_setkey (ctx, key_1, sizeof key_1); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); + chacha20_encrypt_stream (ctx, buf, buf, 1); + chacha20_encrypt_stream (ctx, buf + 1, buf + 1, (sizeof buf) - 1 - 1); + chacha20_encrypt_stream (ctx, buf + (sizeof buf) - 1, + buf + (sizeof buf) - 1, 1); + for (i = 0; i < sizeof buf; i++) + if (buf[i] != (byte) i) + return "ChaCha20 encryption test 2 failed."; + + chacha20_setkey (ctx, key_1, sizeof key_1); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); + /* encrypt */ + for (i = 0; i < sizeof buf; i++) + chacha20_encrypt_stream (ctx, &buf[i], &buf[i], 1); + /* decrypt */ + chacha20_setkey (ctx, key_1, sizeof key_1); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); + chacha20_encrypt_stream (ctx, buf, buf, sizeof buf); + for (i = 0; i < sizeof buf; i++) + if (buf[i] != (byte) i) + return "ChaCha20 encryption test 3 failed."; + + return NULL; +} + + +gcry_cipher_spec_t _gcry_cipher_spec_chacha20 = { + GCRY_CIPHER_CHACHA20, + {0, 0}, /* flags */ + "CHACHA20", /* name */ + NULL, /* aliases */ + NULL, /* oids */ + 1, /* blocksize in bytes. */ + CHACHA20_MAX_KEY_SIZE * 8, /* standard key length in bits. */ + sizeof (CHACHA20_context_t), + chacha20_setkey, + NULL, + NULL, + chacha20_encrypt_stream, + chacha20_encrypt_stream, + NULL, + NULL, + chacha20_setiv +}; diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-aeswrap.c b/libotr/libgcrypt-1.8.7/cipher/cipher-aeswrap.c new file mode 100644 index 0000000..a8d0e03 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-aeswrap.c @@ -0,0 +1,209 @@ +/* cipher-aeswrap.c - Generic AESWRAP mode implementation + * Copyright (C) 2009, 2011 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +/* Perform the AES-Wrap algorithm as specified by RFC3394. We + implement this as a mode usable with any cipher algorithm of + blocksize 128. */ +gcry_err_code_t +_gcry_cipher_aeswrap_encrypt (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen ) +{ + int j, x; + size_t n, i; + unsigned char *r, *a, *b; + unsigned char t[8]; + unsigned int burn, nburn; + +#if MAX_BLOCKSIZE < 8 +#error Invalid block size +#endif + /* We require a cipher with a 128 bit block length. */ + if (c->spec->blocksize != 16) + return GPG_ERR_INV_LENGTH; + + /* The output buffer must be able to hold the input data plus one + additional block. */ + if (outbuflen < inbuflen + 8) + return GPG_ERR_BUFFER_TOO_SHORT; + /* Input data must be multiple of 64 bits. */ + if (inbuflen % 8) + return GPG_ERR_INV_ARG; + + n = inbuflen / 8; + + /* We need at least two 64 bit blocks. */ + if (n < 2) + return GPG_ERR_INV_ARG; + + burn = 0; + + r = outbuf; + a = outbuf; /* We store A directly in OUTBUF. */ + b = c->u_ctr.ctr; /* B is also used to concatenate stuff. */ + + /* Copy the inbuf to the outbuf. */ + memmove (r+8, inbuf, inbuflen); + + /* If an IV has been set we use that IV as the Alternative Initial + Value; if it has not been set we use the standard value. */ + if (c->marks.iv) + memcpy (a, c->u_iv.iv, 8); + else + memset (a, 0xa6, 8); + + memset (t, 0, sizeof t); /* t := 0. */ + + for (j = 0; j <= 5; j++) + { + for (i = 1; i <= n; i++) + { + /* B := AES_k( A | R[i] ) */ + memcpy (b, a, 8); + memcpy (b+8, r+i*8, 8); + nburn = c->spec->encrypt (&c->context.c, b, b); + burn = nburn > burn ? nburn : burn; + /* t := t + 1 */ + for (x = 7; x >= 0; x--) + { + t[x]++; + if (t[x]) + break; + } + /* A := MSB_64(B) ^ t */ + buf_xor(a, b, t, 8); + /* R[i] := LSB_64(B) */ + memcpy (r+i*8, b+8, 8); + } + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} + +/* Perform the AES-Unwrap algorithm as specified by RFC3394. We + implement this as a mode usable with any cipher algorithm of + blocksize 128. */ +gcry_err_code_t +_gcry_cipher_aeswrap_decrypt (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + int j, x; + size_t n, i; + unsigned char *r, *a, *b; + unsigned char t[8]; + unsigned int burn, nburn; + +#if MAX_BLOCKSIZE < 8 +#error Invalid block size +#endif + /* We require a cipher with a 128 bit block length. */ + if (c->spec->blocksize != 16) + return GPG_ERR_INV_LENGTH; + + /* The output buffer must be able to hold the input data minus one + additional block. Fixme: The caller has more restrictive checks + - we may want to fix them for this mode. */ + if (outbuflen + 8 < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + /* Input data must be multiple of 64 bits. */ + if (inbuflen % 8) + return GPG_ERR_INV_ARG; + + n = inbuflen / 8; + + /* We need at least three 64 bit blocks. */ + if (n < 3) + return GPG_ERR_INV_ARG; + + burn = 0; + + r = outbuf; + a = c->lastiv; /* We use c->LASTIV as buffer for A. */ + b = c->u_ctr.ctr; /* B is also used to concatenate stuff. */ + + /* Copy the inbuf to the outbuf and save A. */ + memcpy (a, inbuf, 8); + memmove (r, inbuf+8, inbuflen-8); + n--; /* Reduce to actual number of data blocks. */ + + /* t := 6 * n */ + i = n * 6; /* The range is valid because: n = inbuflen / 8 - 1. */ + for (x=0; x < 8 && x < sizeof (i); x++) + t[7-x] = i >> (8*x); + for (; x < 8; x++) + t[7-x] = 0; + + for (j = 5; j >= 0; j--) + { + for (i = n; i >= 1; i--) + { + /* B := AES_k^1( (A ^ t)| R[i] ) */ + buf_xor(b, a, t, 8); + memcpy (b+8, r+(i-1)*8, 8); + nburn = c->spec->decrypt (&c->context.c, b, b); + burn = nburn > burn ? nburn : burn; + /* t := t - 1 */ + for (x = 7; x >= 0; x--) + { + t[x]--; + if (t[x] != 0xff) + break; + } + /* A := MSB_64(B) */ + memcpy (a, b, 8); + /* R[i] := LSB_64(B) */ + memcpy (r+(i-1)*8, b+8, 8); + } + } + + /* If an IV has been set we compare against this Alternative Initial + Value; if it has not been set we compare against the standard IV. */ + if (c->marks.iv) + j = memcmp (a, c->u_iv.iv, 8); + else + { + for (j=0, x=0; x < 8; x++) + if (a[x] != 0xa6) + { + j=1; + break; + } + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return j? GPG_ERR_CHECKSUM : 0; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-cbc.c b/libotr/libgcrypt-1.8.7/cipher/cipher-cbc.c new file mode 100644 index 0000000..95c49b2 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-cbc.c @@ -0,0 +1,214 @@ +/* cipher-cbc.c - Generic CBC mode implementation + * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 + * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "cipher.h" +#include "./cipher-internal.h" +#include "bufhelp.h" + + + +gcry_err_code_t +_gcry_cipher_cbc_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + size_t n; + unsigned char *ivp; + int i; + size_t blocksize = c->spec->blocksize; + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + size_t nblocks = inbuflen / blocksize; + unsigned int burn, nburn; + + /* Tell compiler that we require a cipher with a 64bit or 128 bit block + * length, to allow better optimization of this function. */ + if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1)) + return GPG_ERR_INV_LENGTH; + + if (outbuflen < ((c->flags & GCRY_CIPHER_CBC_MAC)? blocksize : inbuflen)) + return GPG_ERR_BUFFER_TOO_SHORT; + + if ((inbuflen % blocksize) + && !(inbuflen > blocksize + && (c->flags & GCRY_CIPHER_CBC_CTS))) + return GPG_ERR_INV_LENGTH; + + burn = 0; + + if ((c->flags & GCRY_CIPHER_CBC_CTS) && inbuflen > blocksize) + { + if ((inbuflen % blocksize) == 0) + nblocks--; + } + + if (c->bulk.cbc_enc) + { + c->bulk.cbc_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks, + (c->flags & GCRY_CIPHER_CBC_MAC)); + inbuf += nblocks * blocksize; + if (!(c->flags & GCRY_CIPHER_CBC_MAC)) + outbuf += nblocks * blocksize; + } + else + { + ivp = c->u_iv.iv; + + for (n=0; n < nblocks; n++ ) + { + buf_xor (outbuf, inbuf, ivp, blocksize); + nburn = enc_fn ( &c->context.c, outbuf, outbuf ); + burn = nburn > burn ? nburn : burn; + ivp = outbuf; + inbuf += blocksize; + if (!(c->flags & GCRY_CIPHER_CBC_MAC)) + outbuf += blocksize; + } + + if (ivp != c->u_iv.iv) + buf_cpy (c->u_iv.iv, ivp, blocksize ); + } + + if ((c->flags & GCRY_CIPHER_CBC_CTS) && inbuflen > blocksize) + { + /* We have to be careful here, since outbuf might be equal to + inbuf. */ + size_t restbytes; + unsigned char b; + + if ((inbuflen % blocksize) == 0) + restbytes = blocksize; + else + restbytes = inbuflen % blocksize; + + outbuf -= blocksize; + for (ivp = c->u_iv.iv, i = 0; i < restbytes; i++) + { + b = inbuf[i]; + outbuf[blocksize + i] = outbuf[i]; + outbuf[i] = b ^ *ivp++; + } + for (; i < blocksize; i++) + outbuf[i] = 0 ^ *ivp++; + + nburn = enc_fn (&c->context.c, outbuf, outbuf); + burn = nburn > burn ? nburn : burn; + buf_cpy (c->u_iv.iv, outbuf, blocksize); + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_cbc_decrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + size_t n; + int i; + size_t blocksize = c->spec->blocksize; + gcry_cipher_decrypt_t dec_fn = c->spec->decrypt; + size_t nblocks = inbuflen / blocksize; + unsigned int burn, nburn; + + /* Tell compiler that we require a cipher with a 64bit or 128 bit block + * length, to allow better optimization of this function. */ + if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1)) + return GPG_ERR_INV_LENGTH; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + if ((inbuflen % blocksize) + && !(inbuflen > blocksize + && (c->flags & GCRY_CIPHER_CBC_CTS))) + return GPG_ERR_INV_LENGTH; + + burn = 0; + + if ((c->flags & GCRY_CIPHER_CBC_CTS) && inbuflen > blocksize) + { + nblocks--; + if ((inbuflen % blocksize) == 0) + nblocks--; + buf_cpy (c->lastiv, c->u_iv.iv, blocksize); + } + + if (c->bulk.cbc_dec) + { + c->bulk.cbc_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks); + inbuf += nblocks * blocksize; + outbuf += nblocks * blocksize; + } + else + { + for (n=0; n < nblocks; n++ ) + { + /* Because outbuf and inbuf might be the same, we must not overwrite + the original ciphertext block. We use LASTIV as intermediate + storage here because it is not used otherwise. */ + nburn = dec_fn ( &c->context.c, c->lastiv, inbuf ); + burn = nburn > burn ? nburn : burn; + buf_xor_n_copy_2(outbuf, c->lastiv, c->u_iv.iv, inbuf, blocksize); + inbuf += blocksize; + outbuf += blocksize; + } + } + + if ((c->flags & GCRY_CIPHER_CBC_CTS) && inbuflen > blocksize) + { + size_t restbytes; + + if ((inbuflen % blocksize) == 0) + restbytes = blocksize; + else + restbytes = inbuflen % blocksize; + + buf_cpy (c->lastiv, c->u_iv.iv, blocksize ); /* Save Cn-2. */ + buf_cpy (c->u_iv.iv, inbuf + blocksize, restbytes ); /* Save Cn. */ + + nburn = dec_fn ( &c->context.c, outbuf, inbuf ); + burn = nburn > burn ? nburn : burn; + buf_xor(outbuf, outbuf, c->u_iv.iv, restbytes); + + buf_cpy (outbuf + blocksize, outbuf, restbytes); + for(i=restbytes; i < blocksize; i++) + c->u_iv.iv[i] = outbuf[i]; + nburn = dec_fn (&c->context.c, outbuf, c->u_iv.iv); + burn = nburn > burn ? nburn : burn; + buf_xor(outbuf, outbuf, c->lastiv, blocksize); + /* c->lastiv is now really lastlastiv, does this matter? */ + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-ccm.c b/libotr/libgcrypt-1.8.7/cipher/cipher-ccm.c new file mode 100644 index 0000000..d7f14d8 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-ccm.c @@ -0,0 +1,366 @@ +/* cipher-ccm.c - CTR mode with CBC-MAC mode implementation + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +#define set_burn(burn, nburn) do { \ + unsigned int __nburn = (nburn); \ + (burn) = (burn) > __nburn ? (burn) : __nburn; } while (0) + + +static unsigned int +do_cbc_mac (gcry_cipher_hd_t c, const unsigned char *inbuf, size_t inlen, + int do_padding) +{ + const unsigned int blocksize = 16; + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + unsigned char tmp[blocksize]; + unsigned int burn = 0; + unsigned int unused = c->u_mode.ccm.mac_unused; + size_t nblocks; + + if (inlen == 0 && (unused == 0 || !do_padding)) + return 0; + + do + { + if (inlen + unused < blocksize || unused > 0) + { + for (; inlen && unused < blocksize; inlen--) + c->u_mode.ccm.macbuf[unused++] = *inbuf++; + } + if (!inlen) + { + if (!do_padding) + break; + + while (unused < blocksize) + c->u_mode.ccm.macbuf[unused++] = 0; + } + + if (unused > 0) + { + /* Process one block from macbuf. */ + buf_xor(c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.macbuf, blocksize); + set_burn (burn, enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv )); + + unused = 0; + } + + if (c->bulk.cbc_enc) + { + nblocks = inlen / blocksize; + c->bulk.cbc_enc (&c->context.c, c->u_iv.iv, tmp, inbuf, nblocks, 1); + inbuf += nblocks * blocksize; + inlen -= nblocks * blocksize; + + wipememory (tmp, sizeof(tmp)); + } + else + { + while (inlen >= blocksize) + { + buf_xor(c->u_iv.iv, c->u_iv.iv, inbuf, blocksize); + + set_burn (burn, enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv )); + + inlen -= blocksize; + inbuf += blocksize; + } + } + } + while (inlen > 0); + + c->u_mode.ccm.mac_unused = unused; + + if (burn) + burn += 4 * sizeof(void *); + + return burn; +} + + +gcry_err_code_t +_gcry_cipher_ccm_set_nonce (gcry_cipher_hd_t c, const unsigned char *nonce, + size_t noncelen) +{ + unsigned int marks_key; + size_t L = 15 - noncelen; + size_t L_; + + L_ = L - 1; + + if (!nonce) + return GPG_ERR_INV_ARG; + /* Length field must be 2, 3, ..., or 8. */ + if (L < 2 || L > 8) + return GPG_ERR_INV_LENGTH; + + /* Reset state */ + marks_key = c->marks.key; + memset (&c->u_mode, 0, sizeof(c->u_mode)); + memset (&c->marks, 0, sizeof(c->marks)); + memset (&c->u_iv, 0, sizeof(c->u_iv)); + memset (&c->u_ctr, 0, sizeof(c->u_ctr)); + memset (c->lastiv, 0, sizeof(c->lastiv)); + c->unused = 0; + c->marks.key = marks_key; + + /* Setup CTR */ + c->u_ctr.ctr[0] = L_; + memcpy (&c->u_ctr.ctr[1], nonce, noncelen); + memset (&c->u_ctr.ctr[1 + noncelen], 0, L); + + /* Setup IV */ + c->u_iv.iv[0] = L_; + memcpy (&c->u_iv.iv[1], nonce, noncelen); + /* Add (8 * M_ + 64 * flags) to iv[0] and set iv[noncelen + 1 ... 15] later + in set_aad. */ + memset (&c->u_iv.iv[1 + noncelen], 0, L); + + c->u_mode.ccm.nonce = 1; + + return GPG_ERR_NO_ERROR; +} + + +gcry_err_code_t +_gcry_cipher_ccm_set_lengths (gcry_cipher_hd_t c, u64 encryptlen, u64 aadlen, + u64 taglen) +{ + unsigned int burn = 0; + unsigned char b0[16]; + size_t noncelen = 15 - (c->u_iv.iv[0] + 1); + u64 M = taglen; + u64 M_; + int i; + + M_ = (M - 2) / 2; + + /* Authentication field must be 4, 6, 8, 10, 12, 14 or 16. */ + if ((M_ * 2 + 2) != M || M < 4 || M > 16) + return GPG_ERR_INV_LENGTH; + if (!c->u_mode.ccm.nonce || c->marks.tag) + return GPG_ERR_INV_STATE; + if (c->u_mode.ccm.lengths) + return GPG_ERR_INV_STATE; + + c->u_mode.ccm.authlen = taglen; + c->u_mode.ccm.encryptlen = encryptlen; + c->u_mode.ccm.aadlen = aadlen; + + /* Complete IV setup. */ + c->u_iv.iv[0] += (aadlen > 0) * 64 + M_ * 8; + for (i = 16 - 1; i >= 1 + noncelen; i--) + { + c->u_iv.iv[i] = encryptlen & 0xff; + encryptlen >>= 8; + } + + memcpy (b0, c->u_iv.iv, 16); + memset (c->u_iv.iv, 0, 16); + + set_burn (burn, do_cbc_mac (c, b0, 16, 0)); + + if (aadlen == 0) + { + /* Do nothing. */ + } + else if (aadlen > 0 && aadlen <= (unsigned int)0xfeff) + { + b0[0] = (aadlen >> 8) & 0xff; + b0[1] = aadlen & 0xff; + set_burn (burn, do_cbc_mac (c, b0, 2, 0)); + } + else if (aadlen > 0xfeff && aadlen <= (unsigned int)0xffffffff) + { + b0[0] = 0xff; + b0[1] = 0xfe; + buf_put_be32(&b0[2], aadlen); + set_burn (burn, do_cbc_mac (c, b0, 6, 0)); + } + else if (aadlen > (unsigned int)0xffffffff) + { + b0[0] = 0xff; + b0[1] = 0xff; + buf_put_be64(&b0[2], aadlen); + set_burn (burn, do_cbc_mac (c, b0, 10, 0)); + } + + /* Generate S_0 and increase counter. */ + set_burn (burn, c->spec->encrypt ( &c->context.c, c->u_mode.ccm.s0, + c->u_ctr.ctr )); + c->u_ctr.ctr[15]++; + + if (burn) + _gcry_burn_stack (burn + sizeof(void *) * 5); + + c->u_mode.ccm.lengths = 1; + + return GPG_ERR_NO_ERROR; +} + + +gcry_err_code_t +_gcry_cipher_ccm_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf, + size_t abuflen) +{ + unsigned int burn; + + if (abuflen > 0 && !abuf) + return GPG_ERR_INV_ARG; + if (!c->u_mode.ccm.nonce || !c->u_mode.ccm.lengths || c->marks.tag) + return GPG_ERR_INV_STATE; + if (abuflen > c->u_mode.ccm.aadlen) + return GPG_ERR_INV_LENGTH; + + c->u_mode.ccm.aadlen -= abuflen; + burn = do_cbc_mac (c, abuf, abuflen, c->u_mode.ccm.aadlen == 0); + + if (burn) + _gcry_burn_stack (burn + sizeof(void *) * 5); + + return GPG_ERR_NO_ERROR; +} + + +gcry_err_code_t +_gcry_cipher_ccm_tag (gcry_cipher_hd_t c, unsigned char *outbuf, + size_t outbuflen, int check) +{ + unsigned int burn; + + if (!outbuf || outbuflen == 0) + return GPG_ERR_INV_ARG; + /* Tag length must be same as initial authlen. */ + if (c->u_mode.ccm.authlen != outbuflen) + return GPG_ERR_INV_LENGTH; + if (!c->u_mode.ccm.nonce || !c->u_mode.ccm.lengths || c->u_mode.ccm.aadlen > 0) + return GPG_ERR_INV_STATE; + /* Initial encrypt length must match with length of actual data processed. */ + if (c->u_mode.ccm.encryptlen > 0) + return GPG_ERR_UNFINISHED; + + if (!c->marks.tag) + { + burn = do_cbc_mac (c, NULL, 0, 1); /* Perform final padding. */ + + /* Add S_0 */ + buf_xor (c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.s0, 16); + + wipememory (c->u_ctr.ctr, 16); + wipememory (c->u_mode.ccm.s0, 16); + wipememory (c->u_mode.ccm.macbuf, 16); + + if (burn) + _gcry_burn_stack (burn + sizeof(void *) * 5); + + c->marks.tag = 1; + } + + if (!check) + { + memcpy (outbuf, c->u_iv.iv, outbuflen); + return GPG_ERR_NO_ERROR; + } + else + { + return buf_eq_const(outbuf, c->u_iv.iv, outbuflen) ? + GPG_ERR_NO_ERROR : GPG_ERR_CHECKSUM; + } +} + + +gcry_err_code_t +_gcry_cipher_ccm_get_tag (gcry_cipher_hd_t c, unsigned char *outtag, + size_t taglen) +{ + return _gcry_cipher_ccm_tag (c, outtag, taglen, 0); +} + + +gcry_err_code_t +_gcry_cipher_ccm_check_tag (gcry_cipher_hd_t c, const unsigned char *intag, + size_t taglen) +{ + return _gcry_cipher_ccm_tag (c, (unsigned char *)intag, taglen, 1); +} + + +gcry_err_code_t +_gcry_cipher_ccm_encrypt (gcry_cipher_hd_t c, unsigned char *outbuf, + size_t outbuflen, const unsigned char *inbuf, + size_t inbuflen) +{ + unsigned int burn; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (!c->u_mode.ccm.nonce || c->marks.tag || !c->u_mode.ccm.lengths || + c->u_mode.ccm.aadlen > 0) + return GPG_ERR_INV_STATE; + if (inbuflen > c->u_mode.ccm.encryptlen) + return GPG_ERR_INV_LENGTH; + + c->u_mode.ccm.encryptlen -= inbuflen; + burn = do_cbc_mac (c, inbuf, inbuflen, 0); + if (burn) + _gcry_burn_stack (burn + sizeof(void *) * 5); + + return _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); +} + + +gcry_err_code_t +_gcry_cipher_ccm_decrypt (gcry_cipher_hd_t c, unsigned char *outbuf, + size_t outbuflen, const unsigned char *inbuf, + size_t inbuflen) +{ + gcry_err_code_t err; + unsigned int burn; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (!c->u_mode.ccm.nonce || c->marks.tag || !c->u_mode.ccm.lengths || + c->u_mode.ccm.aadlen > 0) + return GPG_ERR_INV_STATE; + if (inbuflen > c->u_mode.ccm.encryptlen) + return GPG_ERR_INV_LENGTH; + + err = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); + if (err) + return err; + + c->u_mode.ccm.encryptlen -= inbuflen; + burn = do_cbc_mac (c, outbuf, inbuflen, 0); + if (burn) + _gcry_burn_stack (burn + sizeof(void *) * 5); + + return err; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-cfb.c b/libotr/libgcrypt-1.8.7/cipher/cipher-cfb.c new file mode 100644 index 0000000..c888e70 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-cfb.c @@ -0,0 +1,325 @@ +/* cipher-cfb.c - Generic CFB mode implementation + * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 + * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +gcry_err_code_t +_gcry_cipher_cfb_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + unsigned char *ivp; + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + size_t blocksize = c->spec->blocksize; + size_t blocksize_x_2 = blocksize + blocksize; + unsigned int burn, nburn; + + /* Tell compiler that we require a cipher with a 64bit or 128 bit block + * length, to allow better optimization of this function. */ + if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1)) + return GPG_ERR_INV_LENGTH; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + if ( inbuflen <= c->unused ) + { + /* Short enough to be encoded by the remaining XOR mask. */ + /* XOR the input with the IV and store input into IV. */ + ivp = c->u_iv.iv + blocksize - c->unused; + buf_xor_2dst(outbuf, ivp, inbuf, inbuflen); + c->unused -= inbuflen; + return 0; + } + + burn = 0; + + if ( c->unused ) + { + /* XOR the input with the IV and store input into IV */ + inbuflen -= c->unused; + ivp = c->u_iv.iv + blocksize - c->unused; + buf_xor_2dst(outbuf, ivp, inbuf, c->unused); + outbuf += c->unused; + inbuf += c->unused; + c->unused = 0; + } + + /* Now we can process complete blocks. We use a loop as long as we + have at least 2 blocks and use conditions for the rest. This + also allows to use a bulk encryption function if available. */ + if (inbuflen >= blocksize_x_2 && c->bulk.cfb_enc) + { + size_t nblocks = inbuflen / blocksize; + c->bulk.cfb_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks); + outbuf += nblocks * blocksize; + inbuf += nblocks * blocksize; + inbuflen -= nblocks * blocksize; + } + else + { + while ( inbuflen >= blocksize_x_2 ) + { + /* Encrypt the IV. */ + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + /* XOR the input with the IV and store input into IV. */ + buf_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize); + outbuf += blocksize; + inbuf += blocksize; + inbuflen -= blocksize; + } + } + + if ( inbuflen >= blocksize ) + { + /* Save the current IV and then encrypt the IV. */ + buf_cpy( c->lastiv, c->u_iv.iv, blocksize ); + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + /* XOR the input with the IV and store input into IV */ + buf_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize); + outbuf += blocksize; + inbuf += blocksize; + inbuflen -= blocksize; + } + if ( inbuflen ) + { + /* Save the current IV and then encrypt the IV. */ + buf_cpy( c->lastiv, c->u_iv.iv, blocksize ); + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + c->unused = blocksize; + /* Apply the XOR. */ + c->unused -= inbuflen; + buf_xor_2dst(outbuf, c->u_iv.iv, inbuf, inbuflen); + outbuf += inbuflen; + inbuf += inbuflen; + inbuflen = 0; + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_cfb_decrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + unsigned char *ivp; + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + size_t blocksize = c->spec->blocksize; + size_t blocksize_x_2 = blocksize + blocksize; + unsigned int burn, nburn; + + /* Tell compiler that we require a cipher with a 64bit or 128 bit block + * length, to allow better optimization of this function. */ + if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1)) + return GPG_ERR_INV_LENGTH; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + if (inbuflen <= c->unused) + { + /* Short enough to be encoded by the remaining XOR mask. */ + /* XOR the input with the IV and store input into IV. */ + ivp = c->u_iv.iv + blocksize - c->unused; + buf_xor_n_copy(outbuf, ivp, inbuf, inbuflen); + c->unused -= inbuflen; + return 0; + } + + burn = 0; + + if (c->unused) + { + /* XOR the input with the IV and store input into IV. */ + inbuflen -= c->unused; + ivp = c->u_iv.iv + blocksize - c->unused; + buf_xor_n_copy(outbuf, ivp, inbuf, c->unused); + outbuf += c->unused; + inbuf += c->unused; + c->unused = 0; + } + + /* Now we can process complete blocks. We use a loop as long as we + have at least 2 blocks and use conditions for the rest. This + also allows to use a bulk encryption function if available. */ + if (inbuflen >= blocksize_x_2 && c->bulk.cfb_dec) + { + size_t nblocks = inbuflen / blocksize; + c->bulk.cfb_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks); + outbuf += nblocks * blocksize; + inbuf += nblocks * blocksize; + inbuflen -= nblocks * blocksize; + } + else + { + while (inbuflen >= blocksize_x_2 ) + { + /* Encrypt the IV. */ + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + /* XOR the input with the IV and store input into IV. */ + buf_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize); + outbuf += blocksize; + inbuf += blocksize; + inbuflen -= blocksize; + } + } + + if (inbuflen >= blocksize ) + { + /* Save the current IV and then encrypt the IV. */ + buf_cpy ( c->lastiv, c->u_iv.iv, blocksize); + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + /* XOR the input with the IV and store input into IV */ + buf_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize); + outbuf += blocksize; + inbuf += blocksize; + inbuflen -= blocksize; + } + + if (inbuflen) + { + /* Save the current IV and then encrypt the IV. */ + buf_cpy ( c->lastiv, c->u_iv.iv, blocksize ); + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + c->unused = blocksize; + /* Apply the XOR. */ + c->unused -= inbuflen; + buf_xor_n_copy(outbuf, c->u_iv.iv, inbuf, inbuflen); + outbuf += inbuflen; + inbuf += inbuflen; + inbuflen = 0; + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_cfb8_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + size_t blocksize = c->spec->blocksize; + unsigned int burn, nburn; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + burn = 0; + + while ( inbuflen > 0) + { + int i; + + /* Encrypt the IV. */ + nburn = enc_fn ( &c->context.c, c->lastiv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + + outbuf[0] = c->lastiv[0] ^ inbuf[0]; + + /* Bitshift iv by 8 bit to the left */ + for (i = 0; i < blocksize-1; i++) + c->u_iv.iv[i] = c->u_iv.iv[i+1]; + + /* append cipher text to iv */ + c->u_iv.iv[blocksize-1] = outbuf[0]; + + outbuf += 1; + inbuf += 1; + inbuflen -= 1; + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_cfb8_decrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + size_t blocksize = c->spec->blocksize; + unsigned int burn, nburn; + unsigned char appendee; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + burn = 0; + + while (inbuflen > 0) + { + int i; + + /* Encrypt the IV. */ + nburn = enc_fn ( &c->context.c, c->lastiv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + + /* inbuf might == outbuf, make sure we keep the value + so we can append it later */ + appendee = inbuf[0]; + + outbuf[0] = inbuf[0] ^ c->lastiv[0]; + + /* Bitshift iv by 8 bit to the left */ + for (i = 0; i < blocksize-1; i++) + c->u_iv.iv[i] = c->u_iv.iv[i+1]; + + c->u_iv.iv[blocksize-1] = appendee; + + outbuf += 1; + inbuf += 1; + inbuflen -= 1; + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-cmac.c b/libotr/libgcrypt-1.8.7/cipher/cipher-cmac.c new file mode 100644 index 0000000..da3ef75 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-cmac.c @@ -0,0 +1,253 @@ +/* cmac.c - CMAC, Cipher-based MAC. + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" +#include "cipher.h" +#include "cipher-internal.h" +#include "bufhelp.h" + + +#define set_burn(burn, nburn) do { \ + unsigned int __nburn = (nburn); \ + (burn) = (burn) > __nburn ? (burn) : __nburn; } while (0) + + +static void +cmac_write (gcry_cipher_hd_t c, const byte * inbuf, size_t inlen) +{ + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + const unsigned int blocksize = c->spec->blocksize; + byte outbuf[MAX_BLOCKSIZE]; + unsigned int burn = 0; + unsigned int nblocks; + + /* Tell compiler that we require a cipher with a 64bit or 128 bit block + * length, to allow better optimization of this function. */ + if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1)) + return; + + if (!inlen || !inbuf) + return; + + /* Last block is needed for cmac_final. */ + if (c->unused + inlen <= blocksize) + { + for (; inlen && c->unused < blocksize; inlen--) + c->lastiv[c->unused++] = *inbuf++; + return; + } + + if (c->unused) + { + for (; inlen && c->unused < blocksize; inlen--) + c->lastiv[c->unused++] = *inbuf++; + + buf_xor (c->u_iv.iv, c->u_iv.iv, c->lastiv, blocksize); + set_burn (burn, enc_fn (&c->context.c, c->u_iv.iv, c->u_iv.iv)); + + c->unused = 0; + } + + if (c->bulk.cbc_enc && inlen > blocksize) + { + nblocks = inlen / blocksize; + nblocks -= (nblocks * blocksize == inlen); + + c->bulk.cbc_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks, 1); + inbuf += nblocks * blocksize; + inlen -= nblocks * blocksize; + + wipememory (outbuf, sizeof (outbuf)); + } + else + while (inlen > blocksize) + { + buf_xor (c->u_iv.iv, c->u_iv.iv, inbuf, blocksize); + set_burn (burn, enc_fn (&c->context.c, c->u_iv.iv, c->u_iv.iv)); + inlen -= blocksize; + inbuf += blocksize; + } + + /* Make sure that last block is passed to cmac_final. */ + if (inlen == 0) + BUG (); + + for (; inlen && c->unused < blocksize; inlen--) + c->lastiv[c->unused++] = *inbuf++; + + if (burn) + _gcry_burn_stack (burn + 4 * sizeof (void *)); +} + + +static void +cmac_generate_subkeys (gcry_cipher_hd_t c) +{ + const unsigned int blocksize = c->spec->blocksize; + byte rb, carry, t, bi; + unsigned int burn; + int i, j; + union + { + size_t _aligned; + byte buf[MAX_BLOCKSIZE]; + } u; + + /* Tell compiler that we require a cipher with a 64bit or 128 bit block + * length, to allow better optimization of this function. */ + if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1)) + return; + + if (MAX_BLOCKSIZE < blocksize) + BUG (); + + /* encrypt zero block */ + memset (u.buf, 0, blocksize); + burn = c->spec->encrypt (&c->context.c, u.buf, u.buf); + + /* Currently supported blocksizes are 16 and 8. */ + rb = blocksize == 16 ? 0x87 : 0x1B /*blocksize == 8 */ ; + + for (j = 0; j < 2; j++) + { + /* Generate subkeys K1 and K2 */ + carry = 0; + for (i = blocksize - 1; i >= 0; i--) + { + bi = u.buf[i]; + t = carry | (bi << 1); + carry = bi >> 7; + u.buf[i] = t & 0xff; + c->u_mode.cmac.subkeys[j][i] = u.buf[i]; + } + u.buf[blocksize - 1] ^= carry ? rb : 0; + c->u_mode.cmac.subkeys[j][blocksize - 1] = u.buf[blocksize - 1]; + } + + wipememory (&u, sizeof (u)); + if (burn) + _gcry_burn_stack (burn + 4 * sizeof (void *)); +} + + +static void +cmac_final (gcry_cipher_hd_t c) +{ + const unsigned int blocksize = c->spec->blocksize; + unsigned int count = c->unused; + unsigned int burn; + byte *subkey; + + /* Tell compiler that we require a cipher with a 64bit or 128 bit block + * length, to allow better optimization of this function. */ + if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1)) + return; + + if (count == blocksize) + subkey = c->u_mode.cmac.subkeys[0]; /* K1 */ + else + { + subkey = c->u_mode.cmac.subkeys[1]; /* K2 */ + c->lastiv[count++] = 0x80; + while (count < blocksize) + c->lastiv[count++] = 0; + } + + buf_xor (c->lastiv, c->lastiv, subkey, blocksize); + + buf_xor (c->u_iv.iv, c->u_iv.iv, c->lastiv, blocksize); + burn = c->spec->encrypt (&c->context.c, c->u_iv.iv, c->u_iv.iv); + if (burn) + _gcry_burn_stack (burn + 4 * sizeof (void *)); + + c->unused = 0; +} + + +static gcry_err_code_t +cmac_tag (gcry_cipher_hd_t c, unsigned char *tag, size_t taglen, int check) +{ + if (!tag || taglen == 0 || taglen > c->spec->blocksize) + return GPG_ERR_INV_ARG; + + if (!c->u_mode.cmac.tag) + { + cmac_final (c); + c->u_mode.cmac.tag = 1; + } + + if (!check) + { + memcpy (tag, c->u_iv.iv, taglen); + return GPG_ERR_NO_ERROR; + } + else + { + return buf_eq_const (tag, c->u_iv.iv, taglen) ? + GPG_ERR_NO_ERROR : GPG_ERR_CHECKSUM; + } +} + + +gcry_err_code_t +_gcry_cipher_cmac_authenticate (gcry_cipher_hd_t c, + const unsigned char *abuf, size_t abuflen) +{ + if (abuflen > 0 && !abuf) + return GPG_ERR_INV_ARG; + if (c->u_mode.cmac.tag) + return GPG_ERR_INV_STATE; + /* To support new blocksize, update cmac_generate_subkeys() then add new + blocksize here. */ + if (c->spec->blocksize != 16 && c->spec->blocksize != 8) + return GPG_ERR_INV_CIPHER_MODE; + + cmac_write (c, abuf, abuflen); + + return GPG_ERR_NO_ERROR; +} + + +gcry_err_code_t +_gcry_cipher_cmac_get_tag (gcry_cipher_hd_t c, + unsigned char *outtag, size_t taglen) +{ + return cmac_tag (c, outtag, taglen, 0); +} + + +gcry_err_code_t +_gcry_cipher_cmac_check_tag (gcry_cipher_hd_t c, + const unsigned char *intag, size_t taglen) +{ + return cmac_tag (c, (unsigned char *) intag, taglen, 1); +} + +gcry_err_code_t +_gcry_cipher_cmac_set_subkeys (gcry_cipher_hd_t c) +{ + cmac_generate_subkeys (c); + + return GPG_ERR_NO_ERROR; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-ctr.c b/libotr/libgcrypt-1.8.7/cipher/cipher-ctr.c new file mode 100644 index 0000000..f9cb6b5 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-ctr.c @@ -0,0 +1,115 @@ +/* cipher-ctr.c - Generic CTR mode implementation + * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 + * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +gcry_err_code_t +_gcry_cipher_ctr_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + size_t n; + int i; + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + unsigned int blocksize = c->spec->blocksize; + size_t nblocks; + unsigned int burn, nburn; + + /* Tell compiler that we require a cipher with a 64bit or 128 bit block + * length, to allow better optimization of this function. */ + if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1)) + return GPG_ERR_INV_LENGTH; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + burn = 0; + + /* First process a left over encrypted counter. */ + if (c->unused) + { + gcry_assert (c->unused < blocksize); + i = blocksize - c->unused; + n = c->unused > inbuflen ? inbuflen : c->unused; + buf_xor(outbuf, inbuf, &c->lastiv[i], n); + c->unused -= n; + inbuf += n; + outbuf += n; + inbuflen -= n; + } + + /* Use a bulk method if available. */ + nblocks = inbuflen / blocksize; + if (nblocks && c->bulk.ctr_enc) + { + c->bulk.ctr_enc (&c->context.c, c->u_ctr.ctr, outbuf, inbuf, nblocks); + inbuf += nblocks * blocksize; + outbuf += nblocks * blocksize; + inbuflen -= nblocks * blocksize; + } + + /* If we don't have a bulk method use the standard method. We also + use this method for the a remaining partial block. */ + if (inbuflen) + { + unsigned char tmp[MAX_BLOCKSIZE]; + + do { + nburn = enc_fn (&c->context.c, tmp, c->u_ctr.ctr); + burn = nburn > burn ? nburn : burn; + + for (i = blocksize; i > 0; i--) + { + c->u_ctr.ctr[i-1]++; + if (c->u_ctr.ctr[i-1] != 0) + break; + } + + n = blocksize < inbuflen ? blocksize : inbuflen; + buf_xor(outbuf, inbuf, tmp, n); + + inbuflen -= n; + outbuf += n; + inbuf += n; + } while (inbuflen); + + /* Save the unused bytes of the counter. */ + c->unused = blocksize - n; + if (c->unused) + buf_cpy (c->lastiv+n, tmp+n, c->unused); + + wipememory (tmp, sizeof tmp); + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-gcm-armv8-aarch32-ce.S b/libotr/libgcrypt-1.8.7/cipher/cipher-gcm-armv8-aarch32-ce.S new file mode 100644 index 0000000..1de66a1 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-gcm-armv8-aarch32-ce.S @@ -0,0 +1,433 @@ +/* cipher-gcm-armv8-aarch32-ce.S - ARM/CE accelerated GHASH + * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) + +.syntax unified +.arch armv8-a +.fpu crypto-neon-fp-armv8 +.arm + +.text + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + + +/* Constants */ + +.align 4 +gcry_gcm_reduction_constant: +.Lrconst64: + .quad 0xc200000000000000 + + +/* Register macros */ + +#define rhash q0 +#define rhash_l d0 +#define rhash_h d1 + +#define rh1 q1 +#define rh1_l d2 +#define rh1_h d3 + +#define rbuf q2 +#define rbuf_l d4 +#define rbuf_h d5 + +#define rbuf1 q3 +#define rbuf1_l d6 +#define rbuf1_h d7 + +#define rbuf2 q4 +#define rbuf2_l d8 +#define rbuf2_h d9 + +#define rbuf3 q5 +#define rbuf3_l d10 +#define rbuf3_h d11 + +#define rh2 q6 +#define rh2_l d12 +#define rh2_h d13 + +#define rh3 q7 +#define rh3_l d14 +#define rh3_h d15 + +#define rh4 q8 +#define rh4_l d16 +#define rh4_h d17 + +#define rr2 q9 +#define rr2_l d18 +#define rr2_h d19 + +#define rr3 q10 +#define rr3_l d20 +#define rr3_h d21 + +#define rr0 q11 +#define rr0_l d22 +#define rr0_h d23 + +#define rr1 q12 +#define rr1_l d24 +#define rr1_h d25 + +#define rt0 q13 +#define rt0_l d26 +#define rt0_h d27 + +#define rt1 q14 +#define rt1_l d28 +#define rt1_h d29 + +#define rrconst q15 +#define rrconst_l d30 +#define rrconst_h d31 + +/* GHASH macros */ + +/* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in + * Cryptology — CT-RSA 2015" for details. + */ + +/* Input: 'a' and 'b', Output: 'r0:r1' (low 128-bits in r0, high in r1) + * Note: 'r1' may be 'a' or 'b', 'r0' must not be either 'a' or 'b'. + */ +#define PMUL_128x128(r0, r1, a, b, t, interleave_op) \ + veor t##_h, b##_l, b##_h; \ + veor t##_l, a##_l, a##_h; \ + vmull.p64 r0, a##_l, b##_l; \ + vmull.p64 r1, a##_h, b##_h; \ + vmull.p64 t, t##_h, t##_l; \ + interleave_op; \ + veor t, r0; \ + veor t, r1; \ + veor r0##_h, t##_l; \ + veor r1##_l, t##_h; + +/* Input: 'aA' and 'bA', Output: 'r0A:r1A' (low 128-bits in r0A, high in r1A) + * Note: 'r1A' may be 'aA' or 'bA', 'r0A' must not be either 'aA' or 'bA'. + * Input: 'aB' and 'bB', Output: 'r0B:r1B' (low 128-bits in r0B, high in r1B) + * Note: 'r1B' may be 'aB' or 'bB', 'r0B' must not be either 'aB' or 'bB'. + */ +#define PMUL_128x128_2(r0A, r1A, aA, bA, r0B, r1B, aB, bB, tA, tB, interleave_op) \ + veor tA##_h, bA##_l, bA##_h; \ + veor tA##_l, aA##_l, aA##_h; \ + veor tB##_h, bB##_l, bB##_h; \ + veor tB##_l, aB##_l, aB##_h; \ + vmull.p64 r0A, aA##_l, bA##_l; \ + vmull.p64 r1A, aA##_h, bA##_h; \ + vmull.p64 tA, tA##_h, tA##_l; \ + vmull.p64 r0B, aB##_l, bB##_l; \ + vmull.p64 r1B, aB##_h, bB##_h; \ + vmull.p64 tB, tB##_h, tB##_l; \ + interleave_op; \ + veor tA, r0A; \ + veor tA, r1A; \ + veor tB, r0B; \ + veor tB, r1B; \ + veor r0A##_h, tA##_l; \ + veor r1A##_l, tA##_h; \ + veor r0B##_h, tB##_l; \ + veor r1B##_l, tB##_h; \ + +/* Input: 'r0:r1', Output: 'a' */ +#define REDUCTION(a, r0, r1, rconst, t, interleave_op) \ + vmull.p64 t, r0##_l, rconst; \ + veor r0##_h, t##_l; \ + veor r1##_l, t##_h; \ + interleave_op; \ + vmull.p64 t, r0##_h, rconst; \ + veor r1, t; \ + veor a, r0, r1; + +#define _(...) __VA_ARGS__ +#define __ _() + +/* Other functional macros */ + +#define CLEAR_REG(reg) veor reg, reg; + + +/* + * unsigned int _gcry_ghash_armv8_ce_pmull (void *gcm_key, byte *result, + * const byte *buf, size_t nblocks, + * void *gcm_table); + */ +.align 3 +.globl _gcry_ghash_armv8_ce_pmull +.type _gcry_ghash_armv8_ce_pmull,%function; +_gcry_ghash_armv8_ce_pmull: + /* input: + * r0: gcm_key + * r1: result/hash + * r2: buf + * r3: nblocks + * %st+0: gcm_table + */ + push {r4-r6, lr} + + cmp r3, #0 + beq .Ldo_nothing + + GET_DATA_POINTER(r4, .Lrconst64, lr) + + vld1.64 {rhash}, [r1] + vld1.64 {rh1}, [r0] + + vrev64.8 rhash, rhash /* byte-swap */ + vld1.64 {rrconst_h}, [r4] + vext.8 rhash, rhash, rhash, #8 + + cmp r3, #4 + blo .Less_than_4 + + /* Bulk processing of 4 blocks per loop iteration. */ + + ldr r5, [sp, #(4*4)]; + add r6, r5, #32 + + vpush {q4-q7} + + vld1.64 {rh2-rh3}, [r5] + vld1.64 {rh4}, [r6] + + vld1.64 {rbuf-rbuf1}, [r2]! + sub r3, r3, #4 + vld1.64 {rbuf2-rbuf3}, [r2]! + + cmp r3, #4 + vrev64.8 rbuf, rbuf /* byte-swap */ + vrev64.8 rbuf1, rbuf1 /* byte-swap */ + vrev64.8 rbuf2, rbuf2 /* byte-swap */ + vrev64.8 rbuf3, rbuf3 /* byte-swap */ + + vext.8 rbuf, rbuf, rbuf, #8 + vext.8 rbuf1, rbuf1, rbuf1, #8 + vext.8 rbuf2, rbuf2, rbuf2, #8 + vext.8 rbuf3, rbuf3, rbuf3, #8 + veor rhash, rhash, rbuf /* in0 ^ hash */ + + blo .Lend_4 + +.Loop_4: + /* (in0 ^ hash) * H⁴ => rr2:rr3 */ + /* (in1) * H³ => rr0:rr1 */ + PMUL_128x128_2(rr0, rr1, rbuf1, rh3, rr2, rr3, rhash, rh4, rt1, rt0, __) + + vld1.64 {rbuf-rbuf1}, [r2]! + sub r3, r3, #4 + veor rr0, rr0, rr2 + veor rr1, rr1, rr3 + + /* (in2) * H² => rr2:rr3 */ + /* (in3) * H¹ => rhash:rbuf3 */ + PMUL_128x128_2(rr2, rr3, rbuf2, rh2, rhash, rbuf3, rbuf3, rh1, rt0, rt1, + _(vrev64.8 rbuf, rbuf)) + + vld1.64 {rbuf2}, [r2]! + + vrev64.8 rbuf1, rbuf1 + veor rr0, rr0, rr2 + veor rr1, rr1, rr3 + + cmp r3, #4 + vext.8 rbuf, rbuf, rbuf, #8 + vext.8 rbuf1, rbuf1, rbuf1, #8 + + veor rr0, rr0, rhash + veor rr1, rr1, rbuf3 + + vld1.64 {rbuf3}, [r2]! + + REDUCTION(rhash, rr0, rr1, rrconst_h, rt1, + _(vrev64.8 rbuf2, rbuf2; + vrev64.8 rbuf3, rbuf3)) + + vext.8 rbuf2, rbuf2, rbuf2, #8 + vext.8 rbuf3, rbuf3, rbuf3, #8 + veor rhash, rhash, rbuf /* in0 ^ hash */ + + bhs .Loop_4 + +.Lend_4: + /* (in0 ^ hash) * H⁴ => rr2:rr3 */ + /* (in1) * H³ => rr0:rr1 */ + PMUL_128x128_2(rr0, rr1, rbuf1, rh3, rr2, rr3, rhash, rh4, rt1, rt0, __) + + /* (in2) * H² => rhash:rbuf */ + /* (in3) * H¹ => rbuf1:rbuf2 */ + PMUL_128x128_2(rhash, rbuf, rbuf2, rh2, rbuf1, rbuf2, rbuf3, rh1, rt0, rt1, + _(veor rr0, rr0, rr2; + veor rr1, rr1, rr3)) + + veor rr0, rr0, rhash + veor rr1, rr1, rbuf + + veor rr0, rr0, rbuf1 + veor rr1, rr1, rbuf2 + + REDUCTION(rhash, rr0, rr1, rrconst_h, rt1, + _(CLEAR_REG(rr2); + CLEAR_REG(rr3); + CLEAR_REG(rbuf1); + CLEAR_REG(rbuf2); + CLEAR_REG(rbuf3); + CLEAR_REG(rh2); + CLEAR_REG(rh3); + CLEAR_REG(rh4))) + + vpop {q4-q7} + + cmp r3, #0 + beq .Ldone + +.Less_than_4: + /* Handle remaining blocks. */ + + vld1.64 {rbuf}, [r2]! + subs r3, r3, #1 + + vrev64.8 rbuf, rbuf /* byte-swap */ + vext.8 rbuf, rbuf, rbuf, #8 + + veor rhash, rhash, rbuf + + beq .Lend + +.Loop: + vld1.64 {rbuf}, [r2]! + subs r3, r3, #1 + PMUL_128x128(rr0, rr1, rhash, rh1, rt0, _(vrev64.8 rbuf, rbuf)) + REDUCTION(rhash, rr0, rr1, rrconst_h, rt0, _(vext.8 rbuf, rbuf, rbuf, #8)) + veor rhash, rhash, rbuf + + bne .Loop + +.Lend: + PMUL_128x128(rr0, rr1, rhash, rh1, rt0, _(CLEAR_REG(rbuf))) + REDUCTION(rhash, rr0, rr1, rrconst_h, rt0, _(CLEAR_REG(rh1))) + +.Ldone: + CLEAR_REG(rr1) + vrev64.8 rhash, rhash /* byte-swap */ + CLEAR_REG(rt0) + CLEAR_REG(rr0) + vext.8 rhash, rhash, rhash, #8 + CLEAR_REG(rt1) + vst1.64 {rhash}, [r1] + CLEAR_REG(rhash) + +.Ldo_nothing: + mov r0, #0 + pop {r4-r6, pc} +.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull; + + +/* + * void _gcry_ghash_setup_armv8_ce_pmull (void *gcm_key, void *gcm_table); + */ +.align 3 +.globl _gcry_ghash_setup_armv8_ce_pmull +.type _gcry_ghash_setup_armv8_ce_pmull,%function; +_gcry_ghash_setup_armv8_ce_pmull: + /* input: + * r0: gcm_key + * r1: gcm_table + */ + + vpush {q4-q7} + + GET_DATA_POINTER(r2, .Lrconst64, r3) + + vld1.64 {rrconst_h}, [r2] + +#define GCM_LSH_1(r_out, ia, ib, const_d, oa, ob, ma) \ + /* H <<< 1 */ \ + vshr.s64 ma, ib, #63; \ + vshr.u64 oa, ib, #63; \ + vshr.u64 ob, ia, #63; \ + vand ma, const_d; \ + vshl.u64 ib, ib, #1; \ + vshl.u64 ia, ia, #1; \ + vorr ob, ib; \ + vorr oa, ia; \ + veor ob, ma; \ + vst1.64 {oa, ob}, [r_out] + + vld1.64 {rhash}, [r0] + vrev64.8 rhash, rhash /* byte-swap */ + vext.8 rhash, rhash, rhash, #8 + + vmov rbuf1, rhash + GCM_LSH_1(r0, rhash_l, rhash_h, rrconst_h, rh1_l, rh1_h, rt1_l) /* H<<<1 */ + + /* H² */ + PMUL_128x128(rr0, rr1, rbuf1, rh1, rt0, __) + REDUCTION(rh2, rr0, rr1, rrconst_h, rt0, __) + vmov rhash, rh2 + GCM_LSH_1(r1, rh2_l, rh2_h, rrconst_h, rbuf1_l, rbuf1_h, rt1_l) /* H²<<<1 */ + add r1, r1, #16 + + /* H³ */ + PMUL_128x128(rr0, rr1, rhash, rh1, rt1, __) + REDUCTION(rh3, rr0, rr1, rrconst_h, rt1, __) + + /* H⁴ */ + PMUL_128x128(rr0, rr1, rhash, rbuf1, rt0, __) + REDUCTION(rh4, rr0, rr1, rrconst_h, rt0, __) + + GCM_LSH_1(r1, rh3_l, rh3_h, rrconst_h, rt0_l, rt0_h, rt1_l) /* H³<<<1 */ + add r1, r1, #16 + GCM_LSH_1(r1, rh4_l, rh4_h, rrconst_h, rt0_l, rt0_h, rt1_l) /* H⁴<<<1 */ + + CLEAR_REG(rt0) + CLEAR_REG(rt1) + CLEAR_REG(rr1) + CLEAR_REG(rr0) + CLEAR_REG(rh1) + CLEAR_REG(rh2) + CLEAR_REG(rh3) + CLEAR_REG(rh4) + CLEAR_REG(rhash) + CLEAR_REG(rbuf1) + CLEAR_REG(rrconst) + vpop {q4-q7} + bx lr +.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull; + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-gcm-armv8-aarch64-ce.S b/libotr/libgcrypt-1.8.7/cipher/cipher-gcm-armv8-aarch64-ce.S new file mode 100644 index 0000000..21f6037 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-gcm-armv8-aarch64-ce.S @@ -0,0 +1,415 @@ +/* cipher-gcm-armv8-aarch64-ce.S - ARM/CE accelerated GHASH + * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(__AARCH64EL__) && \ + defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) + +.cpu generic+simd+crypto + +.text + +#define GET_DATA_POINTER(reg, name) \ + adrp reg, :got:name ; \ + ldr reg, [reg, #:got_lo12:name] ; + + +/* Constants */ + +.align 4 +gcry_gcm_reduction_constant: +.Lrconst: + .quad 0x87 + + +/* Register macros */ + +#define rhash v0 +#define rr0 v1 +#define rr1 v2 +#define rbuf v3 +#define rbuf1 v4 +#define rbuf2 v5 +#define rbuf3 v6 +#define rbuf4 v7 +#define rbuf5 v8 +#define rr2 v9 +#define rr3 v10 +#define rr4 v11 +#define rr5 v12 +#define rr6 v13 +#define rr7 v14 +#define rr8 v15 +#define rr9 v16 + +#define rrconst v18 +#define rh1 v19 +#define rh2 v20 +#define rh3 v21 +#define rh4 v22 +#define rh5 v23 +#define rh6 v24 +#define t0 v25 +#define t1 v26 +#define t2 v27 +#define t3 v28 +#define t4 v29 +#define t5 v30 +#define vZZ v31 + +/* GHASH macros */ + +/* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in + * Cryptology — CT-RSA 2015" for details. + */ + +/* Input: 'a' and 'b', Output: 'r0:r1' (low 128-bits in r0, high in r1) */ +#define PMUL_128x128(r0, r1, a, b, T0, T1, interleave_op) \ + ext T0.16b, b.16b, b.16b, #8; \ + pmull r0.1q, a.1d, b.1d; \ + pmull2 r1.1q, a.2d, b.2d; \ + pmull T1.1q, a.1d, T0.1d; \ + pmull2 T0.1q, a.2d, T0.2d; \ + interleave_op; \ + eor T0.16b, T0.16b, T1.16b; \ + ext T1.16b, vZZ.16b, T0.16b, #8; \ + ext T0.16b, T0.16b, vZZ.16b, #8; \ + eor r0.16b, r0.16b, T1.16b; \ + eor r1.16b, r1.16b, T0.16b; + +/* Input: 'aA' and 'bA', Output: 'r0A:r1A' (low 128-bits in r0A, high in r1A) + * Input: 'aB' and 'bB', Output: 'r0B:r1B' (low 128-bits in r0B, high in r1B) + * Input: 'aC' and 'bC', Output: 'r0C:r1C' (low 128-bits in r0C, high in r1C) + */ +#define PMUL_128x128_3(r0A, r1A, aA, bA, t0A, t1A, \ + r0B, r1B, aB, bB, t0B, t1B, \ + r0C, r1C, aC, bC, t0C, t1C, interleave_op) \ + ext t0A.16b, bA.16b, bA.16b, #8; \ + pmull r0A.1q, aA.1d, bA.1d; \ + pmull2 r1A.1q, aA.2d, bA.2d; \ + ext t0B.16b, bB.16b, bB.16b, #8; \ + pmull r0B.1q, aB.1d, bB.1d; \ + pmull2 r1B.1q, aB.2d, bB.2d; \ + ext t0C.16b, bC.16b, bC.16b, #8; \ + pmull r0C.1q, aC.1d, bC.1d; \ + pmull2 r1C.1q, aC.2d, bC.2d; \ + pmull t1A.1q, aA.1d, t0A.1d; \ + pmull2 t0A.1q, aA.2d, t0A.2d; \ + pmull t1B.1q, aB.1d, t0B.1d; \ + pmull2 t0B.1q, aB.2d, t0B.2d; \ + pmull t1C.1q, aC.1d, t0C.1d; \ + pmull2 t0C.1q, aC.2d, t0C.2d; \ + eor t0A.16b, t0A.16b, t1A.16b; \ + eor t0B.16b, t0B.16b, t1B.16b; \ + eor t0C.16b, t0C.16b, t1C.16b; \ + interleave_op; \ + ext t1A.16b, vZZ.16b, t0A.16b, #8; \ + ext t0A.16b, t0A.16b, vZZ.16b, #8; \ + ext t1B.16b, vZZ.16b, t0B.16b, #8; \ + ext t0B.16b, t0B.16b, vZZ.16b, #8; \ + ext t1C.16b, vZZ.16b, t0C.16b, #8; \ + ext t0C.16b, t0C.16b, vZZ.16b, #8; \ + eor r0A.16b, r0A.16b, t1A.16b; \ + eor r1A.16b, r1A.16b, t0A.16b; \ + eor r0B.16b, r0B.16b, t1B.16b; \ + eor r1B.16b, r1B.16b, t0B.16b; \ + eor r0C.16b, r0C.16b, t1C.16b; \ + eor r1C.16b, r1C.16b, t0C.16b; \ + +/* Input: 'r0:r1', Output: 'a' */ +#define REDUCTION(a, r0, r1, rconst, T0, T1, interleave_op1, interleave_op2, \ + interleave_op3) \ + pmull2 T0.1q, r1.2d, rconst.2d; \ + interleave_op1; \ + ext T1.16b, T0.16b, vZZ.16b, #8; \ + ext T0.16b, vZZ.16b, T0.16b, #8; \ + interleave_op2; \ + eor r1.16b, r1.16b, T1.16b; \ + eor r0.16b, r0.16b, T0.16b; \ + pmull T0.1q, r1.1d, rconst.1d; \ + interleave_op3; \ + eor a.16b, r0.16b, T0.16b; + +/* Other functional macros */ + +#define _(...) __VA_ARGS__ +#define __ _() + +#define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b; + +#define VPUSH_ABI \ + stp d8, d9, [sp, #-16]!; \ + stp d10, d11, [sp, #-16]!; \ + stp d12, d13, [sp, #-16]!; \ + stp d14, d15, [sp, #-16]!; + +#define VPOP_ABI \ + ldp d14, d15, [sp], #16; \ + ldp d12, d13, [sp], #16; \ + ldp d10, d11, [sp], #16; \ + ldp d8, d9, [sp], #16; + +/* + * unsigned int _gcry_ghash_armv8_ce_pmull (void *gcm_key, byte *result, + * const byte *buf, size_t nblocks, + * void *gcm_table); + */ +.align 3 +.globl _gcry_ghash_armv8_ce_pmull +.type _gcry_ghash_armv8_ce_pmull,%function; +_gcry_ghash_armv8_ce_pmull: + /* input: + * x0: gcm_key + * x1: result/hash + * x2: buf + * x3: nblocks + * x4: gcm_table + */ + cbz x3, .Ldo_nothing; + + GET_DATA_POINTER(x5, .Lrconst) + + eor vZZ.16b, vZZ.16b, vZZ.16b + ld1 {rhash.16b}, [x1] + ld1 {rh1.16b}, [x0] + + rbit rhash.16b, rhash.16b /* bit-swap */ + ld1r {rrconst.2d}, [x5] + + cmp x3, #6 + b.lo .Less_than_6 + + add x6, x4, #64 + VPUSH_ABI + + ld1 {rh2.16b-rh5.16b}, [x4] + ld1 {rh6.16b}, [x6] + + sub x3, x3, #6 + + ld1 {rbuf.16b-rbuf2.16b}, [x2], #(3*16) + ld1 {rbuf3.16b-rbuf5.16b}, [x2], #(3*16) + + rbit rbuf.16b, rbuf.16b /* bit-swap */ + rbit rbuf1.16b, rbuf1.16b /* bit-swap */ + rbit rbuf2.16b, rbuf2.16b /* bit-swap */ + rbit rbuf3.16b, rbuf3.16b /* bit-swap */ + rbit rbuf4.16b, rbuf4.16b /* bit-swap */ + rbit rbuf5.16b, rbuf5.16b /* bit-swap */ + eor rhash.16b, rhash.16b, rbuf.16b + + cmp x3, #6 + b.lo .Lend_6 + +.Loop_6: + + /* (in1) * H⁵ => rr0:rr1 */ + /* (in2) * H⁴ => rr2:rr3 */ + /* (in0 ^ hash) * H⁶ => rr4:rr5 */ + PMUL_128x128_3(rr0, rr1, rbuf1, rh5, t0, t1, + rr2, rr3, rbuf2, rh4, t2, t3, + rr4, rr5, rhash, rh6, t4, t5, + _(sub x3, x3, #6)) + + ld1 {rbuf.16b-rbuf2.16b}, [x2], #(3*16) + cmp x3, #6 + + eor rr0.16b, rr0.16b, rr2.16b + eor rr1.16b, rr1.16b, rr3.16b + + /* (in3) * H³ => rr2:rr3 */ + /* (in4) * H² => rr6:rr7 */ + /* (in5) * H¹ => rr8:rr9 */ + PMUL_128x128_3(rr2, rr3, rbuf3, rh3, t0, t1, + rr6, rr7, rbuf4, rh2, t2, t3, + rr8, rr9, rbuf5, rh1, t4, t5, + _(eor rr0.16b, rr0.16b, rr4.16b; + eor rr1.16b, rr1.16b, rr5.16b)) + + eor rr0.16b, rr0.16b, rr2.16b + eor rr1.16b, rr1.16b, rr3.16b + rbit rbuf.16b, rbuf.16b + eor rr0.16b, rr0.16b, rr6.16b + eor rr1.16b, rr1.16b, rr7.16b + rbit rbuf1.16b, rbuf1.16b + eor rr0.16b, rr0.16b, rr8.16b + eor rr1.16b, rr1.16b, rr9.16b + ld1 {rbuf3.16b-rbuf5.16b}, [x2], #(3*16) + + REDUCTION(rhash, rr0, rr1, rrconst, t0, t1, + _(rbit rbuf2.16b, rbuf2.16b), + _(rbit rbuf3.16b, rbuf3.16b), + _(rbit rbuf4.16b, rbuf4.16b)) + + rbit rbuf5.16b, rbuf5.16b + eor rhash.16b, rhash.16b, rbuf.16b + + b.hs .Loop_6 + +.Lend_6: + + /* (in1) * H⁵ => rr0:rr1 */ + /* (in0 ^ hash) * H⁶ => rr2:rr3 */ + /* (in2) * H⁴ => rr4:rr5 */ + PMUL_128x128_3(rr0, rr1, rbuf1, rh5, t0, t1, + rr2, rr3, rhash, rh6, t2, t3, + rr4, rr5, rbuf2, rh4, t4, t5, + __) + eor rr0.16b, rr0.16b, rr2.16b + eor rr1.16b, rr1.16b, rr3.16b + eor rr0.16b, rr0.16b, rr4.16b + eor rr1.16b, rr1.16b, rr5.16b + + /* (in3) * H³ => rhash:rbuf */ + /* (in4) * H² => rr6:rr7 */ + /* (in5) * H¹ => rr8:rr9 */ + PMUL_128x128_3(rhash, rbuf, rbuf3, rh3, t0, t1, + rr6, rr7, rbuf4, rh2, t2, t3, + rr8, rr9, rbuf5, rh1, t4, t5, + _(CLEAR_REG(rh4); + CLEAR_REG(rh5); + CLEAR_REG(rh6))) + eor rr0.16b, rr0.16b, rhash.16b + eor rr1.16b, rr1.16b, rbuf.16b + eor rr0.16b, rr0.16b, rr6.16b + eor rr1.16b, rr1.16b, rr7.16b + eor rr0.16b, rr0.16b, rr8.16b + eor rr1.16b, rr1.16b, rr9.16b + + REDUCTION(rhash, rr0, rr1, rrconst, t0, t1, + _(CLEAR_REG(rh2); + CLEAR_REG(rh3); + CLEAR_REG(rr2); + CLEAR_REG(rbuf2); + CLEAR_REG(rbuf3)), + _(CLEAR_REG(rr3); + CLEAR_REG(rr4); + CLEAR_REG(rr5); + CLEAR_REG(rr6); + CLEAR_REG(rr7)), + _(CLEAR_REG(rr8); + CLEAR_REG(rr9); + CLEAR_REG(rbuf1); + CLEAR_REG(rbuf2))) + + CLEAR_REG(rbuf4) + CLEAR_REG(rbuf5) + CLEAR_REG(t2) + CLEAR_REG(t3) + CLEAR_REG(t4) + CLEAR_REG(t5) + + VPOP_ABI + + cbz x3, .Ldone + +.Less_than_6: + /* Handle remaining blocks. */ + + ld1 {rbuf.16b}, [x2], #16 + sub x3, x3, #1 + + rbit rbuf.16b, rbuf.16b /* bit-swap */ + + eor rhash.16b, rhash.16b, rbuf.16b + + cbz x3, .Lend + +.Loop: + PMUL_128x128(rr0, rr1, rh1, rhash, t0, t1, _(ld1 {rbuf.16b}, [x2], #16)) + REDUCTION(rhash, rr0, rr1, rrconst, t0, t1, + _(sub x3, x3, #1), + _(rbit rbuf.16b, rbuf.16b), + __) + eor rhash.16b, rhash.16b, rbuf.16b + + cbnz x3, .Loop + +.Lend: + PMUL_128x128(rr0, rr1, rh1, rhash, t0, t1, _(CLEAR_REG(rbuf))) + REDUCTION(rhash, rr0, rr1, rrconst, t0, t1, __, _(CLEAR_REG(rh1)), __) + +.Ldone: + CLEAR_REG(rr1) + CLEAR_REG(rr0) + rbit rhash.16b, rhash.16b /* bit-swap */ + CLEAR_REG(t0) + CLEAR_REG(t1) + + st1 {rhash.2d}, [x1] + CLEAR_REG(rhash) + +.Ldo_nothing: + mov x0, #0 + ret +.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull; + + +/* + * void _gcry_ghash_setup_armv8_ce_pmull (void *gcm_key, void *gcm_table); + */ +.align 3 +.globl _gcry_ghash_setup_armv8_ce_pmull +.type _gcry_ghash_setup_armv8_ce_pmull,%function; +_gcry_ghash_setup_armv8_ce_pmull: + /* input: + * x0: gcm_key + * x1: gcm_table + */ + + GET_DATA_POINTER(x2, .Lrconst) + + eor vZZ.16b, vZZ.16b, vZZ.16b + + /* H¹ */ + ld1 {rh1.16b}, [x0] + rbit rh1.16b, rh1.16b + st1 {rh1.16b}, [x0] + + ld1r {rrconst.2d}, [x2] + + /* H² */ + PMUL_128x128(rr0, rr1, rh1, rh1, t0, t1, __) + REDUCTION(rh2, rr0, rr1, rrconst, t0, t1, __, __, __) + + /* H³ */ + PMUL_128x128(rr0, rr1, rh2, rh1, t0, t1, __) + REDUCTION(rh3, rr0, rr1, rrconst, t0, t1, __, __, __) + + /* H⁴ */ + PMUL_128x128(rr0, rr1, rh2, rh2, t0, t1, __) + REDUCTION(rh4, rr0, rr1, rrconst, t0, t1, __, __, __) + + /* H⁵ */ + PMUL_128x128(rr0, rr1, rh2, rh3, t0, t1, __) + REDUCTION(rh5, rr0, rr1, rrconst, t0, t1, __, __, __) + + /* H⁶ */ + PMUL_128x128(rr0, rr1, rh3, rh3, t0, t1, __) + REDUCTION(rh6, rr0, rr1, rrconst, t0, t1, __, __, __) + + st1 {rh2.16b-rh4.16b}, [x1], #(3*16) + st1 {rh5.16b-rh6.16b}, [x1] + + ret +.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull; + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-gcm-intel-pclmul.c b/libotr/libgcrypt-1.8.7/cipher/cipher-gcm-intel-pclmul.c new file mode 100644 index 0000000..a327249 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-gcm-intel-pclmul.c @@ -0,0 +1,474 @@ +/* cipher-gcm-intel-pclmul.c - Intel PCLMUL accelerated Galois Counter Mode + * implementation + * Copyright (C) 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +#ifdef GCM_USE_INTEL_PCLMUL + + +#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */ +/* Prevent compiler from issuing SSE instructions between asm blocks. */ +# pragma GCC target("no-sse") +#endif + + +/* + Intel PCLMUL ghash based on white paper: + "Intel® Carry-Less Multiplication Instruction and its Usage for Computing the + GCM Mode - Rev 2.01"; Shay Gueron, Michael E. Kounavis. + */ +static inline void gfmul_pclmul(void) +{ + /* Input: XMM0 and XMM1, Output: XMM1. Input XMM0 stays unmodified. + Input must be converted to little-endian. + */ + asm volatile (/* gfmul, xmm0 has operator a and xmm1 has operator b. */ + "pshufd $78, %%xmm0, %%xmm2\n\t" + "pshufd $78, %%xmm1, %%xmm4\n\t" + "pxor %%xmm0, %%xmm2\n\t" /* xmm2 holds a0+a1 */ + "pxor %%xmm1, %%xmm4\n\t" /* xmm4 holds b0+b1 */ + + "movdqa %%xmm0, %%xmm3\n\t" + "pclmulqdq $0, %%xmm1, %%xmm3\n\t" /* xmm3 holds a0*b0 */ + "movdqa %%xmm0, %%xmm6\n\t" + "pclmulqdq $17, %%xmm1, %%xmm6\n\t" /* xmm6 holds a1*b1 */ + "movdqa %%xmm3, %%xmm5\n\t" + "pclmulqdq $0, %%xmm2, %%xmm4\n\t" /* xmm4 holds (a0+a1)*(b0+b1) */ + + "pxor %%xmm6, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */ + "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds a0*b0+a1*b1+(a0+a1)*(b0+b1) */ + "movdqa %%xmm4, %%xmm5\n\t" + "psrldq $8, %%xmm4\n\t" + "pslldq $8, %%xmm5\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "pxor %%xmm4, %%xmm6\n\t" /* <xmm6:xmm3> holds the result of the + carry-less multiplication of xmm0 + by xmm1 */ + + /* shift the result by one bit position to the left cope for + the fact that bits are reversed */ + "movdqa %%xmm3, %%xmm4\n\t" + "movdqa %%xmm6, %%xmm5\n\t" + "pslld $1, %%xmm3\n\t" + "pslld $1, %%xmm6\n\t" + "psrld $31, %%xmm4\n\t" + "psrld $31, %%xmm5\n\t" + "movdqa %%xmm4, %%xmm1\n\t" + "pslldq $4, %%xmm5\n\t" + "pslldq $4, %%xmm4\n\t" + "psrldq $12, %%xmm1\n\t" + "por %%xmm4, %%xmm3\n\t" + "por %%xmm5, %%xmm6\n\t" + "por %%xmm6, %%xmm1\n\t" + + /* first phase of the reduction */ + "movdqa %%xmm3, %%xmm6\n\t" + "movdqa %%xmm3, %%xmm7\n\t" + "pslld $31, %%xmm6\n\t" /* packed right shifting << 31 */ + "movdqa %%xmm3, %%xmm5\n\t" + "pslld $30, %%xmm7\n\t" /* packed right shifting shift << 30 */ + "pslld $25, %%xmm5\n\t" /* packed right shifting shift << 25 */ + "pxor %%xmm7, %%xmm6\n\t" /* xor the shifted versions */ + "pxor %%xmm5, %%xmm6\n\t" + "movdqa %%xmm6, %%xmm7\n\t" + "pslldq $12, %%xmm6\n\t" + "psrldq $4, %%xmm7\n\t" + "pxor %%xmm6, %%xmm3\n\t" /* first phase of the reduction + complete */ + + /* second phase of the reduction */ + "movdqa %%xmm3, %%xmm2\n\t" + "movdqa %%xmm3, %%xmm4\n\t" + "psrld $1, %%xmm2\n\t" /* packed left shifting >> 1 */ + "movdqa %%xmm3, %%xmm5\n\t" + "psrld $2, %%xmm4\n\t" /* packed left shifting >> 2 */ + "psrld $7, %%xmm5\n\t" /* packed left shifting >> 7 */ + "pxor %%xmm4, %%xmm2\n\t" /* xor the shifted versions */ + "pxor %%xmm5, %%xmm2\n\t" + "pxor %%xmm7, %%xmm2\n\t" + "pxor %%xmm2, %%xmm3\n\t" + "pxor %%xmm3, %%xmm1\n\t" /* the result is in xmm1 */ + ::: "cc" ); +} + + +#ifdef __x86_64__ +static inline void gfmul_pclmul_aggr4(void) +{ + /* Input: + H¹: XMM0 X_i : XMM6 + H²: XMM8 X_(i-1) : XMM3 + H³: XMM9 X_(i-2) : XMM2 + H⁴: XMM10 X_(i-3)⊕Y_(i-4): XMM1 + Output: + Y_i: XMM1 + Inputs XMM0 stays unmodified. + Input must be converted to little-endian. + */ + asm volatile (/* perform clmul and merge results... */ + "pshufd $78, %%xmm10, %%xmm11\n\t" + "pshufd $78, %%xmm1, %%xmm12\n\t" + "pxor %%xmm10, %%xmm11\n\t" /* xmm11 holds 4:a0+a1 */ + "pxor %%xmm1, %%xmm12\n\t" /* xmm12 holds 4:b0+b1 */ + + "pshufd $78, %%xmm9, %%xmm13\n\t" + "pshufd $78, %%xmm2, %%xmm14\n\t" + "pxor %%xmm9, %%xmm13\n\t" /* xmm13 holds 3:a0+a1 */ + "pxor %%xmm2, %%xmm14\n\t" /* xmm14 holds 3:b0+b1 */ + + "pshufd $78, %%xmm8, %%xmm5\n\t" + "pshufd $78, %%xmm3, %%xmm15\n\t" + "pxor %%xmm8, %%xmm5\n\t" /* xmm1 holds 2:a0+a1 */ + "pxor %%xmm3, %%xmm15\n\t" /* xmm2 holds 2:b0+b1 */ + + "movdqa %%xmm10, %%xmm4\n\t" + "movdqa %%xmm9, %%xmm7\n\t" + "pclmulqdq $0, %%xmm1, %%xmm4\n\t" /* xmm4 holds 4:a0*b0 */ + "pclmulqdq $0, %%xmm2, %%xmm7\n\t" /* xmm7 holds 3:a0*b0 */ + "pclmulqdq $17, %%xmm10, %%xmm1\n\t" /* xmm1 holds 4:a1*b1 */ + "pclmulqdq $17, %%xmm9, %%xmm2\n\t" /* xmm9 holds 3:a1*b1 */ + "pclmulqdq $0, %%xmm11, %%xmm12\n\t" /* xmm12 holds 4:(a0+a1)*(b0+b1) */ + "pclmulqdq $0, %%xmm13, %%xmm14\n\t" /* xmm14 holds 3:(a0+a1)*(b0+b1) */ + + "pshufd $78, %%xmm0, %%xmm10\n\t" + "pshufd $78, %%xmm6, %%xmm11\n\t" + "pxor %%xmm0, %%xmm10\n\t" /* xmm10 holds 1:a0+a1 */ + "pxor %%xmm6, %%xmm11\n\t" /* xmm11 holds 1:b0+b1 */ + + "pxor %%xmm4, %%xmm7\n\t" /* xmm7 holds 3+4:a0*b0 */ + "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 3+4:a1*b1 */ + "pxor %%xmm14, %%xmm12\n\t" /* xmm12 holds 3+4:(a0+a1)*(b0+b1) */ + + "movdqa %%xmm8, %%xmm13\n\t" + "pclmulqdq $0, %%xmm3, %%xmm13\n\t" /* xmm13 holds 2:a0*b0 */ + "pclmulqdq $17, %%xmm8, %%xmm3\n\t" /* xmm3 holds 2:a1*b1 */ + "pclmulqdq $0, %%xmm5, %%xmm15\n\t" /* xmm15 holds 2:(a0+a1)*(b0+b1) */ + + "pxor %%xmm13, %%xmm7\n\t" /* xmm7 holds 2+3+4:a0*b0 */ + "pxor %%xmm3, %%xmm1\n\t" /* xmm1 holds 2+3+4:a1*b1 */ + "pxor %%xmm15, %%xmm12\n\t" /* xmm12 holds 2+3+4:(a0+a1)*(b0+b1) */ + + "movdqa %%xmm0, %%xmm3\n\t" + "pclmulqdq $0, %%xmm6, %%xmm3\n\t" /* xmm3 holds 1:a0*b0 */ + "pclmulqdq $17, %%xmm0, %%xmm6\n\t" /* xmm6 holds 1:a1*b1 */ + "movdqa %%xmm11, %%xmm4\n\t" + "pclmulqdq $0, %%xmm10, %%xmm4\n\t" /* xmm4 holds 1:(a0+a1)*(b0+b1) */ + + "pxor %%xmm7, %%xmm3\n\t" /* xmm3 holds 1+2+3+4:a0*b0 */ + "pxor %%xmm1, %%xmm6\n\t" /* xmm6 holds 1+2+3+4:a1*b1 */ + "pxor %%xmm12, %%xmm4\n\t" /* xmm4 holds 1+2+3+4:(a0+a1)*(b0+b1) */ + + /* aggregated reduction... */ + "movdqa %%xmm3, %%xmm5\n\t" + "pxor %%xmm6, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */ + "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds a0*b0+a1*b1+(a0+a1)*(b0+b1) */ + "movdqa %%xmm4, %%xmm5\n\t" + "psrldq $8, %%xmm4\n\t" + "pslldq $8, %%xmm5\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "pxor %%xmm4, %%xmm6\n\t" /* <xmm6:xmm3> holds the result of the + carry-less multiplication of xmm0 + by xmm1 */ + + /* shift the result by one bit position to the left cope for + the fact that bits are reversed */ + "movdqa %%xmm3, %%xmm4\n\t" + "movdqa %%xmm6, %%xmm5\n\t" + "pslld $1, %%xmm3\n\t" + "pslld $1, %%xmm6\n\t" + "psrld $31, %%xmm4\n\t" + "psrld $31, %%xmm5\n\t" + "movdqa %%xmm4, %%xmm1\n\t" + "pslldq $4, %%xmm5\n\t" + "pslldq $4, %%xmm4\n\t" + "psrldq $12, %%xmm1\n\t" + "por %%xmm4, %%xmm3\n\t" + "por %%xmm5, %%xmm6\n\t" + "por %%xmm6, %%xmm1\n\t" + + /* first phase of the reduction */ + "movdqa %%xmm3, %%xmm6\n\t" + "movdqa %%xmm3, %%xmm7\n\t" + "pslld $31, %%xmm6\n\t" /* packed right shifting << 31 */ + "movdqa %%xmm3, %%xmm5\n\t" + "pslld $30, %%xmm7\n\t" /* packed right shifting shift << 30 */ + "pslld $25, %%xmm5\n\t" /* packed right shifting shift << 25 */ + "pxor %%xmm7, %%xmm6\n\t" /* xor the shifted versions */ + "pxor %%xmm5, %%xmm6\n\t" + "movdqa %%xmm6, %%xmm7\n\t" + "pslldq $12, %%xmm6\n\t" + "psrldq $4, %%xmm7\n\t" + "pxor %%xmm6, %%xmm3\n\t" /* first phase of the reduction + complete */ + + /* second phase of the reduction */ + "movdqa %%xmm3, %%xmm2\n\t" + "movdqa %%xmm3, %%xmm4\n\t" + "psrld $1, %%xmm2\n\t" /* packed left shifting >> 1 */ + "movdqa %%xmm3, %%xmm5\n\t" + "psrld $2, %%xmm4\n\t" /* packed left shifting >> 2 */ + "psrld $7, %%xmm5\n\t" /* packed left shifting >> 7 */ + "pxor %%xmm4, %%xmm2\n\t" /* xor the shifted versions */ + "pxor %%xmm5, %%xmm2\n\t" + "pxor %%xmm7, %%xmm2\n\t" + "pxor %%xmm2, %%xmm3\n\t" + "pxor %%xmm3, %%xmm1\n\t" /* the result is in xmm1 */ + :::"cc"); +} +#endif + + +void +_gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c) +{ + u64 tmp[2]; +#if defined(__x86_64__) && defined(__WIN64__) + char win64tmp[3 * 16]; + + /* XMM6-XMM8 need to be restored after use. */ + asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" + "movdqu %%xmm7, 1*16(%0)\n\t" + "movdqu %%xmm8, 2*16(%0)\n\t" + : + : "r" (win64tmp) + : "memory"); +#endif + + /* Swap endianness of hsub. */ + tmp[0] = buf_get_be64(c->u_mode.gcm.u_ghash_key.key + 8); + tmp[1] = buf_get_be64(c->u_mode.gcm.u_ghash_key.key + 0); + buf_cpy (c->u_mode.gcm.u_ghash_key.key, tmp, GCRY_GCM_BLOCK_LEN); + +#ifdef __x86_64__ + asm volatile ("movdqu %[h_1], %%xmm0\n\t" + "movdqa %%xmm0, %%xmm1\n\t" + : + : [h_1] "m" (*tmp)); + + gfmul_pclmul (); /* H•H => H² */ + + asm volatile ("movdqu %%xmm1, 0*16(%[h_234])\n\t" + "movdqa %%xmm1, %%xmm8\n\t" + : + : [h_234] "r" (c->u_mode.gcm.gcm_table) + : "memory"); + + gfmul_pclmul (); /* H•H² => H³ */ + + asm volatile ("movdqa %%xmm8, %%xmm0\n\t" + "movdqu %%xmm1, 1*16(%[h_234])\n\t" + "movdqa %%xmm8, %%xmm1\n\t" + : + : [h_234] "r" (c->u_mode.gcm.gcm_table) + : "memory"); + + gfmul_pclmul (); /* H²•H² => H⁴ */ + + asm volatile ("movdqu %%xmm1, 2*16(%[h_234])\n\t" + : + : [h_234] "r" (c->u_mode.gcm.gcm_table) + : "memory"); + +#ifdef __WIN64__ + /* Clear/restore used registers. */ + asm volatile( "pxor %%xmm0, %%xmm0\n\t" + "pxor %%xmm1, %%xmm1\n\t" + "pxor %%xmm2, %%xmm2\n\t" + "pxor %%xmm3, %%xmm3\n\t" + "pxor %%xmm4, %%xmm4\n\t" + "pxor %%xmm5, %%xmm5\n\t" + "movdqu 0*16(%0), %%xmm6\n\t" + "movdqu 1*16(%0), %%xmm7\n\t" + "movdqu 2*16(%0), %%xmm8\n\t" + : + : "r" (win64tmp) + : "memory"); +#else + /* Clear used registers. */ + asm volatile( "pxor %%xmm0, %%xmm0\n\t" + "pxor %%xmm1, %%xmm1\n\t" + "pxor %%xmm2, %%xmm2\n\t" + "pxor %%xmm3, %%xmm3\n\t" + "pxor %%xmm4, %%xmm4\n\t" + "pxor %%xmm5, %%xmm5\n\t" + "pxor %%xmm6, %%xmm6\n\t" + "pxor %%xmm7, %%xmm7\n\t" + "pxor %%xmm8, %%xmm8\n\t" + ::: "cc" ); +#endif +#endif + + wipememory (tmp, sizeof(tmp)); +} + + +unsigned int +_gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf, + size_t nblocks) +{ + static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + const unsigned int blocksize = GCRY_GCM_BLOCK_LEN; +#ifdef __WIN64__ + char win64tmp[10 * 16]; +#endif + + if (nblocks == 0) + return 0; + +#ifdef __WIN64__ + /* XMM8-XMM15 need to be restored after use. */ + asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" + "movdqu %%xmm7, 1*16(%0)\n\t" + "movdqu %%xmm8, 2*16(%0)\n\t" + "movdqu %%xmm9, 3*16(%0)\n\t" + "movdqu %%xmm10, 4*16(%0)\n\t" + "movdqu %%xmm11, 5*16(%0)\n\t" + "movdqu %%xmm12, 6*16(%0)\n\t" + "movdqu %%xmm13, 7*16(%0)\n\t" + "movdqu %%xmm14, 8*16(%0)\n\t" + "movdqu %%xmm15, 9*16(%0)\n\t" + : + : "r" (win64tmp) + : "memory" ); +#endif + + /* Preload hash and H1. */ + asm volatile ("movdqu %[hash], %%xmm1\n\t" + "movdqa %[hsub], %%xmm0\n\t" + "pshufb %[be_mask], %%xmm1\n\t" /* be => le */ + : + : [hash] "m" (*result), [be_mask] "m" (*be_mask), + [hsub] "m" (*c->u_mode.gcm.u_ghash_key.key)); + +#ifdef __x86_64__ + if (nblocks >= 4) + { + do + { + asm volatile ("movdqa %[be_mask], %%xmm4\n\t" + "movdqu 0*16(%[buf]), %%xmm5\n\t" + "movdqu 1*16(%[buf]), %%xmm2\n\t" + "movdqu 2*16(%[buf]), %%xmm3\n\t" + "movdqu 3*16(%[buf]), %%xmm6\n\t" + "pshufb %%xmm4, %%xmm5\n\t" /* be => le */ + + /* Load H2, H3, H4. */ + "movdqu 2*16(%[h_234]), %%xmm10\n\t" + "movdqu 1*16(%[h_234]), %%xmm9\n\t" + "movdqu 0*16(%[h_234]), %%xmm8\n\t" + + "pxor %%xmm5, %%xmm1\n\t" + "pshufb %%xmm4, %%xmm2\n\t" /* be => le */ + "pshufb %%xmm4, %%xmm3\n\t" /* be => le */ + "pshufb %%xmm4, %%xmm6\n\t" /* be => le */ + : + : [buf] "r" (buf), [be_mask] "m" (*be_mask), + [h_234] "r" (c->u_mode.gcm.gcm_table)); + + gfmul_pclmul_aggr4 (); + + buf += 4 * blocksize; + nblocks -= 4; + } + while (nblocks >= 4); + +#ifndef __WIN64__ + /* Clear used x86-64/XMM registers. */ + asm volatile( "pxor %%xmm8, %%xmm8\n\t" + "pxor %%xmm9, %%xmm9\n\t" + "pxor %%xmm10, %%xmm10\n\t" + "pxor %%xmm11, %%xmm11\n\t" + "pxor %%xmm12, %%xmm12\n\t" + "pxor %%xmm13, %%xmm13\n\t" + "pxor %%xmm14, %%xmm14\n\t" + "pxor %%xmm15, %%xmm15\n\t" + ::: "cc" ); +#endif + } +#endif + + while (nblocks--) + { + asm volatile ("movdqu %[buf], %%xmm2\n\t" + "pshufb %[be_mask], %%xmm2\n\t" /* be => le */ + "pxor %%xmm2, %%xmm1\n\t" + : + : [buf] "m" (*buf), [be_mask] "m" (*be_mask)); + + gfmul_pclmul (); + + buf += blocksize; + } + + /* Store hash. */ + asm volatile ("pshufb %[be_mask], %%xmm1\n\t" /* be => le */ + "movdqu %%xmm1, %[hash]\n\t" + : [hash] "=m" (*result) + : [be_mask] "m" (*be_mask)); + +#ifdef __WIN64__ + /* Clear/restore used registers. */ + asm volatile( "pxor %%xmm0, %%xmm0\n\t" + "pxor %%xmm1, %%xmm1\n\t" + "pxor %%xmm2, %%xmm2\n\t" + "pxor %%xmm3, %%xmm3\n\t" + "pxor %%xmm4, %%xmm4\n\t" + "pxor %%xmm5, %%xmm5\n\t" + "movdqu 0*16(%0), %%xmm6\n\t" + "movdqu 1*16(%0), %%xmm7\n\t" + "movdqu 2*16(%0), %%xmm8\n\t" + "movdqu 3*16(%0), %%xmm9\n\t" + "movdqu 4*16(%0), %%xmm10\n\t" + "movdqu 5*16(%0), %%xmm11\n\t" + "movdqu 6*16(%0), %%xmm12\n\t" + "movdqu 7*16(%0), %%xmm13\n\t" + "movdqu 8*16(%0), %%xmm14\n\t" + "movdqu 9*16(%0), %%xmm15\n\t" + : + : "r" (win64tmp) + : "memory" ); +#else + /* Clear used registers. */ + asm volatile( "pxor %%xmm0, %%xmm0\n\t" + "pxor %%xmm1, %%xmm1\n\t" + "pxor %%xmm2, %%xmm2\n\t" + "pxor %%xmm3, %%xmm3\n\t" + "pxor %%xmm4, %%xmm4\n\t" + "pxor %%xmm5, %%xmm5\n\t" + "pxor %%xmm6, %%xmm6\n\t" + "pxor %%xmm7, %%xmm7\n\t" + ::: "cc" ); +#endif + + return 0; +} + +#endif /* GCM_USE_INTEL_PCLMUL */ diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-gcm.c b/libotr/libgcrypt-1.8.7/cipher/cipher-gcm.c new file mode 100644 index 0000000..6169d14 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-gcm.c @@ -0,0 +1,980 @@ +/* cipher-gcm.c - Generic Galois Counter Mode implementation + * Copyright (C) 2013 Dmitry Eremin-Solenikov + * Copyright (C) 2013, 2018 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +#ifdef GCM_USE_INTEL_PCLMUL +extern void _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c); + +extern unsigned int _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, + const byte *buf, size_t nblocks); +#endif + +#ifdef GCM_USE_ARM_PMULL +extern void _gcry_ghash_setup_armv8_ce_pmull (void *gcm_key, void *gcm_table); + +extern unsigned int _gcry_ghash_armv8_ce_pmull (void *gcm_key, byte *result, + const byte *buf, size_t nblocks, + void *gcm_table); + +static void +ghash_setup_armv8_ce_pmull (gcry_cipher_hd_t c) +{ + _gcry_ghash_setup_armv8_ce_pmull(c->u_mode.gcm.u_ghash_key.key, + c->u_mode.gcm.gcm_table); +} + +static unsigned int +ghash_armv8_ce_pmull (gcry_cipher_hd_t c, byte *result, const byte *buf, + size_t nblocks) +{ + return _gcry_ghash_armv8_ce_pmull(c->u_mode.gcm.u_ghash_key.key, result, buf, + nblocks, c->u_mode.gcm.gcm_table); +} + +#endif + + +#ifdef GCM_USE_TABLES +static const u16 gcmR[256] = { + 0x0000, 0x01c2, 0x0384, 0x0246, 0x0708, 0x06ca, 0x048c, 0x054e, + 0x0e10, 0x0fd2, 0x0d94, 0x0c56, 0x0918, 0x08da, 0x0a9c, 0x0b5e, + 0x1c20, 0x1de2, 0x1fa4, 0x1e66, 0x1b28, 0x1aea, 0x18ac, 0x196e, + 0x1230, 0x13f2, 0x11b4, 0x1076, 0x1538, 0x14fa, 0x16bc, 0x177e, + 0x3840, 0x3982, 0x3bc4, 0x3a06, 0x3f48, 0x3e8a, 0x3ccc, 0x3d0e, + 0x3650, 0x3792, 0x35d4, 0x3416, 0x3158, 0x309a, 0x32dc, 0x331e, + 0x2460, 0x25a2, 0x27e4, 0x2626, 0x2368, 0x22aa, 0x20ec, 0x212e, + 0x2a70, 0x2bb2, 0x29f4, 0x2836, 0x2d78, 0x2cba, 0x2efc, 0x2f3e, + 0x7080, 0x7142, 0x7304, 0x72c6, 0x7788, 0x764a, 0x740c, 0x75ce, + 0x7e90, 0x7f52, 0x7d14, 0x7cd6, 0x7998, 0x785a, 0x7a1c, 0x7bde, + 0x6ca0, 0x6d62, 0x6f24, 0x6ee6, 0x6ba8, 0x6a6a, 0x682c, 0x69ee, + 0x62b0, 0x6372, 0x6134, 0x60f6, 0x65b8, 0x647a, 0x663c, 0x67fe, + 0x48c0, 0x4902, 0x4b44, 0x4a86, 0x4fc8, 0x4e0a, 0x4c4c, 0x4d8e, + 0x46d0, 0x4712, 0x4554, 0x4496, 0x41d8, 0x401a, 0x425c, 0x439e, + 0x54e0, 0x5522, 0x5764, 0x56a6, 0x53e8, 0x522a, 0x506c, 0x51ae, + 0x5af0, 0x5b32, 0x5974, 0x58b6, 0x5df8, 0x5c3a, 0x5e7c, 0x5fbe, + 0xe100, 0xe0c2, 0xe284, 0xe346, 0xe608, 0xe7ca, 0xe58c, 0xe44e, + 0xef10, 0xeed2, 0xec94, 0xed56, 0xe818, 0xe9da, 0xeb9c, 0xea5e, + 0xfd20, 0xfce2, 0xfea4, 0xff66, 0xfa28, 0xfbea, 0xf9ac, 0xf86e, + 0xf330, 0xf2f2, 0xf0b4, 0xf176, 0xf438, 0xf5fa, 0xf7bc, 0xf67e, + 0xd940, 0xd882, 0xdac4, 0xdb06, 0xde48, 0xdf8a, 0xddcc, 0xdc0e, + 0xd750, 0xd692, 0xd4d4, 0xd516, 0xd058, 0xd19a, 0xd3dc, 0xd21e, + 0xc560, 0xc4a2, 0xc6e4, 0xc726, 0xc268, 0xc3aa, 0xc1ec, 0xc02e, + 0xcb70, 0xcab2, 0xc8f4, 0xc936, 0xcc78, 0xcdba, 0xcffc, 0xce3e, + 0x9180, 0x9042, 0x9204, 0x93c6, 0x9688, 0x974a, 0x950c, 0x94ce, + 0x9f90, 0x9e52, 0x9c14, 0x9dd6, 0x9898, 0x995a, 0x9b1c, 0x9ade, + 0x8da0, 0x8c62, 0x8e24, 0x8fe6, 0x8aa8, 0x8b6a, 0x892c, 0x88ee, + 0x83b0, 0x8272, 0x8034, 0x81f6, 0x84b8, 0x857a, 0x873c, 0x86fe, + 0xa9c0, 0xa802, 0xaa44, 0xab86, 0xaec8, 0xaf0a, 0xad4c, 0xac8e, + 0xa7d0, 0xa612, 0xa454, 0xa596, 0xa0d8, 0xa11a, 0xa35c, 0xa29e, + 0xb5e0, 0xb422, 0xb664, 0xb7a6, 0xb2e8, 0xb32a, 0xb16c, 0xb0ae, + 0xbbf0, 0xba32, 0xb874, 0xb9b6, 0xbcf8, 0xbd3a, 0xbf7c, 0xbebe, +}; + +#ifdef GCM_TABLES_USE_U64 +static void +bshift (u64 * b0, u64 * b1) +{ + u64 t[2], mask; + + t[0] = *b0; + t[1] = *b1; + mask = t[1] & 1 ? 0xe1 : 0; + mask <<= 56; + + *b1 = (t[1] >> 1) ^ (t[0] << 63); + *b0 = (t[0] >> 1) ^ mask; +} + +static void +do_fillM (unsigned char *h, u64 *M) +{ + int i, j; + + M[0 + 0] = 0; + M[0 + 16] = 0; + + M[8 + 0] = buf_get_be64 (h + 0); + M[8 + 16] = buf_get_be64 (h + 8); + + for (i = 4; i > 0; i /= 2) + { + M[i + 0] = M[2 * i + 0]; + M[i + 16] = M[2 * i + 16]; + + bshift (&M[i], &M[i + 16]); + } + + for (i = 2; i < 16; i *= 2) + for (j = 1; j < i; j++) + { + M[(i + j) + 0] = M[i + 0] ^ M[j + 0]; + M[(i + j) + 16] = M[i + 16] ^ M[j + 16]; + } +} + +static inline unsigned int +do_ghash (unsigned char *result, const unsigned char *buf, const u64 *gcmM) +{ + u64 V[2]; + u64 tmp[2]; + const u64 *M; + u64 T; + u32 A; + int i; + + buf_xor (V, result, buf, 16); + V[0] = be_bswap64 (V[0]); + V[1] = be_bswap64 (V[1]); + + /* First round can be manually tweaked based on fact that 'tmp' is zero. */ + i = 15; + + M = &gcmM[(V[1] & 0xf)]; + V[1] >>= 4; + tmp[0] = (M[0] >> 4) ^ ((u64) gcmR[(M[16] & 0xf) << 4] << 48); + tmp[1] = (M[16] >> 4) ^ (M[0] << 60); + tmp[0] ^= gcmM[(V[1] & 0xf) + 0]; + tmp[1] ^= gcmM[(V[1] & 0xf) + 16]; + V[1] >>= 4; + + --i; + while (1) + { + M = &gcmM[(V[1] & 0xf)]; + V[1] >>= 4; + + A = tmp[1] & 0xff; + T = tmp[0]; + tmp[0] = (T >> 8) ^ ((u64) gcmR[A] << 48) ^ gcmM[(V[1] & 0xf) + 0]; + tmp[1] = (T << 56) ^ (tmp[1] >> 8) ^ gcmM[(V[1] & 0xf) + 16]; + + tmp[0] ^= (M[0] >> 4) ^ ((u64) gcmR[(M[16] & 0xf) << 4] << 48); + tmp[1] ^= (M[16] >> 4) ^ (M[0] << 60); + + if (i == 0) + break; + else if (i == 8) + V[1] = V[0]; + else + V[1] >>= 4; + --i; + } + + buf_put_be64 (result + 0, tmp[0]); + buf_put_be64 (result + 8, tmp[1]); + + return (sizeof(V) + sizeof(T) + sizeof(tmp) + + sizeof(int)*2 + sizeof(void*)*5); +} + +#else /*!GCM_TABLES_USE_U64*/ + +static void +bshift (u32 * M, int i) +{ + u32 t[4], mask; + + t[0] = M[i * 4 + 0]; + t[1] = M[i * 4 + 1]; + t[2] = M[i * 4 + 2]; + t[3] = M[i * 4 + 3]; + mask = t[3] & 1 ? 0xe1 : 0; + + M[i * 4 + 3] = (t[3] >> 1) ^ (t[2] << 31); + M[i * 4 + 2] = (t[2] >> 1) ^ (t[1] << 31); + M[i * 4 + 1] = (t[1] >> 1) ^ (t[0] << 31); + M[i * 4 + 0] = (t[0] >> 1) ^ (mask << 24); +} + +static void +do_fillM (unsigned char *h, u32 *M) +{ + int i, j; + + M[0 * 4 + 0] = 0; + M[0 * 4 + 1] = 0; + M[0 * 4 + 2] = 0; + M[0 * 4 + 3] = 0; + + M[8 * 4 + 0] = buf_get_be32 (h + 0); + M[8 * 4 + 1] = buf_get_be32 (h + 4); + M[8 * 4 + 2] = buf_get_be32 (h + 8); + M[8 * 4 + 3] = buf_get_be32 (h + 12); + + for (i = 4; i > 0; i /= 2) + { + M[i * 4 + 0] = M[2 * i * 4 + 0]; + M[i * 4 + 1] = M[2 * i * 4 + 1]; + M[i * 4 + 2] = M[2 * i * 4 + 2]; + M[i * 4 + 3] = M[2 * i * 4 + 3]; + + bshift (M, i); + } + + for (i = 2; i < 16; i *= 2) + for (j = 1; j < i; j++) + { + M[(i + j) * 4 + 0] = M[i * 4 + 0] ^ M[j * 4 + 0]; + M[(i + j) * 4 + 1] = M[i * 4 + 1] ^ M[j * 4 + 1]; + M[(i + j) * 4 + 2] = M[i * 4 + 2] ^ M[j * 4 + 2]; + M[(i + j) * 4 + 3] = M[i * 4 + 3] ^ M[j * 4 + 3]; + } +} + +static inline unsigned int +do_ghash (unsigned char *result, const unsigned char *buf, const u32 *gcmM) +{ + byte V[16]; + u32 tmp[4]; + u32 v; + const u32 *M, *m; + u32 T[3]; + int i; + + buf_xor (V, result, buf, 16); /* V is big-endian */ + + /* First round can be manually tweaked based on fact that 'tmp' is zero. */ + i = 15; + + v = V[i]; + M = &gcmM[(v & 0xf) * 4]; + v = (v & 0xf0) >> 4; + m = &gcmM[v * 4]; + v = V[--i]; + + tmp[0] = (M[0] >> 4) ^ ((u64) gcmR[(M[3] << 4) & 0xf0] << 16) ^ m[0]; + tmp[1] = (M[1] >> 4) ^ (M[0] << 28) ^ m[1]; + tmp[2] = (M[2] >> 4) ^ (M[1] << 28) ^ m[2]; + tmp[3] = (M[3] >> 4) ^ (M[2] << 28) ^ m[3]; + + while (1) + { + M = &gcmM[(v & 0xf) * 4]; + v = (v & 0xf0) >> 4; + m = &gcmM[v * 4]; + + T[0] = tmp[0]; + T[1] = tmp[1]; + T[2] = tmp[2]; + tmp[0] = (T[0] >> 8) ^ ((u32) gcmR[tmp[3] & 0xff] << 16) ^ m[0]; + tmp[1] = (T[0] << 24) ^ (tmp[1] >> 8) ^ m[1]; + tmp[2] = (T[1] << 24) ^ (tmp[2] >> 8) ^ m[2]; + tmp[3] = (T[2] << 24) ^ (tmp[3] >> 8) ^ m[3]; + + tmp[0] ^= (M[0] >> 4) ^ ((u64) gcmR[(M[3] << 4) & 0xf0] << 16); + tmp[1] ^= (M[1] >> 4) ^ (M[0] << 28); + tmp[2] ^= (M[2] >> 4) ^ (M[1] << 28); + tmp[3] ^= (M[3] >> 4) ^ (M[2] << 28); + + if (i == 0) + break; + + v = V[--i]; + } + + buf_put_be32 (result + 0, tmp[0]); + buf_put_be32 (result + 4, tmp[1]); + buf_put_be32 (result + 8, tmp[2]); + buf_put_be32 (result + 12, tmp[3]); + + return (sizeof(V) + sizeof(T) + sizeof(tmp) + + sizeof(int)*2 + sizeof(void*)*6); +} +#endif /*!GCM_TABLES_USE_U64*/ + +#define fillM(c) \ + do_fillM (c->u_mode.gcm.u_ghash_key.key, c->u_mode.gcm.gcm_table) +#define GHASH(c, result, buf) do_ghash (result, buf, c->u_mode.gcm.gcm_table) + +#else + +static unsigned long +bshift (unsigned long *b) +{ + unsigned long c; + int i; + c = b[3] & 1; + for (i = 3; i > 0; i--) + { + b[i] = (b[i] >> 1) | (b[i - 1] << 31); + } + b[i] >>= 1; + return c; +} + +static unsigned int +do_ghash (unsigned char *hsub, unsigned char *result, const unsigned char *buf) +{ + unsigned long V[4]; + int i, j; + byte *p; + +#ifdef WORDS_BIGENDIAN + p = result; +#else + unsigned long T[4]; + + buf_xor (V, result, buf, 16); + for (i = 0; i < 4; i++) + { + V[i] = (V[i] & 0x00ff00ff) << 8 | (V[i] & 0xff00ff00) >> 8; + V[i] = (V[i] & 0x0000ffff) << 16 | (V[i] & 0xffff0000) >> 16; + } + p = (byte *) T; +#endif + + memset (p, 0, 16); + + for (i = 0; i < 16; i++) + { + for (j = 0x80; j; j >>= 1) + { + if (hsub[i] & j) + buf_xor (p, p, V, 16); + if (bshift (V)) + V[0] ^= 0xe1000000; + } + } +#ifndef WORDS_BIGENDIAN + for (i = 0, p = (byte *) T; i < 16; i += 4, p += 4) + { + result[i + 0] = p[3]; + result[i + 1] = p[2]; + result[i + 2] = p[1]; + result[i + 3] = p[0]; + } +#endif + + return (sizeof(V) + sizeof(T) + sizeof(int)*2 + sizeof(void*)*5); +} + +#define fillM(c) do { } while (0) +#define GHASH(c, result, buf) do_ghash (c->u_mode.gcm.u_ghash_key.key, result, buf) + +#endif /* !GCM_USE_TABLES */ + + +static unsigned int +ghash_internal (gcry_cipher_hd_t c, byte *result, const byte *buf, + size_t nblocks) +{ + const unsigned int blocksize = GCRY_GCM_BLOCK_LEN; + unsigned int burn = 0; + + while (nblocks) + { + burn = GHASH (c, result, buf); + buf += blocksize; + nblocks--; + } + + return burn + (burn ? 5*sizeof(void*) : 0); +} + + +static void +setupM (gcry_cipher_hd_t c) +{ +#if defined(GCM_USE_INTEL_PCLMUL) || defined(GCM_USE_ARM_PMULL) + unsigned int features = _gcry_get_hw_features (); +#endif + + if (0) + ; +#ifdef GCM_USE_INTEL_PCLMUL + else if (features & HWF_INTEL_PCLMUL) + { + c->u_mode.gcm.ghash_fn = _gcry_ghash_intel_pclmul; + _gcry_ghash_setup_intel_pclmul (c); + } +#endif +#ifdef GCM_USE_ARM_PMULL + else if (features & HWF_ARM_PMULL) + { + c->u_mode.gcm.ghash_fn = ghash_armv8_ce_pmull; + ghash_setup_armv8_ce_pmull (c); + } +#endif + else + { + c->u_mode.gcm.ghash_fn = ghash_internal; + fillM (c); + } +} + + +static inline void +gcm_bytecounter_add (u32 ctr[2], size_t add) +{ + if (sizeof(add) > sizeof(u32)) + { + u32 high_add = ((add >> 31) >> 1) & 0xffffffff; + ctr[1] += high_add; + } + + ctr[0] += add; + if (ctr[0] >= add) + return; + ++ctr[1]; +} + + +static inline u32 +gcm_add32_be128 (byte *ctr, unsigned int add) +{ + /* 'ctr' must be aligned to four bytes. */ + const unsigned int blocksize = GCRY_GCM_BLOCK_LEN; + u32 *pval = (u32 *)(void *)(ctr + blocksize - sizeof(u32)); + u32 val; + + val = be_bswap32(*pval) + add; + *pval = be_bswap32(val); + + return val; /* return result as host-endian value */ +} + + +static inline int +gcm_check_datalen (u32 ctr[2]) +{ + /* len(plaintext) <= 2^39-256 bits == 2^36-32 bytes == 2^32-2 blocks */ + if (ctr[1] > 0xfU) + return 0; + if (ctr[1] < 0xfU) + return 1; + + if (ctr[0] <= 0xffffffe0U) + return 1; + + return 0; +} + + +static inline int +gcm_check_aadlen_or_ivlen (u32 ctr[2]) +{ + /* len(aad/iv) <= 2^64-1 bits ~= 2^61-1 bytes */ + if (ctr[1] > 0x1fffffffU) + return 0; + if (ctr[1] < 0x1fffffffU) + return 1; + + if (ctr[0] <= 0xffffffffU) + return 1; + + return 0; +} + + +static void +do_ghash_buf(gcry_cipher_hd_t c, byte *hash, const byte *buf, + size_t buflen, int do_padding) +{ + unsigned int blocksize = GCRY_GCM_BLOCK_LEN; + unsigned int unused = c->u_mode.gcm.mac_unused; + ghash_fn_t ghash_fn = c->u_mode.gcm.ghash_fn; + size_t nblocks, n; + unsigned int burn = 0; + + if (buflen == 0 && (unused == 0 || !do_padding)) + return; + + do + { + if (buflen > 0 && (buflen + unused < blocksize || unused > 0)) + { + n = blocksize - unused; + n = n < buflen ? n : buflen; + + buf_cpy (&c->u_mode.gcm.macbuf[unused], buf, n); + + unused += n; + buf += n; + buflen -= n; + } + if (!buflen) + { + if (!do_padding) + break; + + while (unused < blocksize) + c->u_mode.gcm.macbuf[unused++] = 0; + } + + if (unused > 0) + { + gcry_assert (unused == blocksize); + + /* Process one block from macbuf. */ + burn = ghash_fn (c, hash, c->u_mode.gcm.macbuf, 1); + unused = 0; + } + + nblocks = buflen / blocksize; + + if (nblocks) + { + burn = ghash_fn (c, hash, buf, nblocks); + buf += blocksize * nblocks; + buflen -= blocksize * nblocks; + } + } + while (buflen > 0); + + c->u_mode.gcm.mac_unused = unused; + + if (burn) + _gcry_burn_stack (burn); +} + + +static gcry_err_code_t +gcm_ctr_encrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + gcry_err_code_t err = 0; + + while (inbuflen) + { + u32 nblocks_to_overflow; + u32 num_ctr_increments; + u32 curr_ctr_low; + size_t currlen = inbuflen; + byte ctr_copy[GCRY_GCM_BLOCK_LEN]; + int fix_ctr = 0; + + /* GCM CTR increments only least significant 32-bits, without carry + * to upper 96-bits of counter. Using generic CTR implementation + * directly would carry 32-bit overflow to upper 96-bit. Detect + * if input length is long enough to cause overflow, and limit + * input length so that CTR overflow happen but updated CTR value is + * not used to encrypt further input. After overflow, upper 96 bits + * of CTR are restored to cancel out modification done by generic CTR + * encryption. */ + + if (inbuflen > c->unused) + { + curr_ctr_low = gcm_add32_be128 (c->u_ctr.ctr, 0); + + /* Number of CTR increments this inbuflen would cause. */ + num_ctr_increments = (inbuflen - c->unused) / GCRY_GCM_BLOCK_LEN + + !!((inbuflen - c->unused) % GCRY_GCM_BLOCK_LEN); + + if ((u32)(num_ctr_increments + curr_ctr_low) < curr_ctr_low) + { + nblocks_to_overflow = 0xffffffffU - curr_ctr_low + 1; + currlen = nblocks_to_overflow * GCRY_GCM_BLOCK_LEN + c->unused; + if (currlen > inbuflen) + { + currlen = inbuflen; + } + + fix_ctr = 1; + buf_cpy(ctr_copy, c->u_ctr.ctr, GCRY_GCM_BLOCK_LEN); + } + } + + err = _gcry_cipher_ctr_encrypt(c, outbuf, outbuflen, inbuf, currlen); + if (err != 0) + return err; + + if (fix_ctr) + { + /* Lower 32-bits of CTR should now be zero. */ + gcry_assert(gcm_add32_be128 (c->u_ctr.ctr, 0) == 0); + + /* Restore upper part of CTR. */ + buf_cpy(c->u_ctr.ctr, ctr_copy, GCRY_GCM_BLOCK_LEN - sizeof(u32)); + + wipememory(ctr_copy, sizeof(ctr_copy)); + } + + inbuflen -= currlen; + inbuf += currlen; + outbuflen -= currlen; + outbuf += currlen; + } + + return err; +} + + +gcry_err_code_t +_gcry_cipher_gcm_encrypt (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + static const unsigned char zerobuf[MAX_BLOCKSIZE]; + gcry_err_code_t err; + + if (c->spec->blocksize != GCRY_GCM_BLOCK_LEN) + return GPG_ERR_CIPHER_ALGO; + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (c->u_mode.gcm.datalen_over_limits) + return GPG_ERR_INV_LENGTH; + if (c->marks.tag + || c->u_mode.gcm.ghash_data_finalized + || !c->u_mode.gcm.ghash_fn) + return GPG_ERR_INV_STATE; + + if (!c->marks.iv) + _gcry_cipher_gcm_setiv (c, zerobuf, GCRY_GCM_BLOCK_LEN); + + if (c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode) + return GPG_ERR_INV_STATE; + + if (!c->u_mode.gcm.ghash_aad_finalized) + { + /* Start of encryption marks end of AAD stream. */ + do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1); + c->u_mode.gcm.ghash_aad_finalized = 1; + } + + gcm_bytecounter_add(c->u_mode.gcm.datalen, inbuflen); + if (!gcm_check_datalen(c->u_mode.gcm.datalen)) + { + c->u_mode.gcm.datalen_over_limits = 1; + return GPG_ERR_INV_LENGTH; + } + + err = gcm_ctr_encrypt(c, outbuf, outbuflen, inbuf, inbuflen); + if (err != 0) + return err; + + do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, outbuf, inbuflen, 0); + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_gcm_decrypt (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + static const unsigned char zerobuf[MAX_BLOCKSIZE]; + + if (c->spec->blocksize != GCRY_GCM_BLOCK_LEN) + return GPG_ERR_CIPHER_ALGO; + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (c->u_mode.gcm.datalen_over_limits) + return GPG_ERR_INV_LENGTH; + if (c->marks.tag + || c->u_mode.gcm.ghash_data_finalized + || !c->u_mode.gcm.ghash_fn) + return GPG_ERR_INV_STATE; + + if (!c->marks.iv) + _gcry_cipher_gcm_setiv (c, zerobuf, GCRY_GCM_BLOCK_LEN); + + if (!c->u_mode.gcm.ghash_aad_finalized) + { + /* Start of decryption marks end of AAD stream. */ + do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1); + c->u_mode.gcm.ghash_aad_finalized = 1; + } + + gcm_bytecounter_add(c->u_mode.gcm.datalen, inbuflen); + if (!gcm_check_datalen(c->u_mode.gcm.datalen)) + { + c->u_mode.gcm.datalen_over_limits = 1; + return GPG_ERR_INV_LENGTH; + } + + do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, inbuf, inbuflen, 0); + + return gcm_ctr_encrypt(c, outbuf, outbuflen, inbuf, inbuflen); +} + + +gcry_err_code_t +_gcry_cipher_gcm_authenticate (gcry_cipher_hd_t c, + const byte * aadbuf, size_t aadbuflen) +{ + static const unsigned char zerobuf[MAX_BLOCKSIZE]; + + if (c->spec->blocksize != GCRY_GCM_BLOCK_LEN) + return GPG_ERR_CIPHER_ALGO; + if (c->u_mode.gcm.datalen_over_limits) + return GPG_ERR_INV_LENGTH; + if (c->marks.tag + || c->u_mode.gcm.ghash_aad_finalized + || c->u_mode.gcm.ghash_data_finalized + || !c->u_mode.gcm.ghash_fn) + return GPG_ERR_INV_STATE; + + if (!c->marks.iv) + _gcry_cipher_gcm_setiv (c, zerobuf, GCRY_GCM_BLOCK_LEN); + + gcm_bytecounter_add(c->u_mode.gcm.aadlen, aadbuflen); + if (!gcm_check_aadlen_or_ivlen(c->u_mode.gcm.aadlen)) + { + c->u_mode.gcm.datalen_over_limits = 1; + return GPG_ERR_INV_LENGTH; + } + + do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, aadbuf, aadbuflen, 0); + + return 0; +} + + +void +_gcry_cipher_gcm_setkey (gcry_cipher_hd_t c) +{ + memset (c->u_mode.gcm.u_ghash_key.key, 0, GCRY_GCM_BLOCK_LEN); + + c->spec->encrypt (&c->context.c, c->u_mode.gcm.u_ghash_key.key, + c->u_mode.gcm.u_ghash_key.key); + setupM (c); +} + + +static gcry_err_code_t +_gcry_cipher_gcm_initiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen) +{ + memset (c->u_mode.gcm.aadlen, 0, sizeof(c->u_mode.gcm.aadlen)); + memset (c->u_mode.gcm.datalen, 0, sizeof(c->u_mode.gcm.datalen)); + memset (c->u_mode.gcm.u_tag.tag, 0, GCRY_GCM_BLOCK_LEN); + c->u_mode.gcm.datalen_over_limits = 0; + c->u_mode.gcm.ghash_data_finalized = 0; + c->u_mode.gcm.ghash_aad_finalized = 0; + + if (ivlen == 0) + return GPG_ERR_INV_LENGTH; + + if (ivlen != GCRY_GCM_BLOCK_LEN - 4) + { + u32 iv_bytes[2] = {0, 0}; + u32 bitlengths[2][2]; + + if (!c->u_mode.gcm.ghash_fn) + return GPG_ERR_INV_STATE; + + memset(c->u_ctr.ctr, 0, GCRY_GCM_BLOCK_LEN); + + gcm_bytecounter_add(iv_bytes, ivlen); + if (!gcm_check_aadlen_or_ivlen(iv_bytes)) + { + c->u_mode.gcm.datalen_over_limits = 1; + return GPG_ERR_INV_LENGTH; + } + + do_ghash_buf(c, c->u_ctr.ctr, iv, ivlen, 1); + + /* iv length, 64-bit */ + bitlengths[1][1] = be_bswap32(iv_bytes[0] << 3); + bitlengths[1][0] = be_bswap32((iv_bytes[0] >> 29) | + (iv_bytes[1] << 3)); + /* zeros, 64-bit */ + bitlengths[0][1] = 0; + bitlengths[0][0] = 0; + + do_ghash_buf(c, c->u_ctr.ctr, (byte*)bitlengths, GCRY_GCM_BLOCK_LEN, 1); + + wipememory (iv_bytes, sizeof iv_bytes); + wipememory (bitlengths, sizeof bitlengths); + } + else + { + /* 96-bit IV is handled differently. */ + memcpy (c->u_ctr.ctr, iv, ivlen); + c->u_ctr.ctr[12] = c->u_ctr.ctr[13] = c->u_ctr.ctr[14] = 0; + c->u_ctr.ctr[15] = 1; + } + + c->spec->encrypt (&c->context.c, c->u_mode.gcm.tagiv, c->u_ctr.ctr); + + gcm_add32_be128 (c->u_ctr.ctr, 1); + + c->unused = 0; + c->marks.iv = 1; + c->marks.tag = 0; + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_gcm_setiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen) +{ + c->marks.iv = 0; + c->marks.tag = 0; + c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode = 0; + + if (fips_mode ()) + { + /* Direct invocation of GCM setiv in FIPS mode disables encryption. */ + c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode = 1; + } + + return _gcry_cipher_gcm_initiv (c, iv, ivlen); +} + + +#if 0 && TODO +void +_gcry_cipher_gcm_geniv (gcry_cipher_hd_t c, + byte *ivout, size_t ivoutlen, const byte *nonce, + size_t noncelen) +{ + /* nonce: user provided part (might be null) */ + /* noncelen: check if proper length (if nonce not null) */ + /* ivout: iv used to initialize gcm, output to user */ + /* ivoutlen: check correct size */ + byte iv[IVLEN]; + + if (!ivout) + return GPG_ERR_INV_ARG; + if (ivoutlen != IVLEN) + return GPG_ERR_INV_LENGTH; + if (nonce != NULL && !is_nonce_ok_len(noncelen)) + return GPG_ERR_INV_ARG; + + gcm_generate_iv(iv, nonce, noncelen); + + c->marks.iv = 0; + c->marks.tag = 0; + c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode = 0; + + _gcry_cipher_gcm_initiv (c, iv, IVLEN); + + buf_cpy(ivout, iv, IVLEN); + wipememory(iv, sizeof(iv)); +} +#endif + + +static int +is_tag_length_valid(size_t taglen) +{ + switch (taglen) + { + /* Allowed tag lengths from NIST SP 800-38D. */ + case 128 / 8: /* GCRY_GCM_BLOCK_LEN */ + case 120 / 8: + case 112 / 8: + case 104 / 8: + case 96 / 8: + case 64 / 8: + case 32 / 8: + return 1; + + default: + return 0; + } +} + +static gcry_err_code_t +_gcry_cipher_gcm_tag (gcry_cipher_hd_t c, + byte * outbuf, size_t outbuflen, int check) +{ + if (!(is_tag_length_valid (outbuflen) || outbuflen >= GCRY_GCM_BLOCK_LEN)) + return GPG_ERR_INV_LENGTH; + if (c->u_mode.gcm.datalen_over_limits) + return GPG_ERR_INV_LENGTH; + + if (!c->marks.tag) + { + u32 bitlengths[2][2]; + + if (!c->u_mode.gcm.ghash_fn) + return GPG_ERR_INV_STATE; + + /* aad length */ + bitlengths[0][1] = be_bswap32(c->u_mode.gcm.aadlen[0] << 3); + bitlengths[0][0] = be_bswap32((c->u_mode.gcm.aadlen[0] >> 29) | + (c->u_mode.gcm.aadlen[1] << 3)); + /* data length */ + bitlengths[1][1] = be_bswap32(c->u_mode.gcm.datalen[0] << 3); + bitlengths[1][0] = be_bswap32((c->u_mode.gcm.datalen[0] >> 29) | + (c->u_mode.gcm.datalen[1] << 3)); + + /* Finalize data-stream. */ + do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1); + c->u_mode.gcm.ghash_aad_finalized = 1; + c->u_mode.gcm.ghash_data_finalized = 1; + + /* Add bitlengths to tag. */ + do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, (byte*)bitlengths, + GCRY_GCM_BLOCK_LEN, 1); + buf_xor (c->u_mode.gcm.u_tag.tag, c->u_mode.gcm.tagiv, + c->u_mode.gcm.u_tag.tag, GCRY_GCM_BLOCK_LEN); + c->marks.tag = 1; + + wipememory (bitlengths, sizeof (bitlengths)); + wipememory (c->u_mode.gcm.macbuf, GCRY_GCM_BLOCK_LEN); + wipememory (c->u_mode.gcm.tagiv, GCRY_GCM_BLOCK_LEN); + wipememory (c->u_mode.gcm.aadlen, sizeof (c->u_mode.gcm.aadlen)); + wipememory (c->u_mode.gcm.datalen, sizeof (c->u_mode.gcm.datalen)); + } + + if (!check) + { + if (outbuflen > GCRY_GCM_BLOCK_LEN) + outbuflen = GCRY_GCM_BLOCK_LEN; + + /* NB: We already checked that OUTBUF is large enough to hold + * the result or has valid truncated length. */ + memcpy (outbuf, c->u_mode.gcm.u_tag.tag, outbuflen); + } + else + { + /* OUTBUFLEN gives the length of the user supplied tag in OUTBUF + * and thus we need to compare its length first. */ + if (!is_tag_length_valid (outbuflen) + || !buf_eq_const (outbuf, c->u_mode.gcm.u_tag.tag, outbuflen)) + return GPG_ERR_CHECKSUM; + } + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_gcm_get_tag (gcry_cipher_hd_t c, unsigned char *outtag, + size_t taglen) +{ + /* Outputting authentication tag is part of encryption. */ + if (c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode) + return GPG_ERR_INV_STATE; + + return _gcry_cipher_gcm_tag (c, outtag, taglen, 0); +} + +gcry_err_code_t +_gcry_cipher_gcm_check_tag (gcry_cipher_hd_t c, const unsigned char *intag, + size_t taglen) +{ + return _gcry_cipher_gcm_tag (c, (unsigned char *) intag, taglen, 1); +} diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-internal.h b/libotr/libgcrypt-1.8.7/cipher/cipher-internal.h new file mode 100644 index 0000000..b748125 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-internal.h @@ -0,0 +1,509 @@ +/* cipher-internal.h - Internal defs for cipher.c + * Copyright (C) 2011 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef G10_CIPHER_INTERNAL_H +#define G10_CIPHER_INTERNAL_H + +#include "./poly1305-internal.h" + + +/* The maximum supported size of a block in bytes. */ +#define MAX_BLOCKSIZE 16 + +/* The length for an OCB block. Although OCB supports any block + length it does not make sense to use a 64 bit blocklen (and cipher) + because this reduces the security margin to an unacceptable state. + Thus we require a cipher with 128 bit blocklength. */ +#define OCB_BLOCK_LEN (128/8) + +/* The size of the pre-computed L table for OCB. This takes the same + size as the table used for GCM and thus we don't save anything by + not using such a table. */ +#define OCB_L_TABLE_SIZE 16 + + +/* Check the above constants. */ +#if OCB_BLOCK_LEN > MAX_BLOCKSIZE +# error OCB_BLOCKLEN > MAX_BLOCKSIZE +#endif + + + +/* Magic values for the context structure. */ +#define CTX_MAGIC_NORMAL 0x24091964 +#define CTX_MAGIC_SECURE 0x46919042 + +/* Try to use 16 byte aligned cipher context for better performance. + We use the aligned attribute, thus it is only possible to implement + this with gcc. */ +#undef NEED_16BYTE_ALIGNED_CONTEXT +#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED +# define NEED_16BYTE_ALIGNED_CONTEXT 1 +#endif + +/* Undef this symbol to trade GCM speed for 256 bytes of memory per context */ +#define GCM_USE_TABLES 1 + + +/* GCM_USE_INTEL_PCLMUL indicates whether to compile GCM with Intel PCLMUL + code. */ +#undef GCM_USE_INTEL_PCLMUL +#if defined(ENABLE_PCLMUL_SUPPORT) && defined(GCM_USE_TABLES) +# if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) +# if __GNUC__ >= 4 +# define GCM_USE_INTEL_PCLMUL 1 +# endif +# endif +#endif /* GCM_USE_INTEL_PCLMUL */ + +/* GCM_USE_ARM_PMULL indicates whether to compile GCM with ARMv8 PMULL code. */ +#undef GCM_USE_ARM_PMULL +#if defined(ENABLE_ARM_CRYPTO_SUPPORT) && defined(GCM_USE_TABLES) +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) +# define GCM_USE_ARM_PMULL 1 +# elif defined(__AARCH64EL__) && \ + defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) +# define GCM_USE_ARM_PMULL 1 +# endif +#endif /* GCM_USE_ARM_PMULL */ + + +typedef unsigned int (*ghash_fn_t) (gcry_cipher_hd_t c, byte *result, + const byte *buf, size_t nblocks); + + +/* A VIA processor with the Padlock engine as well as the Intel AES_NI + instructions require an alignment of most data on a 16 byte + boundary. Because we trick out the compiler while allocating the + context, the align attribute as used in rijndael.c does not work on + its own. Thus we need to make sure that the entire context + structure is a aligned on that boundary. We achieve this by + defining a new type and use that instead of our usual alignment + type. */ +typedef union +{ + PROPERLY_ALIGNED_TYPE foo; +#ifdef NEED_16BYTE_ALIGNED_CONTEXT + char bar[16] __attribute__ ((aligned (16))); +#endif + char c[1]; +} cipher_context_alignment_t; + + +/* The handle structure. */ +struct gcry_cipher_handle +{ + int magic; + size_t actual_handle_size; /* Allocated size of this handle. */ + size_t handle_offset; /* Offset to the malloced block. */ + gcry_cipher_spec_t *spec; + + /* The algorithm id. This is a hack required because the module + interface does not easily allow to retrieve this value. */ + int algo; + + /* A structure with function pointers for bulk operations. Due to + limitations of the module system (we don't want to change the + API) we need to keep these function pointers here. The cipher + open function initializes them and the actual encryption routines + use them if they are not NULL. */ + struct { + void (*cfb_enc)(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); + void (*cfb_dec)(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); + void (*cbc_enc)(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int cbc_mac); + void (*cbc_dec)(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); + void (*ctr_enc)(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); + size_t (*ocb_crypt)(gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt); + size_t (*ocb_auth)(gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); + void (*xts_crypt)(gcry_cipher_hd_t c, unsigned char *tweak, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt); + } bulk; + + + int mode; + unsigned int flags; + + struct { + unsigned int key:1; /* Set to 1 if a key has been set. */ + unsigned int iv:1; /* Set to 1 if a IV has been set. */ + unsigned int tag:1; /* Set to 1 if a tag is finalized. */ + unsigned int finalize:1; /* Next encrypt/decrypt has the final data. */ + } marks; + + /* The initialization vector. For best performance we make sure + that it is properly aligned. In particular some implementations + of bulk operations expect an 16 byte aligned IV. IV is also used + to store CBC-MAC in CCM mode; counter IV is stored in U_CTR. For + OCB mode it is used for the offset value. */ + union { + cipher_context_alignment_t iv_align; + unsigned char iv[MAX_BLOCKSIZE]; + } u_iv; + + /* The counter for CTR mode. This field is also used by AESWRAP and + thus we can't use the U_IV union. For OCB mode it is used for + the checksum. */ + union { + cipher_context_alignment_t iv_align; + unsigned char ctr[MAX_BLOCKSIZE]; + } u_ctr; + + /* Space to save an IV or CTR for chaining operations. */ + unsigned char lastiv[MAX_BLOCKSIZE]; + int unused; /* Number of unused bytes in LASTIV. */ + + union { + /* Mode specific storage for CCM mode. */ + struct { + u64 encryptlen; + u64 aadlen; + unsigned int authlen; + + /* Space to save partial input lengths for MAC. */ + unsigned char macbuf[GCRY_CCM_BLOCK_LEN]; + int mac_unused; /* Number of unprocessed bytes in MACBUF. */ + + unsigned char s0[GCRY_CCM_BLOCK_LEN]; + + unsigned int nonce:1;/* Set to 1 if nonce has been set. */ + unsigned int lengths:1; /* Set to 1 if CCM length parameters has been + processed. */ + } ccm; + + /* Mode specific storage for Poly1305 mode. */ + struct { + /* byte counter for AAD. */ + u32 aadcount[2]; + + /* byte counter for data. */ + u32 datacount[2]; + + unsigned int aad_finalized:1; + unsigned int bytecount_over_limits:1; + + poly1305_context_t ctx; + } poly1305; + + /* Mode specific storage for CMAC mode. */ + struct { + unsigned int tag:1; /* Set to 1 if tag has been finalized. */ + + /* Subkeys for tag creation, not cleared by gcry_cipher_reset. */ + unsigned char subkeys[2][MAX_BLOCKSIZE]; + } cmac; + + /* Mode specific storage for GCM mode. */ + struct { + /* The interim tag for GCM mode. */ + union { + cipher_context_alignment_t iv_align; + unsigned char tag[MAX_BLOCKSIZE]; + } u_tag; + + /* Space to save partial input lengths for MAC. */ + unsigned char macbuf[GCRY_CCM_BLOCK_LEN]; + int mac_unused; /* Number of unprocessed bytes in MACBUF. */ + + + /* byte counters for GCM */ + u32 aadlen[2]; + u32 datalen[2]; + + /* encrypted tag counter */ + unsigned char tagiv[MAX_BLOCKSIZE]; + + unsigned int ghash_data_finalized:1; + unsigned int ghash_aad_finalized:1; + + unsigned int datalen_over_limits:1; + unsigned int disallow_encryption_because_of_setiv_in_fips_mode:1; + + /* --- Following members are not cleared in gcry_cipher_reset --- */ + + /* GHASH multiplier from key. */ + union { + cipher_context_alignment_t iv_align; + unsigned char key[MAX_BLOCKSIZE]; + } u_ghash_key; + + /* GHASH implementation in use. */ + ghash_fn_t ghash_fn; + + /* Pre-calculated table for GCM. */ +#ifdef GCM_USE_TABLES + #if (SIZEOF_UNSIGNED_LONG == 8 || defined(__x86_64__)) + #define GCM_TABLES_USE_U64 1 + u64 gcm_table[2 * 16]; + #else + #undef GCM_TABLES_USE_U64 + u32 gcm_table[4 * 16]; + #endif +#endif + } gcm; + + /* Mode specific storage for OCB mode. */ + struct { + /* Helper variables and pre-computed table of L values. */ + unsigned char L_star[OCB_BLOCK_LEN]; + unsigned char L_dollar[OCB_BLOCK_LEN]; + unsigned char L[OCB_BLOCK_LEN][OCB_L_TABLE_SIZE]; + + /* The tag is valid if marks.tag has been set. */ + unsigned char tag[OCB_BLOCK_LEN]; + + /* A buffer to hold the offset for the AAD processing. */ + unsigned char aad_offset[OCB_BLOCK_LEN]; + + /* A buffer to hold the current sum of AAD processing. We can't + use tag here because tag may already hold the preprocessed + checksum of the data. */ + unsigned char aad_sum[OCB_BLOCK_LEN]; + + /* A buffer to store AAD data not yet processed. */ + unsigned char aad_leftover[OCB_BLOCK_LEN]; + + /* Number of data/aad blocks processed so far. */ + u64 data_nblocks; + u64 aad_nblocks; + + /* Number of valid bytes in AAD_LEFTOVER. */ + unsigned char aad_nleftover; + + /* Length of the tag. Fixed for now but may eventually be + specified using a set of gcry_cipher_flags. */ + unsigned char taglen; + + /* Flags indicating that the final data/aad block has been + processed. */ + unsigned int data_finalized:1; + unsigned int aad_finalized:1; + + } ocb; + + /* Mode specific storage for XTS mode. */ + struct { + /* Pointer to tweak cipher context, allocated after actual + * cipher context. */ + char *tweak_context; + } xts; + } u_mode; + + /* What follows are two contexts of the cipher in use. The first + one needs to be aligned well enough for the cipher operation + whereas the second one is a copy created by cipher_setkey and + used by cipher_reset. That second copy has no need for proper + aligment because it is only accessed by memcpy. */ + cipher_context_alignment_t context; +}; + + +/*-- cipher-cbc.c --*/ +gcry_err_code_t _gcry_cipher_cbc_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_cbc_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); + +/*-- cipher-cfb.c --*/ +gcry_err_code_t _gcry_cipher_cfb_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_cfb_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_cfb8_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_cfb8_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); + + +/*-- cipher-ofb.c --*/ +gcry_err_code_t _gcry_cipher_ofb_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); + +/*-- cipher-ctr.c --*/ +gcry_err_code_t _gcry_cipher_ctr_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); + + +/*-- cipher-aeswrap.c --*/ +gcry_err_code_t _gcry_cipher_aeswrap_encrypt +/* */ (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_aeswrap_decrypt +/* */ (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen); + + +/*-- cipher-ccm.c --*/ +gcry_err_code_t _gcry_cipher_ccm_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_ccm_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_ccm_set_nonce +/* */ (gcry_cipher_hd_t c, const unsigned char *nonce, + size_t noncelen); +gcry_err_code_t _gcry_cipher_ccm_authenticate +/* */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t abuflen); +gcry_err_code_t _gcry_cipher_ccm_set_lengths +/* */ (gcry_cipher_hd_t c, u64 encryptedlen, u64 aadlen, u64 taglen); +gcry_err_code_t _gcry_cipher_ccm_get_tag +/* */ (gcry_cipher_hd_t c, + unsigned char *outtag, size_t taglen); +gcry_err_code_t _gcry_cipher_ccm_check_tag +/* */ (gcry_cipher_hd_t c, + const unsigned char *intag, size_t taglen); + + +/*-- cipher-gcm.c --*/ +gcry_err_code_t _gcry_cipher_gcm_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_gcm_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_gcm_setiv +/* */ (gcry_cipher_hd_t c, + const unsigned char *iv, size_t ivlen); +gcry_err_code_t _gcry_cipher_gcm_authenticate +/* */ (gcry_cipher_hd_t c, + const unsigned char *aadbuf, size_t aadbuflen); +gcry_err_code_t _gcry_cipher_gcm_get_tag +/* */ (gcry_cipher_hd_t c, + unsigned char *outtag, size_t taglen); +gcry_err_code_t _gcry_cipher_gcm_check_tag +/* */ (gcry_cipher_hd_t c, + const unsigned char *intag, size_t taglen); +void _gcry_cipher_gcm_setkey +/* */ (gcry_cipher_hd_t c); + + +/*-- cipher-poly1305.c --*/ +gcry_err_code_t _gcry_cipher_poly1305_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_poly1305_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_poly1305_setiv +/* */ (gcry_cipher_hd_t c, + const unsigned char *iv, size_t ivlen); +gcry_err_code_t _gcry_cipher_poly1305_authenticate +/* */ (gcry_cipher_hd_t c, + const unsigned char *aadbuf, size_t aadbuflen); +gcry_err_code_t _gcry_cipher_poly1305_get_tag +/* */ (gcry_cipher_hd_t c, + unsigned char *outtag, size_t taglen); +gcry_err_code_t _gcry_cipher_poly1305_check_tag +/* */ (gcry_cipher_hd_t c, + const unsigned char *intag, size_t taglen); +void _gcry_cipher_poly1305_setkey +/* */ (gcry_cipher_hd_t c); + + +/*-- cipher-ocb.c --*/ +gcry_err_code_t _gcry_cipher_ocb_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_ocb_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_ocb_set_nonce +/* */ (gcry_cipher_hd_t c, const unsigned char *nonce, + size_t noncelen); +gcry_err_code_t _gcry_cipher_ocb_authenticate +/* */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t abuflen); +gcry_err_code_t _gcry_cipher_ocb_get_tag +/* */ (gcry_cipher_hd_t c, + unsigned char *outtag, size_t taglen); +gcry_err_code_t _gcry_cipher_ocb_check_tag +/* */ (gcry_cipher_hd_t c, + const unsigned char *intag, size_t taglen); + + +/*-- cipher-xts.c --*/ +gcry_err_code_t _gcry_cipher_xts_crypt +/* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen, int encrypt); + + +/* Return the L-value for block N. Note: 'cipher_ocb.c' ensures that N + * will never be multiple of 65536 (1 << OCB_L_TABLE_SIZE), thus N can + * be directly passed to _gcry_ctz() function and resulting index will + * never overflow the table. */ +static inline const unsigned char * +ocb_get_l (gcry_cipher_hd_t c, u64 n) +{ + unsigned long ntz; + +#if ((defined(__i386__) || defined(__x86_64__)) && __GNUC__ >= 4) + /* Assumes that N != 0. */ + asm ("rep;bsfl %k[low], %k[ntz]\n\t" + : [ntz] "=r" (ntz) + : [low] "r" ((unsigned long)n) + : "cc"); +#else + ntz = _gcry_ctz (n); +#endif + + return c->u_mode.ocb.L[ntz]; +} + +#endif /*G10_CIPHER_INTERNAL_H*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-ocb.c b/libotr/libgcrypt-1.8.7/cipher/cipher-ocb.c new file mode 100644 index 0000000..db42aaf --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-ocb.c @@ -0,0 +1,723 @@ +/* cipher-ocb.c - OCB cipher mode + * Copyright (C) 2015, 2016 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * + * OCB is covered by several patents but may be used freely by most + * software. See http://web.cs.ucdavis.edu/~rogaway/ocb/license.htm . + * In particular license 1 is suitable for Libgcrypt: See + * http://web.cs.ucdavis.edu/~rogaway/ocb/license1.pdf for the full + * license document; it basically says: + * + * License 1 — License for Open-Source Software Implementations of OCB + * (Jan 9, 2013) + * + * Under this license, you are authorized to make, use, and + * distribute open-source software implementations of OCB. This + * license terminates for you if you sue someone over their + * open-source software implementation of OCB claiming that you have + * a patent covering their implementation. + */ + + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +/* Double the OCB_BLOCK_LEN sized block B in-place. */ +static inline void +double_block (unsigned char *b) +{ +#if OCB_BLOCK_LEN != 16 + unsigned char b_0 = b[0]; + int i; + + for (i=0; i < OCB_BLOCK_LEN - 1; i++) + b[i] = (b[i] << 1) | (b[i+1] >> 7); + + b[OCB_BLOCK_LEN-1] = (b[OCB_BLOCK_LEN-1] << 1) ^ ((b_0 >> 7) * 135); +#else + /* This is the generic code for 16 byte blocks. However it is not + faster than the straight byte by byte implementation. */ + u64 l_0, l, r; + + l = buf_get_be64 (b); + r = buf_get_be64 (b + 8); + + l_0 = -(l >> 63); + l = (l + l) ^ (r >> 63); + r = (r + r) ^ (l_0 & 135); + + buf_put_be64 (b, l); + buf_put_be64 (b+8, r); +#endif +} + + +/* Double the OCB_BLOCK_LEN sized block S and store it at D. S and D + may point to the same memory location but they may not overlap. */ +static void +double_block_cpy (unsigned char *d, const unsigned char *s) +{ + if (d != s) + buf_cpy (d, s, OCB_BLOCK_LEN); + double_block (d); +} + + +/* Copy NBYTES from buffer S starting at bit offset BITOFF to buffer D. */ +static void +bit_copy (unsigned char *d, const unsigned char *s, + unsigned int bitoff, unsigned int nbytes) +{ + unsigned int shift; + + s += bitoff / 8; + shift = bitoff % 8; + if (shift) + { + for (; nbytes; nbytes--, d++, s++) + *d = (s[0] << shift) | (s[1] >> (8 - shift)); + } + else + { + for (; nbytes; nbytes--, d++, s++) + *d = *s; + } +} + + +/* Get L_big value for block N, where N is multiple of 65536. */ +static void +ocb_get_L_big (gcry_cipher_hd_t c, u64 n, unsigned char *l_buf) +{ + int ntz = _gcry_ctz64 (n); + + gcry_assert(ntz >= OCB_L_TABLE_SIZE); + + double_block_cpy (l_buf, c->u_mode.ocb.L[OCB_L_TABLE_SIZE - 1]); + for (ntz -= OCB_L_TABLE_SIZE; ntz; ntz--) + double_block (l_buf); +} + + +/* Set the nonce for OCB. This requires that the key has been set. + Using it again resets start a new encryption cycle using the same + key. */ +gcry_err_code_t +_gcry_cipher_ocb_set_nonce (gcry_cipher_hd_t c, const unsigned char *nonce, + size_t noncelen) +{ + unsigned char ktop[OCB_BLOCK_LEN]; + unsigned char stretch[OCB_BLOCK_LEN + 8]; + unsigned int bottom; + int i; + unsigned int burn = 0; + unsigned int nburn; + + /* Check args. */ + if (!c->marks.key) + return GPG_ERR_INV_STATE; /* Key must have been set first. */ + switch (c->u_mode.ocb.taglen) + { + case 8: + case 12: + case 16: + break; + default: + return GPG_ERR_BUG; /* Invalid tag length. */ + } + + if (c->spec->blocksize != OCB_BLOCK_LEN) + return GPG_ERR_CIPHER_ALGO; + if (!nonce) + return GPG_ERR_INV_ARG; + /* 120 bit is the allowed maximum. In addition we impose a minimum + of 64 bit. */ + if (noncelen > (120/8) || noncelen < (64/8) || noncelen >= OCB_BLOCK_LEN) + return GPG_ERR_INV_LENGTH; + + /* Set up the L table. */ + /* L_star = E(zero_128) */ + memset (ktop, 0, OCB_BLOCK_LEN); + nburn = c->spec->encrypt (&c->context.c, c->u_mode.ocb.L_star, ktop); + burn = nburn > burn ? nburn : burn; + /* L_dollar = double(L_star) */ + double_block_cpy (c->u_mode.ocb.L_dollar, c->u_mode.ocb.L_star); + /* L_0 = double(L_dollar), ... */ + double_block_cpy (c->u_mode.ocb.L[0], c->u_mode.ocb.L_dollar); + for (i = 1; i < OCB_L_TABLE_SIZE; i++) + double_block_cpy (c->u_mode.ocb.L[i], c->u_mode.ocb.L[i-1]); + + /* Prepare the nonce. */ + memset (ktop, 0, (OCB_BLOCK_LEN - noncelen)); + buf_cpy (ktop + (OCB_BLOCK_LEN - noncelen), nonce, noncelen); + ktop[0] = ((c->u_mode.ocb.taglen * 8) % 128) << 1; + ktop[OCB_BLOCK_LEN - noncelen - 1] |= 1; + bottom = ktop[OCB_BLOCK_LEN - 1] & 0x3f; + ktop[OCB_BLOCK_LEN - 1] &= 0xc0; /* Zero the bottom bits. */ + nburn = c->spec->encrypt (&c->context.c, ktop, ktop); + burn = nburn > burn ? nburn : burn; + /* Stretch = Ktop || (Ktop[1..64] xor Ktop[9..72]) */ + buf_cpy (stretch, ktop, OCB_BLOCK_LEN); + buf_xor (stretch + OCB_BLOCK_LEN, ktop, ktop + 1, 8); + /* Offset_0 = Stretch[1+bottom..128+bottom] + (We use the IV field to store the offset) */ + bit_copy (c->u_iv.iv, stretch, bottom, OCB_BLOCK_LEN); + c->marks.iv = 1; + + /* Checksum_0 = zeros(128) + (We use the CTR field to store the checksum) */ + memset (c->u_ctr.ctr, 0, OCB_BLOCK_LEN); + + /* Clear AAD buffer. */ + memset (c->u_mode.ocb.aad_offset, 0, OCB_BLOCK_LEN); + memset (c->u_mode.ocb.aad_sum, 0, OCB_BLOCK_LEN); + + /* Setup other values. */ + memset (c->lastiv, 0, sizeof(c->lastiv)); + c->unused = 0; + c->marks.tag = 0; + c->marks.finalize = 0; + c->u_mode.ocb.data_nblocks = 0; + c->u_mode.ocb.aad_nblocks = 0; + c->u_mode.ocb.aad_nleftover = 0; + c->u_mode.ocb.data_finalized = 0; + c->u_mode.ocb.aad_finalized = 0; + + /* log_printhex ("L_* ", c->u_mode.ocb.L_star, OCB_BLOCK_LEN); */ + /* log_printhex ("L_$ ", c->u_mode.ocb.L_dollar, OCB_BLOCK_LEN); */ + /* log_printhex ("L_0 ", c->u_mode.ocb.L[0], OCB_BLOCK_LEN); */ + /* log_printhex ("L_1 ", c->u_mode.ocb.L[1], OCB_BLOCK_LEN); */ + /* log_debug ( "bottom : %u (decimal)\n", bottom); */ + /* log_printhex ("Ktop ", ktop, OCB_BLOCK_LEN); */ + /* log_printhex ("Stretch ", stretch, sizeof stretch); */ + /* log_printhex ("Offset_0 ", c->u_iv.iv, OCB_BLOCK_LEN); */ + + /* Cleanup */ + wipememory (ktop, sizeof ktop); + wipememory (stretch, sizeof stretch); + if (burn > 0) + _gcry_burn_stack (burn + 4*sizeof(void*)); + + return 0; +} + + +/* Process additional authentication data. This implementation allows + to add additional authentication data at any time before the final + gcry_cipher_gettag. */ +gcry_err_code_t +_gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf, + size_t abuflen) +{ + const size_t table_maxblks = 1 << OCB_L_TABLE_SIZE; + const u32 table_size_mask = ((1 << OCB_L_TABLE_SIZE) - 1); + unsigned char l_tmp[OCB_BLOCK_LEN]; + unsigned int burn = 0; + unsigned int nburn; + + /* Check that a nonce and thus a key has been set and that we have + not yet computed the tag. We also return an error if the aad has + been finalized (i.e. a short block has been processed). */ + if (!c->marks.iv || c->marks.tag || c->u_mode.ocb.aad_finalized) + return GPG_ERR_INV_STATE; + + /* Check correct usage and arguments. */ + if (c->spec->blocksize != OCB_BLOCK_LEN) + return GPG_ERR_CIPHER_ALGO; + + /* Process remaining data from the last call first. */ + if (c->u_mode.ocb.aad_nleftover) + { + for (; abuflen && c->u_mode.ocb.aad_nleftover < OCB_BLOCK_LEN; + abuf++, abuflen--) + c->u_mode.ocb.aad_leftover[c->u_mode.ocb.aad_nleftover++] = *abuf; + + if (c->u_mode.ocb.aad_nleftover == OCB_BLOCK_LEN) + { + c->u_mode.ocb.aad_nblocks++; + + if ((c->u_mode.ocb.aad_nblocks % table_maxblks) == 0) + { + /* Table overflow, L needs to be generated. */ + ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks + 1, l_tmp); + } + else + { + buf_cpy (l_tmp, ocb_get_l (c, c->u_mode.ocb.aad_nblocks), + OCB_BLOCK_LEN); + } + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN); + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + buf_xor (l_tmp, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_leftover, OCB_BLOCK_LEN); + nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp); + burn = nburn > burn ? nburn : burn; + buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); + + c->u_mode.ocb.aad_nleftover = 0; + } + } + + if (!abuflen) + { + if (burn > 0) + _gcry_burn_stack (burn + 4*sizeof(void*)); + + return 0; + } + + /* Full blocks handling. */ + while (abuflen >= OCB_BLOCK_LEN) + { + size_t nblks = abuflen / OCB_BLOCK_LEN; + size_t nmaxblks; + + /* Check how many blocks to process till table overflow. */ + nmaxblks = (c->u_mode.ocb.aad_nblocks + 1) % table_maxblks; + nmaxblks = (table_maxblks - nmaxblks) % table_maxblks; + + if (nmaxblks == 0) + { + /* Table overflow, generate L and process one block. */ + c->u_mode.ocb.aad_nblocks++; + ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks, l_tmp); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN); + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN); + nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp); + burn = nburn > burn ? nburn : burn; + buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); + + abuf += OCB_BLOCK_LEN; + abuflen -= OCB_BLOCK_LEN; + nblks--; + + /* With overflow handled, retry loop again. Next overflow will + * happen after 65535 blocks. */ + continue; + } + + nblks = nblks < nmaxblks ? nblks : nmaxblks; + + /* Use a bulk method if available. */ + if (nblks && c->bulk.ocb_auth) + { + size_t nleft; + size_t ndone; + + nleft = c->bulk.ocb_auth (c, abuf, nblks); + ndone = nblks - nleft; + + abuf += ndone * OCB_BLOCK_LEN; + abuflen -= ndone * OCB_BLOCK_LEN; + nblks = nleft; + } + + /* Hash all full blocks. */ + while (nblks) + { + c->u_mode.ocb.aad_nblocks++; + + gcry_assert(c->u_mode.ocb.aad_nblocks & table_size_mask); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_mode.ocb.aad_offset, + ocb_get_l (c, c->u_mode.ocb.aad_nblocks), + OCB_BLOCK_LEN); + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN); + nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp); + burn = nburn > burn ? nburn : burn; + buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); + + abuf += OCB_BLOCK_LEN; + abuflen -= OCB_BLOCK_LEN; + nblks--; + } + } + + /* Store away the remaining data. */ + for (; abuflen && c->u_mode.ocb.aad_nleftover < OCB_BLOCK_LEN; + abuf++, abuflen--) + c->u_mode.ocb.aad_leftover[c->u_mode.ocb.aad_nleftover++] = *abuf; + gcry_assert (!abuflen); + + if (burn > 0) + _gcry_burn_stack (burn + 4*sizeof(void*)); + + return 0; +} + + +/* Hash final partial AAD block. */ +static void +ocb_aad_finalize (gcry_cipher_hd_t c) +{ + unsigned char l_tmp[OCB_BLOCK_LEN]; + unsigned int burn = 0; + unsigned int nburn; + + /* Check that a nonce and thus a key has been set and that we have + not yet computed the tag. We also skip this if the aad has been + finalized. */ + if (!c->marks.iv || c->marks.tag || c->u_mode.ocb.aad_finalized) + return; + if (c->spec->blocksize != OCB_BLOCK_LEN) + return; /* Ooops. */ + + /* Hash final partial block if any. */ + if (c->u_mode.ocb.aad_nleftover) + { + /* Offset_* = Offset_m xor L_* */ + buf_xor_1 (c->u_mode.ocb.aad_offset, + c->u_mode.ocb.L_star, OCB_BLOCK_LEN); + /* CipherInput = (A_* || 1 || zeros(127-bitlen(A_*))) xor Offset_* */ + buf_cpy (l_tmp, c->u_mode.ocb.aad_leftover, c->u_mode.ocb.aad_nleftover); + memset (l_tmp + c->u_mode.ocb.aad_nleftover, 0, + OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover); + l_tmp[c->u_mode.ocb.aad_nleftover] = 0x80; + buf_xor_1 (l_tmp, c->u_mode.ocb.aad_offset, OCB_BLOCK_LEN); + /* Sum = Sum_m xor ENCIPHER(K, CipherInput) */ + nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp); + burn = nburn > burn ? nburn : burn; + buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); + + c->u_mode.ocb.aad_nleftover = 0; + } + + /* Mark AAD as finalized so that gcry_cipher_ocb_authenticate can + * return an erro when called again. */ + c->u_mode.ocb.aad_finalized = 1; + + if (burn > 0) + _gcry_burn_stack (burn + 4*sizeof(void*)); +} + + + +/* Checksumming for encrypt and decrypt. */ +static void +ocb_checksum (unsigned char *chksum, const unsigned char *plainbuf, + size_t nblks) +{ + while (nblks > 0) + { + /* Checksum_i = Checksum_{i-1} xor P_i */ + buf_xor_1(chksum, plainbuf, OCB_BLOCK_LEN); + + plainbuf += OCB_BLOCK_LEN; + nblks--; + } +} + + +/* Common code for encrypt and decrypt. */ +static gcry_err_code_t +ocb_crypt (gcry_cipher_hd_t c, int encrypt, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + const size_t table_maxblks = 1 << OCB_L_TABLE_SIZE; + const u32 table_size_mask = ((1 << OCB_L_TABLE_SIZE) - 1); + unsigned char l_tmp[OCB_BLOCK_LEN]; + unsigned int burn = 0; + unsigned int nburn; + gcry_cipher_encrypt_t crypt_fn = + encrypt ? c->spec->encrypt : c->spec->decrypt; + + /* Check that a nonce and thus a key has been set and that we are + not yet in end of data state. */ + if (!c->marks.iv || c->u_mode.ocb.data_finalized) + return GPG_ERR_INV_STATE; + + /* Check correct usage and arguments. */ + if (c->spec->blocksize != OCB_BLOCK_LEN) + return GPG_ERR_CIPHER_ALGO; + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (c->marks.finalize) + ; /* Allow arbitarty length. */ + else if ((inbuflen % OCB_BLOCK_LEN)) + return GPG_ERR_INV_LENGTH; /* We support only full blocks for now. */ + + /* Full blocks handling. */ + while (inbuflen >= OCB_BLOCK_LEN) + { + size_t nblks = inbuflen / OCB_BLOCK_LEN; + size_t nmaxblks; + + /* Check how many blocks to process till table overflow. */ + nmaxblks = (c->u_mode.ocb.data_nblocks + 1) % table_maxblks; + nmaxblks = (table_maxblks - nmaxblks) % table_maxblks; + + if (nmaxblks == 0) + { + /* Table overflow, generate L and process one block. */ + c->u_mode.ocb.data_nblocks++; + ocb_get_L_big(c, c->u_mode.ocb.data_nblocks, l_tmp); + + if (encrypt) + { + /* Checksum_i = Checksum_{i-1} xor P_i */ + ocb_checksum (c->u_ctr.ctr, inbuf, 1); + } + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_iv.iv, l_tmp, OCB_BLOCK_LEN); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN); + nburn = crypt_fn (&c->context.c, outbuf, outbuf); + burn = nburn > burn ? nburn : burn; + buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN); + + if (!encrypt) + { + /* Checksum_i = Checksum_{i-1} xor P_i */ + ocb_checksum (c->u_ctr.ctr, outbuf, 1); + } + + inbuf += OCB_BLOCK_LEN; + inbuflen -= OCB_BLOCK_LEN; + outbuf += OCB_BLOCK_LEN; + outbuflen =- OCB_BLOCK_LEN; + nblks--; + + /* With overflow handled, retry loop again. Next overflow will + * happen after 65535 blocks. */ + continue; + } + + nblks = nblks < nmaxblks ? nblks : nmaxblks; + + /* Use a bulk method if available. */ + if (nblks && c->bulk.ocb_crypt) + { + size_t nleft; + size_t ndone; + + nleft = c->bulk.ocb_crypt (c, outbuf, inbuf, nblks, encrypt); + ndone = nblks - nleft; + + inbuf += ndone * OCB_BLOCK_LEN; + outbuf += ndone * OCB_BLOCK_LEN; + inbuflen -= ndone * OCB_BLOCK_LEN; + outbuflen -= ndone * OCB_BLOCK_LEN; + nblks = nleft; + } + + if (nblks) + { + size_t nblks_chksum = nblks; + + if (encrypt) + { + /* Checksum_i = Checksum_{i-1} xor P_i */ + ocb_checksum (c->u_ctr.ctr, inbuf, nblks_chksum); + } + + /* Encrypt all full blocks. */ + while (nblks) + { + c->u_mode.ocb.data_nblocks++; + + gcry_assert(c->u_mode.ocb.data_nblocks & table_size_mask); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_iv.iv, + ocb_get_l (c, c->u_mode.ocb.data_nblocks), + OCB_BLOCK_LEN); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN); + nburn = crypt_fn (&c->context.c, outbuf, outbuf); + burn = nburn > burn ? nburn : burn; + buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN); + + inbuf += OCB_BLOCK_LEN; + inbuflen -= OCB_BLOCK_LEN; + outbuf += OCB_BLOCK_LEN; + outbuflen =- OCB_BLOCK_LEN; + nblks--; + } + + if (!encrypt) + { + /* Checksum_i = Checksum_{i-1} xor P_i */ + ocb_checksum (c->u_ctr.ctr, + outbuf - nblks_chksum * OCB_BLOCK_LEN, + nblks_chksum); + } + } + } + + /* Encrypt final partial block. Note that we expect INBUFLEN to be + shorter than OCB_BLOCK_LEN (see above). */ + if (inbuflen) + { + unsigned char pad[OCB_BLOCK_LEN]; + + /* Offset_* = Offset_m xor L_* */ + buf_xor_1 (c->u_iv.iv, c->u_mode.ocb.L_star, OCB_BLOCK_LEN); + /* Pad = ENCIPHER(K, Offset_*) */ + nburn = c->spec->encrypt (&c->context.c, pad, c->u_iv.iv); + burn = nburn > burn ? nburn : burn; + + if (encrypt) + { + /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */ + /* Note that INBUFLEN is less than OCB_BLOCK_LEN. */ + buf_cpy (l_tmp, inbuf, inbuflen); + memset (l_tmp + inbuflen, 0, OCB_BLOCK_LEN - inbuflen); + l_tmp[inbuflen] = 0x80; + buf_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN); + /* C_* = P_* xor Pad[1..bitlen(P_*)] */ + buf_xor (outbuf, inbuf, pad, inbuflen); + } + else + { + /* P_* = C_* xor Pad[1..bitlen(C_*)] */ + /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */ + buf_cpy (l_tmp, pad, OCB_BLOCK_LEN); + buf_cpy (l_tmp, inbuf, inbuflen); + buf_xor_1 (l_tmp, pad, OCB_BLOCK_LEN); + l_tmp[inbuflen] = 0x80; + buf_cpy (outbuf, l_tmp, inbuflen); + + buf_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN); + } + } + + /* Compute the tag if the finalize flag has been set. */ + if (c->marks.finalize) + { + /* Tag = ENCIPHER(K, Checksum xor Offset xor L_$) xor HASH(K,A) */ + buf_xor (c->u_mode.ocb.tag, c->u_ctr.ctr, c->u_iv.iv, OCB_BLOCK_LEN); + buf_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.L_dollar, OCB_BLOCK_LEN); + nburn = c->spec->encrypt (&c->context.c, + c->u_mode.ocb.tag, c->u_mode.ocb.tag); + burn = nburn > burn ? nburn : burn; + + c->u_mode.ocb.data_finalized = 1; + /* Note that the the final part of the tag computation is done + by _gcry_cipher_ocb_get_tag. */ + } + + if (burn > 0) + _gcry_burn_stack (burn + 4*sizeof(void*)); + + return 0; +} + + +/* Encrypt (INBUF,INBUFLEN) in OCB mode to OUTBUF. OUTBUFLEN gives + the allocated size of OUTBUF. This function accepts only multiples + of a full block unless gcry_cipher_final has been called in which + case the next block may have any length. */ +gcry_err_code_t +_gcry_cipher_ocb_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) + +{ + return ocb_crypt (c, 1, outbuf, outbuflen, inbuf, inbuflen); +} + + +/* Decrypt (INBUF,INBUFLEN) in OCB mode to OUTBUF. OUTBUFLEN gives + the allocated size of OUTBUF. This function accepts only multiples + of a full block unless gcry_cipher_final has been called in which + case the next block may have any length. */ +gcry_err_code_t +_gcry_cipher_ocb_decrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + return ocb_crypt (c, 0, outbuf, outbuflen, inbuf, inbuflen); +} + + +/* Compute the tag. The last data operation has already done some + part of it. To allow adding AAD even after having done all data, + we finish the tag computation only here. */ +static void +compute_tag_if_needed (gcry_cipher_hd_t c) +{ + if (!c->marks.tag) + { + ocb_aad_finalize (c); + buf_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.aad_sum, OCB_BLOCK_LEN); + c->marks.tag = 1; + } +} + + +/* Copy the already computed tag to OUTTAG. OUTTAGSIZE is the + allocated size of OUTTAG; the function returns an error if that is + too short to hold the tag. */ +gcry_err_code_t +_gcry_cipher_ocb_get_tag (gcry_cipher_hd_t c, + unsigned char *outtag, size_t outtagsize) +{ + if (c->u_mode.ocb.taglen > outtagsize) + return GPG_ERR_BUFFER_TOO_SHORT; + if (!c->u_mode.ocb.data_finalized) + return GPG_ERR_INV_STATE; /* Data has not yet been finalized. */ + + compute_tag_if_needed (c); + + memcpy (outtag, c->u_mode.ocb.tag, c->u_mode.ocb.taglen); + + return 0; +} + + +/* Check that the tag (INTAG,TAGLEN) matches the computed tag for the + handle C. */ +gcry_err_code_t +_gcry_cipher_ocb_check_tag (gcry_cipher_hd_t c, const unsigned char *intag, + size_t taglen) +{ + size_t n; + + if (!c->u_mode.ocb.data_finalized) + return GPG_ERR_INV_STATE; /* Data has not yet been finalized. */ + + compute_tag_if_needed (c); + + n = c->u_mode.ocb.taglen; + if (taglen < n) + n = taglen; + + if (!buf_eq_const (intag, c->u_mode.ocb.tag, n) + || c->u_mode.ocb.taglen != taglen) + return GPG_ERR_CHECKSUM; + + return 0; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-ofb.c b/libotr/libgcrypt-1.8.7/cipher/cipher-ofb.c new file mode 100644 index 0000000..f821d1b --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-ofb.c @@ -0,0 +1,100 @@ +/* cipher-ofb.c - Generic OFB mode implementation + * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 + * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +gcry_err_code_t +_gcry_cipher_ofb_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + unsigned char *ivp; + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + size_t blocksize = c->spec->blocksize; + unsigned int burn, nburn; + + /* Tell compiler that we require a cipher with a 64bit or 128 bit block + * length, to allow better optimization of this function. */ + if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1)) + return GPG_ERR_INV_LENGTH; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + if ( inbuflen <= c->unused ) + { + /* Short enough to be encoded by the remaining XOR mask. */ + /* XOR the input with the IV */ + ivp = c->u_iv.iv + blocksize - c->unused; + buf_xor(outbuf, ivp, inbuf, inbuflen); + c->unused -= inbuflen; + return 0; + } + + burn = 0; + + if( c->unused ) + { + inbuflen -= c->unused; + ivp = c->u_iv.iv + blocksize - c->unused; + buf_xor(outbuf, ivp, inbuf, c->unused); + outbuf += c->unused; + inbuf += c->unused; + c->unused = 0; + } + + /* Now we can process complete blocks. */ + while ( inbuflen >= blocksize ) + { + /* Encrypt the IV (and save the current one). */ + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + buf_xor(outbuf, c->u_iv.iv, inbuf, blocksize); + outbuf += blocksize; + inbuf += blocksize; + inbuflen -= blocksize; + } + if ( inbuflen ) + { /* process the remaining bytes */ + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + c->unused = blocksize; + c->unused -= inbuflen; + buf_xor(outbuf, c->u_iv.iv, inbuf, inbuflen); + outbuf += inbuflen; + inbuf += inbuflen; + inbuflen = 0; + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-poly1305.c b/libotr/libgcrypt-1.8.7/cipher/cipher-poly1305.c new file mode 100644 index 0000000..a2a74e8 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-poly1305.c @@ -0,0 +1,334 @@ +/* cipher-poly1305.c - Poly1305 based AEAD cipher mode, RFC-7539 + * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" +#include "./poly1305-internal.h" + + +static inline int +poly1305_bytecounter_add (u32 ctr[2], size_t add) +{ + int overflow = 0; + + if (sizeof(add) > sizeof(u32)) + { + u32 high_add = ((add >> 31) >> 1) & 0xffffffff; + ctr[1] += high_add; + if (ctr[1] < high_add) + overflow = 1; + } + + ctr[0] += add; + if (ctr[0] >= add) + return overflow; + + ctr[1] += 1; + return (ctr[1] < 1) || overflow; +} + + +static void +poly1305_fill_bytecounts (gcry_cipher_hd_t c) +{ + u32 lenbuf[4]; + + lenbuf[0] = le_bswap32(c->u_mode.poly1305.aadcount[0]); + lenbuf[1] = le_bswap32(c->u_mode.poly1305.aadcount[1]); + lenbuf[2] = le_bswap32(c->u_mode.poly1305.datacount[0]); + lenbuf[3] = le_bswap32(c->u_mode.poly1305.datacount[1]); + _gcry_poly1305_update (&c->u_mode.poly1305.ctx, (byte*)lenbuf, + sizeof(lenbuf)); + + wipememory(lenbuf, sizeof(lenbuf)); +} + + +static void +poly1305_do_padding (gcry_cipher_hd_t c, u32 ctr[2]) +{ + static const byte zero_padding_buf[15] = {}; + u32 padding_count; + + /* Padding to 16 byte boundary. */ + if (ctr[0] % 16 > 0) + { + padding_count = 16 - ctr[0] % 16; + + _gcry_poly1305_update (&c->u_mode.poly1305.ctx, zero_padding_buf, + padding_count); + } +} + + +static void +poly1305_aad_finish (gcry_cipher_hd_t c) +{ + /* After AAD, feed padding bytes so we get 16 byte alignment. */ + poly1305_do_padding (c, c->u_mode.poly1305.aadcount); + + /* Start of encryption marks end of AAD stream. */ + c->u_mode.poly1305.aad_finalized = 1; + + c->u_mode.poly1305.datacount[0] = 0; + c->u_mode.poly1305.datacount[1] = 0; +} + + +static gcry_err_code_t +poly1305_set_zeroiv (gcry_cipher_hd_t c) +{ + byte zero[8] = { 0, }; + + return _gcry_cipher_poly1305_setiv (c, zero, sizeof(zero)); +} + + +gcry_err_code_t +_gcry_cipher_poly1305_authenticate (gcry_cipher_hd_t c, + const byte * aadbuf, size_t aadbuflen) +{ + if (c->u_mode.poly1305.bytecount_over_limits) + return GPG_ERR_INV_LENGTH; + if (c->u_mode.poly1305.aad_finalized) + return GPG_ERR_INV_STATE; + if (c->marks.tag) + return GPG_ERR_INV_STATE; + + if (!c->marks.iv) + poly1305_set_zeroiv(c); + + if (poly1305_bytecounter_add(c->u_mode.poly1305.aadcount, aadbuflen)) + { + c->u_mode.poly1305.bytecount_over_limits = 1; + return GPG_ERR_INV_LENGTH; + } + + _gcry_poly1305_update (&c->u_mode.poly1305.ctx, aadbuf, aadbuflen); + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_poly1305_encrypt (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + gcry_err_code_t err; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (c->marks.tag) + return GPG_ERR_INV_STATE; + if (c->u_mode.poly1305.bytecount_over_limits) + return GPG_ERR_INV_LENGTH; + + if (!c->marks.iv) + { + err = poly1305_set_zeroiv(c); + if (err) + return err; + } + + if (!c->u_mode.poly1305.aad_finalized) + poly1305_aad_finish(c); + + if (poly1305_bytecounter_add(c->u_mode.poly1305.datacount, inbuflen)) + { + c->u_mode.poly1305.bytecount_over_limits = 1; + return GPG_ERR_INV_LENGTH; + } + + c->spec->stencrypt(&c->context.c, outbuf, (byte*)inbuf, inbuflen); + + _gcry_poly1305_update (&c->u_mode.poly1305.ctx, outbuf, inbuflen); + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_poly1305_decrypt (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + gcry_err_code_t err; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (c->marks.tag) + return GPG_ERR_INV_STATE; + if (c->u_mode.poly1305.bytecount_over_limits) + return GPG_ERR_INV_LENGTH; + + if (!c->marks.iv) + { + err = poly1305_set_zeroiv(c); + if (err) + return err; + } + + if (!c->u_mode.poly1305.aad_finalized) + poly1305_aad_finish(c); + + if (poly1305_bytecounter_add(c->u_mode.poly1305.datacount, inbuflen)) + { + c->u_mode.poly1305.bytecount_over_limits = 1; + return GPG_ERR_INV_LENGTH; + } + + _gcry_poly1305_update (&c->u_mode.poly1305.ctx, inbuf, inbuflen); + + c->spec->stdecrypt(&c->context.c, outbuf, (byte*)inbuf, inbuflen); + return 0; +} + + +static gcry_err_code_t +_gcry_cipher_poly1305_tag (gcry_cipher_hd_t c, + byte * outbuf, size_t outbuflen, int check) +{ + gcry_err_code_t err; + + if (outbuflen < POLY1305_TAGLEN) + return GPG_ERR_BUFFER_TOO_SHORT; + if (c->u_mode.poly1305.bytecount_over_limits) + return GPG_ERR_INV_LENGTH; + + if (!c->marks.iv) + { + err = poly1305_set_zeroiv(c); + if (err) + return err; + } + + if (!c->u_mode.poly1305.aad_finalized) + poly1305_aad_finish(c); + + if (!c->marks.tag) + { + /* After data, feed padding bytes so we get 16 byte alignment. */ + poly1305_do_padding (c, c->u_mode.poly1305.datacount); + + /* Write byte counts to poly1305. */ + poly1305_fill_bytecounts(c); + + _gcry_poly1305_finish(&c->u_mode.poly1305.ctx, c->u_iv.iv); + + c->marks.tag = 1; + } + + if (!check) + { + memcpy (outbuf, c->u_iv.iv, POLY1305_TAGLEN); + } + else + { + /* OUTBUFLEN gives the length of the user supplied tag in OUTBUF + * and thus we need to compare its length first. */ + if (outbuflen != POLY1305_TAGLEN + || !buf_eq_const (outbuf, c->u_iv.iv, POLY1305_TAGLEN)) + return GPG_ERR_CHECKSUM; + } + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_poly1305_get_tag (gcry_cipher_hd_t c, unsigned char *outtag, + size_t taglen) +{ + return _gcry_cipher_poly1305_tag (c, outtag, taglen, 0); +} + +gcry_err_code_t +_gcry_cipher_poly1305_check_tag (gcry_cipher_hd_t c, const unsigned char *intag, + size_t taglen) +{ + return _gcry_cipher_poly1305_tag (c, (unsigned char *) intag, taglen, 1); +} + + +void +_gcry_cipher_poly1305_setkey (gcry_cipher_hd_t c) +{ + c->u_mode.poly1305.aadcount[0] = 0; + c->u_mode.poly1305.aadcount[1] = 0; + + c->u_mode.poly1305.datacount[0] = 0; + c->u_mode.poly1305.datacount[1] = 0; + + c->u_mode.poly1305.bytecount_over_limits = 0; + c->u_mode.poly1305.aad_finalized = 0; + c->marks.tag = 0; + c->marks.iv = 0; +} + + +gcry_err_code_t +_gcry_cipher_poly1305_setiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen) +{ + byte tmpbuf[64]; /* size of ChaCha20 block */ + gcry_err_code_t err; + + /* IV must be 96-bits */ + if (!iv && ivlen != (96 / 8)) + return GPG_ERR_INV_ARG; + + memset(&c->u_mode.poly1305.ctx, 0, sizeof(c->u_mode.poly1305.ctx)); + + c->u_mode.poly1305.aadcount[0] = 0; + c->u_mode.poly1305.aadcount[1] = 0; + + c->u_mode.poly1305.datacount[0] = 0; + c->u_mode.poly1305.datacount[1] = 0; + + c->u_mode.poly1305.bytecount_over_limits = 0; + c->u_mode.poly1305.aad_finalized = 0; + c->marks.tag = 0; + c->marks.iv = 0; + + /* Set up IV for stream cipher. */ + c->spec->setiv (&c->context.c, iv, ivlen); + + /* Get the first block from ChaCha20. */ + memset(tmpbuf, 0, sizeof(tmpbuf)); + c->spec->stencrypt(&c->context.c, tmpbuf, tmpbuf, sizeof(tmpbuf)); + + /* Use the first 32-bytes as Poly1305 key. */ + err = _gcry_poly1305_init (&c->u_mode.poly1305.ctx, tmpbuf, POLY1305_KEYLEN); + + wipememory(tmpbuf, sizeof(tmpbuf)); + + if (err) + return err; + + c->marks.iv = 1; + return 0; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-selftest.c b/libotr/libgcrypt-1.8.7/cipher/cipher-selftest.c new file mode 100644 index 0000000..cecbab7 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-selftest.c @@ -0,0 +1,511 @@ +/* cipher-selftest.c - Helper functions for bulk encryption selftests. + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#ifdef HAVE_SYSLOG +# include <syslog.h> +#endif /*HAVE_SYSLOG*/ + +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "cipher-selftest.h" + +#ifdef HAVE_STDINT_H +# include <stdint.h> /* uintptr_t */ +#elif defined(HAVE_INTTYPES_H) +# include <inttypes.h> +#else +/* In this case, uintptr_t is provided by config.h. */ +#endif + +/* Helper macro to force alignment to 16 bytes. */ +#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED +# define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) +#else +# define ATTR_ALIGNED_16 +#endif + + +/* Return an allocated buffers of size CONTEXT_SIZE with an alignment + of 16. The caller must free that buffer using the address returned + at R_MEM. Returns NULL and sets ERRNO on failure. */ +void * +_gcry_cipher_selftest_alloc_ctx (const int context_size, unsigned char **r_mem) +{ + int offs; + unsigned int ctx_aligned_size, memsize; + + ctx_aligned_size = context_size + 15; + ctx_aligned_size -= ctx_aligned_size & 0xf; + + memsize = ctx_aligned_size + 16; + + *r_mem = xtrycalloc (1, memsize); + if (!*r_mem) + return NULL; + + offs = (16 - ((uintptr_t)*r_mem & 15)) & 15; + return (void*)(*r_mem + offs); +} + + +/* Run the self-tests for <block cipher>-CBC-<block size>, tests bulk CBC + decryption. Returns NULL on success. */ +const char * +_gcry_selftest_helper_cbc (const char *cipher, gcry_cipher_setkey_t setkey_func, + gcry_cipher_encrypt_t encrypt_one, + gcry_cipher_bulk_cbc_dec_t bulk_cbc_dec, + const int nblocks, const int blocksize, + const int context_size) +{ + int i, offs; + unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *iv, *iv2, *mem; + unsigned int ctx_aligned_size, memsize; + + static const unsigned char key[16] ATTR_ALIGNED_16 = { + 0x66,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, + 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x22 + }; + + /* Allocate buffers, align first two elements to 16 bytes and latter to + block size. */ + ctx_aligned_size = context_size + 15; + ctx_aligned_size -= ctx_aligned_size & 0xf; + + memsize = ctx_aligned_size + (blocksize * 2) + (blocksize * nblocks * 3) + 16; + + mem = xtrycalloc (1, memsize); + if (!mem) + return "failed to allocate memory"; + + offs = (16 - ((uintptr_t)mem & 15)) & 15; + ctx = (void*)(mem + offs); + iv = ctx + ctx_aligned_size; + iv2 = iv + blocksize; + plaintext = iv2 + blocksize; + plaintext2 = plaintext + nblocks * blocksize; + ciphertext = plaintext2 + nblocks * blocksize; + + /* Initialize ctx */ + if (setkey_func (ctx, key, sizeof(key)) != GPG_ERR_NO_ERROR) + { + xfree(mem); + return "setkey failed"; + } + + /* Test single block code path */ + memset (iv, 0x4e, blocksize); + memset (iv2, 0x4e, blocksize); + for (i = 0; i < blocksize; i++) + plaintext[i] = i; + + /* CBC manually. */ + buf_xor (ciphertext, iv, plaintext, blocksize); + encrypt_one (ctx, ciphertext, ciphertext); + memcpy (iv, ciphertext, blocksize); + + /* CBC decrypt. */ + bulk_cbc_dec (ctx, iv2, plaintext2, ciphertext, 1); + if (memcmp (plaintext2, plaintext, blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CBC-%d test failed (plaintext mismatch)", cipher, + blocksize * 8); +#else + (void)cipher; /* Not used. */ +#endif + return "selftest for CBC failed - see syslog for details"; + } + + if (memcmp (iv2, iv, blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CBC-%d test failed (IV mismatch)", cipher, blocksize * 8); +#endif + return "selftest for CBC failed - see syslog for details"; + } + + /* Test parallelized code paths */ + memset (iv, 0x5f, blocksize); + memset (iv2, 0x5f, blocksize); + + for (i = 0; i < nblocks * blocksize; i++) + plaintext[i] = i; + + /* Create CBC ciphertext manually. */ + for (i = 0; i < nblocks * blocksize; i+=blocksize) + { + buf_xor (&ciphertext[i], iv, &plaintext[i], blocksize); + encrypt_one (ctx, &ciphertext[i], &ciphertext[i]); + memcpy (iv, &ciphertext[i], blocksize); + } + + /* Decrypt using bulk CBC and compare result. */ + bulk_cbc_dec (ctx, iv2, plaintext2, ciphertext, nblocks); + + if (memcmp (plaintext2, plaintext, nblocks * blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CBC-%d test failed (plaintext mismatch, parallel path)", + cipher, blocksize * 8); +#endif + return "selftest for CBC failed - see syslog for details"; + } + if (memcmp (iv2, iv, blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CBC-%d test failed (IV mismatch, parallel path)", + cipher, blocksize * 8); +#endif + return "selftest for CBC failed - see syslog for details"; + } + + xfree (mem); + return NULL; +} + +/* Run the self-tests for <block cipher>-CFB-<block size>, tests bulk CFB + decryption. Returns NULL on success. */ +const char * +_gcry_selftest_helper_cfb (const char *cipher, gcry_cipher_setkey_t setkey_func, + gcry_cipher_encrypt_t encrypt_one, + gcry_cipher_bulk_cfb_dec_t bulk_cfb_dec, + const int nblocks, const int blocksize, + const int context_size) +{ + int i, offs; + unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *iv, *iv2, *mem; + unsigned int ctx_aligned_size, memsize; + + static const unsigned char key[16] ATTR_ALIGNED_16 = { + 0x11,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, + 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x33 + }; + + /* Allocate buffers, align first two elements to 16 bytes and latter to + block size. */ + ctx_aligned_size = context_size + 15; + ctx_aligned_size -= ctx_aligned_size & 0xf; + + memsize = ctx_aligned_size + (blocksize * 2) + (blocksize * nblocks * 3) + 16; + + mem = xtrycalloc (1, memsize); + if (!mem) + return "failed to allocate memory"; + + offs = (16 - ((uintptr_t)mem & 15)) & 15; + ctx = (void*)(mem + offs); + iv = ctx + ctx_aligned_size; + iv2 = iv + blocksize; + plaintext = iv2 + blocksize; + plaintext2 = plaintext + nblocks * blocksize; + ciphertext = plaintext2 + nblocks * blocksize; + + /* Initialize ctx */ + if (setkey_func (ctx, key, sizeof(key)) != GPG_ERR_NO_ERROR) + { + xfree(mem); + return "setkey failed"; + } + + /* Test single block code path */ + memset(iv, 0xd3, blocksize); + memset(iv2, 0xd3, blocksize); + for (i = 0; i < blocksize; i++) + plaintext[i] = i; + + /* CFB manually. */ + encrypt_one (ctx, ciphertext, iv); + buf_xor_2dst (iv, ciphertext, plaintext, blocksize); + + /* CFB decrypt. */ + bulk_cfb_dec (ctx, iv2, plaintext2, ciphertext, 1); + if (memcmp(plaintext2, plaintext, blocksize)) + { + xfree(mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CFB-%d test failed (plaintext mismatch)", cipher, + blocksize * 8); +#else + (void)cipher; /* Not used. */ +#endif + return "selftest for CFB failed - see syslog for details"; + } + + if (memcmp(iv2, iv, blocksize)) + { + xfree(mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CFB-%d test failed (IV mismatch)", cipher, blocksize * 8); +#endif + return "selftest for CFB failed - see syslog for details"; + } + + /* Test parallelized code paths */ + memset(iv, 0xe6, blocksize); + memset(iv2, 0xe6, blocksize); + + for (i = 0; i < nblocks * blocksize; i++) + plaintext[i] = i; + + /* Create CFB ciphertext manually. */ + for (i = 0; i < nblocks * blocksize; i+=blocksize) + { + encrypt_one (ctx, &ciphertext[i], iv); + buf_xor_2dst (iv, &ciphertext[i], &plaintext[i], blocksize); + } + + /* Decrypt using bulk CBC and compare result. */ + bulk_cfb_dec (ctx, iv2, plaintext2, ciphertext, nblocks); + + if (memcmp(plaintext2, plaintext, nblocks * blocksize)) + { + xfree(mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CFB-%d test failed (plaintext mismatch, parallel path)", + cipher, blocksize * 8); +#endif + return "selftest for CFB failed - see syslog for details"; + } + if (memcmp(iv2, iv, blocksize)) + { + xfree(mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CFB-%d test failed (IV mismatch, parallel path)", cipher, + blocksize * 8); +#endif + return "selftest for CFB failed - see syslog for details"; + } + + xfree(mem); + return NULL; +} + +/* Run the self-tests for <block cipher>-CTR-<block size>, tests IV increment + of bulk CTR encryption. Returns NULL on success. */ +const char * +_gcry_selftest_helper_ctr (const char *cipher, gcry_cipher_setkey_t setkey_func, + gcry_cipher_encrypt_t encrypt_one, + gcry_cipher_bulk_ctr_enc_t bulk_ctr_enc, + const int nblocks, const int blocksize, + const int context_size) +{ + int i, j, offs, diff; + unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *ciphertext2, + *iv, *iv2, *mem; + unsigned int ctx_aligned_size, memsize; + + static const unsigned char key[16] ATTR_ALIGNED_16 = { + 0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, + 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21 + }; + + /* Allocate buffers, align first two elements to 16 bytes and latter to + block size. */ + ctx_aligned_size = context_size + 15; + ctx_aligned_size -= ctx_aligned_size & 0xf; + + memsize = ctx_aligned_size + (blocksize * 2) + (blocksize * nblocks * 4) + 16; + + mem = xtrycalloc (1, memsize); + if (!mem) + return "failed to allocate memory"; + + offs = (16 - ((uintptr_t)mem & 15)) & 15; + ctx = (void*)(mem + offs); + iv = ctx + ctx_aligned_size; + iv2 = iv + blocksize; + plaintext = iv2 + blocksize; + plaintext2 = plaintext + nblocks * blocksize; + ciphertext = plaintext2 + nblocks * blocksize; + ciphertext2 = ciphertext + nblocks * blocksize; + + /* Initialize ctx */ + if (setkey_func (ctx, key, sizeof(key)) != GPG_ERR_NO_ERROR) + { + xfree(mem); + return "setkey failed"; + } + + /* Test single block code path */ + memset (iv, 0xff, blocksize); + for (i = 0; i < blocksize; i++) + plaintext[i] = i; + + /* CTR manually. */ + encrypt_one (ctx, ciphertext, iv); + for (i = 0; i < blocksize; i++) + ciphertext[i] ^= plaintext[i]; + for (i = blocksize; i > 0; i--) + { + iv[i-1]++; + if (iv[i-1]) + break; + } + + memset (iv2, 0xff, blocksize); + bulk_ctr_enc (ctx, iv2, plaintext2, ciphertext, 1); + + if (memcmp (plaintext2, plaintext, blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CTR-%d test failed (plaintext mismatch)", cipher, + blocksize * 8); +#else + (void)cipher; /* Not used. */ +#endif + return "selftest for CTR failed - see syslog for details"; + } + + if (memcmp (iv2, iv, blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CTR-%d test failed (IV mismatch)", cipher, + blocksize * 8); +#endif + return "selftest for CTR failed - see syslog for details"; + } + + /* Test bulk encryption with typical IV. */ + memset(iv, 0x57, blocksize-4); + iv[blocksize-1] = 1; + iv[blocksize-2] = 0; + iv[blocksize-3] = 0; + iv[blocksize-4] = 0; + memset(iv2, 0x57, blocksize-4); + iv2[blocksize-1] = 1; + iv2[blocksize-2] = 0; + iv2[blocksize-3] = 0; + iv2[blocksize-4] = 0; + + for (i = 0; i < blocksize * nblocks; i++) + plaintext2[i] = plaintext[i] = i; + + /* Create CTR ciphertext manually. */ + for (i = 0; i < blocksize * nblocks; i+=blocksize) + { + encrypt_one (ctx, &ciphertext[i], iv); + for (j = 0; j < blocksize; j++) + ciphertext[i+j] ^= plaintext[i+j]; + for (j = blocksize; j > 0; j--) + { + iv[j-1]++; + if (iv[j-1]) + break; + } + } + + bulk_ctr_enc (ctx, iv2, ciphertext2, plaintext2, nblocks); + + if (memcmp (ciphertext2, ciphertext, blocksize * nblocks)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CTR-%d test failed (ciphertext mismatch, bulk)", cipher, + blocksize * 8); +#endif + return "selftest for CTR failed - see syslog for details"; + } + if (memcmp(iv2, iv, blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CTR-%d test failed (IV mismatch, bulk)", cipher, + blocksize * 8); +#endif + return "selftest for CTR failed - see syslog for details"; + } + + /* Test parallelized code paths (check counter overflow handling) */ + for (diff = 0; diff < nblocks; diff++) { + memset(iv, 0xff, blocksize); + iv[blocksize-1] -= diff; + iv[0] = iv[1] = 0; + iv[2] = 0x07; + + for (i = 0; i < blocksize * nblocks; i++) + plaintext[i] = i; + + /* Create CTR ciphertext manually. */ + for (i = 0; i < blocksize * nblocks; i+=blocksize) + { + encrypt_one (ctx, &ciphertext[i], iv); + for (j = 0; j < blocksize; j++) + ciphertext[i+j] ^= plaintext[i+j]; + for (j = blocksize; j > 0; j--) + { + iv[j-1]++; + if (iv[j-1]) + break; + } + } + + /* Decrypt using bulk CTR and compare result. */ + memset(iv2, 0xff, blocksize); + iv2[blocksize-1] -= diff; + iv2[0] = iv2[1] = 0; + iv2[2] = 0x07; + + bulk_ctr_enc (ctx, iv2, plaintext2, ciphertext, nblocks); + + if (memcmp (plaintext2, plaintext, blocksize * nblocks)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CTR-%d test failed (plaintext mismatch, diff: %d)", cipher, + blocksize * 8, diff); +#endif + return "selftest for CTR failed - see syslog for details"; + } + if (memcmp(iv2, iv, blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CTR-%d test failed (IV mismatch, diff: %d)", cipher, + blocksize * 8, diff); +#endif + return "selftest for CTR failed - see syslog for details"; + } + } + + xfree (mem); + return NULL; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-selftest.h b/libotr/libgcrypt-1.8.7/cipher/cipher-selftest.h new file mode 100644 index 0000000..a435080 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-selftest.h @@ -0,0 +1,72 @@ +/* cipher-selftest.h - Helper functions for bulk encryption selftests. + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef G10_SELFTEST_HELP_H +#define G10_SELFTEST_HELP_H + +#include <config.h> +#include "types.h" +#include "g10lib.h" +#include "cipher.h" + +typedef void (*gcry_cipher_bulk_cbc_dec_t)(void *context, unsigned char *iv, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks); + +typedef void (*gcry_cipher_bulk_cfb_dec_t)(void *context, unsigned char *iv, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks); + +typedef void (*gcry_cipher_bulk_ctr_enc_t)(void *context, unsigned char *iv, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks); + +/* Helper function to allocate an aligned context for selftests. */ +void *_gcry_cipher_selftest_alloc_ctx (const int context_size, + unsigned char **r_mem); + + +/* Helper function for bulk CBC decryption selftest */ +const char * +_gcry_selftest_helper_cbc (const char *cipher, gcry_cipher_setkey_t setkey, + gcry_cipher_encrypt_t encrypt_one, + gcry_cipher_bulk_cbc_dec_t bulk_cbc_dec, + const int nblocks, const int blocksize, + const int context_size); + +/* Helper function for bulk CFB decryption selftest */ +const char * +_gcry_selftest_helper_cfb (const char *cipher, gcry_cipher_setkey_t setkey, + gcry_cipher_encrypt_t encrypt_one, + gcry_cipher_bulk_cfb_dec_t bulk_cfb_dec, + const int nblocks, const int blocksize, + const int context_size); + +/* Helper function for bulk CTR encryption selftest */ +const char * +_gcry_selftest_helper_ctr (const char *cipher, gcry_cipher_setkey_t setkey, + gcry_cipher_encrypt_t encrypt_one, + gcry_cipher_bulk_ctr_enc_t bulk_ctr_enc, + const int nblocks, const int blocksize, + const int context_size); + +#endif /*G10_SELFTEST_HELP_H*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher-xts.c b/libotr/libgcrypt-1.8.7/cipher/cipher-xts.c new file mode 100644 index 0000000..4da89e5 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher-xts.c @@ -0,0 +1,170 @@ +/* cipher-xts.c - XTS mode implementation + * Copyright (C) 2017 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +static inline void xts_gfmul_byA (unsigned char *out, const unsigned char *in) +{ + u64 hi = buf_get_le64 (in + 8); + u64 lo = buf_get_le64 (in + 0); + u64 carry = -(hi >> 63) & 0x87; + + hi = (hi << 1) + (lo >> 63); + lo = (lo << 1) ^ carry; + + buf_put_le64 (out + 8, hi); + buf_put_le64 (out + 0, lo); +} + + +static inline void xts_inc128 (unsigned char *seqno) +{ + u64 lo = buf_get_le64 (seqno + 0); + u64 hi = buf_get_le64 (seqno + 8); + + hi += !(++lo); + + buf_put_le64 (seqno + 0, lo); + buf_put_le64 (seqno + 8, hi); +} + + +gcry_err_code_t +_gcry_cipher_xts_crypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen, + int encrypt) +{ + gcry_cipher_encrypt_t tweak_fn = c->spec->encrypt; + gcry_cipher_encrypt_t crypt_fn = + encrypt ? c->spec->encrypt : c->spec->decrypt; + union + { + cipher_context_alignment_t xcx; + byte x1[GCRY_XTS_BLOCK_LEN]; + u64 x64[GCRY_XTS_BLOCK_LEN / sizeof(u64)]; + } tmp; + unsigned int burn, nburn; + size_t nblocks; + + if (c->spec->blocksize != GCRY_XTS_BLOCK_LEN) + return GPG_ERR_CIPHER_ALGO; + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (inbuflen < GCRY_XTS_BLOCK_LEN) + return GPG_ERR_BUFFER_TOO_SHORT; + + /* Data-unit max length: 2^20 blocks. */ + if (inbuflen > GCRY_XTS_BLOCK_LEN << 20) + return GPG_ERR_INV_LENGTH; + + nblocks = inbuflen / GCRY_XTS_BLOCK_LEN; + nblocks -= !encrypt && (inbuflen % GCRY_XTS_BLOCK_LEN) != 0; + + /* Generate first tweak value. */ + burn = tweak_fn (c->u_mode.xts.tweak_context, c->u_ctr.ctr, c->u_iv.iv); + + /* Use a bulk method if available. */ + if (nblocks && c->bulk.xts_crypt) + { + c->bulk.xts_crypt (c, c->u_ctr.ctr, outbuf, inbuf, nblocks, encrypt); + inbuf += nblocks * GCRY_XTS_BLOCK_LEN; + outbuf += nblocks * GCRY_XTS_BLOCK_LEN; + inbuflen -= nblocks * GCRY_XTS_BLOCK_LEN; + nblocks = 0; + } + + /* If we don't have a bulk method use the standard method. We also + use this method for the a remaining partial block. */ + + while (nblocks) + { + /* Xor-Encrypt/Decrypt-Xor block. */ + buf_xor (tmp.x64, inbuf, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); + nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1); + burn = nburn > burn ? nburn : burn; + buf_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); + + outbuf += GCRY_XTS_BLOCK_LEN; + inbuf += GCRY_XTS_BLOCK_LEN; + inbuflen -= GCRY_XTS_BLOCK_LEN; + nblocks--; + + /* Generate next tweak. */ + xts_gfmul_byA (c->u_ctr.ctr, c->u_ctr.ctr); + } + + /* Handle remaining data with ciphertext stealing. */ + if (inbuflen) + { + if (!encrypt) + { + gcry_assert (inbuflen > GCRY_XTS_BLOCK_LEN); + gcry_assert (inbuflen < GCRY_XTS_BLOCK_LEN * 2); + + /* Generate last tweak. */ + xts_gfmul_byA (tmp.x1, c->u_ctr.ctr); + + /* Decrypt last block first. */ + buf_xor (outbuf, inbuf, tmp.x64, GCRY_XTS_BLOCK_LEN); + nburn = crypt_fn (&c->context.c, outbuf, outbuf); + burn = nburn > burn ? nburn : burn; + buf_xor (outbuf, outbuf, tmp.x64, GCRY_XTS_BLOCK_LEN); + + inbuflen -= GCRY_XTS_BLOCK_LEN; + inbuf += GCRY_XTS_BLOCK_LEN; + outbuf += GCRY_XTS_BLOCK_LEN; + } + + gcry_assert (inbuflen < GCRY_XTS_BLOCK_LEN); + outbuf -= GCRY_XTS_BLOCK_LEN; + + /* Steal ciphertext from previous block. */ + buf_cpy (tmp.x64, outbuf, GCRY_XTS_BLOCK_LEN); + buf_cpy (tmp.x64, inbuf, inbuflen); + buf_cpy (outbuf + GCRY_XTS_BLOCK_LEN, outbuf, inbuflen); + + /* Decrypt/Encrypt last block. */ + buf_xor (tmp.x64, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); + nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1); + burn = nburn > burn ? nburn : burn; + buf_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); + } + + /* Auto-increment data-unit sequence number */ + xts_inc128 (c->u_iv.iv); + + wipememory (&tmp, sizeof(tmp)); + wipememory (c->u_ctr.ctr, sizeof(c->u_ctr.ctr)); + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/cipher.c b/libotr/libgcrypt-1.8.7/cipher/cipher.c new file mode 100644 index 0000000..9812738 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/cipher.c @@ -0,0 +1,1680 @@ +/* cipher.c - cipher dispatcher + * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 + * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "../src/gcrypt-testapi.h" +#include "cipher.h" +#include "./cipher-internal.h" + + +/* This is the list of the default ciphers, which are included in + libgcrypt. */ +static gcry_cipher_spec_t *cipher_list[] = + { +#if USE_BLOWFISH + &_gcry_cipher_spec_blowfish, +#endif +#if USE_DES + &_gcry_cipher_spec_des, + &_gcry_cipher_spec_tripledes, +#endif +#if USE_ARCFOUR + &_gcry_cipher_spec_arcfour, +#endif +#if USE_CAST5 + &_gcry_cipher_spec_cast5, +#endif +#if USE_AES + &_gcry_cipher_spec_aes, + &_gcry_cipher_spec_aes192, + &_gcry_cipher_spec_aes256, +#endif +#if USE_TWOFISH + &_gcry_cipher_spec_twofish, + &_gcry_cipher_spec_twofish128, +#endif +#if USE_SERPENT + &_gcry_cipher_spec_serpent128, + &_gcry_cipher_spec_serpent192, + &_gcry_cipher_spec_serpent256, +#endif +#if USE_RFC2268 + &_gcry_cipher_spec_rfc2268_40, + &_gcry_cipher_spec_rfc2268_128, +#endif +#if USE_SEED + &_gcry_cipher_spec_seed, +#endif +#if USE_CAMELLIA + &_gcry_cipher_spec_camellia128, + &_gcry_cipher_spec_camellia192, + &_gcry_cipher_spec_camellia256, +#endif +#ifdef USE_IDEA + &_gcry_cipher_spec_idea, +#endif +#if USE_SALSA20 + &_gcry_cipher_spec_salsa20, + &_gcry_cipher_spec_salsa20r12, +#endif +#if USE_GOST28147 + &_gcry_cipher_spec_gost28147, +#endif +#if USE_CHACHA20 + &_gcry_cipher_spec_chacha20, +#endif + NULL + }; + + + + +static int +map_algo (int algo) +{ + return algo; +} + + +/* Return the spec structure for the cipher algorithm ALGO. For + an unknown algorithm NULL is returned. */ +static gcry_cipher_spec_t * +spec_from_algo (int algo) +{ + int idx; + gcry_cipher_spec_t *spec; + + algo = map_algo (algo); + + for (idx = 0; (spec = cipher_list[idx]); idx++) + if (algo == spec->algo) + return spec; + return NULL; +} + + +/* Lookup a cipher's spec by its name. */ +static gcry_cipher_spec_t * +spec_from_name (const char *name) +{ + gcry_cipher_spec_t *spec; + int idx; + const char **aliases; + + for (idx=0; (spec = cipher_list[idx]); idx++) + { + if (!stricmp (name, spec->name)) + return spec; + if (spec->aliases) + { + for (aliases = spec->aliases; *aliases; aliases++) + if (!stricmp (name, *aliases)) + return spec; + } + } + + return NULL; +} + + +/* Lookup a cipher's spec by its OID. */ +static gcry_cipher_spec_t * +spec_from_oid (const char *oid) +{ + gcry_cipher_spec_t *spec; + gcry_cipher_oid_spec_t *oid_specs; + int idx, j; + + for (idx=0; (spec = cipher_list[idx]); idx++) + { + oid_specs = spec->oids; + if (oid_specs) + { + for (j = 0; oid_specs[j].oid; j++) + if (!stricmp (oid, oid_specs[j].oid)) + return spec; + } + } + + return NULL; +} + + +/* Locate the OID in the oid table and return the spec or NULL if not + found. An optional "oid." or "OID." prefix in OID is ignored, the + OID is expected to be in standard IETF dotted notation. A pointer + to the OID specification of the module implementing this algorithm + is return in OID_SPEC unless passed as NULL.*/ +static gcry_cipher_spec_t * +search_oid (const char *oid, gcry_cipher_oid_spec_t *oid_spec) +{ + gcry_cipher_spec_t *spec; + int i; + + if (!oid) + return NULL; + + if (!strncmp (oid, "oid.", 4) || !strncmp (oid, "OID.", 4)) + oid += 4; + + spec = spec_from_oid (oid); + if (spec && spec->oids) + { + for (i = 0; spec->oids[i].oid; i++) + if (!stricmp (oid, spec->oids[i].oid)) + { + if (oid_spec) + *oid_spec = spec->oids[i]; + return spec; + } + } + + return NULL; +} + + +/* Map STRING to the cipher algorithm identifier. Returns the + algorithm ID of the cipher for the given name or 0 if the name is + not known. It is valid to pass NULL for STRING which results in a + return value of 0. */ +int +_gcry_cipher_map_name (const char *string) +{ + gcry_cipher_spec_t *spec; + + if (!string) + return 0; + + /* If the string starts with a digit (optionally prefixed with + either "OID." or "oid."), we first look into our table of ASN.1 + object identifiers to figure out the algorithm */ + + spec = search_oid (string, NULL); + if (spec) + return spec->algo; + + spec = spec_from_name (string); + if (spec) + return spec->algo; + + return 0; +} + + +/* Given a STRING with an OID in dotted decimal notation, this + function returns the cipher mode (GCRY_CIPHER_MODE_*) associated + with that OID or 0 if no mode is known. Passing NULL for string + yields a return value of 0. */ +int +_gcry_cipher_mode_from_oid (const char *string) +{ + gcry_cipher_spec_t *spec; + gcry_cipher_oid_spec_t oid_spec; + + if (!string) + return 0; + + spec = search_oid (string, &oid_spec); + if (spec) + return oid_spec.mode; + + return 0; +} + + +/* Map the cipher algorithm identifier ALGORITHM to a string + representing this algorithm. This string is the default name as + used by Libgcrypt. A "?" is returned for an unknown algorithm. + NULL is never returned. */ +const char * +_gcry_cipher_algo_name (int algorithm) +{ + gcry_cipher_spec_t *spec; + + spec = spec_from_algo (algorithm); + return spec? spec->name : "?"; +} + + +/* Flag the cipher algorithm with the identifier ALGORITHM as + disabled. There is no error return, the function does nothing for + unknown algorithms. Disabled algorithms are virtually not + available in Libgcrypt. This is not thread safe and should thus be + called early. */ +static void +disable_cipher_algo (int algo) +{ + gcry_cipher_spec_t *spec = spec_from_algo (algo); + + if (spec) + spec->flags.disabled = 1; +} + + +/* Return 0 if the cipher algorithm with identifier ALGORITHM is + available. Returns a basic error code value if it is not + available. */ +static gcry_err_code_t +check_cipher_algo (int algorithm) +{ + gcry_cipher_spec_t *spec; + + spec = spec_from_algo (algorithm); + if (spec && !spec->flags.disabled) + return 0; + + return GPG_ERR_CIPHER_ALGO; +} + + +/* Return the standard length in bits of the key for the cipher + algorithm with the identifier ALGORITHM. */ +static unsigned int +cipher_get_keylen (int algorithm) +{ + gcry_cipher_spec_t *spec; + unsigned len = 0; + + spec = spec_from_algo (algorithm); + if (spec) + { + len = spec->keylen; + if (!len) + log_bug ("cipher %d w/o key length\n", algorithm); + } + + return len; +} + + +/* Return the block length of the cipher algorithm with the identifier + ALGORITHM. This function return 0 for an invalid algorithm. */ +static unsigned int +cipher_get_blocksize (int algorithm) +{ + gcry_cipher_spec_t *spec; + unsigned len = 0; + + spec = spec_from_algo (algorithm); + if (spec) + { + len = spec->blocksize; + if (!len) + log_bug ("cipher %d w/o blocksize\n", algorithm); + } + + return len; +} + + +/* + Open a cipher handle for use with cipher algorithm ALGORITHM, using + the cipher mode MODE (one of the GCRY_CIPHER_MODE_*) and return a + handle in HANDLE. Put NULL into HANDLE and return an error code if + something goes wrong. FLAGS may be used to modify the + operation. The defined flags are: + + GCRY_CIPHER_SECURE: allocate all internal buffers in secure memory. + GCRY_CIPHER_ENABLE_SYNC: Enable the sync operation as used in OpenPGP. + GCRY_CIPHER_CBC_CTS: Enable CTS mode. + GCRY_CIPHER_CBC_MAC: Enable MAC mode. + + Values for these flags may be combined using OR. + */ +gcry_err_code_t +_gcry_cipher_open (gcry_cipher_hd_t *handle, + int algo, int mode, unsigned int flags) +{ + gcry_err_code_t rc; + gcry_cipher_hd_t h = NULL; + + if (mode >= GCRY_CIPHER_MODE_INTERNAL) + rc = GPG_ERR_INV_CIPHER_MODE; + else + rc = _gcry_cipher_open_internal (&h, algo, mode, flags); + + *handle = rc ? NULL : h; + + return rc; +} + + +gcry_err_code_t +_gcry_cipher_open_internal (gcry_cipher_hd_t *handle, + int algo, int mode, unsigned int flags) +{ + int secure = (flags & GCRY_CIPHER_SECURE); + gcry_cipher_spec_t *spec; + gcry_cipher_hd_t h = NULL; + gcry_err_code_t err; + + /* If the application missed to call the random poll function, we do + it here to ensure that it is used once in a while. */ + _gcry_fast_random_poll (); + + spec = spec_from_algo (algo); + if (!spec) + err = GPG_ERR_CIPHER_ALGO; + else if (spec->flags.disabled) + err = GPG_ERR_CIPHER_ALGO; + else + err = 0; + + /* check flags */ + if ((! err) + && ((flags & ~(0 + | GCRY_CIPHER_SECURE + | GCRY_CIPHER_ENABLE_SYNC + | GCRY_CIPHER_CBC_CTS + | GCRY_CIPHER_CBC_MAC)) + || (flags & GCRY_CIPHER_CBC_CTS & GCRY_CIPHER_CBC_MAC))) + err = GPG_ERR_CIPHER_ALGO; + + /* check that a valid mode has been requested */ + if (! err) + switch (mode) + { + case GCRY_CIPHER_MODE_CCM: + if (spec->blocksize != GCRY_CCM_BLOCK_LEN) + err = GPG_ERR_INV_CIPHER_MODE; + if (!spec->encrypt || !spec->decrypt) + err = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_XTS: + if (spec->blocksize != GCRY_XTS_BLOCK_LEN) + err = GPG_ERR_INV_CIPHER_MODE; + if (!spec->encrypt || !spec->decrypt) + err = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_ECB: + case GCRY_CIPHER_MODE_CBC: + case GCRY_CIPHER_MODE_CFB: + case GCRY_CIPHER_MODE_CFB8: + case GCRY_CIPHER_MODE_OFB: + case GCRY_CIPHER_MODE_CTR: + case GCRY_CIPHER_MODE_AESWRAP: + case GCRY_CIPHER_MODE_CMAC: + case GCRY_CIPHER_MODE_GCM: + if (!spec->encrypt || !spec->decrypt) + err = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_POLY1305: + if (!spec->stencrypt || !spec->stdecrypt || !spec->setiv) + err = GPG_ERR_INV_CIPHER_MODE; + else if (spec->algo != GCRY_CIPHER_CHACHA20) + err = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_OCB: + /* Note that our implementation allows only for 128 bit block + length algorithms. Lower block lengths would be possible + but we do not implement them because they limit the + security too much. */ + if (!spec->encrypt || !spec->decrypt) + err = GPG_ERR_INV_CIPHER_MODE; + else if (spec->blocksize != (128/8)) + err = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_STREAM: + if (!spec->stencrypt || !spec->stdecrypt) + err = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_NONE: + /* This mode may be used for debugging. It copies the main + text verbatim to the ciphertext. We do not allow this in + fips mode or if no debug flag has been set. */ + if (fips_mode () || !_gcry_get_debug_flag (0)) + err = GPG_ERR_INV_CIPHER_MODE; + break; + + default: + err = GPG_ERR_INV_CIPHER_MODE; + } + + /* Perform selftest here and mark this with a flag in cipher_table? + No, we should not do this as it takes too long. Further it does + not make sense to exclude algorithms with failing selftests at + runtime: If a selftest fails there is something seriously wrong + with the system and thus we better die immediately. */ + + if (! err) + { + size_t size = (sizeof (*h) + + 2 * spec->contextsize + - sizeof (cipher_context_alignment_t) +#ifdef NEED_16BYTE_ALIGNED_CONTEXT + + 15 /* Space for leading alignment gap. */ +#endif /*NEED_16BYTE_ALIGNED_CONTEXT*/ + ); + + /* Space needed per mode. */ + switch (mode) + { + case GCRY_CIPHER_MODE_XTS: + /* Additional cipher context for tweak. */ + size += 2 * spec->contextsize + 15; + break; + + default: + break; + } + + if (secure) + h = xtrycalloc_secure (1, size); + else + h = xtrycalloc (1, size); + + if (! h) + err = gpg_err_code_from_syserror (); + else + { + size_t off = 0; + char *tc; + +#ifdef NEED_16BYTE_ALIGNED_CONTEXT + if ( ((uintptr_t)h & 0x0f) ) + { + /* The malloced block is not aligned on a 16 byte + boundary. Correct for this. */ + off = 16 - ((uintptr_t)h & 0x0f); + h = (void*)((char*)h + off); + } +#endif /*NEED_16BYTE_ALIGNED_CONTEXT*/ + + h->magic = secure ? CTX_MAGIC_SECURE : CTX_MAGIC_NORMAL; + h->actual_handle_size = size - off; + h->handle_offset = off; + h->spec = spec; + h->algo = algo; + h->mode = mode; + h->flags = flags; + + /* Setup bulk encryption routines. */ + switch (algo) + { +#ifdef USE_AES + case GCRY_CIPHER_AES128: + case GCRY_CIPHER_AES192: + case GCRY_CIPHER_AES256: + h->bulk.cfb_enc = _gcry_aes_cfb_enc; + h->bulk.cfb_dec = _gcry_aes_cfb_dec; + h->bulk.cbc_enc = _gcry_aes_cbc_enc; + h->bulk.cbc_dec = _gcry_aes_cbc_dec; + h->bulk.ctr_enc = _gcry_aes_ctr_enc; + h->bulk.ocb_crypt = _gcry_aes_ocb_crypt; + h->bulk.ocb_auth = _gcry_aes_ocb_auth; + break; +#endif /*USE_AES*/ +#ifdef USE_BLOWFISH + case GCRY_CIPHER_BLOWFISH: + h->bulk.cfb_dec = _gcry_blowfish_cfb_dec; + h->bulk.cbc_dec = _gcry_blowfish_cbc_dec; + h->bulk.ctr_enc = _gcry_blowfish_ctr_enc; + break; +#endif /*USE_BLOWFISH*/ +#ifdef USE_CAST5 + case GCRY_CIPHER_CAST5: + h->bulk.cfb_dec = _gcry_cast5_cfb_dec; + h->bulk.cbc_dec = _gcry_cast5_cbc_dec; + h->bulk.ctr_enc = _gcry_cast5_ctr_enc; + break; +#endif /*USE_CAMELLIA*/ +#ifdef USE_CAMELLIA + case GCRY_CIPHER_CAMELLIA128: + case GCRY_CIPHER_CAMELLIA192: + case GCRY_CIPHER_CAMELLIA256: + h->bulk.cbc_dec = _gcry_camellia_cbc_dec; + h->bulk.cfb_dec = _gcry_camellia_cfb_dec; + h->bulk.ctr_enc = _gcry_camellia_ctr_enc; + h->bulk.ocb_crypt = _gcry_camellia_ocb_crypt; + h->bulk.ocb_auth = _gcry_camellia_ocb_auth; + break; +#endif /*USE_CAMELLIA*/ +#ifdef USE_DES + case GCRY_CIPHER_3DES: + h->bulk.cbc_dec = _gcry_3des_cbc_dec; + h->bulk.cfb_dec = _gcry_3des_cfb_dec; + h->bulk.ctr_enc = _gcry_3des_ctr_enc; + break; +#endif /*USE_DES*/ +#ifdef USE_SERPENT + case GCRY_CIPHER_SERPENT128: + case GCRY_CIPHER_SERPENT192: + case GCRY_CIPHER_SERPENT256: + h->bulk.cbc_dec = _gcry_serpent_cbc_dec; + h->bulk.cfb_dec = _gcry_serpent_cfb_dec; + h->bulk.ctr_enc = _gcry_serpent_ctr_enc; + h->bulk.ocb_crypt = _gcry_serpent_ocb_crypt; + h->bulk.ocb_auth = _gcry_serpent_ocb_auth; + break; +#endif /*USE_SERPENT*/ +#ifdef USE_TWOFISH + case GCRY_CIPHER_TWOFISH: + case GCRY_CIPHER_TWOFISH128: + h->bulk.cbc_dec = _gcry_twofish_cbc_dec; + h->bulk.cfb_dec = _gcry_twofish_cfb_dec; + h->bulk.ctr_enc = _gcry_twofish_ctr_enc; + h->bulk.ocb_crypt = _gcry_twofish_ocb_crypt; + h->bulk.ocb_auth = _gcry_twofish_ocb_auth; + break; +#endif /*USE_TWOFISH*/ + + default: + break; + } + + /* Setup defaults depending on the mode. */ + switch (mode) + { + case GCRY_CIPHER_MODE_OCB: + h->u_mode.ocb.taglen = 16; /* Bytes. */ + break; + + case GCRY_CIPHER_MODE_XTS: + tc = h->context.c + spec->contextsize * 2; + tc += (16 - (uintptr_t)tc % 16) % 16; + h->u_mode.xts.tweak_context = tc; + + break; + + default: + break; + } + + } + } + + /* Done. */ + + *handle = err ? NULL : h; + + return err; +} + + +/* Release all resources associated with the cipher handle H. H may be + NULL in which case this is a no-operation. */ +void +_gcry_cipher_close (gcry_cipher_hd_t h) +{ + size_t off; + + if (!h) + return; + + if ((h->magic != CTX_MAGIC_SECURE) + && (h->magic != CTX_MAGIC_NORMAL)) + _gcry_fatal_error(GPG_ERR_INTERNAL, + "gcry_cipher_close: already closed/invalid handle"); + else + h->magic = 0; + + /* We always want to wipe out the memory even when the context has + been allocated in secure memory. The user might have disabled + secure memory or is using his own implementation which does not + do the wiping. To accomplish this we need to keep track of the + actual size of this structure because we have no way to known + how large the allocated area was when using a standard malloc. */ + off = h->handle_offset; + wipememory (h, h->actual_handle_size); + + xfree ((char*)h - off); +} + + +/* Set the key to be used for the encryption context C to KEY with + length KEYLEN. The length should match the required length. */ +static gcry_err_code_t +cipher_setkey (gcry_cipher_hd_t c, byte *key, size_t keylen) +{ + gcry_err_code_t rc; + + if (c->mode == GCRY_CIPHER_MODE_XTS) + { + /* XTS uses two keys. */ + if (keylen % 2) + return GPG_ERR_INV_KEYLEN; + keylen /= 2; + + if (fips_mode ()) + { + /* Reject key if subkeys Key_1 and Key_2 are equal. + See "Implementation Guidance for FIPS 140-2, A.9 XTS-AES + Key Generation Requirements" for details. */ + if (buf_eq_const (key, key + keylen, keylen)) + return GPG_ERR_WEAK_KEY; + } + } + + rc = c->spec->setkey (&c->context.c, key, keylen); + if (!rc) + { + /* Duplicate initial context. */ + memcpy ((void *) ((char *) &c->context.c + c->spec->contextsize), + (void *) &c->context.c, + c->spec->contextsize); + c->marks.key = 1; + + switch (c->mode) + { + case GCRY_CIPHER_MODE_CMAC: + _gcry_cipher_cmac_set_subkeys (c); + break; + + case GCRY_CIPHER_MODE_GCM: + _gcry_cipher_gcm_setkey (c); + break; + + case GCRY_CIPHER_MODE_POLY1305: + _gcry_cipher_poly1305_setkey (c); + break; + + case GCRY_CIPHER_MODE_XTS: + /* Setup tweak cipher with second part of XTS key. */ + rc = c->spec->setkey (c->u_mode.xts.tweak_context, key + keylen, + keylen); + if (!rc) + { + /* Duplicate initial tweak context. */ + memcpy (c->u_mode.xts.tweak_context + c->spec->contextsize, + c->u_mode.xts.tweak_context, c->spec->contextsize); + } + else + c->marks.key = 0; + break; + + default: + break; + }; + } + else + c->marks.key = 0; + + return rc; +} + + +/* Set the IV to be used for the encryption context C to IV with + length IVLEN. The length should match the required length. */ +static gcry_err_code_t +cipher_setiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen) +{ + /* If the cipher has its own IV handler, we use only this one. This + is currently used for stream ciphers requiring a nonce. */ + if (c->spec->setiv) + { + c->spec->setiv (&c->context.c, iv, ivlen); + return 0; + } + + memset (c->u_iv.iv, 0, c->spec->blocksize); + if (iv) + { + if (ivlen != c->spec->blocksize) + { + log_info ("WARNING: cipher_setiv: ivlen=%u blklen=%u\n", + (unsigned int)ivlen, (unsigned int)c->spec->blocksize); + fips_signal_error ("IV length does not match blocklength"); + } + if (ivlen > c->spec->blocksize) + ivlen = c->spec->blocksize; + memcpy (c->u_iv.iv, iv, ivlen); + c->marks.iv = 1; + } + else + c->marks.iv = 0; + c->unused = 0; + + return 0; +} + + +/* Reset the cipher context to the initial context. This is basically + the same as an release followed by a new. */ +static void +cipher_reset (gcry_cipher_hd_t c) +{ + unsigned int marks_key; + + marks_key = c->marks.key; + + memcpy (&c->context.c, + (char *) &c->context.c + c->spec->contextsize, + c->spec->contextsize); + memset (&c->marks, 0, sizeof c->marks); + memset (c->u_iv.iv, 0, c->spec->blocksize); + memset (c->lastiv, 0, c->spec->blocksize); + memset (c->u_ctr.ctr, 0, c->spec->blocksize); + c->unused = 0; + + c->marks.key = marks_key; + + switch (c->mode) + { + case GCRY_CIPHER_MODE_CMAC: + /* Only clear 'tag' for cmac, keep subkeys. */ + c->u_mode.cmac.tag = 0; + break; + + case GCRY_CIPHER_MODE_GCM: + /* Only clear head of u_mode, keep ghash_key and gcm_table. */ + { + byte *u_mode_pos = (void *)&c->u_mode; + byte *ghash_key_pos = c->u_mode.gcm.u_ghash_key.key; + size_t u_mode_head_length = ghash_key_pos - u_mode_pos; + + memset (&c->u_mode, 0, u_mode_head_length); + } + break; + + case GCRY_CIPHER_MODE_POLY1305: + memset (&c->u_mode.poly1305, 0, sizeof c->u_mode.poly1305); + break; + + case GCRY_CIPHER_MODE_CCM: + memset (&c->u_mode.ccm, 0, sizeof c->u_mode.ccm); + break; + + case GCRY_CIPHER_MODE_OCB: + memset (&c->u_mode.ocb, 0, sizeof c->u_mode.ocb); + /* Setup default taglen. */ + c->u_mode.ocb.taglen = 16; + break; + + case GCRY_CIPHER_MODE_XTS: + memcpy (c->u_mode.xts.tweak_context, + c->u_mode.xts.tweak_context + c->spec->contextsize, + c->spec->contextsize); + break; + + default: + break; /* u_mode unused by other modes. */ + } +} + + + +static gcry_err_code_t +do_ecb_crypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen, + gcry_cipher_encrypt_t crypt_fn) +{ + unsigned int blocksize = c->spec->blocksize; + size_t n, nblocks; + unsigned int burn, nburn; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if ((inbuflen % blocksize)) + return GPG_ERR_INV_LENGTH; + + nblocks = inbuflen / blocksize; + burn = 0; + + for (n=0; n < nblocks; n++ ) + { + nburn = crypt_fn (&c->context.c, outbuf, inbuf); + burn = nburn > burn ? nburn : burn; + inbuf += blocksize; + outbuf += blocksize; + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} + +static gcry_err_code_t +do_ecb_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + return do_ecb_crypt (c, outbuf, outbuflen, inbuf, inbuflen, c->spec->encrypt); +} + +static gcry_err_code_t +do_ecb_decrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + return do_ecb_crypt (c, outbuf, outbuflen, inbuf, inbuflen, c->spec->decrypt); +} + + +/**************** + * Encrypt INBUF to OUTBUF with the mode selected at open. + * inbuf and outbuf may overlap or be the same. + * Depending on the mode some constraints apply to INBUFLEN. + */ +static gcry_err_code_t +cipher_encrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + gcry_err_code_t rc; + + if (c->mode != GCRY_CIPHER_MODE_NONE && !c->marks.key) + { + log_error ("cipher_encrypt: key not set\n"); + return GPG_ERR_MISSING_KEY; + } + + switch (c->mode) + { + case GCRY_CIPHER_MODE_ECB: + rc = do_ecb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_CBC: + rc = _gcry_cipher_cbc_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_CFB: + rc = _gcry_cipher_cfb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_CFB8: + rc = _gcry_cipher_cfb8_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_OFB: + rc = _gcry_cipher_ofb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_CTR: + rc = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_AESWRAP: + rc = _gcry_cipher_aeswrap_encrypt (c, outbuf, outbuflen, + inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_CCM: + rc = _gcry_cipher_ccm_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_CMAC: + rc = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_GCM: + rc = _gcry_cipher_gcm_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_POLY1305: + rc = _gcry_cipher_poly1305_encrypt (c, outbuf, outbuflen, + inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_OCB: + rc = _gcry_cipher_ocb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_XTS: + rc = _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 1); + break; + + case GCRY_CIPHER_MODE_STREAM: + c->spec->stencrypt (&c->context.c, + outbuf, (byte*)/*arggg*/inbuf, inbuflen); + rc = 0; + break; + + case GCRY_CIPHER_MODE_NONE: + if (fips_mode () || !_gcry_get_debug_flag (0)) + { + fips_signal_error ("cipher mode NONE used"); + rc = GPG_ERR_INV_CIPHER_MODE; + } + else + { + if (inbuf != outbuf) + memmove (outbuf, inbuf, inbuflen); + rc = 0; + } + break; + + default: + log_fatal ("cipher_encrypt: invalid mode %d\n", c->mode ); + rc = GPG_ERR_INV_CIPHER_MODE; + break; + } + + return rc; +} + + +/**************** + * Encrypt IN and write it to OUT. If IN is NULL, in-place encryption has + * been requested. + */ +gcry_err_code_t +_gcry_cipher_encrypt (gcry_cipher_hd_t h, void *out, size_t outsize, + const void *in, size_t inlen) +{ + gcry_err_code_t rc; + + if (!in) /* Caller requested in-place encryption. */ + { + in = out; + inlen = outsize; + } + + rc = cipher_encrypt (h, out, outsize, in, inlen); + + /* Failsafe: Make sure that the plaintext will never make it into + OUT if the encryption returned an error. */ + if (rc && out) + memset (out, 0x42, outsize); + + return rc; +} + + + +/**************** + * Decrypt INBUF to OUTBUF with the mode selected at open. + * inbuf and outbuf may overlap or be the same. + * Depending on the mode some some constraints apply to INBUFLEN. + */ +static gcry_err_code_t +cipher_decrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + gcry_err_code_t rc; + + if (c->mode != GCRY_CIPHER_MODE_NONE && !c->marks.key) + { + log_error ("cipher_decrypt: key not set\n"); + return GPG_ERR_MISSING_KEY; + } + + switch (c->mode) + { + case GCRY_CIPHER_MODE_ECB: + rc = do_ecb_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_CBC: + rc = _gcry_cipher_cbc_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_CFB: + rc = _gcry_cipher_cfb_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_CFB8: + rc = _gcry_cipher_cfb8_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_OFB: + rc = _gcry_cipher_ofb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_CTR: + rc = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_AESWRAP: + rc = _gcry_cipher_aeswrap_decrypt (c, outbuf, outbuflen, + inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_CCM: + rc = _gcry_cipher_ccm_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_CMAC: + rc = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_GCM: + rc = _gcry_cipher_gcm_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_POLY1305: + rc = _gcry_cipher_poly1305_decrypt (c, outbuf, outbuflen, + inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_OCB: + rc = _gcry_cipher_ocb_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); + break; + + case GCRY_CIPHER_MODE_XTS: + rc = _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 0); + break; + + case GCRY_CIPHER_MODE_STREAM: + c->spec->stdecrypt (&c->context.c, + outbuf, (byte*)/*arggg*/inbuf, inbuflen); + rc = 0; + break; + + case GCRY_CIPHER_MODE_NONE: + if (fips_mode () || !_gcry_get_debug_flag (0)) + { + fips_signal_error ("cipher mode NONE used"); + rc = GPG_ERR_INV_CIPHER_MODE; + } + else + { + if (inbuf != outbuf) + memmove (outbuf, inbuf, inbuflen); + rc = 0; + } + break; + + default: + log_fatal ("cipher_decrypt: invalid mode %d\n", c->mode ); + rc = GPG_ERR_INV_CIPHER_MODE; + break; + } + + return rc; +} + + +gcry_err_code_t +_gcry_cipher_decrypt (gcry_cipher_hd_t h, void *out, size_t outsize, + const void *in, size_t inlen) +{ + if (!in) /* Caller requested in-place encryption. */ + { + in = out; + inlen = outsize; + } + + return cipher_decrypt (h, out, outsize, in, inlen); +} + + + +/**************** + * Used for PGP's somewhat strange CFB mode. Only works if + * the corresponding flag is set. + */ +static void +cipher_sync (gcry_cipher_hd_t c) +{ + if ((c->flags & GCRY_CIPHER_ENABLE_SYNC) && c->unused) + { + memmove (c->u_iv.iv + c->unused, + c->u_iv.iv, c->spec->blocksize - c->unused); + memcpy (c->u_iv.iv, + c->lastiv + c->spec->blocksize - c->unused, c->unused); + c->unused = 0; + } +} + + +gcry_err_code_t +_gcry_cipher_setkey (gcry_cipher_hd_t hd, const void *key, size_t keylen) +{ + return cipher_setkey (hd, (void*)key, keylen); +} + + +gcry_err_code_t +_gcry_cipher_setiv (gcry_cipher_hd_t hd, const void *iv, size_t ivlen) +{ + gcry_err_code_t rc = 0; + + switch (hd->mode) + { + case GCRY_CIPHER_MODE_CCM: + rc = _gcry_cipher_ccm_set_nonce (hd, iv, ivlen); + break; + + case GCRY_CIPHER_MODE_GCM: + rc = _gcry_cipher_gcm_setiv (hd, iv, ivlen); + break; + + case GCRY_CIPHER_MODE_POLY1305: + rc = _gcry_cipher_poly1305_setiv (hd, iv, ivlen); + break; + + case GCRY_CIPHER_MODE_OCB: + rc = _gcry_cipher_ocb_set_nonce (hd, iv, ivlen); + break; + + default: + rc = cipher_setiv (hd, iv, ivlen); + break; + } + return rc; +} + +/* Set counter for CTR mode. (CTR,CTRLEN) must denote a buffer of + block size length, or (NULL,0) to set the CTR to the all-zero + block. */ +gpg_err_code_t +_gcry_cipher_setctr (gcry_cipher_hd_t hd, const void *ctr, size_t ctrlen) +{ + if (ctr && ctrlen == hd->spec->blocksize) + { + memcpy (hd->u_ctr.ctr, ctr, hd->spec->blocksize); + hd->unused = 0; + } + else if (!ctr || !ctrlen) + { + memset (hd->u_ctr.ctr, 0, hd->spec->blocksize); + hd->unused = 0; + } + else + return GPG_ERR_INV_ARG; + + return 0; +} + +gpg_err_code_t +_gcry_cipher_getctr (gcry_cipher_hd_t hd, void *ctr, size_t ctrlen) +{ + if (ctr && ctrlen == hd->spec->blocksize) + memcpy (ctr, hd->u_ctr.ctr, hd->spec->blocksize); + else + return GPG_ERR_INV_ARG; + + return 0; +} + +gcry_err_code_t +_gcry_cipher_authenticate (gcry_cipher_hd_t hd, const void *abuf, + size_t abuflen) +{ + gcry_err_code_t rc; + + switch (hd->mode) + { + case GCRY_CIPHER_MODE_CCM: + rc = _gcry_cipher_ccm_authenticate (hd, abuf, abuflen); + break; + + case GCRY_CIPHER_MODE_CMAC: + rc = _gcry_cipher_cmac_authenticate (hd, abuf, abuflen); + break; + + case GCRY_CIPHER_MODE_GCM: + rc = _gcry_cipher_gcm_authenticate (hd, abuf, abuflen); + break; + + case GCRY_CIPHER_MODE_POLY1305: + rc = _gcry_cipher_poly1305_authenticate (hd, abuf, abuflen); + break; + + case GCRY_CIPHER_MODE_OCB: + rc = _gcry_cipher_ocb_authenticate (hd, abuf, abuflen); + break; + + default: + log_error ("gcry_cipher_authenticate: invalid mode %d\n", hd->mode); + rc = GPG_ERR_INV_CIPHER_MODE; + break; + } + + return rc; +} + + +gcry_err_code_t +_gcry_cipher_gettag (gcry_cipher_hd_t hd, void *outtag, size_t taglen) +{ + gcry_err_code_t rc; + + switch (hd->mode) + { + case GCRY_CIPHER_MODE_CCM: + rc = _gcry_cipher_ccm_get_tag (hd, outtag, taglen); + break; + + case GCRY_CIPHER_MODE_CMAC: + rc = _gcry_cipher_cmac_get_tag (hd, outtag, taglen); + break; + + case GCRY_CIPHER_MODE_GCM: + rc = _gcry_cipher_gcm_get_tag (hd, outtag, taglen); + break; + + case GCRY_CIPHER_MODE_POLY1305: + rc = _gcry_cipher_poly1305_get_tag (hd, outtag, taglen); + break; + + case GCRY_CIPHER_MODE_OCB: + rc = _gcry_cipher_ocb_get_tag (hd, outtag, taglen); + break; + + default: + log_error ("gcry_cipher_gettag: invalid mode %d\n", hd->mode); + rc = GPG_ERR_INV_CIPHER_MODE; + break; + } + + return rc; +} + + +gcry_err_code_t +_gcry_cipher_checktag (gcry_cipher_hd_t hd, const void *intag, size_t taglen) +{ + gcry_err_code_t rc; + + switch (hd->mode) + { + case GCRY_CIPHER_MODE_CCM: + rc = _gcry_cipher_ccm_check_tag (hd, intag, taglen); + break; + + case GCRY_CIPHER_MODE_CMAC: + rc = _gcry_cipher_cmac_check_tag (hd, intag, taglen); + break; + + case GCRY_CIPHER_MODE_GCM: + rc = _gcry_cipher_gcm_check_tag (hd, intag, taglen); + break; + + case GCRY_CIPHER_MODE_POLY1305: + rc = _gcry_cipher_poly1305_check_tag (hd, intag, taglen); + break; + + case GCRY_CIPHER_MODE_OCB: + rc = _gcry_cipher_ocb_check_tag (hd, intag, taglen); + break; + + default: + log_error ("gcry_cipher_checktag: invalid mode %d\n", hd->mode); + rc = GPG_ERR_INV_CIPHER_MODE; + break; + } + + return rc; +} + + +gcry_err_code_t +_gcry_cipher_ctl (gcry_cipher_hd_t h, int cmd, void *buffer, size_t buflen) +{ + gcry_err_code_t rc = 0; + + switch (cmd) + { + case GCRYCTL_RESET: + cipher_reset (h); + break; + + case GCRYCTL_FINALIZE: + if (!h || buffer || buflen) + return GPG_ERR_INV_ARG; + h->marks.finalize = 1; + break; + + case GCRYCTL_CFB_SYNC: + cipher_sync( h ); + break; + + case GCRYCTL_SET_CBC_CTS: + if (buflen) + if (h->flags & GCRY_CIPHER_CBC_MAC) + rc = GPG_ERR_INV_FLAG; + else + h->flags |= GCRY_CIPHER_CBC_CTS; + else + h->flags &= ~GCRY_CIPHER_CBC_CTS; + break; + + case GCRYCTL_SET_CBC_MAC: + if (buflen) + if (h->flags & GCRY_CIPHER_CBC_CTS) + rc = GPG_ERR_INV_FLAG; + else + h->flags |= GCRY_CIPHER_CBC_MAC; + else + h->flags &= ~GCRY_CIPHER_CBC_MAC; + break; + + case GCRYCTL_SET_CCM_LENGTHS: + { + u64 params[3]; + size_t encryptedlen; + size_t aadlen; + size_t authtaglen; + + if (h->mode != GCRY_CIPHER_MODE_CCM) + return GPG_ERR_INV_CIPHER_MODE; + + if (!buffer || buflen != 3 * sizeof(u64)) + return GPG_ERR_INV_ARG; + + /* This command is used to pass additional length parameters needed + by CCM mode to initialize CBC-MAC. */ + memcpy (params, buffer, sizeof(params)); + encryptedlen = params[0]; + aadlen = params[1]; + authtaglen = params[2]; + + rc = _gcry_cipher_ccm_set_lengths (h, encryptedlen, aadlen, authtaglen); + } + break; + + case GCRYCTL_SET_TAGLEN: + if (!h || !buffer || buflen != sizeof(int) ) + return GPG_ERR_INV_ARG; + switch (h->mode) + { + case GCRY_CIPHER_MODE_OCB: + switch (*(int*)buffer) + { + case 8: case 12: case 16: + h->u_mode.ocb.taglen = *(int*)buffer; + break; + default: + rc = GPG_ERR_INV_LENGTH; /* Invalid tag length. */ + break; + } + break; + + default: + rc =GPG_ERR_INV_CIPHER_MODE; + break; + } + break; + + case GCRYCTL_DISABLE_ALGO: + /* This command expects NULL for H and BUFFER to point to an + integer with the algo number. */ + if( h || !buffer || buflen != sizeof(int) ) + return GPG_ERR_CIPHER_ALGO; + disable_cipher_algo( *(int*)buffer ); + break; + + case PRIV_CIPHERCTL_DISABLE_WEAK_KEY: /* (private) */ + if (h->spec->set_extra_info) + rc = h->spec->set_extra_info + (&h->context.c, CIPHER_INFO_NO_WEAK_KEY, NULL, 0); + else + rc = GPG_ERR_NOT_SUPPORTED; + break; + + case PRIV_CIPHERCTL_GET_INPUT_VECTOR: /* (private) */ + /* This is the input block as used in CFB and OFB mode which has + initially been set as IV. The returned format is: + 1 byte Actual length of the block in bytes. + n byte The block. + If the provided buffer is too short, an error is returned. */ + if (buflen < (1 + h->spec->blocksize)) + rc = GPG_ERR_TOO_SHORT; + else + { + unsigned char *ivp; + unsigned char *dst = buffer; + int n = h->unused; + + if (!n) + n = h->spec->blocksize; + gcry_assert (n <= h->spec->blocksize); + *dst++ = n; + ivp = h->u_iv.iv + h->spec->blocksize - n; + while (n--) + *dst++ = *ivp++; + } + break; + + case GCRYCTL_SET_SBOX: + if (h->spec->set_extra_info) + rc = h->spec->set_extra_info + (&h->context.c, GCRYCTL_SET_SBOX, buffer, buflen); + else + rc = GPG_ERR_NOT_SUPPORTED; + break; + + default: + rc = GPG_ERR_INV_OP; + } + + return rc; +} + + +/* Return information about the cipher handle H. CMD is the kind of + * information requested. + * + * CMD may be one of: + * + * GCRYCTL_GET_TAGLEN: + * Return the length of the tag for an AE algorithm mode. An + * error is returned for modes which do not support a tag. + * BUFFER must be given as NULL. On success the result is stored + * at NBYTES. The taglen is returned in bytes. + * + * The function returns 0 on success or an error code. + */ +gcry_err_code_t +_gcry_cipher_info (gcry_cipher_hd_t h, int cmd, void *buffer, size_t *nbytes) +{ + gcry_err_code_t rc = 0; + + switch (cmd) + { + case GCRYCTL_GET_TAGLEN: + if (!h || buffer || !nbytes) + rc = GPG_ERR_INV_ARG; + else + { + switch (h->mode) + { + case GCRY_CIPHER_MODE_OCB: + *nbytes = h->u_mode.ocb.taglen; + break; + + case GCRY_CIPHER_MODE_CCM: + *nbytes = h->u_mode.ccm.authlen; + break; + + case GCRY_CIPHER_MODE_GCM: + *nbytes = GCRY_GCM_BLOCK_LEN; + break; + + case GCRY_CIPHER_MODE_POLY1305: + *nbytes = POLY1305_TAGLEN; + break; + + default: + rc = GPG_ERR_INV_CIPHER_MODE; + break; + } + } + break; + + default: + rc = GPG_ERR_INV_OP; + } + + return rc; +} + +/* Return information about the given cipher algorithm ALGO. + + WHAT select the kind of information returned: + + GCRYCTL_GET_KEYLEN: + Return the length of the key. If the algorithm ALGO + supports multiple key lengths, the maximum supported key length + is returned. The key length is returned as number of octets. + BUFFER and NBYTES must be zero. + + GCRYCTL_GET_BLKLEN: + Return the blocklength of the algorithm ALGO counted in octets. + BUFFER and NBYTES must be zero. + + GCRYCTL_TEST_ALGO: + Returns 0 if the specified algorithm ALGO is available for use. + BUFFER and NBYTES must be zero. + + Note: Because this function is in most cases used to return an + integer value, we can make it easier for the caller to just look at + the return value. The caller will in all cases consult the value + and thereby detecting whether a error occurred or not (i.e. while + checking the block size) + */ +gcry_err_code_t +_gcry_cipher_algo_info (int algo, int what, void *buffer, size_t *nbytes) +{ + gcry_err_code_t rc = 0; + unsigned int ui; + + switch (what) + { + case GCRYCTL_GET_KEYLEN: + if (buffer || (! nbytes)) + rc = GPG_ERR_CIPHER_ALGO; + else + { + ui = cipher_get_keylen (algo); + if ((ui > 0) && (ui <= 512)) + *nbytes = (size_t) ui / 8; + else + /* The only reason for an error is an invalid algo. */ + rc = GPG_ERR_CIPHER_ALGO; + } + break; + + case GCRYCTL_GET_BLKLEN: + if (buffer || (! nbytes)) + rc = GPG_ERR_CIPHER_ALGO; + else + { + ui = cipher_get_blocksize (algo); + if ((ui > 0) && (ui < 10000)) + *nbytes = ui; + else + { + /* The only reason is an invalid algo or a strange + blocksize. */ + rc = GPG_ERR_CIPHER_ALGO; + } + } + break; + + case GCRYCTL_TEST_ALGO: + if (buffer || nbytes) + rc = GPG_ERR_INV_ARG; + else + rc = check_cipher_algo (algo); + break; + + default: + rc = GPG_ERR_INV_OP; + } + + return rc; +} + + +/* This function returns length of the key for algorithm ALGO. If the + algorithm supports multiple key lengths, the maximum supported key + length is returned. On error 0 is returned. The key length is + returned as number of octets. + + This is a convenience functions which should be preferred over + gcry_cipher_algo_info because it allows for proper type + checking. */ +size_t +_gcry_cipher_get_algo_keylen (int algo) +{ + size_t n; + + if (_gcry_cipher_algo_info (algo, GCRYCTL_GET_KEYLEN, NULL, &n)) + n = 0; + return n; +} + + +/* This functions returns the blocklength of the algorithm ALGO + counted in octets. On error 0 is returned. + + This is a convenience functions which should be preferred over + gcry_cipher_algo_info because it allows for proper type + checking. */ +size_t +_gcry_cipher_get_algo_blklen (int algo) +{ + size_t n; + + if (_gcry_cipher_algo_info( algo, GCRYCTL_GET_BLKLEN, NULL, &n)) + n = 0; + return n; +} + + +/* Explicitly initialize this module. */ +gcry_err_code_t +_gcry_cipher_init (void) +{ + if (fips_mode()) + { + /* disable algorithms that are disallowed in fips */ + int idx; + gcry_cipher_spec_t *spec; + + for (idx = 0; (spec = cipher_list[idx]); idx++) + if (!spec->flags.fips) + spec->flags.disabled = 1; + } + + return 0; +} + + +/* Run the selftests for cipher algorithm ALGO with optional reporting + function REPORT. */ +gpg_error_t +_gcry_cipher_selftest (int algo, int extended, selftest_report_func_t report) +{ + gcry_err_code_t ec = 0; + gcry_cipher_spec_t *spec; + + spec = spec_from_algo (algo); + if (spec && !spec->flags.disabled && spec->selftest) + ec = spec->selftest (algo, extended, report); + else + { + ec = GPG_ERR_CIPHER_ALGO; + if (report) + report ("cipher", algo, "module", + (spec && !spec->flags.disabled)? + "no selftest available" : + spec? "algorithm disabled" : "algorithm not found"); + } + + return gpg_error (ec); +} diff --git a/libotr/libgcrypt-1.8.7/cipher/crc-intel-pclmul.c b/libotr/libgcrypt-1.8.7/cipher/crc-intel-pclmul.c new file mode 100644 index 0000000..8ff08ec --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/crc-intel-pclmul.c @@ -0,0 +1,925 @@ +/* crc-intel-pclmul.c - Intel PCLMUL accelerated CRC implementation + * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" + +#include "bithelp.h" +#include "bufhelp.h" + + +#if defined(ENABLE_PCLMUL_SUPPORT) && defined(ENABLE_SSE41_SUPPORT) && \ + __GNUC__ >= 4 && \ + ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) + + +#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */ +/* Prevent compiler from issuing SSE instructions between asm blocks. */ +# pragma GCC target("no-sse") +#endif + + +#define ALIGNED_16 __attribute__ ((aligned (16))) + + +struct u16_unaligned_s +{ + u16 a; +} __attribute__((packed, aligned (1), may_alias)); + + +/* Constants structure for generic reflected/non-reflected CRC32 CLMUL + * functions. */ +struct crc32_consts_s +{ + /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */ + u64 k[6]; + /* my_p: { floor(x^64 / P(x)), P(x) } */ + u64 my_p[2]; +}; + + +/* CLMUL constants for CRC32 and CRC32RFC1510. */ +static const struct crc32_consts_s crc32_consts ALIGNED_16 = +{ + { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */ + U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */ + U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */ + U64_C(0x163cd6124), 0 /* y = 2 */ + }, + { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */ + U64_C(0x1f7011641), U64_C(0x1db710641) + } +}; + +/* CLMUL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */ +static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_16 = +{ + { /* k[6] = x^(32*y) mod P(x) << 32*/ + U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */ + U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */ + U64_C(0xd9fe8c00) << 32, 0 /* y = 2 */ + }, + { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */ + U64_C(0x1f845fe24), U64_C(0x1864cfb00) + } +}; + +/* Common constants for CRC32 algorithms. */ +static const byte crc32_refl_shuf_shift[3 * 16] ALIGNED_16 = + { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; +static const byte crc32_shuf_shift[3 * 16] ALIGNED_16 = + { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; +static const byte *crc32_bswap_shuf = &crc32_shuf_shift[16]; +static const byte crc32_partial_fold_input_mask[16 + 16] ALIGNED_16 = + { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; +static const u64 crc32_merge9to15_shuf[15 - 9 + 1][2] ALIGNED_16 = + { + { U64_C(0x0706050403020100), U64_C(0xffffffffffffff0f) }, /* 9 */ + { U64_C(0x0706050403020100), U64_C(0xffffffffffff0f0e) }, + { U64_C(0x0706050403020100), U64_C(0xffffffffff0f0e0d) }, + { U64_C(0x0706050403020100), U64_C(0xffffffff0f0e0d0c) }, + { U64_C(0x0706050403020100), U64_C(0xffffff0f0e0d0c0b) }, + { U64_C(0x0706050403020100), U64_C(0xffff0f0e0d0c0b0a) }, + { U64_C(0x0706050403020100), U64_C(0xff0f0e0d0c0b0a09) }, /* 15 */ + }; +static const u64 crc32_merge5to7_shuf[7 - 5 + 1][2] ALIGNED_16 = + { + { U64_C(0xffffff0703020100), U64_C(0xffffffffffffffff) }, /* 5 */ + { U64_C(0xffff070603020100), U64_C(0xffffffffffffffff) }, + { U64_C(0xff07060503020100), U64_C(0xffffffffffffffff) }, /* 7 */ + }; + +/* PCLMUL functions for reflected CRC32. */ +static inline void +crc32_reflected_bulk (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts) +{ + if (inlen >= 8 * 16) + { + asm volatile ("movd %[crc], %%xmm4\n\t" + "movdqu %[inbuf_0], %%xmm0\n\t" + "movdqu %[inbuf_1], %%xmm1\n\t" + "movdqu %[inbuf_2], %%xmm2\n\t" + "movdqu %[inbuf_3], %%xmm3\n\t" + "pxor %%xmm4, %%xmm0\n\t" + : + : [inbuf_0] "m" (inbuf[0 * 16]), + [inbuf_1] "m" (inbuf[1 * 16]), + [inbuf_2] "m" (inbuf[2 * 16]), + [inbuf_3] "m" (inbuf[3 * 16]), + [crc] "m" (*pcrc) + ); + + inbuf += 4 * 16; + inlen -= 4 * 16; + + asm volatile ("movdqa %[k1k2], %%xmm4\n\t" + : + : [k1k2] "m" (consts->k[1 - 1]) + ); + + /* Fold by 4. */ + while (inlen >= 4 * 16) + { + asm volatile ("movdqu %[inbuf_0], %%xmm5\n\t" + "movdqa %%xmm0, %%xmm6\n\t" + "pclmulqdq $0x00, %%xmm4, %%xmm0\n\t" + "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm6, %%xmm0\n\t" + + "movdqu %[inbuf_1], %%xmm5\n\t" + "movdqa %%xmm1, %%xmm6\n\t" + "pclmulqdq $0x00, %%xmm4, %%xmm1\n\t" + "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "pxor %%xmm6, %%xmm1\n\t" + + "movdqu %[inbuf_2], %%xmm5\n\t" + "movdqa %%xmm2, %%xmm6\n\t" + "pclmulqdq $0x00, %%xmm4, %%xmm2\n\t" + "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm2\n\t" + "pxor %%xmm6, %%xmm2\n\t" + + "movdqu %[inbuf_3], %%xmm5\n\t" + "movdqa %%xmm3, %%xmm6\n\t" + "pclmulqdq $0x00, %%xmm4, %%xmm3\n\t" + "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "pxor %%xmm6, %%xmm3\n\t" + : + : [inbuf_0] "m" (inbuf[0 * 16]), + [inbuf_1] "m" (inbuf[1 * 16]), + [inbuf_2] "m" (inbuf[2 * 16]), + [inbuf_3] "m" (inbuf[3 * 16]) + ); + + inbuf += 4 * 16; + inlen -= 4 * 16; + } + + asm volatile ("movdqa %[k3k4], %%xmm6\n\t" + "movdqa %[my_p], %%xmm5\n\t" + : + : [k3k4] "m" (consts->k[3 - 1]), + [my_p] "m" (consts->my_p[0]) + ); + + /* Fold 4 to 1. */ + + asm volatile ("movdqa %%xmm0, %%xmm4\n\t" + "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t" + "pxor %%xmm1, %%xmm0\n\t" + "pxor %%xmm4, %%xmm0\n\t" + + "movdqa %%xmm0, %%xmm4\n\t" + "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t" + "pxor %%xmm2, %%xmm0\n\t" + "pxor %%xmm4, %%xmm0\n\t" + + "movdqa %%xmm0, %%xmm4\n\t" + "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t" + "pxor %%xmm3, %%xmm0\n\t" + "pxor %%xmm4, %%xmm0\n\t" + : + : + ); + } + else + { + asm volatile ("movd %[crc], %%xmm1\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "movdqa %[k3k4], %%xmm6\n\t" + "pxor %%xmm1, %%xmm0\n\t" + "movdqa %[my_p], %%xmm5\n\t" + : + : [inbuf] "m" (*inbuf), + [crc] "m" (*pcrc), + [k3k4] "m" (consts->k[3 - 1]), + [my_p] "m" (consts->my_p[0]) + ); + + inbuf += 16; + inlen -= 16; + } + + /* Fold by 1. */ + if (inlen >= 16) + { + while (inlen >= 16) + { + /* Load next block to XMM2. Fold XMM0 to XMM0:XMM1. */ + asm volatile ("movdqu %[inbuf], %%xmm2\n\t" + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x11, %%xmm6, %%xmm1\n\t" + "pxor %%xmm2, %%xmm0\n\t" + "pxor %%xmm1, %%xmm0\n\t" + : + : [inbuf] "m" (*inbuf) + ); + + inbuf += 16; + inlen -= 16; + } + } + + /* Partial fold. */ + if (inlen) + { + /* Load last input and add padding zeros. */ + asm volatile ("movdqu %[shr_shuf], %%xmm3\n\t" + "movdqu %[shl_shuf], %%xmm4\n\t" + "movdqu %[mask], %%xmm2\n\t" + + "movdqa %%xmm0, %%xmm1\n\t" + "pshufb %%xmm4, %%xmm0\n\t" + "movdqu %[inbuf], %%xmm4\n\t" + "pshufb %%xmm3, %%xmm1\n\t" + "pand %%xmm4, %%xmm2\n\t" + "por %%xmm1, %%xmm2\n\t" + + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x11, %%xmm6, %%xmm1\n\t" + "pxor %%xmm2, %%xmm0\n\t" + "pxor %%xmm1, %%xmm0\n\t" + : + : [inbuf] "m" (*(inbuf - 16 + inlen)), + [mask] "m" (crc32_partial_fold_input_mask[inlen]), + [shl_shuf] "m" (crc32_refl_shuf_shift[inlen]), + [shr_shuf] "m" (crc32_refl_shuf_shift[inlen + 16]) + ); + + inbuf += inlen; + inlen -= inlen; + } + + /* Final fold. */ + asm volatile (/* reduce 128-bits to 96-bits */ + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x10, %%xmm6, %%xmm0\n\t" + "psrldq $8, %%xmm1\n\t" + "pxor %%xmm1, %%xmm0\n\t" + + /* reduce 96-bits to 64-bits */ + "pshufd $0xfc, %%xmm0, %%xmm1\n\t" /* [00][00][00][x] */ + "pshufd $0xf9, %%xmm0, %%xmm0\n\t" /* [00][00][x>>64][x>>32] */ + "pclmulqdq $0x00, %[k5], %%xmm1\n\t" /* [00][00][xx][xx] */ + "pxor %%xmm1, %%xmm0\n\t" /* top 64-bit are zero */ + + /* barrett reduction */ + "pshufd $0xf3, %%xmm0, %%xmm1\n\t" /* [00][00][x>>32][00] */ + "pslldq $4, %%xmm0\n\t" /* [??][x>>32][??][??] */ + "pclmulqdq $0x00, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ + "pclmulqdq $0x10, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ + "pxor %%xmm1, %%xmm0\n\t" + + /* store CRC */ + "pextrd $2, %%xmm0, %[out]\n\t" + : [out] "=m" (*pcrc) + : [k5] "m" (consts->k[5 - 1]) + ); +} + +static inline void +crc32_reflected_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts) +{ + if (inlen < 4) + { + u32 crc = *pcrc; + u32 data; + + asm volatile ("movdqa %[my_p], %%xmm5\n\t" + : + : [my_p] "m" (consts->my_p[0]) + ); + + if (inlen == 1) + { + data = inbuf[0]; + data ^= crc; + data <<= 24; + crc >>= 8; + } + else if (inlen == 2) + { + data = ((const struct u16_unaligned_s *)inbuf)->a; + data ^= crc; + data <<= 16; + crc >>= 16; + } + else + { + data = ((const struct u16_unaligned_s *)inbuf)->a; + data |= inbuf[2] << 16; + data ^= crc; + data <<= 8; + crc >>= 24; + } + + /* Barrett reduction */ + asm volatile ("movd %[in], %%xmm0\n\t" + "movd %[crc], %%xmm1\n\t" + + "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ + "psllq $32, %%xmm1\n\t" + "pshufd $0xfc, %%xmm0, %%xmm0\n\t" /* [00][00][00][x] */ + "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ + "pxor %%xmm1, %%xmm0\n\t" + + "pextrd $1, %%xmm0, %[out]\n\t" + : [out] "=m" (*pcrc) + : [in] "rm" (data), + [crc] "rm" (crc) + ); + } + else if (inlen == 4) + { + /* Barrett reduction */ + asm volatile ("movd %[crc], %%xmm1\n\t" + "movd %[in], %%xmm0\n\t" + "movdqa %[my_p], %%xmm5\n\t" + "pxor %%xmm1, %%xmm0\n\t" + + "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ + "pshufd $0xfc, %%xmm0, %%xmm0\n\t" /* [00][00][00][x] */ + "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ + + "pextrd $1, %%xmm0, %[out]\n\t" + : [out] "=m" (*pcrc) + : [in] "m" (*inbuf), + [crc] "m" (*pcrc), + [my_p] "m" (consts->my_p[0]) + ); + } + else + { + asm volatile ("movdqu %[shuf], %%xmm4\n\t" + "movd %[crc], %%xmm1\n\t" + "movdqa %[my_p], %%xmm5\n\t" + "movdqa %[k3k4], %%xmm6\n\t" + : + : [shuf] "m" (crc32_refl_shuf_shift[inlen]), + [crc] "m" (*pcrc), + [my_p] "m" (consts->my_p[0]), + [k3k4] "m" (consts->k[3 - 1]) + ); + + if (inlen >= 8) + { + asm volatile ("movq %[inbuf], %%xmm0\n\t" + : + : [inbuf] "m" (*inbuf) + ); + if (inlen > 8) + { + asm volatile (/*"pinsrq $1, %[inbuf_tail], %%xmm0\n\t"*/ + "movq %[inbuf_tail], %%xmm2\n\t" + "punpcklqdq %%xmm2, %%xmm0\n\t" + "pshufb %[merge_shuf], %%xmm0\n\t" + : + : [inbuf_tail] "m" (inbuf[inlen - 8]), + [merge_shuf] "m" + (*crc32_merge9to15_shuf[inlen - 9]) + ); + } + } + else + { + asm volatile ("movd %[inbuf], %%xmm0\n\t" + "pinsrd $1, %[inbuf_tail], %%xmm0\n\t" + "pshufb %[merge_shuf], %%xmm0\n\t" + : + : [inbuf] "m" (*inbuf), + [inbuf_tail] "m" (inbuf[inlen - 4]), + [merge_shuf] "m" + (*crc32_merge5to7_shuf[inlen - 5]) + ); + } + + /* Final fold. */ + asm volatile ("pxor %%xmm1, %%xmm0\n\t" + "pshufb %%xmm4, %%xmm0\n\t" + + /* reduce 128-bits to 96-bits */ + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x10, %%xmm6, %%xmm0\n\t" + "psrldq $8, %%xmm1\n\t" + "pxor %%xmm1, %%xmm0\n\t" /* top 32-bit are zero */ + + /* reduce 96-bits to 64-bits */ + "pshufd $0xfc, %%xmm0, %%xmm1\n\t" /* [00][00][00][x] */ + "pshufd $0xf9, %%xmm0, %%xmm0\n\t" /* [00][00][x>>64][x>>32] */ + "pclmulqdq $0x00, %[k5], %%xmm1\n\t" /* [00][00][xx][xx] */ + "pxor %%xmm1, %%xmm0\n\t" /* top 64-bit are zero */ + + /* barrett reduction */ + "pshufd $0xf3, %%xmm0, %%xmm1\n\t" /* [00][00][x>>32][00] */ + "pslldq $4, %%xmm0\n\t" /* [??][x>>32][??][??] */ + "pclmulqdq $0x00, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ + "pclmulqdq $0x10, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ + "pxor %%xmm1, %%xmm0\n\t" + + /* store CRC */ + "pextrd $2, %%xmm0, %[out]\n\t" + : [out] "=m" (*pcrc) + : [k5] "m" (consts->k[5 - 1]) + ); + } +} + +/* PCLMUL functions for non-reflected CRC32. */ +static inline void +crc32_bulk (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts) +{ + asm volatile ("movdqa %[bswap], %%xmm7\n\t" + : + : [bswap] "m" (*crc32_bswap_shuf) + ); + + if (inlen >= 8 * 16) + { + asm volatile ("movd %[crc], %%xmm4\n\t" + "movdqu %[inbuf_0], %%xmm0\n\t" + "movdqu %[inbuf_1], %%xmm1\n\t" + "movdqu %[inbuf_2], %%xmm2\n\t" + "pxor %%xmm4, %%xmm0\n\t" + "movdqu %[inbuf_3], %%xmm3\n\t" + "pshufb %%xmm7, %%xmm0\n\t" + "pshufb %%xmm7, %%xmm1\n\t" + "pshufb %%xmm7, %%xmm2\n\t" + "pshufb %%xmm7, %%xmm3\n\t" + : + : [inbuf_0] "m" (inbuf[0 * 16]), + [inbuf_1] "m" (inbuf[1 * 16]), + [inbuf_2] "m" (inbuf[2 * 16]), + [inbuf_3] "m" (inbuf[3 * 16]), + [crc] "m" (*pcrc) + ); + + inbuf += 4 * 16; + inlen -= 4 * 16; + + asm volatile ("movdqa %[k1k2], %%xmm4\n\t" + : + : [k1k2] "m" (consts->k[1 - 1]) + ); + + /* Fold by 4. */ + while (inlen >= 4 * 16) + { + asm volatile ("movdqu %[inbuf_0], %%xmm5\n\t" + "movdqa %%xmm0, %%xmm6\n\t" + "pshufb %%xmm7, %%xmm5\n\t" + "pclmulqdq $0x01, %%xmm4, %%xmm0\n\t" + "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm6, %%xmm0\n\t" + + "movdqu %[inbuf_1], %%xmm5\n\t" + "movdqa %%xmm1, %%xmm6\n\t" + "pshufb %%xmm7, %%xmm5\n\t" + "pclmulqdq $0x01, %%xmm4, %%xmm1\n\t" + "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "pxor %%xmm6, %%xmm1\n\t" + + "movdqu %[inbuf_2], %%xmm5\n\t" + "movdqa %%xmm2, %%xmm6\n\t" + "pshufb %%xmm7, %%xmm5\n\t" + "pclmulqdq $0x01, %%xmm4, %%xmm2\n\t" + "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm2\n\t" + "pxor %%xmm6, %%xmm2\n\t" + + "movdqu %[inbuf_3], %%xmm5\n\t" + "movdqa %%xmm3, %%xmm6\n\t" + "pshufb %%xmm7, %%xmm5\n\t" + "pclmulqdq $0x01, %%xmm4, %%xmm3\n\t" + "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "pxor %%xmm6, %%xmm3\n\t" + : + : [inbuf_0] "m" (inbuf[0 * 16]), + [inbuf_1] "m" (inbuf[1 * 16]), + [inbuf_2] "m" (inbuf[2 * 16]), + [inbuf_3] "m" (inbuf[3 * 16]) + ); + + inbuf += 4 * 16; + inlen -= 4 * 16; + } + + asm volatile ("movdqa %[k3k4], %%xmm6\n\t" + "movdqa %[my_p], %%xmm5\n\t" + : + : [k3k4] "m" (consts->k[3 - 1]), + [my_p] "m" (consts->my_p[0]) + ); + + /* Fold 4 to 1. */ + + asm volatile ("movdqa %%xmm0, %%xmm4\n\t" + "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t" + "pxor %%xmm1, %%xmm0\n\t" + "pxor %%xmm4, %%xmm0\n\t" + + "movdqa %%xmm0, %%xmm4\n\t" + "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t" + "pxor %%xmm2, %%xmm0\n\t" + "pxor %%xmm4, %%xmm0\n\t" + + "movdqa %%xmm0, %%xmm4\n\t" + "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t" + "pxor %%xmm3, %%xmm0\n\t" + "pxor %%xmm4, %%xmm0\n\t" + : + : + ); + } + else + { + asm volatile ("movd %[crc], %%xmm1\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "movdqa %[k3k4], %%xmm6\n\t" + "pxor %%xmm1, %%xmm0\n\t" + "movdqa %[my_p], %%xmm5\n\t" + "pshufb %%xmm7, %%xmm0\n\t" + : + : [inbuf] "m" (*inbuf), + [crc] "m" (*pcrc), + [k3k4] "m" (consts->k[3 - 1]), + [my_p] "m" (consts->my_p[0]) + ); + + inbuf += 16; + inlen -= 16; + } + + /* Fold by 1. */ + if (inlen >= 16) + { + while (inlen >= 16) + { + /* Load next block to XMM2. Fold XMM0 to XMM0:XMM1. */ + asm volatile ("movdqu %[inbuf], %%xmm2\n\t" + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t" + "pshufb %%xmm7, %%xmm2\n\t" + "pclmulqdq $0x10, %%xmm6, %%xmm1\n\t" + "pxor %%xmm2, %%xmm0\n\t" + "pxor %%xmm1, %%xmm0\n\t" + : + : [inbuf] "m" (*inbuf) + ); + + inbuf += 16; + inlen -= 16; + } + } + + /* Partial fold. */ + if (inlen) + { + /* Load last input and add padding zeros. */ + asm volatile ("movdqu %[shl_shuf], %%xmm4\n\t" + "movdqu %[shr_shuf], %%xmm3\n\t" + "movdqu %[mask], %%xmm2\n\t" + + "movdqa %%xmm0, %%xmm1\n\t" + "pshufb %%xmm4, %%xmm0\n\t" + "movdqu %[inbuf], %%xmm4\n\t" + "pshufb %%xmm3, %%xmm1\n\t" + "pand %%xmm4, %%xmm2\n\t" + "por %%xmm1, %%xmm2\n\t" + + "pshufb %%xmm7, %%xmm2\n\t" + + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x10, %%xmm6, %%xmm1\n\t" + "pxor %%xmm2, %%xmm0\n\t" + "pxor %%xmm1, %%xmm0\n\t" + : + : [inbuf] "m" (*(inbuf - 16 + inlen)), + [mask] "m" (crc32_partial_fold_input_mask[inlen]), + [shl_shuf] "m" (crc32_refl_shuf_shift[32 - inlen]), + [shr_shuf] "m" (crc32_shuf_shift[inlen + 16]) + ); + + inbuf += inlen; + inlen -= inlen; + } + + /* Final fold. */ + asm volatile (/* reduce 128-bits to 96-bits */ + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x11, %%xmm6, %%xmm0\n\t" + "pslldq $8, %%xmm1\n\t" + "pxor %%xmm1, %%xmm0\n\t" /* bottom 32-bit are zero */ + + /* reduce 96-bits to 64-bits */ + "pshufd $0x30, %%xmm0, %%xmm1\n\t" /* [00][x>>96][00][00] */ + "pshufd $0x24, %%xmm0, %%xmm0\n\t" /* [00][xx][xx][00] */ + "pclmulqdq $0x01, %[k5], %%xmm1\n\t" /* [00][xx][xx][00] */ + "pxor %%xmm1, %%xmm0\n\t" /* top and bottom 32-bit are zero */ + + /* barrett reduction */ + "pshufd $0x01, %%xmm0, %%xmm1\n\t" /* [00][00][00][x>>32] */ + "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][xx] */ + "psrldq $4, %%xmm0\n\t" /* [00][00][xx][xx] */ + "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" + "pxor %%xmm1, %%xmm0\n\t" + + /* store CRC in input endian */ + "movd %%xmm0, %%eax\n\t" + "bswapl %%eax\n\t" + "movl %%eax, %[out]\n\t" + : [out] "=m" (*pcrc) + : [k5] "m" (consts->k[5 - 1]) + : "eax" ); +} + +static inline void +crc32_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts) +{ + if (inlen < 4) + { + u32 crc = *pcrc; + u32 data; + + asm volatile ("movdqa %[my_p], %%xmm5\n\t" + : + : [my_p] "m" (consts->my_p[0]) + ); + + if (inlen == 1) + { + data = inbuf[0]; + data ^= crc; + data = _gcry_bswap32(data << 24); + crc = _gcry_bswap32(crc >> 8); + } + else if (inlen == 2) + { + data = ((const struct u16_unaligned_s *)inbuf)->a; + data ^= crc; + data = _gcry_bswap32(data << 16); + crc = _gcry_bswap32(crc >> 16); + } + else + { + data = ((const struct u16_unaligned_s *)inbuf)->a; + data |= inbuf[2] << 16; + data ^= crc; + data = _gcry_bswap32(data << 8); + crc = _gcry_bswap32(crc >> 24); + } + + /* Barrett reduction */ + asm volatile ("movd %[in], %%xmm0\n\t" + "psllq $32, %%xmm0\n\t" /* [00][00][xx][00] */ + "movd %[crc], %%xmm1\n\t" + + "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][00] */ + "pclmulqdq $0x11, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ + "pxor %%xmm1, %%xmm0\n\t" + + /* store CRC in input endian */ + "movd %%xmm0, %%eax\n\t" + "bswapl %%eax\n\t" + "movl %%eax, %[out]\n\t" + : [out] "=m" (*pcrc) + : [in] "r" (data), + [crc] "r" (crc) + : "eax" ); + } + else if (inlen == 4) + { + /* Barrett reduction */ + asm volatile ("movd %[crc], %%xmm0\n\t" + "movd %[in], %%xmm1\n\t" + "movdqa %[my_p], %%xmm5\n\t" + : + : [in] "m" (*inbuf), + [crc] "m" (*pcrc), + [my_p] "m" (consts->my_p[0]) + : "cc" ); + + asm volatile ("pxor %%xmm1, %%xmm0\n\t" + "pshufb %[bswap], %%xmm0\n\t" /* [xx][00][00][00] */ + + "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][00] */ + "pclmulqdq $0x11, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ + : + : [bswap] "m" (*crc32_bswap_shuf) + : "cc" ); + + asm volatile (/* store CRC in input endian */ + "movd %%xmm0, %%eax\n\t" + "bswapl %%eax\n\t" + "movl %%eax, %[out]\n\t" + : [out] "=m" (*pcrc) + : + : "eax", "cc" ); + } + else + { + asm volatile ("movdqu %[shuf], %%xmm7\n\t" + "movd %[crc], %%xmm1\n\t" + "movdqa %[my_p], %%xmm5\n\t" + "movdqa %[k3k4], %%xmm6\n\t" + : + : [shuf] "m" (crc32_shuf_shift[32 - inlen]), + [crc] "m" (*pcrc), + [my_p] "m" (consts->my_p[0]), + [k3k4] "m" (consts->k[3 - 1]) + ); + + if (inlen >= 8) + { + asm volatile ("movq %[inbuf], %%xmm0\n\t" + : + : [inbuf] "m" (*inbuf) + ); + if (inlen > 8) + { + asm volatile (/*"pinsrq $1, %[inbuf_tail], %%xmm0\n\t"*/ + "movq %[inbuf_tail], %%xmm2\n\t" + "punpcklqdq %%xmm2, %%xmm0\n\t" + "pshufb %[merge_shuf], %%xmm0\n\t" + : + : [inbuf_tail] "m" (inbuf[inlen - 8]), + [merge_shuf] "m" + (*crc32_merge9to15_shuf[inlen - 9]) + ); + } + } + else + { + asm volatile ("movd %[inbuf], %%xmm0\n\t" + "pinsrd $1, %[inbuf_tail], %%xmm0\n\t" + "pshufb %[merge_shuf], %%xmm0\n\t" + : + : [inbuf] "m" (*inbuf), + [inbuf_tail] "m" (inbuf[inlen - 4]), + [merge_shuf] "m" + (*crc32_merge5to7_shuf[inlen - 5]) + ); + } + + /* Final fold. */ + asm volatile ("pxor %%xmm1, %%xmm0\n\t" + "pshufb %%xmm7, %%xmm0\n\t" + + /* reduce 128-bits to 96-bits */ + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x11, %%xmm6, %%xmm0\n\t" + "pslldq $8, %%xmm1\n\t" + "pxor %%xmm1, %%xmm0\n\t" /* bottom 32-bit are zero */ + + /* reduce 96-bits to 64-bits */ + "pshufd $0x30, %%xmm0, %%xmm1\n\t" /* [00][x>>96][00][00] */ + "pshufd $0x24, %%xmm0, %%xmm0\n\t" /* [00][xx][xx][00] */ + "pclmulqdq $0x01, %[k5], %%xmm1\n\t" /* [00][xx][xx][00] */ + "pxor %%xmm1, %%xmm0\n\t" /* top and bottom 32-bit are zero */ + + /* barrett reduction */ + "pshufd $0x01, %%xmm0, %%xmm1\n\t" /* [00][00][00][x>>32] */ + "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][xx] */ + "psrldq $4, %%xmm0\n\t" /* [00][00][xx][xx] */ + "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" + "pxor %%xmm1, %%xmm0\n\t" + + /* store CRC in input endian */ + "movd %%xmm0, %%eax\n\t" + "bswapl %%eax\n\t" + "movl %%eax, %[out]\n\t" + : [out] "=m" (*pcrc) + : [k5] "m" (consts->k[5 - 1]) + : "eax" ); + } +} + +void +_gcry_crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen) +{ + const struct crc32_consts_s *consts = &crc32_consts; +#if defined(__x86_64__) && defined(__WIN64__) + char win64tmp[2 * 16]; + + /* XMM6-XMM7 need to be restored after use. */ + asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" + "movdqu %%xmm7, 1*16(%0)\n\t" + : + : "r" (win64tmp) + : "memory"); +#endif + + if (!inlen) + return; + + if (inlen >= 16) + crc32_reflected_bulk(pcrc, inbuf, inlen, consts); + else + crc32_reflected_less_than_16(pcrc, inbuf, inlen, consts); + +#if defined(__x86_64__) && defined(__WIN64__) + /* Restore used registers. */ + asm volatile("movdqu 0*16(%0), %%xmm6\n\t" + "movdqu 1*16(%0), %%xmm7\n\t" + : + : "r" (win64tmp) + : "memory"); +#endif +} + +void +_gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen) +{ + const struct crc32_consts_s *consts = &crc24rfc2440_consts; +#if defined(__x86_64__) && defined(__WIN64__) + char win64tmp[2 * 16]; + + /* XMM6-XMM7 need to be restored after use. */ + asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" + "movdqu %%xmm7, 1*16(%0)\n\t" + : + : "r" (win64tmp) + : "memory"); +#endif + + if (!inlen) + return; + + /* Note: *pcrc in input endian. */ + + if (inlen >= 16) + crc32_bulk(pcrc, inbuf, inlen, consts); + else + crc32_less_than_16(pcrc, inbuf, inlen, consts); + +#if defined(__x86_64__) && defined(__WIN64__) + /* Restore used registers. */ + asm volatile("movdqu 0*16(%0), %%xmm6\n\t" + "movdqu 1*16(%0), %%xmm7\n\t" + : + : "r" (win64tmp) + : "memory"); +#endif +} + +#endif /* USE_INTEL_PCLMUL */ diff --git a/libotr/libgcrypt-1.8.7/cipher/crc.c b/libotr/libgcrypt-1.8.7/cipher/crc.c new file mode 100644 index 0000000..a1ce50b --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/crc.c @@ -0,0 +1,861 @@ +/* crc.c - Cyclic redundancy checks. + * Copyright (C) 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" +#include "cipher.h" + +#include "bithelp.h" +#include "bufhelp.h" + + +/* USE_INTEL_PCLMUL indicates whether to compile CRC with Intel PCLMUL/SSE4.1 + * code. */ +#undef USE_INTEL_PCLMUL +#if defined(ENABLE_PCLMUL_SUPPORT) && defined(ENABLE_SSE41_SUPPORT) +# if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) +# if __GNUC__ >= 4 +# define USE_INTEL_PCLMUL 1 +# endif +# endif +#endif /* USE_INTEL_PCLMUL */ + + +typedef struct +{ + u32 CRC; +#ifdef USE_INTEL_PCLMUL + unsigned int use_pclmul:1; /* Intel PCLMUL shall be used. */ +#endif + byte buf[4]; +} +CRC_CONTEXT; + + +#ifdef USE_INTEL_PCLMUL +/*-- crc-intel-pclmul.c --*/ +void _gcry_crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen); +void _gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf, + size_t inlen); +#endif + + +/* + * Code generated by universal_crc by Danjel McGougan + * + * CRC parameters used: + * bits: 32 + * poly: 0x04c11db7 + * init: 0xffffffff + * xor: 0xffffffff + * reverse: true + * non-direct: false + * + * CRC of the string "123456789" is 0xcbf43926 + */ + +static const u32 crc32_table[1024] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, + 0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, + 0x646cc504, 0x7d77f445, 0x565aa786, 0x4f4196c7, + 0xc8d98a08, 0xd1c2bb49, 0xfaefe88a, 0xe3f4d9cb, + 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, 0x87981ccf, + 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192, + 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, + 0x821b9859, 0x9b00a918, 0xb02dfadb, 0xa936cb9a, + 0xe6775d5d, 0xff6c6c1c, 0xd4413fdf, 0xcd5a0e9e, + 0x958424a2, 0x8c9f15e3, 0xa7b24620, 0xbea97761, + 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265, + 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, + 0x39316bae, 0x202a5aef, 0x0b07092c, 0x121c386d, + 0xdf4636f3, 0xc65d07b2, 0xed705471, 0xf46b6530, + 0xbb2af3f7, 0xa231c2b6, 0x891c9175, 0x9007a034, + 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38, + 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, + 0xf0794f05, 0xe9627e44, 0xc24f2d87, 0xdb541cc6, + 0x94158a01, 0x8d0ebb40, 0xa623e883, 0xbf38d9c2, + 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, 0x138d96ce, + 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca, + 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, + 0xded79850, 0xc7cca911, 0xece1fad2, 0xf5facb93, + 0x7262d75c, 0x6b79e61d, 0x4054b5de, 0x594f849f, + 0x160e1258, 0x0f152319, 0x243870da, 0x3d23419b, + 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864, + 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, + 0xad24e1af, 0xb43fd0ee, 0x9f12832d, 0x8609b26c, + 0xc94824ab, 0xd05315ea, 0xfb7e4629, 0xe2657768, + 0x2f3f79f6, 0x362448b7, 0x1d091b74, 0x04122a35, + 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31, + 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, + 0x838a36fa, 0x9a9107bb, 0xb1bc5478, 0xa8a76539, + 0x3b83984b, 0x2298a90a, 0x09b5fac9, 0x10aecb88, + 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, 0x74c20e8c, + 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180, + 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, + 0x71418a1a, 0x685abb5b, 0x4377e898, 0x5a6cd9d9, + 0x152d4f1e, 0x0c367e5f, 0x271b2d9c, 0x3e001cdd, + 0xb9980012, 0xa0833153, 0x8bae6290, 0x92b553d1, + 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5, + 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, + 0xca6b79ed, 0xd37048ac, 0xf85d1b6f, 0xe1462a2e, + 0x66de36e1, 0x7fc507a0, 0x54e85463, 0x4df36522, + 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, 0x299fa026, + 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b, + 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, + 0x2c1c24b0, 0x350715f1, 0x1e2a4632, 0x07317773, + 0x4870e1b4, 0x516bd0f5, 0x7a468336, 0x635db277, + 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, 0xe0d7848d, + 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189, + 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, + 0x674f9842, 0x7e54a903, 0x5579fac0, 0x4c62cb81, + 0x8138c51f, 0x9823f45e, 0xb30ea79d, 0xaa1596dc, + 0xe554001b, 0xfc4f315a, 0xd7626299, 0xce7953d8, + 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4, + 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, + 0x5e7ef3ec, 0x4765c2ad, 0x6c48916e, 0x7553a02f, + 0x3a1236e8, 0x230907a9, 0x0824546a, 0x113f652b, + 0x96a779e4, 0x8fbc48a5, 0xa4911b66, 0xbd8a2a27, + 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23, + 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, + 0x70d024b9, 0x69cb15f8, 0x42e6463b, 0x5bfd777a, + 0xdc656bb5, 0xc57e5af4, 0xee530937, 0xf7483876, + 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, 0x9324fd72, + 0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, + 0x0709a8dc, 0x06cbc2eb, 0x048d7cb2, 0x054f1685, + 0x0e1351b8, 0x0fd13b8f, 0x0d9785d6, 0x0c55efe1, + 0x091af964, 0x08d89353, 0x0a9e2d0a, 0x0b5c473d, + 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29, + 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, + 0x1235f2c8, 0x13f798ff, 0x11b126a6, 0x10734c91, + 0x153c5a14, 0x14fe3023, 0x16b88e7a, 0x177ae44d, + 0x384d46e0, 0x398f2cd7, 0x3bc9928e, 0x3a0bf8b9, + 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065, + 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, + 0x3157bf84, 0x3095d5b3, 0x32d36bea, 0x331101dd, + 0x246be590, 0x25a98fa7, 0x27ef31fe, 0x262d5bc9, + 0x23624d4c, 0x22a0277b, 0x20e69922, 0x2124f315, + 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71, + 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, + 0x709a8dc0, 0x7158e7f7, 0x731e59ae, 0x72dc3399, + 0x7793251c, 0x76514f2b, 0x7417f172, 0x75d59b45, + 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, 0x7ccf6221, + 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd, + 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, + 0x6bb5866c, 0x6a77ec5b, 0x68315202, 0x69f33835, + 0x62af7f08, 0x636d153f, 0x612bab66, 0x60e9c151, + 0x65a6d7d4, 0x6464bde3, 0x662203ba, 0x67e0698d, + 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579, + 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, + 0x46c49a98, 0x4706f0af, 0x45404ef6, 0x448224c1, + 0x41cd3244, 0x400f5873, 0x4249e62a, 0x438b8c1d, + 0x54f16850, 0x55330267, 0x5775bc3e, 0x56b7d609, + 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5, + 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, + 0x5deb9134, 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, + 0xe1351b80, 0xe0f771b7, 0xe2b1cfee, 0xe373a5d9, + 0xe63cb35c, 0xe7fed96b, 0xe5b86732, 0xe47a0d05, + 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461, + 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, + 0xfd13b8f0, 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, + 0xfa1a102c, 0xfbd87a1b, 0xf99ec442, 0xf85cae75, + 0xf300e948, 0xf2c2837f, 0xf0843d26, 0xf1465711, + 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd, + 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, + 0xde71f5bc, 0xdfb39f8b, 0xddf521d2, 0xdc374be5, + 0xd76b0cd8, 0xd6a966ef, 0xd4efd8b6, 0xd52db281, + 0xd062a404, 0xd1a0ce33, 0xd3e6706a, 0xd2241a5d, + 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049, + 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, + 0xcb4dafa8, 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, + 0xcc440774, 0xcd866d43, 0xcfc0d31a, 0xce02b92d, + 0x91af9640, 0x906dfc77, 0x922b422e, 0x93e92819, + 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5, + 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, + 0x98b56f24, 0x99770513, 0x9b31bb4a, 0x9af3d17d, + 0x8d893530, 0x8c4b5f07, 0x8e0de15e, 0x8fcf8b69, + 0x8a809dec, 0x8b42f7db, 0x89044982, 0x88c623b5, + 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1, + 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, + 0xa9e2d0a0, 0xa820ba97, 0xaa6604ce, 0xaba46ef9, + 0xaeeb787c, 0xaf29124b, 0xad6fac12, 0xacadc625, + 0xa7f18118, 0xa633eb2f, 0xa4755576, 0xa5b73f41, + 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d, + 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, + 0xb2cddb0c, 0xb30fb13b, 0xb1490f62, 0xb08b6555, + 0xbbd72268, 0xba15485f, 0xb853f606, 0xb9919c31, + 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, 0xbe9834ed, + 0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, + 0x8f629757, 0x37def032, 0x256b5fdc, 0x9dd738b9, + 0xc5b428ef, 0x7d084f8a, 0x6fbde064, 0xd7018701, + 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, 0x58631056, + 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871, + 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, + 0x95ad7f70, 0x2d111815, 0x3fa4b7fb, 0x8718d09e, + 0x1acfe827, 0xa2738f42, 0xb0c620ac, 0x087a47c9, + 0xa032af3e, 0x188ec85b, 0x0a3b67b5, 0xb28700d0, + 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787, + 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, + 0xeae41086, 0x525877e3, 0x40edd80d, 0xf851bf68, + 0xf02bf8a1, 0x48979fc4, 0x5a22302a, 0xe29e574f, + 0x7f496ff6, 0xc7f50893, 0xd540a77d, 0x6dfcc018, + 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0, + 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, + 0x9b14583d, 0x23a83f58, 0x311d90b6, 0x89a1f7d3, + 0x1476cf6a, 0xaccaa80f, 0xbe7f07e1, 0x06c36084, + 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, 0x4c15df3c, + 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b, + 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, + 0x446f98f5, 0xfcd3ff90, 0xee66507e, 0x56da371b, + 0x0eb9274d, 0xb6054028, 0xa4b0efc6, 0x1c0c88a3, + 0x81dbb01a, 0x3967d77f, 0x2bd27891, 0x936e1ff4, + 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed, + 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, + 0xfe92dfec, 0x462eb889, 0x549b1767, 0xec277002, + 0x71f048bb, 0xc94c2fde, 0xdbf98030, 0x6345e755, + 0x6b3fa09c, 0xd383c7f9, 0xc1366817, 0x798a0f72, + 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825, + 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, + 0x21e91f24, 0x99557841, 0x8be0d7af, 0x335cb0ca, + 0xed59b63b, 0x55e5d15e, 0x47507eb0, 0xffec19d5, + 0x623b216c, 0xda874609, 0xc832e9e7, 0x708e8e82, + 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a, + 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, + 0xbd40e1a4, 0x05fc86c1, 0x1749292f, 0xaff54e4a, + 0x322276f3, 0x8a9e1196, 0x982bbe78, 0x2097d91d, + 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, 0x6a4166a5, + 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2, + 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, + 0xc2098e52, 0x7ab5e937, 0x680046d9, 0xd0bc21bc, + 0x88df31ea, 0x3063568f, 0x22d6f961, 0x9a6a9e04, + 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, 0x15080953, + 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174, + 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, + 0xd8c66675, 0x607a0110, 0x72cfaefe, 0xca73c99b, + 0x57a4f122, 0xef189647, 0xfdad39a9, 0x45115ecc, + 0x764dee06, 0xcef18963, 0xdc44268d, 0x64f841e8, + 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf, + 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, + 0x3c9b51be, 0x842736db, 0x96929935, 0x2e2efe50, + 0x2654b999, 0x9ee8defc, 0x8c5d7112, 0x34e11677, + 0xa9362ece, 0x118a49ab, 0x033fe645, 0xbb838120, + 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98, + 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, + 0xd67f4138, 0x6ec3265d, 0x7c7689b3, 0xc4caeed6, + 0x591dd66f, 0xe1a1b10a, 0xf3141ee4, 0x4ba87981, + 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, 0x017ec639, + 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e, + 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, + 0x090481f0, 0xb1b8e695, 0xa30d497b, 0x1bb12e1e, + 0x43d23e48, 0xfb6e592d, 0xe9dbf6c3, 0x516791a6, + 0xccb0a91f, 0x740cce7a, 0x66b96194, 0xde0506f1 +}; + +/* CRC32 */ + +static inline u32 +crc32_next (u32 crc, byte data) +{ + return (crc >> 8) ^ crc32_table[(crc & 0xff) ^ data]; +} + +/* + * Process 4 bytes in one go + */ +static inline u32 +crc32_next4 (u32 crc, u32 data) +{ + crc ^= data; + crc = crc32_table[(crc & 0xff) + 0x300] ^ + crc32_table[((crc >> 8) & 0xff) + 0x200] ^ + crc32_table[((crc >> 16) & 0xff) + 0x100] ^ + crc32_table[(crc >> 24) & 0xff]; + return crc; +} + +static void +crc32_init (void *context, unsigned int flags) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; +#ifdef USE_INTEL_PCLMUL + u32 hwf = _gcry_get_hw_features (); + + ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL); +#endif + + (void)flags; + + ctx->CRC = 0 ^ 0xffffffffL; +} + +static void +crc32_write (void *context, const void *inbuf_arg, size_t inlen) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; + const byte *inbuf = inbuf_arg; + u32 crc; + +#ifdef USE_INTEL_PCLMUL + if (ctx->use_pclmul) + { + _gcry_crc32_intel_pclmul(&ctx->CRC, inbuf, inlen); + return; + } +#endif + + if (!inbuf || !inlen) + return; + + crc = ctx->CRC; + + while (inlen >= 16) + { + inlen -= 16; + crc = crc32_next4(crc, buf_get_le32(&inbuf[0])); + crc = crc32_next4(crc, buf_get_le32(&inbuf[4])); + crc = crc32_next4(crc, buf_get_le32(&inbuf[8])); + crc = crc32_next4(crc, buf_get_le32(&inbuf[12])); + inbuf += 16; + } + + while (inlen >= 4) + { + inlen -= 4; + crc = crc32_next4(crc, buf_get_le32(inbuf)); + inbuf += 4; + } + + while (inlen--) + { + crc = crc32_next(crc, *inbuf++); + } + + ctx->CRC = crc; +} + +static byte * +crc32_read (void *context) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; + return ctx->buf; +} + +static void +crc32_final (void *context) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; + ctx->CRC ^= 0xffffffffL; + buf_put_be32 (ctx->buf, ctx->CRC); +} + +/* CRC32 a'la RFC 1510 */ +/* CRC of the string "123456789" is 0x2dfd2d88 */ + +static void +crc32rfc1510_init (void *context, unsigned int flags) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; +#ifdef USE_INTEL_PCLMUL + u32 hwf = _gcry_get_hw_features (); + + ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL); +#endif + + (void)flags; + + ctx->CRC = 0; +} + +static void +crc32rfc1510_final (void *context) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; + buf_put_be32(ctx->buf, ctx->CRC); +} + +/* CRC24 a'la RFC 2440 */ +/* + * Code generated by universal_crc by Danjel McGougan + * + * CRC parameters used: + * bits: 24 + * poly: 0x864cfb + * init: 0xb704ce + * xor: 0x000000 + * reverse: false + * non-direct: false + * + * CRC of the string "123456789" is 0x21cf02 + */ + +static const u32 crc24_table[1024] = +{ + 0x00000000, 0x00fb4c86, 0x000dd58a, 0x00f6990c, + 0x00e1e693, 0x001aaa15, 0x00ec3319, 0x00177f9f, + 0x003981a1, 0x00c2cd27, 0x0034542b, 0x00cf18ad, + 0x00d86732, 0x00232bb4, 0x00d5b2b8, 0x002efe3e, + 0x00894ec5, 0x00720243, 0x00849b4f, 0x007fd7c9, + 0x0068a856, 0x0093e4d0, 0x00657ddc, 0x009e315a, + 0x00b0cf64, 0x004b83e2, 0x00bd1aee, 0x00465668, + 0x005129f7, 0x00aa6571, 0x005cfc7d, 0x00a7b0fb, + 0x00e9d10c, 0x00129d8a, 0x00e40486, 0x001f4800, + 0x0008379f, 0x00f37b19, 0x0005e215, 0x00feae93, + 0x00d050ad, 0x002b1c2b, 0x00dd8527, 0x0026c9a1, + 0x0031b63e, 0x00cafab8, 0x003c63b4, 0x00c72f32, + 0x00609fc9, 0x009bd34f, 0x006d4a43, 0x009606c5, + 0x0081795a, 0x007a35dc, 0x008cacd0, 0x0077e056, + 0x00591e68, 0x00a252ee, 0x0054cbe2, 0x00af8764, + 0x00b8f8fb, 0x0043b47d, 0x00b52d71, 0x004e61f7, + 0x00d2a319, 0x0029ef9f, 0x00df7693, 0x00243a15, + 0x0033458a, 0x00c8090c, 0x003e9000, 0x00c5dc86, + 0x00eb22b8, 0x00106e3e, 0x00e6f732, 0x001dbbb4, + 0x000ac42b, 0x00f188ad, 0x000711a1, 0x00fc5d27, + 0x005beddc, 0x00a0a15a, 0x00563856, 0x00ad74d0, + 0x00ba0b4f, 0x004147c9, 0x00b7dec5, 0x004c9243, + 0x00626c7d, 0x009920fb, 0x006fb9f7, 0x0094f571, + 0x00838aee, 0x0078c668, 0x008e5f64, 0x007513e2, + 0x003b7215, 0x00c03e93, 0x0036a79f, 0x00cdeb19, + 0x00da9486, 0x0021d800, 0x00d7410c, 0x002c0d8a, + 0x0002f3b4, 0x00f9bf32, 0x000f263e, 0x00f46ab8, + 0x00e31527, 0x001859a1, 0x00eec0ad, 0x00158c2b, + 0x00b23cd0, 0x00497056, 0x00bfe95a, 0x0044a5dc, + 0x0053da43, 0x00a896c5, 0x005e0fc9, 0x00a5434f, + 0x008bbd71, 0x0070f1f7, 0x008668fb, 0x007d247d, + 0x006a5be2, 0x00911764, 0x00678e68, 0x009cc2ee, + 0x00a44733, 0x005f0bb5, 0x00a992b9, 0x0052de3f, + 0x0045a1a0, 0x00beed26, 0x0048742a, 0x00b338ac, + 0x009dc692, 0x00668a14, 0x00901318, 0x006b5f9e, + 0x007c2001, 0x00876c87, 0x0071f58b, 0x008ab90d, + 0x002d09f6, 0x00d64570, 0x0020dc7c, 0x00db90fa, + 0x00ccef65, 0x0037a3e3, 0x00c13aef, 0x003a7669, + 0x00148857, 0x00efc4d1, 0x00195ddd, 0x00e2115b, + 0x00f56ec4, 0x000e2242, 0x00f8bb4e, 0x0003f7c8, + 0x004d963f, 0x00b6dab9, 0x004043b5, 0x00bb0f33, + 0x00ac70ac, 0x00573c2a, 0x00a1a526, 0x005ae9a0, + 0x0074179e, 0x008f5b18, 0x0079c214, 0x00828e92, + 0x0095f10d, 0x006ebd8b, 0x00982487, 0x00636801, + 0x00c4d8fa, 0x003f947c, 0x00c90d70, 0x003241f6, + 0x00253e69, 0x00de72ef, 0x0028ebe3, 0x00d3a765, + 0x00fd595b, 0x000615dd, 0x00f08cd1, 0x000bc057, + 0x001cbfc8, 0x00e7f34e, 0x00116a42, 0x00ea26c4, + 0x0076e42a, 0x008da8ac, 0x007b31a0, 0x00807d26, + 0x009702b9, 0x006c4e3f, 0x009ad733, 0x00619bb5, + 0x004f658b, 0x00b4290d, 0x0042b001, 0x00b9fc87, + 0x00ae8318, 0x0055cf9e, 0x00a35692, 0x00581a14, + 0x00ffaaef, 0x0004e669, 0x00f27f65, 0x000933e3, + 0x001e4c7c, 0x00e500fa, 0x001399f6, 0x00e8d570, + 0x00c62b4e, 0x003d67c8, 0x00cbfec4, 0x0030b242, + 0x0027cddd, 0x00dc815b, 0x002a1857, 0x00d154d1, + 0x009f3526, 0x006479a0, 0x0092e0ac, 0x0069ac2a, + 0x007ed3b5, 0x00859f33, 0x0073063f, 0x00884ab9, + 0x00a6b487, 0x005df801, 0x00ab610d, 0x00502d8b, + 0x00475214, 0x00bc1e92, 0x004a879e, 0x00b1cb18, + 0x00167be3, 0x00ed3765, 0x001bae69, 0x00e0e2ef, + 0x00f79d70, 0x000cd1f6, 0x00fa48fa, 0x0001047c, + 0x002ffa42, 0x00d4b6c4, 0x00222fc8, 0x00d9634e, + 0x00ce1cd1, 0x00355057, 0x00c3c95b, 0x003885dd, + 0x00000000, 0x00488f66, 0x00901ecd, 0x00d891ab, + 0x00db711c, 0x0093fe7a, 0x004b6fd1, 0x0003e0b7, + 0x00b6e338, 0x00fe6c5e, 0x0026fdf5, 0x006e7293, + 0x006d9224, 0x00251d42, 0x00fd8ce9, 0x00b5038f, + 0x006cc771, 0x00244817, 0x00fcd9bc, 0x00b456da, + 0x00b7b66d, 0x00ff390b, 0x0027a8a0, 0x006f27c6, + 0x00da2449, 0x0092ab2f, 0x004a3a84, 0x0002b5e2, + 0x00015555, 0x0049da33, 0x00914b98, 0x00d9c4fe, + 0x00d88ee3, 0x00900185, 0x0048902e, 0x00001f48, + 0x0003ffff, 0x004b7099, 0x0093e132, 0x00db6e54, + 0x006e6ddb, 0x0026e2bd, 0x00fe7316, 0x00b6fc70, + 0x00b51cc7, 0x00fd93a1, 0x0025020a, 0x006d8d6c, + 0x00b44992, 0x00fcc6f4, 0x0024575f, 0x006cd839, + 0x006f388e, 0x0027b7e8, 0x00ff2643, 0x00b7a925, + 0x0002aaaa, 0x004a25cc, 0x0092b467, 0x00da3b01, + 0x00d9dbb6, 0x009154d0, 0x0049c57b, 0x00014a1d, + 0x004b5141, 0x0003de27, 0x00db4f8c, 0x0093c0ea, + 0x0090205d, 0x00d8af3b, 0x00003e90, 0x0048b1f6, + 0x00fdb279, 0x00b53d1f, 0x006dacb4, 0x002523d2, + 0x0026c365, 0x006e4c03, 0x00b6dda8, 0x00fe52ce, + 0x00279630, 0x006f1956, 0x00b788fd, 0x00ff079b, + 0x00fce72c, 0x00b4684a, 0x006cf9e1, 0x00247687, + 0x00917508, 0x00d9fa6e, 0x00016bc5, 0x0049e4a3, + 0x004a0414, 0x00028b72, 0x00da1ad9, 0x009295bf, + 0x0093dfa2, 0x00db50c4, 0x0003c16f, 0x004b4e09, + 0x0048aebe, 0x000021d8, 0x00d8b073, 0x00903f15, + 0x00253c9a, 0x006db3fc, 0x00b52257, 0x00fdad31, + 0x00fe4d86, 0x00b6c2e0, 0x006e534b, 0x0026dc2d, + 0x00ff18d3, 0x00b797b5, 0x006f061e, 0x00278978, + 0x002469cf, 0x006ce6a9, 0x00b47702, 0x00fcf864, + 0x0049fbeb, 0x0001748d, 0x00d9e526, 0x00916a40, + 0x00928af7, 0x00da0591, 0x0002943a, 0x004a1b5c, + 0x0096a282, 0x00de2de4, 0x0006bc4f, 0x004e3329, + 0x004dd39e, 0x00055cf8, 0x00ddcd53, 0x00954235, + 0x002041ba, 0x0068cedc, 0x00b05f77, 0x00f8d011, + 0x00fb30a6, 0x00b3bfc0, 0x006b2e6b, 0x0023a10d, + 0x00fa65f3, 0x00b2ea95, 0x006a7b3e, 0x0022f458, + 0x002114ef, 0x00699b89, 0x00b10a22, 0x00f98544, + 0x004c86cb, 0x000409ad, 0x00dc9806, 0x00941760, + 0x0097f7d7, 0x00df78b1, 0x0007e91a, 0x004f667c, + 0x004e2c61, 0x0006a307, 0x00de32ac, 0x0096bdca, + 0x00955d7d, 0x00ddd21b, 0x000543b0, 0x004dccd6, + 0x00f8cf59, 0x00b0403f, 0x0068d194, 0x00205ef2, + 0x0023be45, 0x006b3123, 0x00b3a088, 0x00fb2fee, + 0x0022eb10, 0x006a6476, 0x00b2f5dd, 0x00fa7abb, + 0x00f99a0c, 0x00b1156a, 0x006984c1, 0x00210ba7, + 0x00940828, 0x00dc874e, 0x000416e5, 0x004c9983, + 0x004f7934, 0x0007f652, 0x00df67f9, 0x0097e89f, + 0x00ddf3c3, 0x00957ca5, 0x004ded0e, 0x00056268, + 0x000682df, 0x004e0db9, 0x00969c12, 0x00de1374, + 0x006b10fb, 0x00239f9d, 0x00fb0e36, 0x00b38150, + 0x00b061e7, 0x00f8ee81, 0x00207f2a, 0x0068f04c, + 0x00b134b2, 0x00f9bbd4, 0x00212a7f, 0x0069a519, + 0x006a45ae, 0x0022cac8, 0x00fa5b63, 0x00b2d405, + 0x0007d78a, 0x004f58ec, 0x0097c947, 0x00df4621, + 0x00dca696, 0x009429f0, 0x004cb85b, 0x0004373d, + 0x00057d20, 0x004df246, 0x009563ed, 0x00ddec8b, + 0x00de0c3c, 0x0096835a, 0x004e12f1, 0x00069d97, + 0x00b39e18, 0x00fb117e, 0x002380d5, 0x006b0fb3, + 0x0068ef04, 0x00206062, 0x00f8f1c9, 0x00b07eaf, + 0x0069ba51, 0x00213537, 0x00f9a49c, 0x00b12bfa, + 0x00b2cb4d, 0x00fa442b, 0x0022d580, 0x006a5ae6, + 0x00df5969, 0x0097d60f, 0x004f47a4, 0x0007c8c2, + 0x00042875, 0x004ca713, 0x009436b8, 0x00dcb9de, + 0x00000000, 0x00d70983, 0x00555f80, 0x00825603, + 0x0051f286, 0x0086fb05, 0x0004ad06, 0x00d3a485, + 0x0059a88b, 0x008ea108, 0x000cf70b, 0x00dbfe88, + 0x00085a0d, 0x00df538e, 0x005d058d, 0x008a0c0e, + 0x00491c91, 0x009e1512, 0x001c4311, 0x00cb4a92, + 0x0018ee17, 0x00cfe794, 0x004db197, 0x009ab814, + 0x0010b41a, 0x00c7bd99, 0x0045eb9a, 0x0092e219, + 0x0041469c, 0x00964f1f, 0x0014191c, 0x00c3109f, + 0x006974a4, 0x00be7d27, 0x003c2b24, 0x00eb22a7, + 0x00388622, 0x00ef8fa1, 0x006dd9a2, 0x00bad021, + 0x0030dc2f, 0x00e7d5ac, 0x006583af, 0x00b28a2c, + 0x00612ea9, 0x00b6272a, 0x00347129, 0x00e378aa, + 0x00206835, 0x00f761b6, 0x007537b5, 0x00a23e36, + 0x00719ab3, 0x00a69330, 0x0024c533, 0x00f3ccb0, + 0x0079c0be, 0x00aec93d, 0x002c9f3e, 0x00fb96bd, + 0x00283238, 0x00ff3bbb, 0x007d6db8, 0x00aa643b, + 0x0029a4ce, 0x00fead4d, 0x007cfb4e, 0x00abf2cd, + 0x00785648, 0x00af5fcb, 0x002d09c8, 0x00fa004b, + 0x00700c45, 0x00a705c6, 0x002553c5, 0x00f25a46, + 0x0021fec3, 0x00f6f740, 0x0074a143, 0x00a3a8c0, + 0x0060b85f, 0x00b7b1dc, 0x0035e7df, 0x00e2ee5c, + 0x00314ad9, 0x00e6435a, 0x00641559, 0x00b31cda, + 0x003910d4, 0x00ee1957, 0x006c4f54, 0x00bb46d7, + 0x0068e252, 0x00bfebd1, 0x003dbdd2, 0x00eab451, + 0x0040d06a, 0x0097d9e9, 0x00158fea, 0x00c28669, + 0x001122ec, 0x00c62b6f, 0x00447d6c, 0x009374ef, + 0x001978e1, 0x00ce7162, 0x004c2761, 0x009b2ee2, + 0x00488a67, 0x009f83e4, 0x001dd5e7, 0x00cadc64, + 0x0009ccfb, 0x00dec578, 0x005c937b, 0x008b9af8, + 0x00583e7d, 0x008f37fe, 0x000d61fd, 0x00da687e, + 0x00506470, 0x00876df3, 0x00053bf0, 0x00d23273, + 0x000196f6, 0x00d69f75, 0x0054c976, 0x0083c0f5, + 0x00a9041b, 0x007e0d98, 0x00fc5b9b, 0x002b5218, + 0x00f8f69d, 0x002fff1e, 0x00ada91d, 0x007aa09e, + 0x00f0ac90, 0x0027a513, 0x00a5f310, 0x0072fa93, + 0x00a15e16, 0x00765795, 0x00f40196, 0x00230815, + 0x00e0188a, 0x00371109, 0x00b5470a, 0x00624e89, + 0x00b1ea0c, 0x0066e38f, 0x00e4b58c, 0x0033bc0f, + 0x00b9b001, 0x006eb982, 0x00ecef81, 0x003be602, + 0x00e84287, 0x003f4b04, 0x00bd1d07, 0x006a1484, + 0x00c070bf, 0x0017793c, 0x00952f3f, 0x004226bc, + 0x00918239, 0x00468bba, 0x00c4ddb9, 0x0013d43a, + 0x0099d834, 0x004ed1b7, 0x00cc87b4, 0x001b8e37, + 0x00c82ab2, 0x001f2331, 0x009d7532, 0x004a7cb1, + 0x00896c2e, 0x005e65ad, 0x00dc33ae, 0x000b3a2d, + 0x00d89ea8, 0x000f972b, 0x008dc128, 0x005ac8ab, + 0x00d0c4a5, 0x0007cd26, 0x00859b25, 0x005292a6, + 0x00813623, 0x00563fa0, 0x00d469a3, 0x00036020, + 0x0080a0d5, 0x0057a956, 0x00d5ff55, 0x0002f6d6, + 0x00d15253, 0x00065bd0, 0x00840dd3, 0x00530450, + 0x00d9085e, 0x000e01dd, 0x008c57de, 0x005b5e5d, + 0x0088fad8, 0x005ff35b, 0x00dda558, 0x000aacdb, + 0x00c9bc44, 0x001eb5c7, 0x009ce3c4, 0x004bea47, + 0x00984ec2, 0x004f4741, 0x00cd1142, 0x001a18c1, + 0x009014cf, 0x00471d4c, 0x00c54b4f, 0x001242cc, + 0x00c1e649, 0x0016efca, 0x0094b9c9, 0x0043b04a, + 0x00e9d471, 0x003eddf2, 0x00bc8bf1, 0x006b8272, + 0x00b826f7, 0x006f2f74, 0x00ed7977, 0x003a70f4, + 0x00b07cfa, 0x00677579, 0x00e5237a, 0x00322af9, + 0x00e18e7c, 0x003687ff, 0x00b4d1fc, 0x0063d87f, + 0x00a0c8e0, 0x0077c163, 0x00f59760, 0x00229ee3, + 0x00f13a66, 0x002633e5, 0x00a465e6, 0x00736c65, + 0x00f9606b, 0x002e69e8, 0x00ac3feb, 0x007b3668, + 0x00a892ed, 0x007f9b6e, 0x00fdcd6d, 0x002ac4ee, + 0x00000000, 0x00520936, 0x00a4126c, 0x00f61b5a, + 0x004825d8, 0x001a2cee, 0x00ec37b4, 0x00be3e82, + 0x006b0636, 0x00390f00, 0x00cf145a, 0x009d1d6c, + 0x002323ee, 0x00712ad8, 0x00873182, 0x00d538b4, + 0x00d60c6c, 0x0084055a, 0x00721e00, 0x00201736, + 0x009e29b4, 0x00cc2082, 0x003a3bd8, 0x006832ee, + 0x00bd0a5a, 0x00ef036c, 0x00191836, 0x004b1100, + 0x00f52f82, 0x00a726b4, 0x00513dee, 0x000334d8, + 0x00ac19d8, 0x00fe10ee, 0x00080bb4, 0x005a0282, + 0x00e43c00, 0x00b63536, 0x00402e6c, 0x0012275a, + 0x00c71fee, 0x009516d8, 0x00630d82, 0x003104b4, + 0x008f3a36, 0x00dd3300, 0x002b285a, 0x0079216c, + 0x007a15b4, 0x00281c82, 0x00de07d8, 0x008c0eee, + 0x0032306c, 0x0060395a, 0x00962200, 0x00c42b36, + 0x00111382, 0x00431ab4, 0x00b501ee, 0x00e708d8, + 0x0059365a, 0x000b3f6c, 0x00fd2436, 0x00af2d00, + 0x00a37f36, 0x00f17600, 0x00076d5a, 0x0055646c, + 0x00eb5aee, 0x00b953d8, 0x004f4882, 0x001d41b4, + 0x00c87900, 0x009a7036, 0x006c6b6c, 0x003e625a, + 0x00805cd8, 0x00d255ee, 0x00244eb4, 0x00764782, + 0x0075735a, 0x00277a6c, 0x00d16136, 0x00836800, + 0x003d5682, 0x006f5fb4, 0x009944ee, 0x00cb4dd8, + 0x001e756c, 0x004c7c5a, 0x00ba6700, 0x00e86e36, + 0x005650b4, 0x00045982, 0x00f242d8, 0x00a04bee, + 0x000f66ee, 0x005d6fd8, 0x00ab7482, 0x00f97db4, + 0x00474336, 0x00154a00, 0x00e3515a, 0x00b1586c, + 0x006460d8, 0x003669ee, 0x00c072b4, 0x00927b82, + 0x002c4500, 0x007e4c36, 0x0088576c, 0x00da5e5a, + 0x00d96a82, 0x008b63b4, 0x007d78ee, 0x002f71d8, + 0x00914f5a, 0x00c3466c, 0x00355d36, 0x00675400, + 0x00b26cb4, 0x00e06582, 0x00167ed8, 0x004477ee, + 0x00fa496c, 0x00a8405a, 0x005e5b00, 0x000c5236, + 0x0046ff6c, 0x0014f65a, 0x00e2ed00, 0x00b0e436, + 0x000edab4, 0x005cd382, 0x00aac8d8, 0x00f8c1ee, + 0x002df95a, 0x007ff06c, 0x0089eb36, 0x00dbe200, + 0x0065dc82, 0x0037d5b4, 0x00c1ceee, 0x0093c7d8, + 0x0090f300, 0x00c2fa36, 0x0034e16c, 0x0066e85a, + 0x00d8d6d8, 0x008adfee, 0x007cc4b4, 0x002ecd82, + 0x00fbf536, 0x00a9fc00, 0x005fe75a, 0x000dee6c, + 0x00b3d0ee, 0x00e1d9d8, 0x0017c282, 0x0045cbb4, + 0x00eae6b4, 0x00b8ef82, 0x004ef4d8, 0x001cfdee, + 0x00a2c36c, 0x00f0ca5a, 0x0006d100, 0x0054d836, + 0x0081e082, 0x00d3e9b4, 0x0025f2ee, 0x0077fbd8, + 0x00c9c55a, 0x009bcc6c, 0x006dd736, 0x003fde00, + 0x003cead8, 0x006ee3ee, 0x0098f8b4, 0x00caf182, + 0x0074cf00, 0x0026c636, 0x00d0dd6c, 0x0082d45a, + 0x0057ecee, 0x0005e5d8, 0x00f3fe82, 0x00a1f7b4, + 0x001fc936, 0x004dc000, 0x00bbdb5a, 0x00e9d26c, + 0x00e5805a, 0x00b7896c, 0x00419236, 0x00139b00, + 0x00ada582, 0x00ffacb4, 0x0009b7ee, 0x005bbed8, + 0x008e866c, 0x00dc8f5a, 0x002a9400, 0x00789d36, + 0x00c6a3b4, 0x0094aa82, 0x0062b1d8, 0x0030b8ee, + 0x00338c36, 0x00618500, 0x00979e5a, 0x00c5976c, + 0x007ba9ee, 0x0029a0d8, 0x00dfbb82, 0x008db2b4, + 0x00588a00, 0x000a8336, 0x00fc986c, 0x00ae915a, + 0x0010afd8, 0x0042a6ee, 0x00b4bdb4, 0x00e6b482, + 0x00499982, 0x001b90b4, 0x00ed8bee, 0x00bf82d8, + 0x0001bc5a, 0x0053b56c, 0x00a5ae36, 0x00f7a700, + 0x00229fb4, 0x00709682, 0x00868dd8, 0x00d484ee, + 0x006aba6c, 0x0038b35a, 0x00cea800, 0x009ca136, + 0x009f95ee, 0x00cd9cd8, 0x003b8782, 0x00698eb4, + 0x00d7b036, 0x0085b900, 0x0073a25a, 0x0021ab6c, + 0x00f493d8, 0x00a69aee, 0x005081b4, 0x00028882, + 0x00bcb600, 0x00eebf36, 0x0018a46c, 0x004aad5a +}; + +static inline +u32 crc24_init (void) +{ + /* Transformed to 32-bit CRC by multiplied by x⁸ and then byte swapped. */ + return 0xce04b7; /* _gcry_bswap(0xb704ce << 8) */ +} + +static inline +u32 crc24_next (u32 crc, byte data) +{ + return (crc >> 8) ^ crc24_table[(crc & 0xff) ^ data]; +} + +/* + * Process 4 bytes in one go + */ +static inline +u32 crc24_next4 (u32 crc, u32 data) +{ + crc ^= data; + crc = crc24_table[(crc & 0xff) + 0x300] ^ + crc24_table[((crc >> 8) & 0xff) + 0x200] ^ + crc24_table[((crc >> 16) & 0xff) + 0x100] ^ + crc24_table[(data >> 24) & 0xff]; + return crc; +} + +static inline +u32 crc24_final (u32 crc) +{ + return crc & 0xffffff; +} + +static void +crc24rfc2440_init (void *context, unsigned int flags) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; +#ifdef USE_INTEL_PCLMUL + u32 hwf = _gcry_get_hw_features (); + + ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL); +#endif + + (void)flags; + + ctx->CRC = crc24_init(); +} + +static void +crc24rfc2440_write (void *context, const void *inbuf_arg, size_t inlen) +{ + const unsigned char *inbuf = inbuf_arg; + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; + u32 crc; + +#ifdef USE_INTEL_PCLMUL + if (ctx->use_pclmul) + { + _gcry_crc24rfc2440_intel_pclmul(&ctx->CRC, inbuf, inlen); + return; + } +#endif + + if (!inbuf || !inlen) + return; + + crc = ctx->CRC; + + while (inlen >= 16) + { + inlen -= 16; + crc = crc24_next4(crc, buf_get_le32(&inbuf[0])); + crc = crc24_next4(crc, buf_get_le32(&inbuf[4])); + crc = crc24_next4(crc, buf_get_le32(&inbuf[8])); + crc = crc24_next4(crc, buf_get_le32(&inbuf[12])); + inbuf += 16; + } + + while (inlen >= 4) + { + inlen -= 4; + crc = crc24_next4(crc, buf_get_le32(inbuf)); + inbuf += 4; + } + + while (inlen--) + { + crc = crc24_next(crc, *inbuf++); + } + + ctx->CRC = crc; +} + +static void +crc24rfc2440_final (void *context) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; + ctx->CRC = crc24_final(ctx->CRC); + buf_put_le32 (ctx->buf, ctx->CRC); +} + +/* We allow the CRC algorithms even in FIPS mode because they are + actually no cryptographic primitives. */ + +gcry_md_spec_t _gcry_digest_spec_crc32 = + { + GCRY_MD_CRC32, {0, 1}, + "CRC32", NULL, 0, NULL, 4, + crc32_init, crc32_write, crc32_final, crc32_read, NULL, + sizeof (CRC_CONTEXT) + }; + +gcry_md_spec_t _gcry_digest_spec_crc32_rfc1510 = + { + GCRY_MD_CRC32_RFC1510, {0, 1}, + "CRC32RFC1510", NULL, 0, NULL, 4, + crc32rfc1510_init, crc32_write, crc32rfc1510_final, crc32_read, NULL, + sizeof (CRC_CONTEXT) + }; + +gcry_md_spec_t _gcry_digest_spec_crc24_rfc2440 = + { + GCRY_MD_CRC24_RFC2440, {0, 1}, + "CRC24RFC2440", NULL, 0, NULL, 3, + crc24rfc2440_init, crc24rfc2440_write, crc24rfc2440_final, crc32_read, NULL, + sizeof (CRC_CONTEXT) + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/des-amd64.S b/libotr/libgcrypt-1.8.7/cipher/des-amd64.S new file mode 100644 index 0000000..1b7cfba --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/des-amd64.S @@ -0,0 +1,1036 @@ +/* des-amd64.S - AMD64 assembly implementation of 3DES cipher + * + * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef __x86_64 +#include <config.h> +#if defined(USE_DES) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) + +#ifdef __PIC__ +# define RIP (%rip) +#else +# define RIP +#endif + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.text + +#define s1 0 +#define s2 ((s1) + (64*8)) +#define s3 ((s2) + (64*8)) +#define s4 ((s3) + (64*8)) +#define s5 ((s4) + (64*8)) +#define s6 ((s5) + (64*8)) +#define s7 ((s6) + (64*8)) +#define s8 ((s7) + (64*8)) + +/* register macros */ +#define CTX %rdi +#define SBOXES %rbp + +#define RL0 %r8 +#define RL1 %r9 +#define RL2 %r10 + +#define RL0d %r8d +#define RL1d %r9d +#define RL2d %r10d + +#define RR0 %r11 +#define RR1 %r12 +#define RR2 %r13 + +#define RR0d %r11d +#define RR1d %r12d +#define RR2d %r13d + +#define RW0 %rax +#define RW1 %rbx +#define RW2 %rcx + +#define RW0d %eax +#define RW1d %ebx +#define RW2d %ecx + +#define RW0bl %al +#define RW1bl %bl +#define RW2bl %cl + +#define RW0bh %ah +#define RW1bh %bh +#define RW2bh %ch + +#define RT0 %r15 +#define RT1 %rsi +#define RT2 %r14 +#define RT3 %rdx + +#define RT0d %r15d +#define RT1d %esi +#define RT2d %r14d +#define RT3d %edx + +/*********************************************************************** + * 1-way 3DES + ***********************************************************************/ +#define do_permutation(a, b, offset, mask) \ + movl a, RT0d; \ + shrl $(offset), RT0d; \ + xorl b, RT0d; \ + andl $(mask), RT0d; \ + xorl RT0d, b; \ + shll $(offset), RT0d; \ + xorl RT0d, a; + +#define expand_to_64bits(val, mask) \ + movl val##d, RT0d; \ + rorl $4, RT0d; \ + shlq $32, RT0; \ + orq RT0, val; \ + andq mask, val; + +#define compress_to_64bits(val) \ + movq val, RT0; \ + shrq $32, RT0; \ + roll $4, RT0d; \ + orl RT0d, val##d; + +#define initial_permutation(left, right) \ + do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \ + do_permutation(left##d, right##d, 16, 0x0000ffff); \ + do_permutation(right##d, left##d, 2, 0x33333333); \ + do_permutation(right##d, left##d, 8, 0x00ff00ff); \ + movabs $0x3f3f3f3f3f3f3f3f, RT3; \ + movl left##d, RW0d; \ + roll $1, right##d; \ + xorl right##d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, left##d; \ + xorl RW0d, right##d; \ + roll $1, left##d; \ + expand_to_64bits(right, RT3); \ + expand_to_64bits(left, RT3); + +#define final_permutation(left, right) \ + compress_to_64bits(right); \ + compress_to_64bits(left); \ + movl right##d, RW0d; \ + rorl $1, left##d; \ + xorl left##d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, right##d; \ + xorl RW0d, left##d; \ + rorl $1, right##d; \ + do_permutation(right##d, left##d, 8, 0x00ff00ff); \ + do_permutation(right##d, left##d, 2, 0x33333333); \ + do_permutation(left##d, right##d, 16, 0x0000ffff); \ + do_permutation(left##d, right##d, 4, 0x0f0f0f0f); + +#define round1(n, from, to, load_next_key) \ + xorq from, RW0; \ + \ + movzbl RW0bl, RT0d; \ + movzbl RW0bh, RT1d; \ + shrq $16, RW0; \ + movzbl RW0bl, RT2d; \ + movzbl RW0bh, RT3d; \ + shrq $16, RW0; \ + movq s8(SBOXES, RT0, 8), RT0; \ + xorq s6(SBOXES, RT1, 8), to; \ + movzbl RW0bl, RL1d; \ + movzbl RW0bh, RT1d; \ + shrl $16, RW0d; \ + xorq s4(SBOXES, RT2, 8), RT0; \ + xorq s2(SBOXES, RT3, 8), to; \ + movzbl RW0bl, RT2d; \ + movzbl RW0bh, RT3d; \ + xorq s7(SBOXES, RL1, 8), RT0; \ + xorq s5(SBOXES, RT1, 8), to; \ + xorq s3(SBOXES, RT2, 8), RT0; \ + load_next_key(n, RW0); \ + xorq RT0, to; \ + xorq s1(SBOXES, RT3, 8), to; \ + +#define load_next_key(n, RWx) \ + movq (((n) + 1) * 8)(CTX), RWx; + +#define dummy2(a, b) /*_*/ + +#define read_block(io, left, right) \ + movl (io), left##d; \ + movl 4(io), right##d; \ + bswapl left##d; \ + bswapl right##d; + +#define write_block(io, left, right) \ + bswapl left##d; \ + bswapl right##d; \ + movl left##d, (io); \ + movl right##d, 4(io); + +.align 8 +.globl _gcry_3des_amd64_crypt_block +ELF(.type _gcry_3des_amd64_crypt_block,@function;) + +_gcry_3des_amd64_crypt_block: + /* input: + * %rdi: round keys, CTX + * %rsi: dst + * %rdx: src + */ + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + pushq %r14; + pushq %r15; + pushq %rsi; /*dst*/ + + leaq .L_s1 RIP, SBOXES; + + read_block(%rdx, RL0, RR0); + initial_permutation(RL0, RR0); + + movq (CTX), RW0; + + round1(0, RR0, RL0, load_next_key); + round1(1, RL0, RR0, load_next_key); + round1(2, RR0, RL0, load_next_key); + round1(3, RL0, RR0, load_next_key); + round1(4, RR0, RL0, load_next_key); + round1(5, RL0, RR0, load_next_key); + round1(6, RR0, RL0, load_next_key); + round1(7, RL0, RR0, load_next_key); + round1(8, RR0, RL0, load_next_key); + round1(9, RL0, RR0, load_next_key); + round1(10, RR0, RL0, load_next_key); + round1(11, RL0, RR0, load_next_key); + round1(12, RR0, RL0, load_next_key); + round1(13, RL0, RR0, load_next_key); + round1(14, RR0, RL0, load_next_key); + round1(15, RL0, RR0, load_next_key); + + round1(16+0, RL0, RR0, load_next_key); + round1(16+1, RR0, RL0, load_next_key); + round1(16+2, RL0, RR0, load_next_key); + round1(16+3, RR0, RL0, load_next_key); + round1(16+4, RL0, RR0, load_next_key); + round1(16+5, RR0, RL0, load_next_key); + round1(16+6, RL0, RR0, load_next_key); + round1(16+7, RR0, RL0, load_next_key); + round1(16+8, RL0, RR0, load_next_key); + round1(16+9, RR0, RL0, load_next_key); + round1(16+10, RL0, RR0, load_next_key); + round1(16+11, RR0, RL0, load_next_key); + round1(16+12, RL0, RR0, load_next_key); + round1(16+13, RR0, RL0, load_next_key); + round1(16+14, RL0, RR0, load_next_key); + round1(16+15, RR0, RL0, load_next_key); + + round1(32+0, RR0, RL0, load_next_key); + round1(32+1, RL0, RR0, load_next_key); + round1(32+2, RR0, RL0, load_next_key); + round1(32+3, RL0, RR0, load_next_key); + round1(32+4, RR0, RL0, load_next_key); + round1(32+5, RL0, RR0, load_next_key); + round1(32+6, RR0, RL0, load_next_key); + round1(32+7, RL0, RR0, load_next_key); + round1(32+8, RR0, RL0, load_next_key); + round1(32+9, RL0, RR0, load_next_key); + round1(32+10, RR0, RL0, load_next_key); + round1(32+11, RL0, RR0, load_next_key); + round1(32+12, RR0, RL0, load_next_key); + round1(32+13, RL0, RR0, load_next_key); + round1(32+14, RR0, RL0, load_next_key); + round1(32+15, RL0, RR0, dummy2); + + popq RW2; /*dst*/ + final_permutation(RR0, RL0); + write_block(RW2, RR0, RL0); + + popq %r15; + popq %r14; + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + + ret; +ELF(.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;) + +/*********************************************************************** + * 3-way 3DES + ***********************************************************************/ +#define expand_to_64bits(val, mask) \ + movl val##d, RT0d; \ + rorl $4, RT0d; \ + shlq $32, RT0; \ + orq RT0, val; \ + andq mask, val; + +#define compress_to_64bits(val) \ + movq val, RT0; \ + shrq $32, RT0; \ + roll $4, RT0d; \ + orl RT0d, val##d; + +#define initial_permutation3(left, right) \ + do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ + do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ + do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ + do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ + do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \ + do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ + \ + do_permutation(right##0d, left##0d, 2, 0x33333333); \ + do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ + do_permutation(right##1d, left##1d, 2, 0x33333333); \ + do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ + do_permutation(right##2d, left##2d, 2, 0x33333333); \ + do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ + \ + movabs $0x3f3f3f3f3f3f3f3f, RT3; \ + \ + movl left##0d, RW0d; \ + roll $1, right##0d; \ + xorl right##0d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, left##0d; \ + xorl RW0d, right##0d; \ + roll $1, left##0d; \ + expand_to_64bits(right##0, RT3); \ + expand_to_64bits(left##0, RT3); \ + movl left##1d, RW1d; \ + roll $1, right##1d; \ + xorl right##1d, RW1d; \ + andl $0xaaaaaaaa, RW1d; \ + xorl RW1d, left##1d; \ + xorl RW1d, right##1d; \ + roll $1, left##1d; \ + expand_to_64bits(right##1, RT3); \ + expand_to_64bits(left##1, RT3); \ + movl left##2d, RW2d; \ + roll $1, right##2d; \ + xorl right##2d, RW2d; \ + andl $0xaaaaaaaa, RW2d; \ + xorl RW2d, left##2d; \ + xorl RW2d, right##2d; \ + roll $1, left##2d; \ + expand_to_64bits(right##2, RT3); \ + expand_to_64bits(left##2, RT3); + +#define final_permutation3(left, right) \ + compress_to_64bits(right##0); \ + compress_to_64bits(left##0); \ + movl right##0d, RW0d; \ + rorl $1, left##0d; \ + xorl left##0d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, right##0d; \ + xorl RW0d, left##0d; \ + rorl $1, right##0d; \ + compress_to_64bits(right##1); \ + compress_to_64bits(left##1); \ + movl right##1d, RW1d; \ + rorl $1, left##1d; \ + xorl left##1d, RW1d; \ + andl $0xaaaaaaaa, RW1d; \ + xorl RW1d, right##1d; \ + xorl RW1d, left##1d; \ + rorl $1, right##1d; \ + compress_to_64bits(right##2); \ + compress_to_64bits(left##2); \ + movl right##2d, RW2d; \ + rorl $1, left##2d; \ + xorl left##2d, RW2d; \ + andl $0xaaaaaaaa, RW2d; \ + xorl RW2d, right##2d; \ + xorl RW2d, left##2d; \ + rorl $1, right##2d; \ + \ + do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ + do_permutation(right##0d, left##0d, 2, 0x33333333); \ + do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ + do_permutation(right##1d, left##1d, 2, 0x33333333); \ + do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ + do_permutation(right##2d, left##2d, 2, 0x33333333); \ + \ + do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ + do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ + do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ + do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ + do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ + do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); + +#define round3(n, from, to, load_next_key, do_movq) \ + xorq from##0, RW0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + shrq $16, RW0; \ + xorq s8(SBOXES, RT3, 8), to##0; \ + xorq s6(SBOXES, RT1, 8), to##0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + shrq $16, RW0; \ + xorq s4(SBOXES, RT3, 8), to##0; \ + xorq s2(SBOXES, RT1, 8), to##0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + shrl $16, RW0d; \ + xorq s7(SBOXES, RT3, 8), to##0; \ + xorq s5(SBOXES, RT1, 8), to##0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + load_next_key(n, RW0); \ + xorq s3(SBOXES, RT3, 8), to##0; \ + xorq s1(SBOXES, RT1, 8), to##0; \ + xorq from##1, RW1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + shrq $16, RW1; \ + xorq s8(SBOXES, RT3, 8), to##1; \ + xorq s6(SBOXES, RT1, 8), to##1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + shrq $16, RW1; \ + xorq s4(SBOXES, RT3, 8), to##1; \ + xorq s2(SBOXES, RT1, 8), to##1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + shrl $16, RW1d; \ + xorq s7(SBOXES, RT3, 8), to##1; \ + xorq s5(SBOXES, RT1, 8), to##1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + do_movq(RW0, RW1); \ + xorq s3(SBOXES, RT3, 8), to##1; \ + xorq s1(SBOXES, RT1, 8), to##1; \ + xorq from##2, RW2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + shrq $16, RW2; \ + xorq s8(SBOXES, RT3, 8), to##2; \ + xorq s6(SBOXES, RT1, 8), to##2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + shrq $16, RW2; \ + xorq s4(SBOXES, RT3, 8), to##2; \ + xorq s2(SBOXES, RT1, 8), to##2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + shrl $16, RW2d; \ + xorq s7(SBOXES, RT3, 8), to##2; \ + xorq s5(SBOXES, RT1, 8), to##2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + do_movq(RW0, RW2); \ + xorq s3(SBOXES, RT3, 8), to##2; \ + xorq s1(SBOXES, RT1, 8), to##2; + +#define __movq(src, dst) \ + movq src, dst; + +#define read_block(io, left, right) \ + movl (io), left##d; \ + movl 4(io), right##d; \ + bswapl left##d; \ + bswapl right##d; + +#define write_block(io, left, right) \ + bswapl left##d; \ + bswapl right##d; \ + movl left##d, (io); \ + movl right##d, 4(io); + +.align 8 +ELF(.type _gcry_3des_amd64_crypt_blk3,@function;) +_gcry_3des_amd64_crypt_blk3: + /* input: + * %rdi: round keys, CTX + * RL0d, RR0d, RL1d, RR1d, RL2d, RR2d: 3 input blocks + * RR0d, RL0d, RR1d, RL1d, RR2d, RL2d: 3 output blocks + */ + + leaq .L_s1 RIP, SBOXES; + + initial_permutation3(RL, RR); + + movq 0(CTX), RW0; + movq RW0, RW1; + movq RW0, RW2; + + round3(0, RR, RL, load_next_key, __movq); + round3(1, RL, RR, load_next_key, __movq); + round3(2, RR, RL, load_next_key, __movq); + round3(3, RL, RR, load_next_key, __movq); + round3(4, RR, RL, load_next_key, __movq); + round3(5, RL, RR, load_next_key, __movq); + round3(6, RR, RL, load_next_key, __movq); + round3(7, RL, RR, load_next_key, __movq); + round3(8, RR, RL, load_next_key, __movq); + round3(9, RL, RR, load_next_key, __movq); + round3(10, RR, RL, load_next_key, __movq); + round3(11, RL, RR, load_next_key, __movq); + round3(12, RR, RL, load_next_key, __movq); + round3(13, RL, RR, load_next_key, __movq); + round3(14, RR, RL, load_next_key, __movq); + round3(15, RL, RR, load_next_key, __movq); + + round3(16+0, RL, RR, load_next_key, __movq); + round3(16+1, RR, RL, load_next_key, __movq); + round3(16+2, RL, RR, load_next_key, __movq); + round3(16+3, RR, RL, load_next_key, __movq); + round3(16+4, RL, RR, load_next_key, __movq); + round3(16+5, RR, RL, load_next_key, __movq); + round3(16+6, RL, RR, load_next_key, __movq); + round3(16+7, RR, RL, load_next_key, __movq); + round3(16+8, RL, RR, load_next_key, __movq); + round3(16+9, RR, RL, load_next_key, __movq); + round3(16+10, RL, RR, load_next_key, __movq); + round3(16+11, RR, RL, load_next_key, __movq); + round3(16+12, RL, RR, load_next_key, __movq); + round3(16+13, RR, RL, load_next_key, __movq); + round3(16+14, RL, RR, load_next_key, __movq); + round3(16+15, RR, RL, load_next_key, __movq); + + round3(32+0, RR, RL, load_next_key, __movq); + round3(32+1, RL, RR, load_next_key, __movq); + round3(32+2, RR, RL, load_next_key, __movq); + round3(32+3, RL, RR, load_next_key, __movq); + round3(32+4, RR, RL, load_next_key, __movq); + round3(32+5, RL, RR, load_next_key, __movq); + round3(32+6, RR, RL, load_next_key, __movq); + round3(32+7, RL, RR, load_next_key, __movq); + round3(32+8, RR, RL, load_next_key, __movq); + round3(32+9, RL, RR, load_next_key, __movq); + round3(32+10, RR, RL, load_next_key, __movq); + round3(32+11, RL, RR, load_next_key, __movq); + round3(32+12, RR, RL, load_next_key, __movq); + round3(32+13, RL, RR, load_next_key, __movq); + round3(32+14, RR, RL, load_next_key, __movq); + round3(32+15, RL, RR, dummy2, dummy2); + + final_permutation3(RR, RL); + + ret; +ELF(.size _gcry_3des_amd64_crypt_blk3,.-_gcry_3des_amd64_crypt_blk3;) + +.align 8 +.globl _gcry_3des_amd64_cbc_dec +ELF(.type _gcry_3des_amd64_cbc_dec,@function;) +_gcry_3des_amd64_cbc_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %rcx: iv (64bit) + */ + + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + pushq %r14; + pushq %r15; + + pushq %rsi; /*dst*/ + pushq %rdx; /*src*/ + pushq %rcx; /*iv*/ + + /* load input */ + movl 0 * 4(%rdx), RL0d; + movl 1 * 4(%rdx), RR0d; + movl 2 * 4(%rdx), RL1d; + movl 3 * 4(%rdx), RR1d; + movl 4 * 4(%rdx), RL2d; + movl 5 * 4(%rdx), RR2d; + + bswapl RL0d; + bswapl RR0d; + bswapl RL1d; + bswapl RR1d; + bswapl RL2d; + bswapl RR2d; + + call _gcry_3des_amd64_crypt_blk3; + + popq %rcx; /*iv*/ + popq %rdx; /*src*/ + popq %rsi; /*dst*/ + + bswapl RR0d; + bswapl RL0d; + bswapl RR1d; + bswapl RL1d; + bswapl RR2d; + bswapl RL2d; + + movq 2 * 8(%rdx), RT0; + xorl 0 * 4(%rcx), RR0d; + xorl 1 * 4(%rcx), RL0d; + xorl 0 * 4(%rdx), RR1d; + xorl 1 * 4(%rdx), RL1d; + xorl 2 * 4(%rdx), RR2d; + xorl 3 * 4(%rdx), RL2d; + movq RT0, (%rcx); /* store new IV */ + + movl RR0d, 0 * 4(%rsi); + movl RL0d, 1 * 4(%rsi); + movl RR1d, 2 * 4(%rsi); + movl RL1d, 3 * 4(%rsi); + movl RR2d, 4 * 4(%rsi); + movl RL2d, 5 * 4(%rsi); + + popq %r15; + popq %r14; + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + + ret; +ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;) + +.align 8 +.globl _gcry_3des_amd64_ctr_enc +ELF(.type _gcry_3des_amd64_ctr_enc,@function;) +_gcry_3des_amd64_ctr_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %rcx: iv (64bit) + */ + + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + pushq %r14; + pushq %r15; + + pushq %rsi; /*dst*/ + pushq %rdx; /*src*/ + movq %rcx, RW2; + + /* load IV and byteswap */ + movq (RW2), RT0; + bswapq RT0; + movq RT0, RR0; + + /* construct IVs */ + leaq 1(RT0), RR1; + leaq 2(RT0), RR2; + leaq 3(RT0), RT0; + movq RR0, RL0; + movq RR1, RL1; + movq RR2, RL2; + bswapq RT0; + shrq $32, RL0; + shrq $32, RL1; + shrq $32, RL2; + + /* store new IV */ + movq RT0, (RW2); + + call _gcry_3des_amd64_crypt_blk3; + + popq %rdx; /*src*/ + popq %rsi; /*dst*/ + + bswapl RR0d; + bswapl RL0d; + bswapl RR1d; + bswapl RL1d; + bswapl RR2d; + bswapl RL2d; + + xorl 0 * 4(%rdx), RR0d; + xorl 1 * 4(%rdx), RL0d; + xorl 2 * 4(%rdx), RR1d; + xorl 3 * 4(%rdx), RL1d; + xorl 4 * 4(%rdx), RR2d; + xorl 5 * 4(%rdx), RL2d; + + movl RR0d, 0 * 4(%rsi); + movl RL0d, 1 * 4(%rsi); + movl RR1d, 2 * 4(%rsi); + movl RL1d, 3 * 4(%rsi); + movl RR2d, 4 * 4(%rsi); + movl RL2d, 5 * 4(%rsi); + + popq %r15; + popq %r14; + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + + ret; +ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;) + +.align 8 +.globl _gcry_3des_amd64_cfb_dec +ELF(.type _gcry_3des_amd64_cfb_dec,@function;) +_gcry_3des_amd64_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %rcx: iv (64bit) + */ + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + pushq %r14; + pushq %r15; + + pushq %rsi; /*dst*/ + pushq %rdx; /*src*/ + movq %rcx, RW2; + + /* Load input */ + movl 0 * 4(RW2), RL0d; + movl 1 * 4(RW2), RR0d; + movl 0 * 4(%rdx), RL1d; + movl 1 * 4(%rdx), RR1d; + movl 2 * 4(%rdx), RL2d; + movl 3 * 4(%rdx), RR2d; + + bswapl RL0d; + bswapl RR0d; + bswapl RL1d; + bswapl RR1d; + bswapl RL2d; + bswapl RR2d; + + /* Update IV */ + movq 4 * 4(%rdx), RW0; + movq RW0, (RW2); + + call _gcry_3des_amd64_crypt_blk3; + + popq %rdx; /*src*/ + popq %rsi; /*dst*/ + + bswapl RR0d; + bswapl RL0d; + bswapl RR1d; + bswapl RL1d; + bswapl RR2d; + bswapl RL2d; + + xorl 0 * 4(%rdx), RR0d; + xorl 1 * 4(%rdx), RL0d; + xorl 2 * 4(%rdx), RR1d; + xorl 3 * 4(%rdx), RL1d; + xorl 4 * 4(%rdx), RR2d; + xorl 5 * 4(%rdx), RL2d; + + movl RR0d, 0 * 4(%rsi); + movl RL0d, 1 * 4(%rsi); + movl RR1d, 2 * 4(%rsi); + movl RL1d, 3 * 4(%rsi); + movl RR2d, 4 * 4(%rsi); + movl RL2d, 5 * 4(%rsi); + + popq %r15; + popq %r14; + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + ret; +ELF(.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;) + +.align 16 +.L_s1: + .quad 0x0010100001010400, 0x0000000000000000 + .quad 0x0000100000010000, 0x0010100001010404 + .quad 0x0010100001010004, 0x0000100000010404 + .quad 0x0000000000000004, 0x0000100000010000 + .quad 0x0000000000000400, 0x0010100001010400 + .quad 0x0010100001010404, 0x0000000000000400 + .quad 0x0010000001000404, 0x0010100001010004 + .quad 0x0010000001000000, 0x0000000000000004 + .quad 0x0000000000000404, 0x0010000001000400 + .quad 0x0010000001000400, 0x0000100000010400 + .quad 0x0000100000010400, 0x0010100001010000 + .quad 0x0010100001010000, 0x0010000001000404 + .quad 0x0000100000010004, 0x0010000001000004 + .quad 0x0010000001000004, 0x0000100000010004 + .quad 0x0000000000000000, 0x0000000000000404 + .quad 0x0000100000010404, 0x0010000001000000 + .quad 0x0000100000010000, 0x0010100001010404 + .quad 0x0000000000000004, 0x0010100001010000 + .quad 0x0010100001010400, 0x0010000001000000 + .quad 0x0010000001000000, 0x0000000000000400 + .quad 0x0010100001010004, 0x0000100000010000 + .quad 0x0000100000010400, 0x0010000001000004 + .quad 0x0000000000000400, 0x0000000000000004 + .quad 0x0010000001000404, 0x0000100000010404 + .quad 0x0010100001010404, 0x0000100000010004 + .quad 0x0010100001010000, 0x0010000001000404 + .quad 0x0010000001000004, 0x0000000000000404 + .quad 0x0000100000010404, 0x0010100001010400 + .quad 0x0000000000000404, 0x0010000001000400 + .quad 0x0010000001000400, 0x0000000000000000 + .quad 0x0000100000010004, 0x0000100000010400 + .quad 0x0000000000000000, 0x0010100001010004 +.L_s2: + .quad 0x0801080200100020, 0x0800080000000000 + .quad 0x0000080000000000, 0x0001080200100020 + .quad 0x0001000000100000, 0x0000000200000020 + .quad 0x0801000200100020, 0x0800080200000020 + .quad 0x0800000200000020, 0x0801080200100020 + .quad 0x0801080000100000, 0x0800000000000000 + .quad 0x0800080000000000, 0x0001000000100000 + .quad 0x0000000200000020, 0x0801000200100020 + .quad 0x0001080000100000, 0x0001000200100020 + .quad 0x0800080200000020, 0x0000000000000000 + .quad 0x0800000000000000, 0x0000080000000000 + .quad 0x0001080200100020, 0x0801000000100000 + .quad 0x0001000200100020, 0x0800000200000020 + .quad 0x0000000000000000, 0x0001080000100000 + .quad 0x0000080200000020, 0x0801080000100000 + .quad 0x0801000000100000, 0x0000080200000020 + .quad 0x0000000000000000, 0x0001080200100020 + .quad 0x0801000200100020, 0x0001000000100000 + .quad 0x0800080200000020, 0x0801000000100000 + .quad 0x0801080000100000, 0x0000080000000000 + .quad 0x0801000000100000, 0x0800080000000000 + .quad 0x0000000200000020, 0x0801080200100020 + .quad 0x0001080200100020, 0x0000000200000020 + .quad 0x0000080000000000, 0x0800000000000000 + .quad 0x0000080200000020, 0x0801080000100000 + .quad 0x0001000000100000, 0x0800000200000020 + .quad 0x0001000200100020, 0x0800080200000020 + .quad 0x0800000200000020, 0x0001000200100020 + .quad 0x0001080000100000, 0x0000000000000000 + .quad 0x0800080000000000, 0x0000080200000020 + .quad 0x0800000000000000, 0x0801000200100020 + .quad 0x0801080200100020, 0x0001080000100000 +.L_s3: + .quad 0x0000002000000208, 0x0000202008020200 + .quad 0x0000000000000000, 0x0000200008020008 + .quad 0x0000002008000200, 0x0000000000000000 + .quad 0x0000202000020208, 0x0000002008000200 + .quad 0x0000200000020008, 0x0000000008000008 + .quad 0x0000000008000008, 0x0000200000020000 + .quad 0x0000202008020208, 0x0000200000020008 + .quad 0x0000200008020000, 0x0000002000000208 + .quad 0x0000000008000000, 0x0000000000000008 + .quad 0x0000202008020200, 0x0000002000000200 + .quad 0x0000202000020200, 0x0000200008020000 + .quad 0x0000200008020008, 0x0000202000020208 + .quad 0x0000002008000208, 0x0000202000020200 + .quad 0x0000200000020000, 0x0000002008000208 + .quad 0x0000000000000008, 0x0000202008020208 + .quad 0x0000002000000200, 0x0000000008000000 + .quad 0x0000202008020200, 0x0000000008000000 + .quad 0x0000200000020008, 0x0000002000000208 + .quad 0x0000200000020000, 0x0000202008020200 + .quad 0x0000002008000200, 0x0000000000000000 + .quad 0x0000002000000200, 0x0000200000020008 + .quad 0x0000202008020208, 0x0000002008000200 + .quad 0x0000000008000008, 0x0000002000000200 + .quad 0x0000000000000000, 0x0000200008020008 + .quad 0x0000002008000208, 0x0000200000020000 + .quad 0x0000000008000000, 0x0000202008020208 + .quad 0x0000000000000008, 0x0000202000020208 + .quad 0x0000202000020200, 0x0000000008000008 + .quad 0x0000200008020000, 0x0000002008000208 + .quad 0x0000002000000208, 0x0000200008020000 + .quad 0x0000202000020208, 0x0000000000000008 + .quad 0x0000200008020008, 0x0000202000020200 +.L_s4: + .quad 0x1008020000002001, 0x1000020800002001 + .quad 0x1000020800002001, 0x0000000800000000 + .quad 0x0008020800002000, 0x1008000800000001 + .quad 0x1008000000000001, 0x1000020000002001 + .quad 0x0000000000000000, 0x0008020000002000 + .quad 0x0008020000002000, 0x1008020800002001 + .quad 0x1000000800000001, 0x0000000000000000 + .quad 0x0008000800000000, 0x1008000000000001 + .quad 0x1000000000000001, 0x0000020000002000 + .quad 0x0008000000000000, 0x1008020000002001 + .quad 0x0000000800000000, 0x0008000000000000 + .quad 0x1000020000002001, 0x0000020800002000 + .quad 0x1008000800000001, 0x1000000000000001 + .quad 0x0000020800002000, 0x0008000800000000 + .quad 0x0000020000002000, 0x0008020800002000 + .quad 0x1008020800002001, 0x1000000800000001 + .quad 0x0008000800000000, 0x1008000000000001 + .quad 0x0008020000002000, 0x1008020800002001 + .quad 0x1000000800000001, 0x0000000000000000 + .quad 0x0000000000000000, 0x0008020000002000 + .quad 0x0000020800002000, 0x0008000800000000 + .quad 0x1008000800000001, 0x1000000000000001 + .quad 0x1008020000002001, 0x1000020800002001 + .quad 0x1000020800002001, 0x0000000800000000 + .quad 0x1008020800002001, 0x1000000800000001 + .quad 0x1000000000000001, 0x0000020000002000 + .quad 0x1008000000000001, 0x1000020000002001 + .quad 0x0008020800002000, 0x1008000800000001 + .quad 0x1000020000002001, 0x0000020800002000 + .quad 0x0008000000000000, 0x1008020000002001 + .quad 0x0000000800000000, 0x0008000000000000 + .quad 0x0000020000002000, 0x0008020800002000 +.L_s5: + .quad 0x0000001000000100, 0x0020001002080100 + .quad 0x0020000002080000, 0x0420001002000100 + .quad 0x0000000000080000, 0x0000001000000100 + .quad 0x0400000000000000, 0x0020000002080000 + .quad 0x0400001000080100, 0x0000000000080000 + .quad 0x0020001002000100, 0x0400001000080100 + .quad 0x0420001002000100, 0x0420000002080000 + .quad 0x0000001000080100, 0x0400000000000000 + .quad 0x0020000002000000, 0x0400000000080000 + .quad 0x0400000000080000, 0x0000000000000000 + .quad 0x0400001000000100, 0x0420001002080100 + .quad 0x0420001002080100, 0x0020001002000100 + .quad 0x0420000002080000, 0x0400001000000100 + .quad 0x0000000000000000, 0x0420000002000000 + .quad 0x0020001002080100, 0x0020000002000000 + .quad 0x0420000002000000, 0x0000001000080100 + .quad 0x0000000000080000, 0x0420001002000100 + .quad 0x0000001000000100, 0x0020000002000000 + .quad 0x0400000000000000, 0x0020000002080000 + .quad 0x0420001002000100, 0x0400001000080100 + .quad 0x0020001002000100, 0x0400000000000000 + .quad 0x0420000002080000, 0x0020001002080100 + .quad 0x0400001000080100, 0x0000001000000100 + .quad 0x0020000002000000, 0x0420000002080000 + .quad 0x0420001002080100, 0x0000001000080100 + .quad 0x0420000002000000, 0x0420001002080100 + .quad 0x0020000002080000, 0x0000000000000000 + .quad 0x0400000000080000, 0x0420000002000000 + .quad 0x0000001000080100, 0x0020001002000100 + .quad 0x0400001000000100, 0x0000000000080000 + .quad 0x0000000000000000, 0x0400000000080000 + .quad 0x0020001002080100, 0x0400001000000100 +.L_s6: + .quad 0x0200000120000010, 0x0204000020000000 + .quad 0x0000040000000000, 0x0204040120000010 + .quad 0x0204000020000000, 0x0000000100000010 + .quad 0x0204040120000010, 0x0004000000000000 + .quad 0x0200040020000000, 0x0004040100000010 + .quad 0x0004000000000000, 0x0200000120000010 + .quad 0x0004000100000010, 0x0200040020000000 + .quad 0x0200000020000000, 0x0000040100000010 + .quad 0x0000000000000000, 0x0004000100000010 + .quad 0x0200040120000010, 0x0000040000000000 + .quad 0x0004040000000000, 0x0200040120000010 + .quad 0x0000000100000010, 0x0204000120000010 + .quad 0x0204000120000010, 0x0000000000000000 + .quad 0x0004040100000010, 0x0204040020000000 + .quad 0x0000040100000010, 0x0004040000000000 + .quad 0x0204040020000000, 0x0200000020000000 + .quad 0x0200040020000000, 0x0000000100000010 + .quad 0x0204000120000010, 0x0004040000000000 + .quad 0x0204040120000010, 0x0004000000000000 + .quad 0x0000040100000010, 0x0200000120000010 + .quad 0x0004000000000000, 0x0200040020000000 + .quad 0x0200000020000000, 0x0000040100000010 + .quad 0x0200000120000010, 0x0204040120000010 + .quad 0x0004040000000000, 0x0204000020000000 + .quad 0x0004040100000010, 0x0204040020000000 + .quad 0x0000000000000000, 0x0204000120000010 + .quad 0x0000000100000010, 0x0000040000000000 + .quad 0x0204000020000000, 0x0004040100000010 + .quad 0x0000040000000000, 0x0004000100000010 + .quad 0x0200040120000010, 0x0000000000000000 + .quad 0x0204040020000000, 0x0200000020000000 + .quad 0x0004000100000010, 0x0200040120000010 +.L_s7: + .quad 0x0002000000200000, 0x2002000004200002 + .quad 0x2000000004000802, 0x0000000000000000 + .quad 0x0000000000000800, 0x2000000004000802 + .quad 0x2002000000200802, 0x0002000004200800 + .quad 0x2002000004200802, 0x0002000000200000 + .quad 0x0000000000000000, 0x2000000004000002 + .quad 0x2000000000000002, 0x0000000004000000 + .quad 0x2002000004200002, 0x2000000000000802 + .quad 0x0000000004000800, 0x2002000000200802 + .quad 0x2002000000200002, 0x0000000004000800 + .quad 0x2000000004000002, 0x0002000004200000 + .quad 0x0002000004200800, 0x2002000000200002 + .quad 0x0002000004200000, 0x0000000000000800 + .quad 0x2000000000000802, 0x2002000004200802 + .quad 0x0002000000200800, 0x2000000000000002 + .quad 0x0000000004000000, 0x0002000000200800 + .quad 0x0000000004000000, 0x0002000000200800 + .quad 0x0002000000200000, 0x2000000004000802 + .quad 0x2000000004000802, 0x2002000004200002 + .quad 0x2002000004200002, 0x2000000000000002 + .quad 0x2002000000200002, 0x0000000004000000 + .quad 0x0000000004000800, 0x0002000000200000 + .quad 0x0002000004200800, 0x2000000000000802 + .quad 0x2002000000200802, 0x0002000004200800 + .quad 0x2000000000000802, 0x2000000004000002 + .quad 0x2002000004200802, 0x0002000004200000 + .quad 0x0002000000200800, 0x0000000000000000 + .quad 0x2000000000000002, 0x2002000004200802 + .quad 0x0000000000000000, 0x2002000000200802 + .quad 0x0002000004200000, 0x0000000000000800 + .quad 0x2000000004000002, 0x0000000004000800 + .quad 0x0000000000000800, 0x2002000000200002 +.L_s8: + .quad 0x0100010410001000, 0x0000010000001000 + .quad 0x0000000000040000, 0x0100010410041000 + .quad 0x0100000010000000, 0x0100010410001000 + .quad 0x0000000400000000, 0x0100000010000000 + .quad 0x0000000400040000, 0x0100000010040000 + .quad 0x0100010410041000, 0x0000010000041000 + .quad 0x0100010010041000, 0x0000010400041000 + .quad 0x0000010000001000, 0x0000000400000000 + .quad 0x0100000010040000, 0x0100000410000000 + .quad 0x0100010010001000, 0x0000010400001000 + .quad 0x0000010000041000, 0x0000000400040000 + .quad 0x0100000410040000, 0x0100010010041000 + .quad 0x0000010400001000, 0x0000000000000000 + .quad 0x0000000000000000, 0x0100000410040000 + .quad 0x0100000410000000, 0x0100010010001000 + .quad 0x0000010400041000, 0x0000000000040000 + .quad 0x0000010400041000, 0x0000000000040000 + .quad 0x0100010010041000, 0x0000010000001000 + .quad 0x0000000400000000, 0x0100000410040000 + .quad 0x0000010000001000, 0x0000010400041000 + .quad 0x0100010010001000, 0x0000000400000000 + .quad 0x0100000410000000, 0x0100000010040000 + .quad 0x0100000410040000, 0x0100000010000000 + .quad 0x0000000000040000, 0x0100010410001000 + .quad 0x0000000000000000, 0x0100010410041000 + .quad 0x0000000400040000, 0x0100000410000000 + .quad 0x0100000010040000, 0x0100010010001000 + .quad 0x0100010410001000, 0x0000000000000000 + .quad 0x0100010410041000, 0x0000010000041000 + .quad 0x0000010000041000, 0x0000010400001000 + .quad 0x0000010400001000, 0x0000000400040000 + .quad 0x0100000010000000, 0x0100010010041000 + +#endif +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/des.c b/libotr/libgcrypt-1.8.7/cipher/des.c new file mode 100644 index 0000000..5c99f50 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/des.c @@ -0,0 +1,1536 @@ +/* des.c - DES and Triple-DES encryption/decryption Algorithm + * Copyright (C) 1998, 1999, 2001, 2002, 2003, + * 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * For a description of triple encryption, see: + * Bruce Schneier: Applied Cryptography. Second Edition. + * John Wiley & Sons, 1996. ISBN 0-471-12845-7. Pages 358 ff. + * This implementation is according to the definition of DES in FIPS + * PUB 46-2 from December 1993. + */ + + +/* + * Written by Michael Roth <mroth@nessie.de>, September 1998 + */ + + +/* + * U S A G E + * =========== + * + * For DES or Triple-DES encryption/decryption you must initialize a proper + * encryption context with a key. + * + * A DES key is 64bit wide but only 56bits of the key are used. The remaining + * bits are parity bits and they will _not_ checked in this implementation, but + * simply ignored. + * + * For Triple-DES you could use either two 64bit keys or three 64bit keys. + * The parity bits will _not_ checked, too. + * + * After initializing a context with a key you could use this context to + * encrypt or decrypt data in 64bit blocks in Electronic Codebook Mode. + * + * (In the examples below the slashes at the beginning and ending of comments + * are omitted.) + * + * DES Example + * ----------- + * unsigned char key[8]; + * unsigned char plaintext[8]; + * unsigned char ciphertext[8]; + * unsigned char recoverd[8]; + * des_ctx context; + * + * * Fill 'key' and 'plaintext' with some data * + * .... + * + * * Set up the DES encryption context * + * des_setkey(context, key); + * + * * Encrypt the plaintext * + * des_ecb_encrypt(context, plaintext, ciphertext); + * + * * To recover the original plaintext from ciphertext use: * + * des_ecb_decrypt(context, ciphertext, recoverd); + * + * + * Triple-DES Example + * ------------------ + * unsigned char key1[8]; + * unsigned char key2[8]; + * unsigned char key3[8]; + * unsigned char plaintext[8]; + * unsigned char ciphertext[8]; + * unsigned char recoverd[8]; + * tripledes_ctx context; + * + * * If you would like to use two 64bit keys, fill 'key1' and'key2' + * then setup the encryption context: * + * tripledes_set2keys(context, key1, key2); + * + * * To use three 64bit keys with Triple-DES use: * + * tripledes_set3keys(context, key1, key2, key3); + * + * * Encrypting plaintext with Triple-DES * + * tripledes_ecb_encrypt(context, plaintext, ciphertext); + * + * * Decrypting ciphertext to recover the plaintext with Triple-DES * + * tripledes_ecb_decrypt(context, ciphertext, recoverd); + * + * + * Selftest + * -------- + * char *error_msg; + * + * * To perform a selftest of this DES/Triple-DES implementation use the + * function selftest(). It will return an error string if there are + * some problems with this library. * + * + * if ( (error_msg = selftest()) ) + * { + * fprintf(stderr, "An error in the DES/Triple-DES implementation occurred: %s\n", error_msg); + * abort(); + * } + */ + + +#include <config.h> +#include <stdio.h> +#include <string.h> /* memcpy, memcmp */ +#include "types.h" /* for byte and u32 typedefs */ +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "cipher-selftest.h" + + +#define DES_BLOCKSIZE 8 + + +/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ +#undef USE_AMD64_ASM +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AMD64_ASM 1 +#endif + +/* Helper macro to force alignment to 16 bytes. */ +#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED +# define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) +#else +# define ATTR_ALIGNED_16 +#endif + +#if defined(__GNUC__) && defined(__GNU_LIBRARY__) +# define working_memcmp memcmp +#else +/* + * According to the SunOS man page, memcmp returns indeterminate sign + * depending on whether characters are signed or not. + */ +static int +working_memcmp( const void *_a, const void *_b, size_t n ) +{ + const char *a = _a; + const char *b = _b; + for( ; n; n--, a++, b++ ) + if( *a != *b ) + return (int)(*(byte*)a) - (int)(*(byte*)b); + return 0; +} +#endif + +/* + * Encryption/Decryption context of DES + */ +typedef struct _des_ctx + { + u32 encrypt_subkeys[32]; + u32 decrypt_subkeys[32]; + } +des_ctx[1]; + +/* + * Encryption/Decryption context of Triple-DES + */ +typedef struct _tripledes_ctx + { + u32 encrypt_subkeys[96]; + u32 decrypt_subkeys[96]; + struct { + int no_weak_key; + } flags; + } +tripledes_ctx[1]; + +static void des_key_schedule (const byte *, u32 *); +static int des_setkey (struct _des_ctx *, const byte *); +static int des_ecb_crypt (struct _des_ctx *, const byte *, byte *, int); +static int tripledes_set2keys (struct _tripledes_ctx *, + const byte *, const byte *); +static int tripledes_set3keys (struct _tripledes_ctx *, + const byte *, const byte *, const byte *); +static int tripledes_ecb_crypt (struct _tripledes_ctx *, + const byte *, byte *, int); +static int is_weak_key ( const byte *key ); +static const char *selftest (void); +static unsigned int do_tripledes_encrypt(void *context, byte *outbuf, + const byte *inbuf ); +static unsigned int do_tripledes_decrypt(void *context, byte *outbuf, + const byte *inbuf ); +static gcry_err_code_t do_tripledes_setkey(void *context, const byte *key, + unsigned keylen); + +static int initialized; + + + + +/* + * The s-box values are permuted according to the 'primitive function P' + * and are rotated one bit to the left. + */ +static u32 sbox1[64] = +{ + 0x01010400, 0x00000000, 0x00010000, 0x01010404, 0x01010004, 0x00010404, 0x00000004, 0x00010000, + 0x00000400, 0x01010400, 0x01010404, 0x00000400, 0x01000404, 0x01010004, 0x01000000, 0x00000004, + 0x00000404, 0x01000400, 0x01000400, 0x00010400, 0x00010400, 0x01010000, 0x01010000, 0x01000404, + 0x00010004, 0x01000004, 0x01000004, 0x00010004, 0x00000000, 0x00000404, 0x00010404, 0x01000000, + 0x00010000, 0x01010404, 0x00000004, 0x01010000, 0x01010400, 0x01000000, 0x01000000, 0x00000400, + 0x01010004, 0x00010000, 0x00010400, 0x01000004, 0x00000400, 0x00000004, 0x01000404, 0x00010404, + 0x01010404, 0x00010004, 0x01010000, 0x01000404, 0x01000004, 0x00000404, 0x00010404, 0x01010400, + 0x00000404, 0x01000400, 0x01000400, 0x00000000, 0x00010004, 0x00010400, 0x00000000, 0x01010004 +}; + +static u32 sbox2[64] = +{ + 0x80108020, 0x80008000, 0x00008000, 0x00108020, 0x00100000, 0x00000020, 0x80100020, 0x80008020, + 0x80000020, 0x80108020, 0x80108000, 0x80000000, 0x80008000, 0x00100000, 0x00000020, 0x80100020, + 0x00108000, 0x00100020, 0x80008020, 0x00000000, 0x80000000, 0x00008000, 0x00108020, 0x80100000, + 0x00100020, 0x80000020, 0x00000000, 0x00108000, 0x00008020, 0x80108000, 0x80100000, 0x00008020, + 0x00000000, 0x00108020, 0x80100020, 0x00100000, 0x80008020, 0x80100000, 0x80108000, 0x00008000, + 0x80100000, 0x80008000, 0x00000020, 0x80108020, 0x00108020, 0x00000020, 0x00008000, 0x80000000, + 0x00008020, 0x80108000, 0x00100000, 0x80000020, 0x00100020, 0x80008020, 0x80000020, 0x00100020, + 0x00108000, 0x00000000, 0x80008000, 0x00008020, 0x80000000, 0x80100020, 0x80108020, 0x00108000 +}; + +static u32 sbox3[64] = +{ + 0x00000208, 0x08020200, 0x00000000, 0x08020008, 0x08000200, 0x00000000, 0x00020208, 0x08000200, + 0x00020008, 0x08000008, 0x08000008, 0x00020000, 0x08020208, 0x00020008, 0x08020000, 0x00000208, + 0x08000000, 0x00000008, 0x08020200, 0x00000200, 0x00020200, 0x08020000, 0x08020008, 0x00020208, + 0x08000208, 0x00020200, 0x00020000, 0x08000208, 0x00000008, 0x08020208, 0x00000200, 0x08000000, + 0x08020200, 0x08000000, 0x00020008, 0x00000208, 0x00020000, 0x08020200, 0x08000200, 0x00000000, + 0x00000200, 0x00020008, 0x08020208, 0x08000200, 0x08000008, 0x00000200, 0x00000000, 0x08020008, + 0x08000208, 0x00020000, 0x08000000, 0x08020208, 0x00000008, 0x00020208, 0x00020200, 0x08000008, + 0x08020000, 0x08000208, 0x00000208, 0x08020000, 0x00020208, 0x00000008, 0x08020008, 0x00020200 +}; + +static u32 sbox4[64] = +{ + 0x00802001, 0x00002081, 0x00002081, 0x00000080, 0x00802080, 0x00800081, 0x00800001, 0x00002001, + 0x00000000, 0x00802000, 0x00802000, 0x00802081, 0x00000081, 0x00000000, 0x00800080, 0x00800001, + 0x00000001, 0x00002000, 0x00800000, 0x00802001, 0x00000080, 0x00800000, 0x00002001, 0x00002080, + 0x00800081, 0x00000001, 0x00002080, 0x00800080, 0x00002000, 0x00802080, 0x00802081, 0x00000081, + 0x00800080, 0x00800001, 0x00802000, 0x00802081, 0x00000081, 0x00000000, 0x00000000, 0x00802000, + 0x00002080, 0x00800080, 0x00800081, 0x00000001, 0x00802001, 0x00002081, 0x00002081, 0x00000080, + 0x00802081, 0x00000081, 0x00000001, 0x00002000, 0x00800001, 0x00002001, 0x00802080, 0x00800081, + 0x00002001, 0x00002080, 0x00800000, 0x00802001, 0x00000080, 0x00800000, 0x00002000, 0x00802080 +}; + +static u32 sbox5[64] = +{ + 0x00000100, 0x02080100, 0x02080000, 0x42000100, 0x00080000, 0x00000100, 0x40000000, 0x02080000, + 0x40080100, 0x00080000, 0x02000100, 0x40080100, 0x42000100, 0x42080000, 0x00080100, 0x40000000, + 0x02000000, 0x40080000, 0x40080000, 0x00000000, 0x40000100, 0x42080100, 0x42080100, 0x02000100, + 0x42080000, 0x40000100, 0x00000000, 0x42000000, 0x02080100, 0x02000000, 0x42000000, 0x00080100, + 0x00080000, 0x42000100, 0x00000100, 0x02000000, 0x40000000, 0x02080000, 0x42000100, 0x40080100, + 0x02000100, 0x40000000, 0x42080000, 0x02080100, 0x40080100, 0x00000100, 0x02000000, 0x42080000, + 0x42080100, 0x00080100, 0x42000000, 0x42080100, 0x02080000, 0x00000000, 0x40080000, 0x42000000, + 0x00080100, 0x02000100, 0x40000100, 0x00080000, 0x00000000, 0x40080000, 0x02080100, 0x40000100 +}; + +static u32 sbox6[64] = +{ + 0x20000010, 0x20400000, 0x00004000, 0x20404010, 0x20400000, 0x00000010, 0x20404010, 0x00400000, + 0x20004000, 0x00404010, 0x00400000, 0x20000010, 0x00400010, 0x20004000, 0x20000000, 0x00004010, + 0x00000000, 0x00400010, 0x20004010, 0x00004000, 0x00404000, 0x20004010, 0x00000010, 0x20400010, + 0x20400010, 0x00000000, 0x00404010, 0x20404000, 0x00004010, 0x00404000, 0x20404000, 0x20000000, + 0x20004000, 0x00000010, 0x20400010, 0x00404000, 0x20404010, 0x00400000, 0x00004010, 0x20000010, + 0x00400000, 0x20004000, 0x20000000, 0x00004010, 0x20000010, 0x20404010, 0x00404000, 0x20400000, + 0x00404010, 0x20404000, 0x00000000, 0x20400010, 0x00000010, 0x00004000, 0x20400000, 0x00404010, + 0x00004000, 0x00400010, 0x20004010, 0x00000000, 0x20404000, 0x20000000, 0x00400010, 0x20004010 +}; + +static u32 sbox7[64] = +{ + 0x00200000, 0x04200002, 0x04000802, 0x00000000, 0x00000800, 0x04000802, 0x00200802, 0x04200800, + 0x04200802, 0x00200000, 0x00000000, 0x04000002, 0x00000002, 0x04000000, 0x04200002, 0x00000802, + 0x04000800, 0x00200802, 0x00200002, 0x04000800, 0x04000002, 0x04200000, 0x04200800, 0x00200002, + 0x04200000, 0x00000800, 0x00000802, 0x04200802, 0x00200800, 0x00000002, 0x04000000, 0x00200800, + 0x04000000, 0x00200800, 0x00200000, 0x04000802, 0x04000802, 0x04200002, 0x04200002, 0x00000002, + 0x00200002, 0x04000000, 0x04000800, 0x00200000, 0x04200800, 0x00000802, 0x00200802, 0x04200800, + 0x00000802, 0x04000002, 0x04200802, 0x04200000, 0x00200800, 0x00000000, 0x00000002, 0x04200802, + 0x00000000, 0x00200802, 0x04200000, 0x00000800, 0x04000002, 0x04000800, 0x00000800, 0x00200002 +}; + +static u32 sbox8[64] = +{ + 0x10001040, 0x00001000, 0x00040000, 0x10041040, 0x10000000, 0x10001040, 0x00000040, 0x10000000, + 0x00040040, 0x10040000, 0x10041040, 0x00041000, 0x10041000, 0x00041040, 0x00001000, 0x00000040, + 0x10040000, 0x10000040, 0x10001000, 0x00001040, 0x00041000, 0x00040040, 0x10040040, 0x10041000, + 0x00001040, 0x00000000, 0x00000000, 0x10040040, 0x10000040, 0x10001000, 0x00041040, 0x00040000, + 0x00041040, 0x00040000, 0x10041000, 0x00001000, 0x00000040, 0x10040040, 0x00001000, 0x00041040, + 0x10001000, 0x00000040, 0x10000040, 0x10040000, 0x10040040, 0x10000000, 0x00040000, 0x10001040, + 0x00000000, 0x10041040, 0x00040040, 0x10000040, 0x10040000, 0x10001000, 0x10001040, 0x00000000, + 0x10041040, 0x00041000, 0x00041000, 0x00001040, 0x00001040, 0x00040040, 0x10000000, 0x10041000 +}; + + +/* + * These two tables are part of the 'permuted choice 1' function. + * In this implementation several speed improvements are done. + */ +static u32 leftkey_swap[16] = +{ + 0x00000000, 0x00000001, 0x00000100, 0x00000101, + 0x00010000, 0x00010001, 0x00010100, 0x00010101, + 0x01000000, 0x01000001, 0x01000100, 0x01000101, + 0x01010000, 0x01010001, 0x01010100, 0x01010101 +}; + +static u32 rightkey_swap[16] = +{ + 0x00000000, 0x01000000, 0x00010000, 0x01010000, + 0x00000100, 0x01000100, 0x00010100, 0x01010100, + 0x00000001, 0x01000001, 0x00010001, 0x01010001, + 0x00000101, 0x01000101, 0x00010101, 0x01010101, +}; + + + +/* + * Numbers of left shifts per round for encryption subkeys. + * To calculate the decryption subkeys we just reverse the + * ordering of the calculated encryption subkeys. So their + * is no need for a decryption rotate tab. + */ +static byte encrypt_rotate_tab[16] = +{ + 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 +}; + + + +/* + * Table with weak DES keys sorted in ascending order. + * In DES their are 64 known keys which are weak. They are weak + * because they produce only one, two or four different + * subkeys in the subkey scheduling process. + * The keys in this table have all their parity bits cleared. + */ +static byte weak_keys[64][8] = +{ + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /*w*/ + { 0x00, 0x00, 0x1e, 0x1e, 0x00, 0x00, 0x0e, 0x0e }, + { 0x00, 0x00, 0xe0, 0xe0, 0x00, 0x00, 0xf0, 0xf0 }, + { 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x00, 0xfe, 0xfe }, + { 0x00, 0x1e, 0x00, 0x1e, 0x00, 0x0e, 0x00, 0x0e }, /*sw*/ + { 0x00, 0x1e, 0x1e, 0x00, 0x00, 0x0e, 0x0e, 0x00 }, + { 0x00, 0x1e, 0xe0, 0xfe, 0x00, 0x0e, 0xf0, 0xfe }, + { 0x00, 0x1e, 0xfe, 0xe0, 0x00, 0x0e, 0xfe, 0xf0 }, + { 0x00, 0xe0, 0x00, 0xe0, 0x00, 0xf0, 0x00, 0xf0 }, /*sw*/ + { 0x00, 0xe0, 0x1e, 0xfe, 0x00, 0xf0, 0x0e, 0xfe }, + { 0x00, 0xe0, 0xe0, 0x00, 0x00, 0xf0, 0xf0, 0x00 }, + { 0x00, 0xe0, 0xfe, 0x1e, 0x00, 0xf0, 0xfe, 0x0e }, + { 0x00, 0xfe, 0x00, 0xfe, 0x00, 0xfe, 0x00, 0xfe }, /*sw*/ + { 0x00, 0xfe, 0x1e, 0xe0, 0x00, 0xfe, 0x0e, 0xf0 }, + { 0x00, 0xfe, 0xe0, 0x1e, 0x00, 0xfe, 0xf0, 0x0e }, + { 0x00, 0xfe, 0xfe, 0x00, 0x00, 0xfe, 0xfe, 0x00 }, + { 0x1e, 0x00, 0x00, 0x1e, 0x0e, 0x00, 0x00, 0x0e }, + { 0x1e, 0x00, 0x1e, 0x00, 0x0e, 0x00, 0x0e, 0x00 }, /*sw*/ + { 0x1e, 0x00, 0xe0, 0xfe, 0x0e, 0x00, 0xf0, 0xfe }, + { 0x1e, 0x00, 0xfe, 0xe0, 0x0e, 0x00, 0xfe, 0xf0 }, + { 0x1e, 0x1e, 0x00, 0x00, 0x0e, 0x0e, 0x00, 0x00 }, + { 0x1e, 0x1e, 0x1e, 0x1e, 0x0e, 0x0e, 0x0e, 0x0e }, /*w*/ + { 0x1e, 0x1e, 0xe0, 0xe0, 0x0e, 0x0e, 0xf0, 0xf0 }, + { 0x1e, 0x1e, 0xfe, 0xfe, 0x0e, 0x0e, 0xfe, 0xfe }, + { 0x1e, 0xe0, 0x00, 0xfe, 0x0e, 0xf0, 0x00, 0xfe }, + { 0x1e, 0xe0, 0x1e, 0xe0, 0x0e, 0xf0, 0x0e, 0xf0 }, /*sw*/ + { 0x1e, 0xe0, 0xe0, 0x1e, 0x0e, 0xf0, 0xf0, 0x0e }, + { 0x1e, 0xe0, 0xfe, 0x00, 0x0e, 0xf0, 0xfe, 0x00 }, + { 0x1e, 0xfe, 0x00, 0xe0, 0x0e, 0xfe, 0x00, 0xf0 }, + { 0x1e, 0xfe, 0x1e, 0xfe, 0x0e, 0xfe, 0x0e, 0xfe }, /*sw*/ + { 0x1e, 0xfe, 0xe0, 0x00, 0x0e, 0xfe, 0xf0, 0x00 }, + { 0x1e, 0xfe, 0xfe, 0x1e, 0x0e, 0xfe, 0xfe, 0x0e }, + { 0xe0, 0x00, 0x00, 0xe0, 0xf0, 0x00, 0x00, 0xf0 }, + { 0xe0, 0x00, 0x1e, 0xfe, 0xf0, 0x00, 0x0e, 0xfe }, + { 0xe0, 0x00, 0xe0, 0x00, 0xf0, 0x00, 0xf0, 0x00 }, /*sw*/ + { 0xe0, 0x00, 0xfe, 0x1e, 0xf0, 0x00, 0xfe, 0x0e }, + { 0xe0, 0x1e, 0x00, 0xfe, 0xf0, 0x0e, 0x00, 0xfe }, + { 0xe0, 0x1e, 0x1e, 0xe0, 0xf0, 0x0e, 0x0e, 0xf0 }, + { 0xe0, 0x1e, 0xe0, 0x1e, 0xf0, 0x0e, 0xf0, 0x0e }, /*sw*/ + { 0xe0, 0x1e, 0xfe, 0x00, 0xf0, 0x0e, 0xfe, 0x00 }, + { 0xe0, 0xe0, 0x00, 0x00, 0xf0, 0xf0, 0x00, 0x00 }, + { 0xe0, 0xe0, 0x1e, 0x1e, 0xf0, 0xf0, 0x0e, 0x0e }, + { 0xe0, 0xe0, 0xe0, 0xe0, 0xf0, 0xf0, 0xf0, 0xf0 }, /*w*/ + { 0xe0, 0xe0, 0xfe, 0xfe, 0xf0, 0xf0, 0xfe, 0xfe }, + { 0xe0, 0xfe, 0x00, 0x1e, 0xf0, 0xfe, 0x00, 0x0e }, + { 0xe0, 0xfe, 0x1e, 0x00, 0xf0, 0xfe, 0x0e, 0x00 }, + { 0xe0, 0xfe, 0xe0, 0xfe, 0xf0, 0xfe, 0xf0, 0xfe }, /*sw*/ + { 0xe0, 0xfe, 0xfe, 0xe0, 0xf0, 0xfe, 0xfe, 0xf0 }, + { 0xfe, 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x00, 0xfe }, + { 0xfe, 0x00, 0x1e, 0xe0, 0xfe, 0x00, 0x0e, 0xf0 }, + { 0xfe, 0x00, 0xe0, 0x1e, 0xfe, 0x00, 0xf0, 0x0e }, + { 0xfe, 0x00, 0xfe, 0x00, 0xfe, 0x00, 0xfe, 0x00 }, /*sw*/ + { 0xfe, 0x1e, 0x00, 0xe0, 0xfe, 0x0e, 0x00, 0xf0 }, + { 0xfe, 0x1e, 0x1e, 0xfe, 0xfe, 0x0e, 0x0e, 0xfe }, + { 0xfe, 0x1e, 0xe0, 0x00, 0xfe, 0x0e, 0xf0, 0x00 }, + { 0xfe, 0x1e, 0xfe, 0x1e, 0xfe, 0x0e, 0xfe, 0x0e }, /*sw*/ + { 0xfe, 0xe0, 0x00, 0x1e, 0xfe, 0xf0, 0x00, 0x0e }, + { 0xfe, 0xe0, 0x1e, 0x00, 0xfe, 0xf0, 0x0e, 0x00 }, + { 0xfe, 0xe0, 0xe0, 0xfe, 0xfe, 0xf0, 0xf0, 0xfe }, + { 0xfe, 0xe0, 0xfe, 0xe0, 0xfe, 0xf0, 0xfe, 0xf0 }, /*sw*/ + { 0xfe, 0xfe, 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x00 }, + { 0xfe, 0xfe, 0x1e, 0x1e, 0xfe, 0xfe, 0x0e, 0x0e }, + { 0xfe, 0xfe, 0xe0, 0xe0, 0xfe, 0xfe, 0xf0, 0xf0 }, + { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe } /*w*/ +}; +static unsigned char weak_keys_chksum[20] = { + 0xD0, 0xCF, 0x07, 0x38, 0x93, 0x70, 0x8A, 0x83, 0x7D, 0xD7, + 0x8A, 0x36, 0x65, 0x29, 0x6C, 0x1F, 0x7C, 0x3F, 0xD3, 0x41 +}; + + + +/* + * Macro to swap bits across two words. + */ +#define DO_PERMUTATION(a, temp, b, offset, mask) \ + temp = ((a>>offset) ^ b) & mask; \ + b ^= temp; \ + a ^= temp<<offset; + + +/* + * This performs the 'initial permutation' of the data to be encrypted + * or decrypted. Additionally the resulting two words are rotated one bit + * to the left. + */ +#define INITIAL_PERMUTATION(left, temp, right) \ + DO_PERMUTATION(left, temp, right, 4, 0x0f0f0f0f) \ + DO_PERMUTATION(left, temp, right, 16, 0x0000ffff) \ + DO_PERMUTATION(right, temp, left, 2, 0x33333333) \ + DO_PERMUTATION(right, temp, left, 8, 0x00ff00ff) \ + right = (right << 1) | (right >> 31); \ + temp = (left ^ right) & 0xaaaaaaaa; \ + right ^= temp; \ + left ^= temp; \ + left = (left << 1) | (left >> 31); + +/* + * The 'inverse initial permutation'. + */ +#define FINAL_PERMUTATION(left, temp, right) \ + left = (left << 31) | (left >> 1); \ + temp = (left ^ right) & 0xaaaaaaaa; \ + left ^= temp; \ + right ^= temp; \ + right = (right << 31) | (right >> 1); \ + DO_PERMUTATION(right, temp, left, 8, 0x00ff00ff) \ + DO_PERMUTATION(right, temp, left, 2, 0x33333333) \ + DO_PERMUTATION(left, temp, right, 16, 0x0000ffff) \ + DO_PERMUTATION(left, temp, right, 4, 0x0f0f0f0f) + + +/* + * A full DES round including 'expansion function', 'sbox substitution' + * and 'primitive function P' but without swapping the left and right word. + * Please note: The data in 'from' and 'to' is already rotated one bit to + * the left, done in the initial permutation. + */ +#define DES_ROUND(from, to, work, subkey) \ + work = from ^ *subkey++; \ + to ^= sbox8[ work & 0x3f ]; \ + to ^= sbox6[ (work>>8) & 0x3f ]; \ + to ^= sbox4[ (work>>16) & 0x3f ]; \ + to ^= sbox2[ (work>>24) & 0x3f ]; \ + work = ((from << 28) | (from >> 4)) ^ *subkey++; \ + to ^= sbox7[ work & 0x3f ]; \ + to ^= sbox5[ (work>>8) & 0x3f ]; \ + to ^= sbox3[ (work>>16) & 0x3f ]; \ + to ^= sbox1[ (work>>24) & 0x3f ]; + +/* + * Macros to convert 8 bytes from/to 32bit words. + */ +#define READ_64BIT_DATA(data, left, right) \ + left = buf_get_be32(data + 0); \ + right = buf_get_be32(data + 4); + +#define WRITE_64BIT_DATA(data, left, right) \ + buf_put_be32(data + 0, left); \ + buf_put_be32(data + 4, right); + +/* + * Handy macros for encryption and decryption of data + */ +#define des_ecb_encrypt(ctx, from, to) des_ecb_crypt(ctx, from, to, 0) +#define des_ecb_decrypt(ctx, from, to) des_ecb_crypt(ctx, from, to, 1) +#define tripledes_ecb_encrypt(ctx, from, to) tripledes_ecb_crypt(ctx,from,to,0) +#define tripledes_ecb_decrypt(ctx, from, to) tripledes_ecb_crypt(ctx,from,to,1) + + + + + + +/* + * des_key_schedule(): Calculate 16 subkeys pairs (even/odd) for + * 16 encryption rounds. + * To calculate subkeys for decryption the caller + * have to reorder the generated subkeys. + * + * rawkey: 8 Bytes of key data + * subkey: Array of at least 32 u32s. Will be filled + * with calculated subkeys. + * + */ +static void +des_key_schedule (const byte * rawkey, u32 * subkey) +{ + u32 left, right, work; + int round; + + READ_64BIT_DATA (rawkey, left, right) + + DO_PERMUTATION (right, work, left, 4, 0x0f0f0f0f) + DO_PERMUTATION (right, work, left, 0, 0x10101010) + + left = ((leftkey_swap[(left >> 0) & 0xf] << 3) + | (leftkey_swap[(left >> 8) & 0xf] << 2) + | (leftkey_swap[(left >> 16) & 0xf] << 1) + | (leftkey_swap[(left >> 24) & 0xf]) + | (leftkey_swap[(left >> 5) & 0xf] << 7) + | (leftkey_swap[(left >> 13) & 0xf] << 6) + | (leftkey_swap[(left >> 21) & 0xf] << 5) + | (leftkey_swap[(left >> 29) & 0xf] << 4)); + + left &= 0x0fffffff; + + right = ((rightkey_swap[(right >> 1) & 0xf] << 3) + | (rightkey_swap[(right >> 9) & 0xf] << 2) + | (rightkey_swap[(right >> 17) & 0xf] << 1) + | (rightkey_swap[(right >> 25) & 0xf]) + | (rightkey_swap[(right >> 4) & 0xf] << 7) + | (rightkey_swap[(right >> 12) & 0xf] << 6) + | (rightkey_swap[(right >> 20) & 0xf] << 5) + | (rightkey_swap[(right >> 28) & 0xf] << 4)); + + right &= 0x0fffffff; + + for (round = 0; round < 16; ++round) + { + left = ((left << encrypt_rotate_tab[round]) + | (left >> (28 - encrypt_rotate_tab[round]))) & 0x0fffffff; + right = ((right << encrypt_rotate_tab[round]) + | (right >> (28 - encrypt_rotate_tab[round]))) & 0x0fffffff; + + *subkey++ = (((left << 4) & 0x24000000) + | ((left << 28) & 0x10000000) + | ((left << 14) & 0x08000000) + | ((left << 18) & 0x02080000) + | ((left << 6) & 0x01000000) + | ((left << 9) & 0x00200000) + | ((left >> 1) & 0x00100000) + | ((left << 10) & 0x00040000) + | ((left << 2) & 0x00020000) + | ((left >> 10) & 0x00010000) + | ((right >> 13) & 0x00002000) + | ((right >> 4) & 0x00001000) + | ((right << 6) & 0x00000800) + | ((right >> 1) & 0x00000400) + | ((right >> 14) & 0x00000200) + | (right & 0x00000100) + | ((right >> 5) & 0x00000020) + | ((right >> 10) & 0x00000010) + | ((right >> 3) & 0x00000008) + | ((right >> 18) & 0x00000004) + | ((right >> 26) & 0x00000002) + | ((right >> 24) & 0x00000001)); + + *subkey++ = (((left << 15) & 0x20000000) + | ((left << 17) & 0x10000000) + | ((left << 10) & 0x08000000) + | ((left << 22) & 0x04000000) + | ((left >> 2) & 0x02000000) + | ((left << 1) & 0x01000000) + | ((left << 16) & 0x00200000) + | ((left << 11) & 0x00100000) + | ((left << 3) & 0x00080000) + | ((left >> 6) & 0x00040000) + | ((left << 15) & 0x00020000) + | ((left >> 4) & 0x00010000) + | ((right >> 2) & 0x00002000) + | ((right << 8) & 0x00001000) + | ((right >> 14) & 0x00000808) + | ((right >> 9) & 0x00000400) + | ((right) & 0x00000200) + | ((right << 7) & 0x00000100) + | ((right >> 7) & 0x00000020) + | ((right >> 3) & 0x00000011) + | ((right << 2) & 0x00000004) + | ((right >> 21) & 0x00000002)); + } +} + + +/* + * Fill a DES context with subkeys calculated from a 64bit key. + * Does not check parity bits, but simply ignore them. + * Does not check for weak keys. + */ +static int +des_setkey (struct _des_ctx *ctx, const byte * key) +{ + static const char *selftest_failed; + int i; + + if (!fips_mode () && !initialized) + { + initialized = 1; + selftest_failed = selftest (); + + if (selftest_failed) + log_error ("%s\n", selftest_failed); + } + if (selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + + des_key_schedule (key, ctx->encrypt_subkeys); + _gcry_burn_stack (32); + + for(i=0; i<32; i+=2) + { + ctx->decrypt_subkeys[i] = ctx->encrypt_subkeys[30-i]; + ctx->decrypt_subkeys[i+1] = ctx->encrypt_subkeys[31-i]; + } + + return 0; +} + + + +/* + * Electronic Codebook Mode DES encryption/decryption of data according + * to 'mode'. + */ +static int +des_ecb_crypt (struct _des_ctx *ctx, const byte * from, byte * to, int mode) +{ + u32 left, right, work; + u32 *keys; + + keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys; + + READ_64BIT_DATA (from, left, right) + INITIAL_PERMUTATION (left, work, right) + + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + + FINAL_PERMUTATION (right, work, left) + WRITE_64BIT_DATA (to, right, left) + + return 0; +} + + + +/* + * Fill a Triple-DES context with subkeys calculated from two 64bit keys. + * Does not check the parity bits of the keys, but simply ignore them. + * Does not check for weak keys. + */ +static int +tripledes_set2keys (struct _tripledes_ctx *ctx, + const byte * key1, + const byte * key2) +{ + int i; + + des_key_schedule (key1, ctx->encrypt_subkeys); + des_key_schedule (key2, &(ctx->decrypt_subkeys[32])); + _gcry_burn_stack (32); + + for(i=0; i<32; i+=2) + { + ctx->decrypt_subkeys[i] = ctx->encrypt_subkeys[30-i]; + ctx->decrypt_subkeys[i+1] = ctx->encrypt_subkeys[31-i]; + + ctx->encrypt_subkeys[i+32] = ctx->decrypt_subkeys[62-i]; + ctx->encrypt_subkeys[i+33] = ctx->decrypt_subkeys[63-i]; + + ctx->encrypt_subkeys[i+64] = ctx->encrypt_subkeys[i]; + ctx->encrypt_subkeys[i+65] = ctx->encrypt_subkeys[i+1]; + + ctx->decrypt_subkeys[i+64] = ctx->decrypt_subkeys[i]; + ctx->decrypt_subkeys[i+65] = ctx->decrypt_subkeys[i+1]; + } + + return 0; +} + + + +/* + * Fill a Triple-DES context with subkeys calculated from three 64bit keys. + * Does not check the parity bits of the keys, but simply ignore them. + * Does not check for weak keys. + */ +static int +tripledes_set3keys (struct _tripledes_ctx *ctx, + const byte * key1, + const byte * key2, + const byte * key3) +{ + static const char *selftest_failed; + int i; + + if (!fips_mode () && !initialized) + { + initialized = 1; + selftest_failed = selftest (); + + if (selftest_failed) + log_error ("%s\n", selftest_failed); + } + if (selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + + des_key_schedule (key1, ctx->encrypt_subkeys); + des_key_schedule (key2, &(ctx->decrypt_subkeys[32])); + des_key_schedule (key3, &(ctx->encrypt_subkeys[64])); + _gcry_burn_stack (32); + + for(i=0; i<32; i+=2) + { + ctx->decrypt_subkeys[i] = ctx->encrypt_subkeys[94-i]; + ctx->decrypt_subkeys[i+1] = ctx->encrypt_subkeys[95-i]; + + ctx->encrypt_subkeys[i+32] = ctx->decrypt_subkeys[62-i]; + ctx->encrypt_subkeys[i+33] = ctx->decrypt_subkeys[63-i]; + + ctx->decrypt_subkeys[i+64] = ctx->encrypt_subkeys[30-i]; + ctx->decrypt_subkeys[i+65] = ctx->encrypt_subkeys[31-i]; + } + + return 0; +} + + + +#ifdef USE_AMD64_ASM + +/* Assembly implementation of triple-DES. */ +extern void _gcry_3des_amd64_crypt_block(const void *keys, byte *out, + const byte *in); + +/* These assembly implementations process three blocks in parallel. */ +extern void _gcry_3des_amd64_ctr_enc(const void *keys, byte *out, + const byte *in, byte *ctr); + +extern void _gcry_3des_amd64_cbc_dec(const void *keys, byte *out, + const byte *in, byte *iv); + +extern void _gcry_3des_amd64_cfb_dec(const void *keys, byte *out, + const byte *in, byte *iv); + +#define TRIPLEDES_ECB_BURN_STACK (8 * sizeof(void *)) + +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +static inline void +call_sysv_fn (const void *fn, const void *arg1, const void *arg2, + const void *arg3, const void *arg4) +{ + /* Call SystemV ABI function without storing non-volatile XMM registers, + * as target function does not use vector instruction sets. */ + asm volatile ("callq *%0\n\t" + : "+a" (fn), + "+D" (arg1), + "+S" (arg2), + "+d" (arg3), + "+c" (arg4) + : + : "cc", "memory", "r8", "r9", "r10", "r11"); +} +#endif + +/* + * Electronic Codebook Mode Triple-DES encryption/decryption of data + * according to 'mode'. Sometimes this mode is named 'EDE' mode + * (Encryption-Decryption-Encryption). + */ +static inline int +tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from, + byte * to, int mode) +{ + u32 *keys; + + keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys; + +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_3des_amd64_crypt_block, keys, to, from, NULL); +#else + _gcry_3des_amd64_crypt_block(keys, to, from); +#endif + + return 0; +} + +static inline void +tripledes_amd64_ctr_enc(const void *keys, byte *out, const byte *in, byte *ctr) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_3des_amd64_ctr_enc, keys, out, in, ctr); +#else + _gcry_3des_amd64_ctr_enc(keys, out, in, ctr); +#endif +} + +static inline void +tripledes_amd64_cbc_dec(const void *keys, byte *out, const byte *in, byte *iv) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_3des_amd64_cbc_dec, keys, out, in, iv); +#else + _gcry_3des_amd64_cbc_dec(keys, out, in, iv); +#endif +} + +static inline void +tripledes_amd64_cfb_dec(const void *keys, byte *out, const byte *in, byte *iv) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_3des_amd64_cfb_dec, keys, out, in, iv); +#else + _gcry_3des_amd64_cfb_dec(keys, out, in, iv); +#endif +} + +#else /*USE_AMD64_ASM*/ + +#define TRIPLEDES_ECB_BURN_STACK 32 + +/* + * Electronic Codebook Mode Triple-DES encryption/decryption of data + * according to 'mode'. Sometimes this mode is named 'EDE' mode + * (Encryption-Decryption-Encryption). + */ +static int +tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from, + byte * to, int mode) +{ + u32 left, right, work; + u32 *keys; + + keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys; + + READ_64BIT_DATA (from, left, right) + INITIAL_PERMUTATION (left, work, right) + + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + + FINAL_PERMUTATION (right, work, left) + WRITE_64BIT_DATA (to, right, left) + + return 0; +} + +#endif /*!USE_AMD64_ASM*/ + + + +/* Bulk encryption of complete blocks in CTR mode. This function is only + intended for the bulk encryption feature of cipher.c. CTR is expected to be + of size DES_BLOCKSIZE. */ +void +_gcry_3des_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + struct _tripledes_ctx *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char tmpbuf[DES_BLOCKSIZE]; + int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK; + int i; + +#ifdef USE_AMD64_ASM + { + int asm_burn_depth = 9 * sizeof(void *); + + if (nblocks >= 3 && burn_stack_depth < asm_burn_depth) + burn_stack_depth = asm_burn_depth; + + /* Process data in 3 block chunks. */ + while (nblocks >= 3) + { + tripledes_amd64_ctr_enc(ctx->encrypt_subkeys, outbuf, inbuf, ctr); + + nblocks -= 3; + outbuf += 3 * DES_BLOCKSIZE; + inbuf += 3 * DES_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the counter. */ + tripledes_ecb_encrypt (ctx, ctr, tmpbuf); + /* XOR the input with the encrypted counter and store in output. */ + buf_xor(outbuf, tmpbuf, inbuf, DES_BLOCKSIZE); + outbuf += DES_BLOCKSIZE; + inbuf += DES_BLOCKSIZE; + /* Increment the counter. */ + for (i = DES_BLOCKSIZE; i > 0; i--) + { + ctr[i-1]++; + if (ctr[i-1]) + break; + } + } + + wipememory(tmpbuf, sizeof(tmpbuf)); + _gcry_burn_stack(burn_stack_depth); +} + + +/* Bulk decryption of complete blocks in CBC mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +void +_gcry_3des_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + struct _tripledes_ctx *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char savebuf[DES_BLOCKSIZE]; + int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK; + +#ifdef USE_AMD64_ASM + { + int asm_burn_depth = 10 * sizeof(void *); + + if (nblocks >= 3 && burn_stack_depth < asm_burn_depth) + burn_stack_depth = asm_burn_depth; + + /* Process data in 3 block chunks. */ + while (nblocks >= 3) + { + tripledes_amd64_cbc_dec(ctx->decrypt_subkeys, outbuf, inbuf, iv); + + nblocks -= 3; + outbuf += 3 * DES_BLOCKSIZE; + inbuf += 3 * DES_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* INBUF is needed later and it may be identical to OUTBUF, so store + the intermediate result to SAVEBUF. */ + tripledes_ecb_decrypt (ctx, inbuf, savebuf); + + buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, DES_BLOCKSIZE); + inbuf += DES_BLOCKSIZE; + outbuf += DES_BLOCKSIZE; + } + + wipememory(savebuf, sizeof(savebuf)); + _gcry_burn_stack(burn_stack_depth); +} + + +/* Bulk decryption of complete blocks in CFB mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +void +_gcry_3des_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + struct _tripledes_ctx *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK; + +#ifdef USE_AMD64_ASM + { + int asm_burn_depth = 9 * sizeof(void *); + + if (nblocks >= 3 && burn_stack_depth < asm_burn_depth) + burn_stack_depth = asm_burn_depth; + + /* Process data in 3 block chunks. */ + while (nblocks >= 3) + { + tripledes_amd64_cfb_dec(ctx->encrypt_subkeys, outbuf, inbuf, iv); + + nblocks -= 3; + outbuf += 3 * DES_BLOCKSIZE; + inbuf += 3 * DES_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + tripledes_ecb_encrypt (ctx, iv, iv); + buf_xor_n_copy(outbuf, iv, inbuf, DES_BLOCKSIZE); + outbuf += DES_BLOCKSIZE; + inbuf += DES_BLOCKSIZE; + } + + _gcry_burn_stack(burn_stack_depth); +} + + +/* + * Check whether the 8 byte key is weak. + * Does not check the parity bits of the key but simple ignore them. + */ +static int +is_weak_key ( const byte *key ) +{ + byte work[8]; + int i, left, right, middle, cmp_result; + + /* clear parity bits */ + for(i=0; i<8; ++i) + work[i] = key[i] & 0xfe; + + /* binary search in the weak key table */ + left = 0; + right = 63; + while(left <= right) + { + middle = (left + right) / 2; + + if ( !(cmp_result=working_memcmp(work, weak_keys[middle], 8)) ) + return -1; + + if ( cmp_result > 0 ) + left = middle + 1; + else + right = middle - 1; + } + + return 0; +} + + +/* Alternative setkey for selftests; need larger key than default. */ +static gcry_err_code_t +bulk_selftest_setkey (void *context, const byte *__key, unsigned __keylen) +{ + static const unsigned char key[24] ATTR_ALIGNED_16 = { + 0x66,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, + 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x22, + 0x18,0x2A,0x39,0x47,0x5E,0x6F,0x75,0x82 + }; + + (void)__key; + (void)__keylen; + + return do_tripledes_setkey(context, key, sizeof(key)); +} + + +/* Run the self-tests for DES-CTR, tests IV increment of bulk CTR + encryption. Returns NULL on success. */ +static const char * +selftest_ctr (void) +{ + const int nblocks = 3+1; + const int blocksize = DES_BLOCKSIZE; + const int context_size = sizeof(struct _tripledes_ctx); + + return _gcry_selftest_helper_ctr("3DES", &bulk_selftest_setkey, + &do_tripledes_encrypt, &_gcry_3des_ctr_enc, nblocks, blocksize, + context_size); +} + + +/* Run the self-tests for DES-CBC, tests bulk CBC decryption. + Returns NULL on success. */ +static const char * +selftest_cbc (void) +{ + const int nblocks = 3+2; + const int blocksize = DES_BLOCKSIZE; + const int context_size = sizeof(struct _tripledes_ctx); + + return _gcry_selftest_helper_cbc("3DES", &bulk_selftest_setkey, + &do_tripledes_encrypt, &_gcry_3des_cbc_dec, nblocks, blocksize, + context_size); +} + + +/* Run the self-tests for DES-CFB, tests bulk CBC decryption. + Returns NULL on success. */ +static const char * +selftest_cfb (void) +{ + const int nblocks = 3+2; + const int blocksize = DES_BLOCKSIZE; + const int context_size = sizeof(struct _tripledes_ctx); + + return _gcry_selftest_helper_cfb("3DES", &bulk_selftest_setkey, + &do_tripledes_encrypt, &_gcry_3des_cfb_dec, nblocks, blocksize, + context_size); +} + + +/* + * Performs a selftest of this DES/Triple-DES implementation. + * Returns an string with the error text on failure. + * Returns NULL if all is ok. + */ +static const char * +selftest (void) +{ + const char *r; + + /* + * Check if 'u32' is really 32 bits wide. This DES / 3DES implementation + * need this. + */ + if (sizeof (u32) != 4) + return "Wrong word size for DES configured."; + + /* + * DES Maintenance Test + */ + { + int i; + byte key[8] = + {0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55}; + byte input[8] = + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + byte result[8] = + {0x24, 0x6e, 0x9d, 0xb9, 0xc5, 0x50, 0x38, 0x1a}; + byte temp1[8], temp2[8], temp3[8]; + des_ctx des; + + for (i = 0; i < 64; ++i) + { + des_setkey (des, key); + des_ecb_encrypt (des, input, temp1); + des_ecb_encrypt (des, temp1, temp2); + des_setkey (des, temp2); + des_ecb_decrypt (des, temp1, temp3); + memcpy (key, temp3, 8); + memcpy (input, temp1, 8); + } + if (memcmp (temp3, result, 8)) + return "DES maintenance test failed."; + } + + + /* + * Self made Triple-DES test (Does somebody know an official test?) + */ + { + int i; + byte input[8] = + {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10}; + byte key1[8] = + {0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0}; + byte key2[8] = + {0x11, 0x22, 0x33, 0x44, 0xff, 0xaa, 0xcc, 0xdd}; + byte result[8] = + {0x7b, 0x38, 0x3b, 0x23, 0xa2, 0x7d, 0x26, 0xd3}; + + tripledes_ctx des3; + + for (i = 0; i < 16; ++i) + { + tripledes_set2keys (des3, key1, key2); + tripledes_ecb_encrypt (des3, input, key1); + tripledes_ecb_decrypt (des3, input, key2); + tripledes_set3keys (des3, key1, input, key2); + tripledes_ecb_encrypt (des3, input, input); + } + if (memcmp (input, result, 8)) + return "Triple-DES test failed."; + } + + /* + * More Triple-DES test. These are testvectors as used by SSLeay, + * thanks to Jeroen C. van Gelderen. + */ + { + static const struct { byte key[24]; byte plain[8]; byte cipher[8]; } + testdata[] = { + { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01 }, + { 0x95,0xF8,0xA5,0xE5,0xDD,0x31,0xD9,0x00 }, + { 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00 } + }, + + { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01 }, + { 0x9D,0x64,0x55,0x5A,0x9A,0x10,0xB8,0x52, }, + { 0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00 } + }, + { { 0x38,0x49,0x67,0x4C,0x26,0x02,0x31,0x9E, + 0x38,0x49,0x67,0x4C,0x26,0x02,0x31,0x9E, + 0x38,0x49,0x67,0x4C,0x26,0x02,0x31,0x9E }, + { 0x51,0x45,0x4B,0x58,0x2D,0xDF,0x44,0x0A }, + { 0x71,0x78,0x87,0x6E,0x01,0xF1,0x9B,0x2A } + }, + { { 0x04,0xB9,0x15,0xBA,0x43,0xFE,0xB5,0xB6, + 0x04,0xB9,0x15,0xBA,0x43,0xFE,0xB5,0xB6, + 0x04,0xB9,0x15,0xBA,0x43,0xFE,0xB5,0xB6 }, + { 0x42,0xFD,0x44,0x30,0x59,0x57,0x7F,0xA2 }, + { 0xAF,0x37,0xFB,0x42,0x1F,0x8C,0x40,0x95 } + }, + { { 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF, + 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF, + 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF }, + { 0x73,0x6F,0x6D,0x65,0x64,0x61,0x74,0x61 }, + { 0x3D,0x12,0x4F,0xE2,0x19,0x8B,0xA3,0x18 } + }, + { { 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF, + 0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55, + 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF }, + { 0x73,0x6F,0x6D,0x65,0x64,0x61,0x74,0x61 }, + { 0xFB,0xAB,0xA1,0xFF,0x9D,0x05,0xE9,0xB1 } + }, + { { 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF, + 0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55, + 0xFE,0xDC,0xBA,0x98,0x76,0x54,0x32,0x10 }, + { 0x73,0x6F,0x6D,0x65,0x64,0x61,0x74,0x61 }, + { 0x18,0xd7,0x48,0xe5,0x63,0x62,0x05,0x72 } + }, + { { 0x03,0x52,0x02,0x07,0x67,0x20,0x82,0x17, + 0x86,0x02,0x87,0x66,0x59,0x08,0x21,0x98, + 0x64,0x05,0x6A,0xBD,0xFE,0xA9,0x34,0x57 }, + { 0x73,0x71,0x75,0x69,0x67,0x67,0x6C,0x65 }, + { 0xc0,0x7d,0x2a,0x0f,0xa5,0x66,0xfa,0x30 } + }, + { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x80,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x02 }, + { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 }, + { 0xe6,0xe6,0xdd,0x5b,0x7e,0x72,0x29,0x74 } + }, + { { 0x10,0x46,0x10,0x34,0x89,0x98,0x80,0x20, + 0x91,0x07,0xD0,0x15,0x89,0x19,0x01,0x01, + 0x19,0x07,0x92,0x10,0x98,0x1A,0x01,0x01 }, + { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 }, + { 0xe1,0xef,0x62,0xc3,0x32,0xfe,0x82,0x5b } + } + }; + + byte result[8]; + int i; + tripledes_ctx des3; + + for (i=0; i<sizeof(testdata)/sizeof(*testdata); ++i) + { + tripledes_set3keys (des3, testdata[i].key, + testdata[i].key + 8, testdata[i].key + 16); + + tripledes_ecb_encrypt (des3, testdata[i].plain, result); + if (memcmp (testdata[i].cipher, result, 8)) + return "Triple-DES SSLeay test failed on encryption."; + + tripledes_ecb_decrypt (des3, testdata[i].cipher, result); + if (memcmp (testdata[i].plain, result, 8)) + return "Triple-DES SSLeay test failed on decryption.";; + } + } + + /* + * Check the weak key detection. We simply assume that the table + * with weak keys is ok and check every key in the table if it is + * detected... (This test is a little bit stupid). + */ + { + int i; + unsigned char *p; + gcry_md_hd_t h; + + if (_gcry_md_open (&h, GCRY_MD_SHA1, 0)) + return "SHA1 not available"; + + for (i = 0; i < 64; ++i) + _gcry_md_write (h, weak_keys[i], 8); + p = _gcry_md_read (h, GCRY_MD_SHA1); + i = memcmp (p, weak_keys_chksum, 20); + _gcry_md_close (h); + if (i) + return "weak key table defect"; + + for (i = 0; i < 64; ++i) + if (!is_weak_key(weak_keys[i])) + return "DES weak key detection failed"; + } + + if ( (r = selftest_cbc ()) ) + return r; + + if ( (r = selftest_cfb ()) ) + return r; + + if ( (r = selftest_ctr ()) ) + return r; + + return 0; +} + + +static gcry_err_code_t +do_tripledes_setkey ( void *context, const byte *key, unsigned keylen ) +{ + struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context; + + if( keylen != 24 ) + return GPG_ERR_INV_KEYLEN; + + tripledes_set3keys ( ctx, key, key+8, key+16); + + if (ctx->flags.no_weak_key) + ; /* Detection has been disabled. */ + else if (is_weak_key (key) || is_weak_key (key+8) || is_weak_key (key+16)) + { + _gcry_burn_stack (64); + return GPG_ERR_WEAK_KEY; + } + _gcry_burn_stack (64); + + return GPG_ERR_NO_ERROR; +} + + +static gcry_err_code_t +do_tripledes_set_extra_info (void *context, int what, + const void *buffer, size_t buflen) +{ + struct _tripledes_ctx *ctx = (struct _tripledes_ctx *)context; + gpg_err_code_t ec = 0; + + (void)buffer; + (void)buflen; + + switch (what) + { + case CIPHER_INFO_NO_WEAK_KEY: + ctx->flags.no_weak_key = 1; + break; + + default: + ec = GPG_ERR_INV_OP; + break; + } + return ec; +} + + +static unsigned int +do_tripledes_encrypt( void *context, byte *outbuf, const byte *inbuf ) +{ + struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context; + + tripledes_ecb_encrypt ( ctx, inbuf, outbuf ); + return /*burn_stack*/ TRIPLEDES_ECB_BURN_STACK; +} + +static unsigned int +do_tripledes_decrypt( void *context, byte *outbuf, const byte *inbuf ) +{ + struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context; + tripledes_ecb_decrypt ( ctx, inbuf, outbuf ); + return /*burn_stack*/ TRIPLEDES_ECB_BURN_STACK; +} + +static gcry_err_code_t +do_des_setkey (void *context, const byte *key, unsigned keylen) +{ + struct _des_ctx *ctx = (struct _des_ctx *) context; + + if (keylen != 8) + return GPG_ERR_INV_KEYLEN; + + des_setkey (ctx, key); + + if (is_weak_key (key)) { + _gcry_burn_stack (64); + return GPG_ERR_WEAK_KEY; + } + _gcry_burn_stack (64); + + return GPG_ERR_NO_ERROR; +} + + +static unsigned int +do_des_encrypt( void *context, byte *outbuf, const byte *inbuf ) +{ + struct _des_ctx *ctx = (struct _des_ctx *) context; + + des_ecb_encrypt ( ctx, inbuf, outbuf ); + return /*burn_stack*/ (32); +} + +static unsigned int +do_des_decrypt( void *context, byte *outbuf, const byte *inbuf ) +{ + struct _des_ctx *ctx = (struct _des_ctx *) context; + + des_ecb_decrypt ( ctx, inbuf, outbuf ); + return /*burn_stack*/ (32); +} + + + + +/* + Self-test section. + */ + + +/* Selftest for TripleDES. */ +static gpg_err_code_t +selftest_fips (int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + + (void)extended; /* No extended tests available. */ + + what = "low-level"; + errtxt = selftest (); + if (errtxt) + goto failed; + + /* The low-level self-tests are quite extensive and thus we can do + without high level tests. This is also justified because we have + no custom block code implementation for 3des but always use the + standard high level block code. */ + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("cipher", GCRY_CIPHER_3DES, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + + +/* Run a full self-test for ALGO and return 0 on success. */ +static gpg_err_code_t +run_selftests (int algo, int extended, selftest_report_func_t report) +{ + gpg_err_code_t ec; + + switch (algo) + { + case GCRY_CIPHER_3DES: + ec = selftest_fips (extended, report); + break; + default: + ec = GPG_ERR_CIPHER_ALGO; + break; + + } + return ec; +} + + + +gcry_cipher_spec_t _gcry_cipher_spec_des = + { + GCRY_CIPHER_DES, {0, 0}, + "DES", NULL, NULL, 8, 64, sizeof (struct _des_ctx), + do_des_setkey, do_des_encrypt, do_des_decrypt + }; + +static gcry_cipher_oid_spec_t oids_tripledes[] = + { + { "1.2.840.113549.3.7", GCRY_CIPHER_MODE_CBC }, + /* Teletrust specific OID for 3DES. */ + { "1.3.36.3.1.3.2.1", GCRY_CIPHER_MODE_CBC }, + /* pbeWithSHAAnd3_KeyTripleDES_CBC */ + { "1.2.840.113549.1.12.1.3", GCRY_CIPHER_MODE_CBC }, + { NULL } + }; + +gcry_cipher_spec_t _gcry_cipher_spec_tripledes = + { + GCRY_CIPHER_3DES, {0, 1}, + "3DES", NULL, oids_tripledes, 8, 192, sizeof (struct _tripledes_ctx), + do_tripledes_setkey, do_tripledes_encrypt, do_tripledes_decrypt, + NULL, NULL, + run_selftests, + do_tripledes_set_extra_info + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/dsa-common.c b/libotr/libgcrypt-1.8.7/cipher/dsa-common.c new file mode 100644 index 0000000..fe49248 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/dsa-common.c @@ -0,0 +1,418 @@ +/* dsa-common.c - Common code for DSA + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "pubkey-internal.h" + + +/* + * Modify K, so that computation time difference can be small, + * by making K large enough. + * + * Originally, (EC)DSA computation requires k where 0 < k < q. Here, + * we add q (the order), to keep k in a range: q < k < 2*q (or, + * addming more q, to keep k in a range: 2*q < k < 3*q), so that + * timing difference of the EC multiply (or exponentiation) operation + * can be small. The result of (EC)DSA computation is same. + */ +void +_gcry_dsa_modify_k (gcry_mpi_t k, gcry_mpi_t q, int qbits) +{ + gcry_mpi_t k1 = mpi_new (qbits+2); + + mpi_resize (k, (qbits+2+BITS_PER_MPI_LIMB-1) / BITS_PER_MPI_LIMB); + k->nlimbs = k->alloced; + mpi_add (k, k, q); + mpi_add (k1, k, q); + mpi_set_cond (k, k1, !mpi_test_bit (k, qbits)); + + mpi_free (k1); +} + +/* + * Generate a random secret exponent K less than Q. + * Note that ECDSA uses this code also to generate D. + */ +gcry_mpi_t +_gcry_dsa_gen_k (gcry_mpi_t q, int security_level) +{ + gcry_mpi_t k = mpi_alloc_secure (mpi_get_nlimbs (q)); + unsigned int nbits = mpi_get_nbits (q); + unsigned int nbytes = (nbits+7)/8; + char *rndbuf = NULL; + + /* To learn why we don't use mpi_mod to get the requested bit size, + read the paper: "The Insecurity of the Digital Signature + Algorithm with Partially Known Nonces" by Nguyen and Shparlinski. + Journal of Cryptology, New York. Vol 15, nr 3 (2003) */ + + if (DBG_CIPHER) + log_debug ("choosing a random k of %u bits at seclevel %d\n", + nbits, security_level); + for (;;) + { + if ( !rndbuf || nbits < 32 ) + { + xfree (rndbuf); + rndbuf = _gcry_random_bytes_secure (nbytes, security_level); + } + else + { /* Change only some of the higher bits. We could improve + this by directly requesting more memory at the first call + to get_random_bytes() and use these extra bytes here. + However the required management code is more complex and + thus we better use this simple method. */ + char *pp = _gcry_random_bytes_secure (4, security_level); + memcpy (rndbuf, pp, 4); + xfree (pp); + } + _gcry_mpi_set_buffer (k, rndbuf, nbytes, 0); + + /* Make sure we have the requested number of bits. This code + looks a bit funny but it is easy to understand if you + consider that mpi_set_highbit clears all higher bits. We + don't have a clear_highbit, thus we first set the high bit + and then clear it again. */ + if (mpi_test_bit (k, nbits-1)) + mpi_set_highbit (k, nbits-1); + else + { + mpi_set_highbit (k, nbits-1); + mpi_clear_bit (k, nbits-1); + } + + if (!(mpi_cmp (k, q) < 0)) /* check: k < q */ + { + if (DBG_CIPHER) + log_debug ("\tk too large - again\n"); + continue; /* no */ + } + if (!(mpi_cmp_ui (k, 0) > 0)) /* check: k > 0 */ + { + if (DBG_CIPHER) + log_debug ("\tk is zero - again\n"); + continue; /* no */ + } + break; /* okay */ + } + xfree (rndbuf); + + return k; +} + + +/* Turn VALUE into an octet string and store it in an allocated buffer + at R_FRAME. If the resulting octet string is shorter than NBYTES + the result will be left padded with zeroes. If VALUE does not fit + into NBYTES an error code is returned. */ +static gpg_err_code_t +int2octets (unsigned char **r_frame, gcry_mpi_t value, size_t nbytes) +{ + gpg_err_code_t rc; + size_t nframe, noff, n; + unsigned char *frame; + + rc = _gcry_mpi_print (GCRYMPI_FMT_USG, NULL, 0, &nframe, value); + if (rc) + return rc; + if (nframe > nbytes) + return GPG_ERR_TOO_LARGE; /* Value too long to fit into NBYTES. */ + + noff = (nframe < nbytes)? nbytes - nframe : 0; + n = nframe + noff; + frame = mpi_is_secure (value)? xtrymalloc_secure (n) : xtrymalloc (n); + if (!frame) + return gpg_err_code_from_syserror (); + if (noff) + memset (frame, 0, noff); + nframe += noff; + rc = _gcry_mpi_print (GCRYMPI_FMT_USG, frame+noff, nframe-noff, NULL, value); + if (rc) + { + xfree (frame); + return rc; + } + + *r_frame = frame; + return 0; +} + + +/* Connert the bit string BITS of length NBITS into an octet string + with a length of (QBITS+7)/8 bytes. On success store the result at + R_FRAME. */ +static gpg_err_code_t +bits2octets (unsigned char **r_frame, + const void *bits, unsigned int nbits, + gcry_mpi_t q, unsigned int qbits) +{ + gpg_err_code_t rc; + gcry_mpi_t z1; + + /* z1 = bits2int (b) */ + rc = _gcry_mpi_scan (&z1, GCRYMPI_FMT_USG, bits, (nbits+7)/8, NULL); + if (rc) + return rc; + if (nbits > qbits) + mpi_rshift (z1, z1, nbits - qbits); + + /* z2 - z1 mod q */ + if (mpi_cmp (z1, q) >= 0) + mpi_sub (z1, z1, q); + + /* Convert to an octet string. */ + rc = int2octets (r_frame, z1, (qbits+7)/8); + + mpi_free (z1); + return rc; +} + + +/* + * Generate a deterministic secret exponent K less than DSA_Q. H1 is + * the to be signed digest with a length of HLEN bytes. HALGO is the + * algorithm used to create the hash. On success the value for K is + * stored at R_K. + */ +gpg_err_code_t +_gcry_dsa_gen_rfc6979_k (gcry_mpi_t *r_k, + gcry_mpi_t dsa_q, gcry_mpi_t dsa_x, + const unsigned char *h1, unsigned int hlen, + int halgo, unsigned int extraloops) +{ + gpg_err_code_t rc; + unsigned char *V = NULL; + unsigned char *K = NULL; + unsigned char *x_buf = NULL; + unsigned char *h1_buf = NULL; + gcry_md_hd_t hd = NULL; + unsigned char *t = NULL; + gcry_mpi_t k = NULL; + unsigned int tbits, qbits; + int i; + + qbits = mpi_get_nbits (dsa_q); + + if (!qbits || !h1 || !hlen) + return GPG_ERR_EINVAL; + + if (_gcry_md_get_algo_dlen (halgo) != hlen) + return GPG_ERR_DIGEST_ALGO; + + /* Step b: V = 0x01 0x01 0x01 ... 0x01 */ + V = xtrymalloc (hlen); + if (!V) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + for (i=0; i < hlen; i++) + V[i] = 1; + + /* Step c: K = 0x00 0x00 0x00 ... 0x00 */ + K = xtrycalloc (1, hlen); + if (!K) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + + rc = int2octets (&x_buf, dsa_x, (qbits+7)/8); + if (rc) + goto leave; + + rc = bits2octets (&h1_buf, h1, hlen*8, dsa_q, qbits); + if (rc) + goto leave; + + /* Create a handle to compute the HMACs. */ + rc = _gcry_md_open (&hd, halgo, (GCRY_MD_FLAG_SECURE | GCRY_MD_FLAG_HMAC)); + if (rc) + goto leave; + + /* Step d: K = HMAC_K(V || 0x00 || int2octets(x) || bits2octets(h1) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + _gcry_md_write (hd, "", 1); + _gcry_md_write (hd, x_buf, (qbits+7)/8); + _gcry_md_write (hd, h1_buf, (qbits+7)/8); + memcpy (K, _gcry_md_read (hd, 0), hlen); + + /* Step e: V = HMAC_K(V) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + memcpy (V, _gcry_md_read (hd, 0), hlen); + + /* Step f: K = HMAC_K(V || 0x01 || int2octets(x) || bits2octets(h1) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + _gcry_md_write (hd, "\x01", 1); + _gcry_md_write (hd, x_buf, (qbits+7)/8); + _gcry_md_write (hd, h1_buf, (qbits+7)/8); + memcpy (K, _gcry_md_read (hd, 0), hlen); + + /* Step g: V = HMAC_K(V) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + memcpy (V, _gcry_md_read (hd, 0), hlen); + + /* Step h. */ + t = xtrymalloc_secure ((qbits+7)/8+hlen); + if (!t) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + + again: + for (tbits = 0; tbits < qbits;) + { + /* V = HMAC_K(V) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + memcpy (V, _gcry_md_read (hd, 0), hlen); + + /* T = T || V */ + memcpy (t+(tbits+7)/8, V, hlen); + tbits += 8*hlen; + } + + /* k = bits2int (T) */ + mpi_free (k); + k = NULL; + rc = _gcry_mpi_scan (&k, GCRYMPI_FMT_USG, t, (tbits+7)/8, NULL); + if (rc) + goto leave; + if (tbits > qbits) + mpi_rshift (k, k, tbits - qbits); + + /* Check: k < q and k > 1 */ + if (!(mpi_cmp (k, dsa_q) < 0 && mpi_cmp_ui (k, 0) > 0)) + { + /* K = HMAC_K(V || 0x00) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + _gcry_md_write (hd, "", 1); + memcpy (K, _gcry_md_read (hd, 0), hlen); + + /* V = HMAC_K(V) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + memcpy (V, _gcry_md_read (hd, 0), hlen); + + goto again; + } + + /* The caller may have requested that we introduce some extra loops. + This is for example useful if the caller wants another value for + K because the last returned one yielded an R of 0. Because this + is very unlikely we implement it in a straightforward way. */ + if (extraloops) + { + extraloops--; + + /* K = HMAC_K(V || 0x00) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + _gcry_md_write (hd, "", 1); + memcpy (K, _gcry_md_read (hd, 0), hlen); + + /* V = HMAC_K(V) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + memcpy (V, _gcry_md_read (hd, 0), hlen); + + goto again; + } + + /* log_mpidump (" k", k); */ + + leave: + xfree (t); + _gcry_md_close (hd); + xfree (h1_buf); + xfree (x_buf); + xfree (K); + xfree (V); + + if (rc) + mpi_free (k); + else + *r_k = k; + return rc; +} + +/* + * Truncate opaque hash value to qbits for DSA. + * Non-opaque input is not truncated, in hope that user + * knows what is passed. It is not possible to correctly + * trucate non-opaque inputs. + */ +gpg_err_code_t +_gcry_dsa_normalize_hash (gcry_mpi_t input, + gcry_mpi_t *out, + unsigned int qbits) +{ + gpg_err_code_t rc = 0; + const void *abuf; + unsigned int abits; + gcry_mpi_t hash; + + if (mpi_is_opaque (input)) + { + abuf = mpi_get_opaque (input, &abits); + rc = _gcry_mpi_scan (&hash, GCRYMPI_FMT_USG, abuf, (abits+7)/8, NULL); + if (rc) + return rc; + if (abits > qbits) + mpi_rshift (hash, hash, abits - qbits); + } + else + hash = input; + + *out = hash; + + return rc; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/dsa.c b/libotr/libgcrypt-1.8.7/cipher/dsa.c new file mode 100644 index 0000000..b93e385 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/dsa.c @@ -0,0 +1,1392 @@ +/* dsa.c - DSA signature algorithm + * Copyright (C) 1998, 2000, 2001, 2002, 2003, + * 2006, 2008 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "pubkey-internal.h" + + +typedef struct +{ + gcry_mpi_t p; /* prime */ + gcry_mpi_t q; /* group order */ + gcry_mpi_t g; /* group generator */ + gcry_mpi_t y; /* g^x mod p */ +} DSA_public_key; + + +typedef struct +{ + gcry_mpi_t p; /* prime */ + gcry_mpi_t q; /* group order */ + gcry_mpi_t g; /* group generator */ + gcry_mpi_t y; /* g^x mod p */ + gcry_mpi_t x; /* secret exponent */ +} DSA_secret_key; + + +/* A structure used to hold domain parameters. */ +typedef struct +{ + gcry_mpi_t p; /* prime */ + gcry_mpi_t q; /* group order */ + gcry_mpi_t g; /* group generator */ +} dsa_domain_t; + + +static const char *dsa_names[] = + { + "dsa", + "openpgp-dsa", + NULL, + }; + + +/* A sample 1024 bit DSA key used for the selftests. Not anymore + * used, kept only for reference. */ +#if 0 +static const char sample_secret_key_1024[] = +"(private-key" +" (dsa" +" (p #00AD7C0025BA1A15F775F3F2D673718391D00456978D347B33D7B49E7F32EDAB" +" 96273899DD8B2BB46CD6ECA263FAF04A28903503D59062A8865D2AE8ADFB5191" +" CF36FFB562D0E2F5809801A1F675DAE59698A9E01EFE8D7DCFCA084F4C6F5A44" +" 44D499A06FFAEA5E8EF5E01F2FD20A7B7EF3F6968AFBA1FB8D91F1559D52D8777B#)" +" (q #00EB7B5751D25EBBB7BD59D920315FD840E19AEBF9#)" +" (g #1574363387FDFD1DDF38F4FBE135BB20C7EE4772FB94C337AF86EA8E49666503" +" AE04B6BE81A2F8DD095311E0217ACA698A11E6C5D33CCDAE71498ED35D13991E" +" B02F09AB40BD8F4C5ED8C75DA779D0AE104BC34C960B002377068AB4B5A1F984" +" 3FBA91F537F1B7CAC4D8DD6D89B0D863AF7025D549F9C765D2FC07EE208F8D15#)" +" (y #64B11EF8871BE4AB572AA810D5D3CA11A6CDBC637A8014602C72960DB135BF46" +" A1816A724C34F87330FC9E187C5D66897A04535CC2AC9164A7150ABFA8179827" +" 6E45831AB811EEE848EBB24D9F5F2883B6E5DDC4C659DEF944DCFD80BF4D0A20" +" 42CAA7DC289F0C5A9D155F02D3D551DB741A81695B74D4C8F477F9C7838EB0FB#)" +" (x #11D54E4ADBD3034160F2CED4B7CD292A4EBF3EC0#)))"; +/* A sample 1024 bit DSA key used for the selftests (public only). */ +static const char sample_public_key_1024[] = +"(public-key" +" (dsa" +" (p #00AD7C0025BA1A15F775F3F2D673718391D00456978D347B33D7B49E7F32EDAB" +" 96273899DD8B2BB46CD6ECA263FAF04A28903503D59062A8865D2AE8ADFB5191" +" CF36FFB562D0E2F5809801A1F675DAE59698A9E01EFE8D7DCFCA084F4C6F5A44" +" 44D499A06FFAEA5E8EF5E01F2FD20A7B7EF3F6968AFBA1FB8D91F1559D52D8777B#)" +" (q #00EB7B5751D25EBBB7BD59D920315FD840E19AEBF9#)" +" (g #1574363387FDFD1DDF38F4FBE135BB20C7EE4772FB94C337AF86EA8E49666503" +" AE04B6BE81A2F8DD095311E0217ACA698A11E6C5D33CCDAE71498ED35D13991E" +" B02F09AB40BD8F4C5ED8C75DA779D0AE104BC34C960B002377068AB4B5A1F984" +" 3FBA91F537F1B7CAC4D8DD6D89B0D863AF7025D549F9C765D2FC07EE208F8D15#)" +" (y #64B11EF8871BE4AB572AA810D5D3CA11A6CDBC637A8014602C72960DB135BF46" +" A1816A724C34F87330FC9E187C5D66897A04535CC2AC9164A7150ABFA8179827" +" 6E45831AB811EEE848EBB24D9F5F2883B6E5DDC4C659DEF944DCFD80BF4D0A20" +" 42CAA7DC289F0C5A9D155F02D3D551DB741A81695B74D4C8F477F9C7838EB0FB#)))"; +#endif /*0*/ + +/* 2048 DSA key from RFC 6979 A.2.2 */ +static const char sample_public_key_2048[] = +"(public-key" +" (dsa" +" (p #9DB6FB5951B66BB6FE1E140F1D2CE5502374161FD6538DF1648218642F0B5C48C8F7A41AADFA187324B87674FA1822B00F1ECF8136943D7C55757264E5A1A44FFE012E9936E00C1D3E9310B01C7D179805D3058B2A9F4BB6F9716BFE6117C6B5B3CC4D9BE341104AD4A80AD6C94E005F4B993E14F091EB51743BF33050C38DE235567E1B34C3D6A5C0CEAA1A0F368213C3D19843D0B4B09DCB9FC72D39C8DE41F1BF14D4BB4563CA28371621CAD3324B6A2D392145BEBFAC748805236F5CA2FE92B871CD8F9C36D3292B5509CA8CAA77A2ADFC7BFD77DDA6F71125A7456FEA153E433256A2261C6A06ED3693797E7995FAD5AABBCFBE3EDA2741E375404AE25B#)" +" (q #F2C3119374CE76C9356990B465374A17F23F9ED35089BD969F61C6DDE9998C1F#)" +" (g #5C7FF6B06F8F143FE8288433493E4769C4D988ACE5BE25A0E24809670716C613D7B0CEE6932F8FAA7C44D2CB24523DA53FBE4F6EC3595892D1AA58C4328A06C46A15662E7EAA703A1DECF8BBB2D05DBE2EB956C142A338661D10461C0D135472085057F3494309FFA73C611F78B32ADBB5740C361C9F35BE90997DB2014E2EF5AA61782F52ABEB8BD6432C4DD097BC5423B285DAFB60DC364E8161F4A2A35ACA3A10B1C4D203CC76A470A33AFDCBDD92959859ABD8B56E1725252D78EAC66E71BA9AE3F1DD2487199874393CD4D832186800654760E1E34C09E4D155179F9EC0DC4473F996BDCE6EED1CABED8B6F116F7AD9CF505DF0F998E34AB27514B0FFE7#)" +" (y #667098C654426C78D7F8201EAC6C203EF030D43605032C2F1FA937E5237DBD949F34A0A2564FE126DC8B715C5141802CE0979C8246463C40E6B6BDAA2513FA611728716C2E4FD53BC95B89E69949D96512E873B9C8F8DFD499CC312882561ADECB31F658E934C0C197F2C4D96B05CBAD67381E7B768891E4DA3843D24D94CDFB5126E9B8BF21E8358EE0E0A30EF13FD6A664C0DCE3731F7FB49A4845A4FD8254687972A2D382599C9BAC4E0ED7998193078913032558134976410B89D2C171D123AC35FD977219597AA7D15C1A9A428E59194F75C721EBCBCFAE44696A499AFA74E04299F132026601638CB87AB79190D4A0986315DA8EEC6561C938996BEADF#)))"; + +static const char sample_secret_key_2048[] = +"(private-key" +" (dsa" +" (p #9DB6FB5951B66BB6FE1E140F1D2CE5502374161FD6538DF1648218642F0B5C48C8F7A41AADFA187324B87674FA1822B00F1ECF8136943D7C55757264E5A1A44FFE012E9936E00C1D3E9310B01C7D179805D3058B2A9F4BB6F9716BFE6117C6B5B3CC4D9BE341104AD4A80AD6C94E005F4B993E14F091EB51743BF33050C38DE235567E1B34C3D6A5C0CEAA1A0F368213C3D19843D0B4B09DCB9FC72D39C8DE41F1BF14D4BB4563CA28371621CAD3324B6A2D392145BEBFAC748805236F5CA2FE92B871CD8F9C36D3292B5509CA8CAA77A2ADFC7BFD77DDA6F71125A7456FEA153E433256A2261C6A06ED3693797E7995FAD5AABBCFBE3EDA2741E375404AE25B#)" +" (q #F2C3119374CE76C9356990B465374A17F23F9ED35089BD969F61C6DDE9998C1F#)" +" (g #5C7FF6B06F8F143FE8288433493E4769C4D988ACE5BE25A0E24809670716C613D7B0CEE6932F8FAA7C44D2CB24523DA53FBE4F6EC3595892D1AA58C4328A06C46A15662E7EAA703A1DECF8BBB2D05DBE2EB956C142A338661D10461C0D135472085057F3494309FFA73C611F78B32ADBB5740C361C9F35BE90997DB2014E2EF5AA61782F52ABEB8BD6432C4DD097BC5423B285DAFB60DC364E8161F4A2A35ACA3A10B1C4D203CC76A470A33AFDCBDD92959859ABD8B56E1725252D78EAC66E71BA9AE3F1DD2487199874393CD4D832186800654760E1E34C09E4D155179F9EC0DC4473F996BDCE6EED1CABED8B6F116F7AD9CF505DF0F998E34AB27514B0FFE7#)" +" (y #667098C654426C78D7F8201EAC6C203EF030D43605032C2F1FA937E5237DBD949F34A0A2564FE126DC8B715C5141802CE0979C8246463C40E6B6BDAA2513FA611728716C2E4FD53BC95B89E69949D96512E873B9C8F8DFD499CC312882561ADECB31F658E934C0C197F2C4D96B05CBAD67381E7B768891E4DA3843D24D94CDFB5126E9B8BF21E8358EE0E0A30EF13FD6A664C0DCE3731F7FB49A4845A4FD8254687972A2D382599C9BAC4E0ED7998193078913032558134976410B89D2C171D123AC35FD977219597AA7D15C1A9A428E59194F75C721EBCBCFAE44696A499AFA74E04299F132026601638CB87AB79190D4A0986315DA8EEC6561C938996BEADF#)" +" (x #69C7548C21D0DFEA6B9A51C9EAD4E27C33D3B3F180316E5BCAB92C933F0E4DBC#)))"; + + + +static int test_keys (DSA_secret_key *sk, unsigned int qbits); +static int check_secret_key (DSA_secret_key *sk); +static gpg_err_code_t generate (DSA_secret_key *sk, + unsigned int nbits, + unsigned int qbits, + int transient_key, + dsa_domain_t *domain, + gcry_mpi_t **ret_factors); +static gpg_err_code_t sign (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input, + DSA_secret_key *skey, int flags, int hashalgo); +static gpg_err_code_t verify (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input, + DSA_public_key *pkey); +static unsigned int dsa_get_nbits (gcry_sexp_t parms); + + +static void (*progress_cb) (void *,const char *, int, int, int ); +static void *progress_cb_data; + + +void +_gcry_register_pk_dsa_progress (void (*cb) (void *, const char *, + int, int, int), + void *cb_data) +{ + progress_cb = cb; + progress_cb_data = cb_data; +} + + +static void +progress (int c) +{ + if (progress_cb) + progress_cb (progress_cb_data, "pk_dsa", c, 0, 0); +} + + +/* Check that a freshly generated key actually works. Returns 0 on success. */ +static int +test_keys (DSA_secret_key *sk, unsigned int qbits) +{ + int result = -1; /* Default to failure. */ + DSA_public_key pk; + gcry_mpi_t data = mpi_new (qbits); + gcry_mpi_t sig_a = mpi_new (qbits); + gcry_mpi_t sig_b = mpi_new (qbits); + + /* Put the relevant parameters into a public key structure. */ + pk.p = sk->p; + pk.q = sk->q; + pk.g = sk->g; + pk.y = sk->y; + + /* Create a random plaintext. */ + _gcry_mpi_randomize (data, qbits, GCRY_WEAK_RANDOM); + + /* Sign DATA using the secret key. */ + sign (sig_a, sig_b, data, sk, 0, 0); + + /* Verify the signature using the public key. */ + if ( verify (sig_a, sig_b, data, &pk) ) + goto leave; /* Signature does not match. */ + + /* Modify the data and check that the signing fails. */ + mpi_add_ui (data, data, 1); + if ( !verify (sig_a, sig_b, data, &pk) ) + goto leave; /* Signature matches but should not. */ + + result = 0; /* The test succeeded. */ + + leave: + _gcry_mpi_release (sig_b); + _gcry_mpi_release (sig_a); + _gcry_mpi_release (data); + return result; +} + + + +/* + Generate a DSA key pair with a key of size NBITS. If transient_key + is true the key is generated using the standard RNG and not the + very secure one. + + Returns: 2 structures filled with all needed values + and an array with the n-1 factors of (p-1) + */ +static gpg_err_code_t +generate (DSA_secret_key *sk, unsigned int nbits, unsigned int qbits, + int transient_key, dsa_domain_t *domain, gcry_mpi_t **ret_factors ) +{ + gpg_err_code_t rc; + gcry_mpi_t p; /* the prime */ + gcry_mpi_t q; /* the 160 bit prime factor */ + gcry_mpi_t g; /* the generator */ + gcry_mpi_t y; /* g^x mod p */ + gcry_mpi_t x; /* the secret exponent */ + gcry_mpi_t h, e; /* helper */ + unsigned char *rndbuf; + gcry_random_level_t random_level; + + if (qbits) + ; /* Caller supplied qbits. Use this value. */ + else if ( nbits >= 512 && nbits <= 1024 ) + qbits = 160; + else if ( nbits == 2048 ) + qbits = 224; + else if ( nbits == 3072 ) + qbits = 256; + else if ( nbits == 7680 ) + qbits = 384; + else if ( nbits == 15360 ) + qbits = 512; + else + return GPG_ERR_INV_VALUE; + + if (qbits < 160 || qbits > 512 || (qbits%8) ) + return GPG_ERR_INV_VALUE; + if (nbits < 2*qbits || nbits > 15360) + return GPG_ERR_INV_VALUE; + + if (fips_mode ()) + { + if (nbits < 1024) + return GPG_ERR_INV_VALUE; + if (transient_key) + return GPG_ERR_INV_VALUE; + } + + if (domain->p && domain->q && domain->g) + { + /* Domain parameters are given; use them. */ + p = mpi_copy (domain->p); + q = mpi_copy (domain->q); + g = mpi_copy (domain->g); + gcry_assert (mpi_get_nbits (p) == nbits); + gcry_assert (mpi_get_nbits (q) == qbits); + h = mpi_alloc (0); + e = NULL; + } + else + { + /* Generate new domain parameters. */ + rc = _gcry_generate_elg_prime (1, nbits, qbits, NULL, &p, ret_factors); + if (rc) + return rc; + + /* Get q out of factors. */ + q = mpi_copy ((*ret_factors)[0]); + gcry_assert (mpi_get_nbits (q) == qbits); + + /* Find a generator g (h and e are helpers). + e = (p-1)/q */ + e = mpi_alloc (mpi_get_nlimbs (p)); + mpi_sub_ui (e, p, 1); + mpi_fdiv_q (e, e, q); + g = mpi_alloc (mpi_get_nlimbs (p)); + h = mpi_alloc_set_ui (1); /* (We start with 2.) */ + do + { + mpi_add_ui (h, h, 1); + /* g = h^e mod p */ + mpi_powm (g, h, e, p); + } + while (!mpi_cmp_ui (g, 1)); /* Continue until g != 1. */ + } + + /* Select a random number X with the property: + * 0 < x < q-1 + * + * FIXME: Why do we use the requirement x < q-1 ? It should be + * sufficient to test for x < q. FIPS-186-3 check x < q-1 but it + * does not check for 0 < x because it makes sure that Q is unsigned + * and finally adds one to the result so that 0 will never be + * returned. We should replace the code below with _gcry_dsa_gen_k. + * + * This must be a very good random number because this is the secret + * part. The random quality depends on the transient_key flag. */ + random_level = transient_key ? GCRY_STRONG_RANDOM : GCRY_VERY_STRONG_RANDOM; + if (DBG_CIPHER) + log_debug("choosing a random x%s\n", transient_key? " (transient-key)":""); + gcry_assert( qbits >= 160 ); + x = mpi_alloc_secure( mpi_get_nlimbs(q) ); + mpi_sub_ui( h, q, 1 ); /* put q-1 into h */ + rndbuf = NULL; + do + { + if( DBG_CIPHER ) + progress('.'); + if( !rndbuf ) + rndbuf = _gcry_random_bytes_secure ((qbits+7)/8, random_level); + else + { /* Change only some of the higher bits (= 2 bytes)*/ + char *r = _gcry_random_bytes_secure (2, random_level); + memcpy(rndbuf, r, 2 ); + xfree(r); + } + + _gcry_mpi_set_buffer( x, rndbuf, (qbits+7)/8, 0 ); + mpi_clear_highbit( x, qbits+1 ); + } + while ( !( mpi_cmp_ui( x, 0 )>0 && mpi_cmp( x, h )<0 ) ); + xfree(rndbuf); + mpi_free( e ); + mpi_free( h ); + + /* y = g^x mod p */ + y = mpi_alloc( mpi_get_nlimbs(p) ); + mpi_powm (y, g, x, p); + + if( DBG_CIPHER ) + { + progress('\n'); + log_mpidump("dsa p", p ); + log_mpidump("dsa q", q ); + log_mpidump("dsa g", g ); + log_mpidump("dsa y", y ); + log_mpidump("dsa x", x ); + } + + /* Copy the stuff to the key structures. */ + sk->p = p; + sk->q = q; + sk->g = g; + sk->y = y; + sk->x = x; + + /* Now we can test our keys (this should never fail!). */ + if ( test_keys (sk, qbits) ) + { + _gcry_mpi_release (sk->p); sk->p = NULL; + _gcry_mpi_release (sk->q); sk->q = NULL; + _gcry_mpi_release (sk->g); sk->g = NULL; + _gcry_mpi_release (sk->y); sk->y = NULL; + _gcry_mpi_release (sk->x); sk->x = NULL; + fips_signal_error ("self-test after key generation failed"); + return GPG_ERR_SELFTEST_FAILED; + } + return 0; +} + + +/* Generate a DSA key pair with a key of size NBITS using the + algorithm given in FIPS-186-3. If USE_FIPS186_2 is true, + FIPS-186-2 is used and thus the length is restricted to 1024/160. + If DERIVEPARMS is not NULL it may contain a seed value. If domain + parameters are specified in DOMAIN, DERIVEPARMS may not be given + and NBITS and QBITS must match the specified domain parameters. */ +static gpg_err_code_t +generate_fips186 (DSA_secret_key *sk, unsigned int nbits, unsigned int qbits, + gcry_sexp_t deriveparms, int use_fips186_2, + dsa_domain_t *domain, + int *r_counter, void **r_seed, size_t *r_seedlen, + gcry_mpi_t *r_h) +{ + gpg_err_code_t ec; + struct { + gcry_sexp_t sexp; + const void *seed; + size_t seedlen; + } initial_seed = { NULL, NULL, 0 }; + gcry_mpi_t prime_q = NULL; + gcry_mpi_t prime_p = NULL; + gcry_mpi_t value_g = NULL; /* The generator. */ + gcry_mpi_t value_y = NULL; /* g^x mod p */ + gcry_mpi_t value_x = NULL; /* The secret exponent. */ + gcry_mpi_t value_h = NULL; /* Helper. */ + gcry_mpi_t value_e = NULL; /* Helper. */ + gcry_mpi_t value_c = NULL; /* helper for x */ + gcry_mpi_t value_qm2 = NULL; /* q - 2 */ + + /* Preset return values. */ + *r_counter = 0; + *r_seed = NULL; + *r_seedlen = 0; + *r_h = NULL; + + /* Derive QBITS from NBITS if requested */ + if (!qbits) + { + if (nbits == 1024) + qbits = 160; + else if (nbits == 2048) + qbits = 224; + else if (nbits == 3072) + qbits = 256; + } + + /* Check that QBITS and NBITS match the standard. Note that FIPS + 186-3 uses N for QBITS and L for NBITS. */ + if (nbits == 1024 && qbits == 160 && use_fips186_2) + ; /* Allowed in FIPS 186-2 mode. */ + else if (nbits == 2048 && qbits == 224) + ; + else if (nbits == 2048 && qbits == 256) + ; + else if (nbits == 3072 && qbits == 256) + ; + else + return GPG_ERR_INV_VALUE; + + if (domain->p && domain->q && domain->g) + { + /* Domain parameters are given; use them. */ + prime_p = mpi_copy (domain->p); + prime_q = mpi_copy (domain->q); + value_g = mpi_copy (domain->g); + gcry_assert (mpi_get_nbits (prime_p) == nbits); + gcry_assert (mpi_get_nbits (prime_q) == qbits); + gcry_assert (!deriveparms); + ec = 0; + } + else + { + /* Generate new domain parameters. */ + + /* Get an initial seed value. */ + if (deriveparms) + { + initial_seed.sexp = sexp_find_token (deriveparms, "seed", 0); + if (initial_seed.sexp) + initial_seed.seed = sexp_nth_data (initial_seed.sexp, 1, + &initial_seed.seedlen); + } + + if (use_fips186_2) + ec = _gcry_generate_fips186_2_prime (nbits, qbits, + initial_seed.seed, + initial_seed.seedlen, + &prime_q, &prime_p, + r_counter, + r_seed, r_seedlen); + else + ec = _gcry_generate_fips186_3_prime (nbits, qbits, NULL, 0, + &prime_q, &prime_p, + r_counter, + r_seed, r_seedlen, NULL); + sexp_release (initial_seed.sexp); + if (ec) + goto leave; + + /* Find a generator g (h and e are helpers). + * e = (p-1)/q + */ + value_e = mpi_alloc_like (prime_p); + mpi_sub_ui (value_e, prime_p, 1); + mpi_fdiv_q (value_e, value_e, prime_q ); + value_g = mpi_alloc_like (prime_p); + value_h = mpi_alloc_set_ui (1); + do + { + mpi_add_ui (value_h, value_h, 1); + /* g = h^e mod p */ + mpi_powm (value_g, value_h, value_e, prime_p); + } + while (!mpi_cmp_ui (value_g, 1)); /* Continue until g != 1. */ + } + + value_c = mpi_snew (qbits); + value_x = mpi_snew (qbits); + value_qm2 = mpi_snew (qbits); + mpi_sub_ui (value_qm2, prime_q, 2); + + /* FIPS 186-4 B.1.2 steps 4-6 */ + do + { + if( DBG_CIPHER ) + progress('.'); + _gcry_mpi_randomize (value_c, qbits, GCRY_VERY_STRONG_RANDOM); + mpi_clear_highbit (value_c, qbits+1); + } + while (!(mpi_cmp_ui (value_c, 0) > 0 && mpi_cmp (value_c, value_qm2) < 0)); + /* while (mpi_cmp (value_c, value_qm2) > 0); */ + + /* x = c + 1 */ + mpi_add_ui(value_x, value_c, 1); + + /* y = g^x mod p */ + value_y = mpi_alloc_like (prime_p); + mpi_powm (value_y, value_g, value_x, prime_p); + + if (DBG_CIPHER) + { + progress('\n'); + log_mpidump("dsa p", prime_p ); + log_mpidump("dsa q", prime_q ); + log_mpidump("dsa g", value_g ); + log_mpidump("dsa y", value_y ); + log_mpidump("dsa x", value_x ); + log_mpidump("dsa h", value_h ); + } + + /* Copy the stuff to the key structures. */ + sk->p = prime_p; prime_p = NULL; + sk->q = prime_q; prime_q = NULL; + sk->g = value_g; value_g = NULL; + sk->y = value_y; value_y = NULL; + sk->x = value_x; value_x = NULL; + *r_h = value_h; value_h = NULL; + + leave: + _gcry_mpi_release (prime_p); + _gcry_mpi_release (prime_q); + _gcry_mpi_release (value_g); + _gcry_mpi_release (value_y); + _gcry_mpi_release (value_x); + _gcry_mpi_release (value_h); + _gcry_mpi_release (value_e); + _gcry_mpi_release (value_c); + _gcry_mpi_release (value_qm2); + + /* As a last step test this keys (this should never fail of course). */ + if (!ec && test_keys (sk, qbits) ) + { + _gcry_mpi_release (sk->p); sk->p = NULL; + _gcry_mpi_release (sk->q); sk->q = NULL; + _gcry_mpi_release (sk->g); sk->g = NULL; + _gcry_mpi_release (sk->y); sk->y = NULL; + _gcry_mpi_release (sk->x); sk->x = NULL; + fips_signal_error ("self-test after key generation failed"); + ec = GPG_ERR_SELFTEST_FAILED; + } + + if (ec) + { + *r_counter = 0; + xfree (*r_seed); *r_seed = NULL; + *r_seedlen = 0; + _gcry_mpi_release (*r_h); *r_h = NULL; + } + + return ec; +} + + + +/* + Test whether the secret key is valid. + Returns: if this is a valid key. + */ +static int +check_secret_key( DSA_secret_key *sk ) +{ + int rc; + gcry_mpi_t y = mpi_alloc( mpi_get_nlimbs(sk->y) ); + + mpi_powm( y, sk->g, sk->x, sk->p ); + rc = !mpi_cmp( y, sk->y ); + mpi_free( y ); + return rc; +} + + + +/* + Make a DSA signature from INPUT and put it into r and s. + + INPUT may either be a plain MPI or an opaque MPI which is then + internally converted to a plain MPI. FLAGS and HASHALGO may both + be 0 for standard operation mode. + + The return value is 0 on success or an error code. Note that for + backward compatibility the function will not return any error if + FLAGS and HASHALGO are both 0 and INPUT is a plain MPI. + */ +static gpg_err_code_t +sign (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input, DSA_secret_key *skey, + int flags, int hashalgo) +{ + gpg_err_code_t rc; + gcry_mpi_t hash; + gcry_mpi_t k; + gcry_mpi_t kinv; + gcry_mpi_t tmp; + const void *abuf; + unsigned int abits, qbits; + int extraloops = 0; + + qbits = mpi_get_nbits (skey->q); + + /* Convert the INPUT into an MPI. */ + rc = _gcry_dsa_normalize_hash (input, &hash, qbits); + if (rc) + return rc; + + again: + /* Create the K value. */ + if ((flags & PUBKEY_FLAG_RFC6979) && hashalgo) + { + /* Use Pornin's method for deterministic DSA. If this flag is + set, it is expected that HASH is an opaque MPI with the to be + signed hash. That hash is also used as h1 from 3.2.a. */ + if (!mpi_is_opaque (input)) + { + rc = GPG_ERR_CONFLICT; + goto leave; + } + + abuf = mpi_get_opaque (input, &abits); + rc = _gcry_dsa_gen_rfc6979_k (&k, skey->q, skey->x, + abuf, (abits+7)/8, hashalgo, extraloops); + if (rc) + goto leave; + } + else + { + /* Select a random k with 0 < k < q */ + k = _gcry_dsa_gen_k (skey->q, GCRY_STRONG_RANDOM); + } + + /* kinv = k^(-1) mod q */ + kinv = mpi_alloc( mpi_get_nlimbs(k) ); + mpi_invm(kinv, k, skey->q ); + + _gcry_dsa_modify_k (k, skey->q, qbits); + + /* r = (a^k mod p) mod q */ + mpi_powm( r, skey->g, k, skey->p ); + mpi_fdiv_r( r, r, skey->q ); + + /* s = (kinv * ( hash + x * r)) mod q */ + tmp = mpi_alloc( mpi_get_nlimbs(skey->p) ); + mpi_mul( tmp, skey->x, r ); + mpi_add( tmp, tmp, hash ); + mpi_mulm( s , kinv, tmp, skey->q ); + + mpi_free(k); + mpi_free(kinv); + mpi_free(tmp); + + if (!mpi_cmp_ui (r, 0)) + { + /* This is a highly unlikely code path. */ + extraloops++; + goto again; + } + + rc = 0; + + leave: + if (hash != input) + mpi_free (hash); + + return rc; +} + + +/* + Returns true if the signature composed from R and S is valid. + */ +static gpg_err_code_t +verify (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input, DSA_public_key *pkey ) +{ + gpg_err_code_t rc = 0; + gcry_mpi_t w, u1, u2, v; + gcry_mpi_t base[3]; + gcry_mpi_t ex[3]; + gcry_mpi_t hash; + unsigned int nbits; + + if( !(mpi_cmp_ui( r, 0 ) > 0 && mpi_cmp( r, pkey->q ) < 0) ) + return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < r < n failed. */ + if( !(mpi_cmp_ui( s, 0 ) > 0 && mpi_cmp( s, pkey->q ) < 0) ) + return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < s < n failed. */ + + nbits = mpi_get_nbits (pkey->q); + rc = _gcry_dsa_normalize_hash (input, &hash, nbits); + if (rc) + return rc; + + w = mpi_alloc( mpi_get_nlimbs(pkey->q) ); + u1 = mpi_alloc( mpi_get_nlimbs(pkey->q) ); + u2 = mpi_alloc( mpi_get_nlimbs(pkey->q) ); + v = mpi_alloc( mpi_get_nlimbs(pkey->p) ); + + /* w = s^(-1) mod q */ + mpi_invm( w, s, pkey->q ); + + /* u1 = (hash * w) mod q */ + mpi_mulm( u1, hash, w, pkey->q ); + + /* u2 = r * w mod q */ + mpi_mulm( u2, r, w, pkey->q ); + + /* v = g^u1 * y^u2 mod p mod q */ + base[0] = pkey->g; ex[0] = u1; + base[1] = pkey->y; ex[1] = u2; + base[2] = NULL; ex[2] = NULL; + mpi_mulpowm( v, base, ex, pkey->p ); + mpi_fdiv_r( v, v, pkey->q ); + + if (mpi_cmp( v, r )) + { + if (DBG_CIPHER) + { + log_mpidump (" i", input); + log_mpidump (" h", hash); + log_mpidump (" v", v); + log_mpidump (" r", r); + log_mpidump (" s", s); + } + rc = GPG_ERR_BAD_SIGNATURE; + } + + mpi_free(w); + mpi_free(u1); + mpi_free(u2); + mpi_free(v); + if (hash != input) + mpi_free (hash); + + return rc; +} + + +/********************************************* + ************** interface ****************** + *********************************************/ + +static gcry_err_code_t +dsa_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) +{ + gpg_err_code_t rc; + unsigned int nbits; + gcry_sexp_t domainsexp; + DSA_secret_key sk; + gcry_sexp_t l1; + unsigned int qbits = 0; + gcry_sexp_t deriveparms = NULL; + gcry_sexp_t seedinfo = NULL; + gcry_sexp_t misc_info = NULL; + int flags = 0; + dsa_domain_t domain; + gcry_mpi_t *factors = NULL; + + memset (&sk, 0, sizeof sk); + memset (&domain, 0, sizeof domain); + + rc = _gcry_pk_util_get_nbits (genparms, &nbits); + if (rc) + return rc; + + /* Parse the optional flags list. */ + l1 = sexp_find_token (genparms, "flags", 0); + if (l1) + { + rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL); + sexp_release (l1); + if (rc) + return rc;\ + } + + /* Parse the optional qbits element. */ + l1 = sexp_find_token (genparms, "qbits", 0); + if (l1) + { + char buf[50]; + const char *s; + size_t n; + + s = sexp_nth_data (l1, 1, &n); + if (!s || n >= DIM (buf) - 1 ) + { + sexp_release (l1); + return GPG_ERR_INV_OBJ; /* No value or value too large. */ + } + memcpy (buf, s, n); + buf[n] = 0; + qbits = (unsigned int)strtoul (buf, NULL, 0); + sexp_release (l1); + } + + /* Parse the optional transient-key flag. */ + if (!(flags & PUBKEY_FLAG_TRANSIENT_KEY)) + { + l1 = sexp_find_token (genparms, "transient-key", 0); + if (l1) + { + flags |= PUBKEY_FLAG_TRANSIENT_KEY; + sexp_release (l1); + } + } + + /* Get the optional derive parameters. */ + deriveparms = sexp_find_token (genparms, "derive-parms", 0); + + /* Parse the optional "use-fips186" flags. */ + if (!(flags & PUBKEY_FLAG_USE_FIPS186)) + { + l1 = sexp_find_token (genparms, "use-fips186", 0); + if (l1) + { + flags |= PUBKEY_FLAG_USE_FIPS186; + sexp_release (l1); + } + } + if (!(flags & PUBKEY_FLAG_USE_FIPS186_2)) + { + l1 = sexp_find_token (genparms, "use-fips186-2", 0); + if (l1) + { + flags |= PUBKEY_FLAG_USE_FIPS186_2; + sexp_release (l1); + } + } + + /* Check whether domain parameters are given. */ + domainsexp = sexp_find_token (genparms, "domain", 0); + if (domainsexp) + { + /* DERIVEPARMS can't be used together with domain parameters. + NBITS abnd QBITS may not be specified because there values + are derived from the domain parameters. */ + if (deriveparms || qbits || nbits) + { + sexp_release (domainsexp); + sexp_release (deriveparms); + return GPG_ERR_INV_VALUE; + } + + /* Put all domain parameters into the domain object. */ + l1 = sexp_find_token (domainsexp, "p", 0); + domain.p = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG); + sexp_release (l1); + l1 = sexp_find_token (domainsexp, "q", 0); + domain.q = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG); + sexp_release (l1); + l1 = sexp_find_token (domainsexp, "g", 0); + domain.g = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG); + sexp_release (l1); + sexp_release (domainsexp); + + /* Check that all domain parameters are available. */ + if (!domain.p || !domain.q || !domain.g) + { + _gcry_mpi_release (domain.p); + _gcry_mpi_release (domain.q); + _gcry_mpi_release (domain.g); + sexp_release (deriveparms); + return GPG_ERR_MISSING_VALUE; + } + + /* Get NBITS and QBITS from the domain parameters. */ + nbits = mpi_get_nbits (domain.p); + qbits = mpi_get_nbits (domain.q); + } + + if (deriveparms + || (flags & PUBKEY_FLAG_USE_FIPS186) + || (flags & PUBKEY_FLAG_USE_FIPS186_2) + || fips_mode ()) + { + int counter; + void *seed; + size_t seedlen; + gcry_mpi_t h_value; + + rc = generate_fips186 (&sk, nbits, qbits, deriveparms, + !!(flags & PUBKEY_FLAG_USE_FIPS186_2), + &domain, + &counter, &seed, &seedlen, &h_value); + if (!rc && h_value) + { + /* Format the seed-values unless domain parameters are used + for which a H_VALUE of NULL is an indication. */ + rc = sexp_build (&seedinfo, NULL, + "(seed-values(counter %d)(seed %b)(h %m))", + counter, (int)seedlen, seed, h_value); + xfree (seed); + _gcry_mpi_release (h_value); + } + } + else + { + rc = generate (&sk, nbits, qbits, + !!(flags & PUBKEY_FLAG_TRANSIENT_KEY), + &domain, &factors); + } + + if (!rc) + { + /* Put the factors into MISC_INFO. Note that the factors are + not confidential thus we can store them in standard memory. */ + int nfactors, i, j; + char *p; + char *format = NULL; + void **arg_list = NULL; + + for (nfactors=0; factors && factors[nfactors]; nfactors++) + ; + /* Allocate space for the format string: + "(misc-key-info%S(pm1-factors%m))" + with one "%m" for each factor and construct it. */ + format = xtrymalloc (50 + 2*nfactors); + if (!format) + rc = gpg_err_code_from_syserror (); + else + { + p = stpcpy (format, "(misc-key-info"); + if (seedinfo) + p = stpcpy (p, "%S"); + if (nfactors) + { + p = stpcpy (p, "(pm1-factors"); + for (i=0; i < nfactors; i++) + p = stpcpy (p, "%m"); + p = stpcpy (p, ")"); + } + p = stpcpy (p, ")"); + + /* Allocate space for the list of factors plus one for the + seedinfo s-exp plus an extra NULL entry for safety and + fill it with the factors. */ + arg_list = xtrycalloc (nfactors+1+1, sizeof *arg_list); + if (!arg_list) + rc = gpg_err_code_from_syserror (); + else + { + i = 0; + if (seedinfo) + arg_list[i++] = &seedinfo; + for (j=0; j < nfactors; j++) + arg_list[i++] = factors + j; + arg_list[i] = NULL; + + rc = sexp_build_array (&misc_info, NULL, format, arg_list); + } + } + + xfree (arg_list); + xfree (format); + } + + if (!rc) + rc = sexp_build (r_skey, NULL, + "(key-data" + " (public-key" + " (dsa(p%m)(q%m)(g%m)(y%m)))" + " (private-key" + " (dsa(p%m)(q%m)(g%m)(y%m)(x%m)))" + " %S)", + sk.p, sk.q, sk.g, sk.y, + sk.p, sk.q, sk.g, sk.y, sk.x, + misc_info); + + + _gcry_mpi_release (sk.p); + _gcry_mpi_release (sk.q); + _gcry_mpi_release (sk.g); + _gcry_mpi_release (sk.y); + _gcry_mpi_release (sk.x); + + _gcry_mpi_release (domain.p); + _gcry_mpi_release (domain.q); + _gcry_mpi_release (domain.g); + + sexp_release (seedinfo); + sexp_release (misc_info); + sexp_release (deriveparms); + if (factors) + { + gcry_mpi_t *mp; + for (mp = factors; *mp; mp++) + mpi_free (*mp); + xfree (factors); + } + return rc; +} + + + +static gcry_err_code_t +dsa_check_secret_key (gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + DSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL}; + + rc = _gcry_sexp_extract_param (keyparms, NULL, "pqgyx", + &sk.p, &sk.q, &sk.g, &sk.y, &sk.x, + NULL); + if (rc) + goto leave; + + if (!check_secret_key (&sk)) + rc = GPG_ERR_BAD_SECKEY; + + leave: + _gcry_mpi_release (sk.p); + _gcry_mpi_release (sk.q); + _gcry_mpi_release (sk.g); + _gcry_mpi_release (sk.y); + _gcry_mpi_release (sk.x); + if (DBG_CIPHER) + log_debug ("dsa_testkey => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +dsa_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_mpi_t data = NULL; + DSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL}; + gcry_mpi_t sig_r = NULL; + gcry_mpi_t sig_s = NULL; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN, + dsa_get_nbits (keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_mpidump ("dsa_sign data", data); + + /* Extract the key. */ + rc = _gcry_sexp_extract_param (keyparms, NULL, "pqgyx", + &sk.p, &sk.q, &sk.g, &sk.y, &sk.x, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("dsa_sign p", sk.p); + log_mpidump ("dsa_sign q", sk.q); + log_mpidump ("dsa_sign g", sk.g); + log_mpidump ("dsa_sign y", sk.y); + if (!fips_mode ()) + log_mpidump ("dsa_sign x", sk.x); + } + + sig_r = mpi_new (0); + sig_s = mpi_new (0); + rc = sign (sig_r, sig_s, data, &sk, ctx.flags, ctx.hash_algo); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("dsa_sign sig_r", sig_r); + log_mpidump ("dsa_sign sig_s", sig_s); + } + rc = sexp_build (r_sig, NULL, "(sig-val(dsa(r%M)(s%M)))", sig_r, sig_s); + + leave: + _gcry_mpi_release (sig_r); + _gcry_mpi_release (sig_s); + _gcry_mpi_release (sk.p); + _gcry_mpi_release (sk.q); + _gcry_mpi_release (sk.g); + _gcry_mpi_release (sk.y); + _gcry_mpi_release (sk.x); + _gcry_mpi_release (data); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("dsa_sign => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +dsa_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_sexp_t l1 = NULL; + gcry_mpi_t sig_r = NULL; + gcry_mpi_t sig_s = NULL; + gcry_mpi_t data = NULL; + DSA_public_key pk = { NULL, NULL, NULL, NULL }; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY, + dsa_get_nbits (s_keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_mpidump ("dsa_verify data", data); + + /* Extract the signature value. */ + rc = _gcry_pk_util_preparse_sigval (s_sig, dsa_names, &l1, NULL); + if (rc) + goto leave; + rc = _gcry_sexp_extract_param (l1, NULL, "rs", &sig_r, &sig_s, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("dsa_verify s_r", sig_r); + log_mpidump ("dsa_verify s_s", sig_s); + } + + /* Extract the key. */ + rc = _gcry_sexp_extract_param (s_keyparms, NULL, "pqgy", + &pk.p, &pk.q, &pk.g, &pk.y, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("dsa_verify p", pk.p); + log_mpidump ("dsa_verify q", pk.q); + log_mpidump ("dsa_verify g", pk.g); + log_mpidump ("dsa_verify y", pk.y); + } + + /* Verify the signature. */ + rc = verify (sig_r, sig_s, data, &pk); + + leave: + _gcry_mpi_release (pk.p); + _gcry_mpi_release (pk.q); + _gcry_mpi_release (pk.g); + _gcry_mpi_release (pk.y); + _gcry_mpi_release (data); + _gcry_mpi_release (sig_r); + _gcry_mpi_release (sig_s); + sexp_release (l1); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("dsa_verify => %s\n", rc?gpg_strerror (rc):"Good"); + return rc; +} + + +/* Return the number of bits for the key described by PARMS. On error + * 0 is returned. The format of PARMS starts with the algorithm name; + * for example: + * + * (dsa + * (p <mpi>) + * (q <mpi>) + * (g <mpi>) + * (y <mpi>)) + * + * More parameters may be given but we only need P here. + */ +static unsigned int +dsa_get_nbits (gcry_sexp_t parms) +{ + gcry_sexp_t l1; + gcry_mpi_t p; + unsigned int nbits; + + l1 = sexp_find_token (parms, "p", 1); + if (!l1) + return 0; /* Parameter P not found. */ + + p = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG); + sexp_release (l1); + nbits = p? mpi_get_nbits (p) : 0; + _gcry_mpi_release (p); + return nbits; +} + + + +/* + Self-test section. + */ + +static const char * +selftest_sign (gcry_sexp_t pkey, gcry_sexp_t skey) +{ + /* Sample data from RFC 6979 section A.2.2, hash is of message "sample" */ + static const char sample_data[] = + "(data (flags rfc6979)" + " (hash sha256 #af2bdbe1aa9b6ec1e2ade1d694f41fc71a831d0268e9891562113d8a62add1bf#))"; + static const char sample_data_bad[] = + "(data (flags rfc6979)" + " (hash sha256 #bf2bdbe1aa9b6ec1e2ade1d694f41fc71a831d0268e9891562113d8a62add1bf#))"; + static const char signature_r[] = + "eace8bdbbe353c432a795d9ec556c6d021f7a03f42c36e9bc87e4ac7932cc809"; + static const char signature_s[] = + "7081e175455f9247b812b74583e9e94f9ea79bd640dc962533b0680793a38d53"; + + const char *errtxt = NULL; + gcry_error_t err; + gcry_sexp_t data = NULL; + gcry_sexp_t data_bad = NULL; + gcry_sexp_t sig = NULL; + gcry_sexp_t l1 = NULL; + gcry_sexp_t l2 = NULL; + gcry_mpi_t r = NULL; + gcry_mpi_t s = NULL; + gcry_mpi_t calculated_r = NULL; + gcry_mpi_t calculated_s = NULL; + int cmp; + + err = sexp_sscan (&data, NULL, sample_data, strlen (sample_data)); + if (!err) + err = sexp_sscan (&data_bad, NULL, + sample_data_bad, strlen (sample_data_bad)); + if (!err) + err = _gcry_mpi_scan (&r, GCRYMPI_FMT_HEX, signature_r, 0, NULL); + if (!err) + err = _gcry_mpi_scan (&s, GCRYMPI_FMT_HEX, signature_s, 0, NULL); + + if (err) + { + errtxt = "converting data failed"; + goto leave; + } + + err = _gcry_pk_sign (&sig, data, skey); + if (err) + { + errtxt = "signing failed"; + goto leave; + } + + /* check against known signature */ + errtxt = "signature validity failed"; + l1 = _gcry_sexp_find_token (sig, "sig-val", 0); + if (!l1) + goto leave; + l2 = _gcry_sexp_find_token (l1, "dsa", 0); + if (!l2) + goto leave; + + sexp_release (l1); + l1 = l2; + + l2 = _gcry_sexp_find_token (l1, "r", 0); + if (!l2) + goto leave; + calculated_r = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG); + if (!calculated_r) + goto leave; + + sexp_release (l2); + l2 = _gcry_sexp_find_token (l1, "s", 0); + if (!l2) + goto leave; + calculated_s = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG); + if (!calculated_s) + goto leave; + + errtxt = "known sig check failed"; + + cmp = _gcry_mpi_cmp (r, calculated_r); + if (cmp) + goto leave; + cmp = _gcry_mpi_cmp (s, calculated_s); + if (cmp) + goto leave; + + errtxt = NULL; + + + err = _gcry_pk_verify (sig, data, pkey); + if (err) + { + errtxt = "verify failed"; + goto leave; + } + err = _gcry_pk_verify (sig, data_bad, pkey); + if (gcry_err_code (err) != GPG_ERR_BAD_SIGNATURE) + { + errtxt = "bad signature not detected"; + goto leave; + } + + + leave: + _gcry_mpi_release (calculated_s); + _gcry_mpi_release (calculated_r); + _gcry_mpi_release (s); + _gcry_mpi_release (r); + sexp_release (l2); + sexp_release (l1); + sexp_release (sig); + sexp_release (data_bad); + sexp_release (data); + return errtxt; +} + + +static gpg_err_code_t +selftests_dsa_2048 (selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + gcry_error_t err; + gcry_sexp_t skey = NULL; + gcry_sexp_t pkey = NULL; + + /* Convert the S-expressions into the internal representation. */ + what = "convert"; + err = sexp_sscan (&skey, NULL, sample_secret_key_2048, strlen (sample_secret_key_2048)); + if (!err) + err = sexp_sscan (&pkey, NULL, + sample_public_key_2048, strlen (sample_public_key_2048)); + if (err) + { + errtxt = _gcry_strerror (err); + goto failed; + } + + what = "key consistency"; + err = _gcry_pk_testkey (skey); + if (err) + { + errtxt = _gcry_strerror (err); + goto failed; + } + + what = "sign"; + errtxt = selftest_sign (pkey, skey); + if (errtxt) + goto failed; + + sexp_release (pkey); + sexp_release (skey); + return 0; /* Succeeded. */ + + failed: + sexp_release (pkey); + sexp_release (skey); + if (report) + report ("pubkey", GCRY_PK_DSA, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +/* Run a full self-test for ALGO and return 0 on success. */ +static gpg_err_code_t +run_selftests (int algo, int extended, selftest_report_func_t report) +{ + gpg_err_code_t ec; + + (void)extended; + + switch (algo) + { + case GCRY_PK_DSA: + ec = selftests_dsa_2048 (report); + break; + default: + ec = GPG_ERR_PUBKEY_ALGO; + break; + + } + return ec; +} + + + +gcry_pk_spec_t _gcry_pubkey_spec_dsa = + { + GCRY_PK_DSA, { 0, 1 }, + GCRY_PK_USAGE_SIGN, + "DSA", dsa_names, + "pqgy", "pqgyx", "", "rs", "pqgy", + dsa_generate, + dsa_check_secret_key, + NULL, + NULL, + dsa_sign, + dsa_verify, + dsa_get_nbits, + run_selftests + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/ecc-common.h b/libotr/libgcrypt-1.8.7/cipher/ecc-common.h new file mode 100644 index 0000000..748e6db --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/ecc-common.h @@ -0,0 +1,141 @@ +/* ecc-common.h - Declarations of common ECC code + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef GCRY_ECC_COMMON_H +#define GCRY_ECC_COMMON_H + + +/* Definition of a curve. */ +typedef struct +{ + enum gcry_mpi_ec_models model;/* The model descrinbing this curve. */ + enum ecc_dialects dialect; /* The dialect used with the curve. */ + gcry_mpi_t p; /* Prime specifying the field GF(p). */ + gcry_mpi_t a; /* First coefficient of the Weierstrass equation. */ + gcry_mpi_t b; /* Second coefficient of the Weierstrass equation. + or d as used by Twisted Edwards curves. */ + mpi_point_struct G; /* Base point (generator). */ + gcry_mpi_t n; /* Order of G. */ + gcry_mpi_t h; /* Cofactor. */ + const char *name; /* Name of the curve or NULL. */ +} elliptic_curve_t; + + +typedef struct +{ + elliptic_curve_t E; + mpi_point_struct Q; /* Q = [d]G */ +} ECC_public_key; + + +typedef struct +{ + elliptic_curve_t E; + mpi_point_struct Q; + gcry_mpi_t d; +} ECC_secret_key; + + + +/* Set the value from S into D. */ +static inline void +point_set (mpi_point_t d, mpi_point_t s) +{ + mpi_set (d->x, s->x); + mpi_set (d->y, s->y); + mpi_set (d->z, s->z); +} + +#define point_init(a) _gcry_mpi_point_init ((a)) +#define point_free(a) _gcry_mpi_point_free_parts ((a)) + + +/*-- ecc-curves.c --*/ +gpg_err_code_t _gcry_ecc_fill_in_curve (unsigned int nbits, + const char *name, + elliptic_curve_t *curve, + unsigned int *r_nbits); +gpg_err_code_t _gcry_ecc_update_curve_param (const char *name, + enum gcry_mpi_ec_models *model, + enum ecc_dialects *dialect, + gcry_mpi_t *p, gcry_mpi_t *a, + gcry_mpi_t *b, gcry_mpi_t *g, + gcry_mpi_t *n, gcry_mpi_t *h); + +const char *_gcry_ecc_get_curve (gcry_sexp_t keyparms, + int iterator, + unsigned int *r_nbits); +gcry_sexp_t _gcry_ecc_get_param_sexp (const char *name); + +/*-- ecc-misc.c --*/ +void _gcry_ecc_curve_free (elliptic_curve_t *E); +elliptic_curve_t _gcry_ecc_curve_copy (elliptic_curve_t E); +const char *_gcry_ecc_model2str (enum gcry_mpi_ec_models model); +const char *_gcry_ecc_dialect2str (enum ecc_dialects dialect); +gcry_mpi_t _gcry_ecc_ec2os (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t p); + +mpi_point_t _gcry_ecc_compute_public (mpi_point_t Q, mpi_ec_t ec, + mpi_point_t G, gcry_mpi_t d); + + +/*-- ecc.c --*/ + +/*-- ecc-ecdsa.c --*/ +gpg_err_code_t _gcry_ecc_ecdsa_sign (gcry_mpi_t input, ECC_secret_key *skey, + gcry_mpi_t r, gcry_mpi_t s, + int flags, int hashalgo); +gpg_err_code_t _gcry_ecc_ecdsa_verify (gcry_mpi_t input, ECC_public_key *pkey, + gcry_mpi_t r, gcry_mpi_t s); + +/*-- ecc-eddsa.c --*/ +gpg_err_code_t _gcry_ecc_eddsa_recover_x (gcry_mpi_t x, gcry_mpi_t y, int sign, + mpi_ec_t ec); +gpg_err_code_t _gcry_ecc_eddsa_encodepoint (mpi_point_t point, mpi_ec_t ctx, + gcry_mpi_t x, gcry_mpi_t y, + int with_prefix, + unsigned char **r_buffer, + unsigned int *r_buflen); +gpg_err_code_t _gcry_ecc_eddsa_ensure_compact (gcry_mpi_t value, + unsigned int nbits); + + +gpg_err_code_t _gcry_ecc_eddsa_compute_h_d (unsigned char **r_digest, + gcry_mpi_t d, mpi_ec_t ec); + +gpg_err_code_t _gcry_ecc_eddsa_genkey (ECC_secret_key *sk, + elliptic_curve_t *E, + mpi_ec_t ctx, + int flags); +gpg_err_code_t _gcry_ecc_eddsa_sign (gcry_mpi_t input, + ECC_secret_key *sk, + gcry_mpi_t r_r, gcry_mpi_t s, + int hashalgo, gcry_mpi_t pk); +gpg_err_code_t _gcry_ecc_eddsa_verify (gcry_mpi_t input, + ECC_public_key *pk, + gcry_mpi_t r, gcry_mpi_t s, + int hashalgo, gcry_mpi_t pkmpi); + +/*-- ecc-gost.c --*/ +gpg_err_code_t _gcry_ecc_gost_sign (gcry_mpi_t input, ECC_secret_key *skey, + gcry_mpi_t r, gcry_mpi_t s); +gpg_err_code_t _gcry_ecc_gost_verify (gcry_mpi_t input, ECC_public_key *pkey, + gcry_mpi_t r, gcry_mpi_t s); + + +#endif /*GCRY_ECC_COMMON_H*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/ecc-curves.c b/libotr/libgcrypt-1.8.7/cipher/ecc-curves.c new file mode 100644 index 0000000..3150d3c --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/ecc-curves.c @@ -0,0 +1,1347 @@ +/* ecc-curves.c - Elliptic Curve parameter mangement + * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "pubkey-internal.h" +#include "ecc-common.h" + + +/* This tables defines aliases for curve names. */ +static const struct +{ + const char *name; /* Our name. */ + const char *other; /* Other name. */ +} curve_aliases[] = + { + { "Curve25519", "1.3.6.1.4.1.3029.1.5.1" }, /* OpenPGP */ + { "Curve25519", "1.3.101.110" }, /* rfc8410 */ + { "Curve25519", "X25519" }, /* rfc8410 */ + + { "Ed25519", "1.3.6.1.4.1.11591.15.1" },/* OpenPGP */ + { "Ed25519", "1.3.101.112" }, /* rfc8410 */ + + { "NIST P-192", "1.2.840.10045.3.1.1" }, /* X9.62 OID */ + { "NIST P-192", "prime192v1" }, /* X9.62 name. */ + { "NIST P-192", "secp192r1" }, /* SECP name. */ + { "NIST P-192", "nistp192" }, /* rfc5656. */ + + { "NIST P-224", "secp224r1" }, + { "NIST P-224", "1.3.132.0.33" }, /* SECP OID. */ + { "NIST P-224", "nistp224" }, /* rfc5656. */ + + { "NIST P-256", "1.2.840.10045.3.1.7" }, /* From NIST SP 800-78-1. */ + { "NIST P-256", "prime256v1" }, + { "NIST P-256", "secp256r1" }, + { "NIST P-256", "nistp256" }, /* rfc5656. */ + + { "NIST P-384", "secp384r1" }, + { "NIST P-384", "1.3.132.0.34" }, + { "NIST P-384", "nistp384" }, /* rfc5656. */ + + { "NIST P-521", "secp521r1" }, + { "NIST P-521", "1.3.132.0.35" }, + { "NIST P-521", "nistp521" }, /* rfc5656. */ + + { "brainpoolP160r1", "1.3.36.3.3.2.8.1.1.1" }, + { "brainpoolP192r1", "1.3.36.3.3.2.8.1.1.3" }, + { "brainpoolP224r1", "1.3.36.3.3.2.8.1.1.5" }, + { "brainpoolP256r1", "1.3.36.3.3.2.8.1.1.7" }, + { "brainpoolP320r1", "1.3.36.3.3.2.8.1.1.9" }, + { "brainpoolP384r1", "1.3.36.3.3.2.8.1.1.11"}, + { "brainpoolP512r1", "1.3.36.3.3.2.8.1.1.13"}, + + { "GOST2001-test", "1.2.643.2.2.35.0" }, + { "GOST2001-CryptoPro-A", "1.2.643.2.2.35.1" }, + { "GOST2001-CryptoPro-B", "1.2.643.2.2.35.2" }, + { "GOST2001-CryptoPro-C", "1.2.643.2.2.35.3" }, + { "GOST2001-CryptoPro-A", "GOST2001-CryptoPro-XchA" }, + { "GOST2001-CryptoPro-C", "GOST2001-CryptoPro-XchB" }, + { "GOST2001-CryptoPro-A", "1.2.643.2.2.36.0" }, + { "GOST2001-CryptoPro-C", "1.2.643.2.2.36.1" }, + + { "GOST2012-tc26-A", "1.2.643.7.1.2.1.2.1" }, + { "GOST2012-tc26-B", "1.2.643.7.1.2.1.2.2" }, + + { "secp256k1", "1.3.132.0.10" }, + + { NULL, NULL} + }; + + +typedef struct +{ + const char *desc; /* Description of the curve. */ + unsigned int nbits; /* Number of bits. */ + unsigned int fips:1; /* True if this is a FIPS140-2 approved curve. */ + + /* The model describing this curve. This is mainly used to select + the group equation. */ + enum gcry_mpi_ec_models model; + + /* The actual ECC dialect used. This is used for curve specific + optimizations and to select encodings etc. */ + enum ecc_dialects dialect; + + const char *p; /* The prime defining the field. */ + const char *a, *b; /* The coefficients. For Twisted Edwards + Curves b is used for d. For Montgomery + Curves (a,b) has ((A-2)/4,B^-1). */ + const char *n; /* The order of the base point. */ + const char *g_x, *g_y; /* Base point. */ + const char *h; /* Cofactor. */ +} ecc_domain_parms_t; + + +/* This static table defines all available curves. */ +static const ecc_domain_parms_t domain_parms[] = + { + { + /* (-x^2 + y^2 = 1 + dx^2y^2) */ + "Ed25519", 256, 0, + MPI_EC_EDWARDS, ECC_DIALECT_ED25519, + "0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED", + "-0x01", + "-0x2DFC9311D490018C7338BF8688861767FF8FF5B2BEBE27548A14B235ECA6874A", + "0x1000000000000000000000000000000014DEF9DEA2F79CD65812631A5CF5D3ED", + "0x216936D3CD6E53FEC0A4E231FDD6DC5C692CC7609525A7B2C9562D608F25D51A", + "0x6666666666666666666666666666666666666666666666666666666666666658", + "0x08" + }, + { + /* (y^2 = x^3 + 486662*x^2 + x) */ + "Curve25519", 256, 0, + MPI_EC_MONTGOMERY, ECC_DIALECT_STANDARD, + "0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED", + "0x01DB41", + "0x01", + "0x1000000000000000000000000000000014DEF9DEA2F79CD65812631A5CF5D3ED", + "0x0000000000000000000000000000000000000000000000000000000000000009", + "0x20AE19A1B8A086B4E01EDD2C7748D14C923D4D7E6D7C61B229E9C5A27ECED3D9", + "0x08" + }, +#if 0 /* No real specs yet found. */ + { + /* x^2 + y^2 = 1 + 3617x^2y^2 mod 2^414 - 17 */ + "Curve3617", + "0x3FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEF", + MPI_EC_EDWARDS, 0, + "0x01", + "0x0e21", + "0x07FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEB3CC92414CF" + "706022B36F1C0338AD63CF181B0E71A5E106AF79", + "0x1A334905141443300218C0631C326E5FCD46369F44C03EC7F57FF35498A4AB4D" + "6D6BA111301A73FAA8537C64C4FD3812F3CBC595", + "0x22", + "0x08" + }, +#endif /*0*/ + { + "NIST P-192", 192, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xfffffffffffffffffffffffffffffffeffffffffffffffff", + "0xfffffffffffffffffffffffffffffffefffffffffffffffc", + "0x64210519e59c80e70fa7e9ab72243049feb8deecc146b9b1", + "0xffffffffffffffffffffffff99def836146bc9b1b4d22831", + + "0x188da80eb03090f67cbf20eb43a18800f4ff0afd82ff1012", + "0x07192b95ffc8da78631011ed6b24cdd573f977a11e794811", + "0x01" + }, + { + "NIST P-224", 224, 1, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xffffffffffffffffffffffffffffffff000000000000000000000001", + "0xfffffffffffffffffffffffffffffffefffffffffffffffffffffffe", + "0xb4050a850c04b3abf54132565044b0b7d7bfd8ba270b39432355ffb4", + "0xffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3d" , + + "0xb70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21", + "0xbd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34", + "0x01" + }, + { + "NIST P-256", 256, 1, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff", + "0xffffffff00000001000000000000000000000000fffffffffffffffffffffffc", + "0x5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", + "0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551", + + "0x6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", + "0x4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", + "0x01" + }, + { + "NIST P-384", 384, 1, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe" + "ffffffff0000000000000000ffffffff", + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe" + "ffffffff0000000000000000fffffffc", + "0xb3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875a" + "c656398d8a2ed19d2a85c8edd3ec2aef", + "0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf" + "581a0db248b0a77aecec196accc52973", + + "0xaa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a38" + "5502f25dbf55296c3a545e3872760ab7", + "0x3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c0" + "0a60b1ce1d7e819d7a431d7c90ea0e5f", + "0x01" + }, + { + "NIST P-521", 521, 1, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + "0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffc", + "0x051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef10" + "9e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00", + "0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "ffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409", + + "0x00c6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d" + "3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66", + "0x011839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e" + "662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650", + "0x01" + }, + + { "brainpoolP160r1", 160, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xe95e4a5f737059dc60dfc7ad95b3d8139515620f", + "0x340e7be2a280eb74e2be61bada745d97e8f7c300", + "0x1e589a8595423412134faa2dbdec95c8d8675e58", + "0xe95e4a5f737059dc60df5991d45029409e60fc09", + "0xbed5af16ea3f6a4f62938c4631eb5af7bdbcdbc3", + "0x1667cb477a1a8ec338f94741669c976316da6321", + "0x01" + }, + + { "brainpoolP192r1", 192, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xc302f41d932a36cda7a3463093d18db78fce476de1a86297", + "0x6a91174076b1e0e19c39c031fe8685c1cae040e5c69a28ef", + "0x469a28ef7c28cca3dc721d044f4496bcca7ef4146fbf25c9", + "0xc302f41d932a36cda7a3462f9e9e916b5be8f1029ac4acc1", + "0xc0a0647eaab6a48753b033c56cb0f0900a2f5c4853375fd6", + "0x14b690866abd5bb88b5f4828c1490002e6773fa2fa299b8f", + "0x01" + }, + + { "brainpoolP224r1", 224, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xd7c134aa264366862a18302575d1d787b09f075797da89f57ec8c0ff", + "0x68a5e62ca9ce6c1c299803a6c1530b514e182ad8b0042a59cad29f43", + "0x2580f63ccfe44138870713b1a92369e33e2135d266dbb372386c400b", + "0xd7c134aa264366862a18302575d0fb98d116bc4b6ddebca3a5a7939f", + "0x0d9029ad2c7e5cf4340823b2a87dc68c9e4ce3174c1e6efdee12c07d", + "0x58aa56f772c0726f24c6b89e4ecdac24354b9e99caa3f6d3761402cd", + "0x01" + }, + + { "brainpoolP256r1", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xa9fb57dba1eea9bc3e660a909d838d726e3bf623d52620282013481d1f6e5377", + "0x7d5a0975fc2c3057eef67530417affe7fb8055c126dc5c6ce94a4b44f330b5d9", + "0x26dc5c6ce94a4b44f330b5d9bbd77cbf958416295cf7e1ce6bccdc18ff8c07b6", + "0xa9fb57dba1eea9bc3e660a909d838d718c397aa3b561a6f7901e0e82974856a7", + "0x8bd2aeb9cb7e57cb2c4b482ffc81b7afb9de27e1e3bd23c23a4453bd9ace3262", + "0x547ef835c3dac4fd97f8461a14611dc9c27745132ded8e545c1d54c72f046997", + "0x01" + }, + + { "brainpoolP320r1", 320, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xd35e472036bc4fb7e13c785ed201e065f98fcfa6f6f40def4f92b9ec7893ec28" + "fcd412b1f1b32e27", + "0x3ee30b568fbab0f883ccebd46d3f3bb8a2a73513f5eb79da66190eb085ffa9f4" + "92f375a97d860eb4", + "0x520883949dfdbc42d3ad198640688a6fe13f41349554b49acc31dccd88453981" + "6f5eb4ac8fb1f1a6", + "0xd35e472036bc4fb7e13c785ed201e065f98fcfa5b68f12a32d482ec7ee8658e9" + "8691555b44c59311", + "0x43bd7e9afb53d8b85289bcc48ee5bfe6f20137d10a087eb6e7871e2a10a599c7" + "10af8d0d39e20611", + "0x14fdd05545ec1cc8ab4093247f77275e0743ffed117182eaa9c77877aaac6ac7" + "d35245d1692e8ee1", + "0x01" + }, + + { "brainpoolP384r1", 384, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0x8cb91e82a3386d280f5d6f7e50e641df152f7109ed5456b412b1da197fb71123" + "acd3a729901d1a71874700133107ec53", + "0x7bc382c63d8c150c3c72080ace05afa0c2bea28e4fb22787139165efba91f90f" + "8aa5814a503ad4eb04a8c7dd22ce2826", + "0x04a8c7dd22ce28268b39b55416f0447c2fb77de107dcd2a62e880ea53eeb62d5" + "7cb4390295dbc9943ab78696fa504c11", + "0x8cb91e82a3386d280f5d6f7e50e641df152f7109ed5456b31f166e6cac0425a7" + "cf3ab6af6b7fc3103b883202e9046565", + "0x1d1c64f068cf45ffa2a63a81b7c13f6b8847a3e77ef14fe3db7fcafe0cbd10e8" + "e826e03436d646aaef87b2e247d4af1e", + "0x8abe1d7520f9c2a45cb1eb8e95cfd55262b70b29feec5864e19c054ff9912928" + "0e4646217791811142820341263c5315", + "0x01" + }, + + { "brainpoolP512r1", 512, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xaadd9db8dbe9c48b3fd4e6ae33c9fc07cb308db3b3c9d20ed6639cca70330871" + "7d4d9b009bc66842aecda12ae6a380e62881ff2f2d82c68528aa6056583a48f3", + "0x7830a3318b603b89e2327145ac234cc594cbdd8d3df91610a83441caea9863bc" + "2ded5d5aa8253aa10a2ef1c98b9ac8b57f1117a72bf2c7b9e7c1ac4d77fc94ca", + "0x3df91610a83441caea9863bc2ded5d5aa8253aa10a2ef1c98b9ac8b57f1117a7" + "2bf2c7b9e7c1ac4d77fc94cadc083e67984050b75ebae5dd2809bd638016f723", + "0xaadd9db8dbe9c48b3fd4e6ae33c9fc07cb308db3b3c9d20ed6639cca70330870" + "553e5c414ca92619418661197fac10471db1d381085ddaddb58796829ca90069", + "0x81aee4bdd82ed9645a21322e9c4c6a9385ed9f70b5d916c1b43b62eef4d0098e" + "ff3b1f78e2d0d48d50d1687b93b97d5f7c6d5047406a5e688b352209bcb9f822", + "0x7dde385d566332ecc0eabfa9cf7822fdf209f70024a57b1aa000c55b881f8111" + "b2dcde494a5f485e5bca4bd88a2763aed1ca2b2fa8f0540678cd1e0f3ad80892", + "0x01" + }, + { + "GOST2001-test", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0x8000000000000000000000000000000000000000000000000000000000000431", + "0x0000000000000000000000000000000000000000000000000000000000000007", + "0x5fbff498aa938ce739b8e022fbafef40563f6e6a3472fc2a514c0ce9dae23b7e", + "0x8000000000000000000000000000000150fe8a1892976154c59cfc193accf5b3", + + "0x0000000000000000000000000000000000000000000000000000000000000002", + "0x08e2a8a0e65147d4bd6316030e16d19c85c97f0a9ca267122b96abbcea7e8fc8", + "0x01" + }, + { + "GOST2001-CryptoPro-A", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffd97", + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffd94", + "0x00000000000000000000000000000000000000000000000000000000000000a6", + "0xffffffffffffffffffffffffffffffff6c611070995ad10045841b09b761b893", + "0x0000000000000000000000000000000000000000000000000000000000000001", + "0x8d91e471e0989cda27df505a453f2b7635294f2ddf23e3b122acc99c9e9f1e14", + "0x01" + }, + { + "GOST2001-CryptoPro-B", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0x8000000000000000000000000000000000000000000000000000000000000c99", + "0x8000000000000000000000000000000000000000000000000000000000000c96", + "0x3e1af419a269a5f866a7d3c25c3df80ae979259373ff2b182f49d4ce7e1bbc8b", + "0x800000000000000000000000000000015f700cfff1a624e5e497161bcc8a198f", + "0x0000000000000000000000000000000000000000000000000000000000000001", + "0x3fa8124359f96680b83d1c3eb2c070e5c545c9858d03ecfb744bf8d717717efc", + "0x01" + }, + { + "GOST2001-CryptoPro-C", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0x9b9f605f5a858107ab1ec85e6b41c8aacf846e86789051d37998f7b9022d759b", + "0x9b9f605f5a858107ab1ec85e6b41c8aacf846e86789051d37998f7b9022d7598", + "0x000000000000000000000000000000000000000000000000000000000000805a", + "0x9b9f605f5a858107ab1ec85e6b41c8aa582ca3511eddfb74f02f3a6598980bb9", + "0x0000000000000000000000000000000000000000000000000000000000000000", + "0x41ece55743711a8c3cbf3783cd08c0ee4d4dc440d4641a8f366e550dfdb3bb67", + "0x01" + }, + { + "GOST2012-test", 511, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0x4531acd1fe0023c7550d267b6b2fee80922b14b2ffb90f04d4eb7c09b5d2d15d" + "f1d852741af4704a0458047e80e4546d35b8336fac224dd81664bbf528be6373", + "0x0000000000000000000000000000000000000000000000000000000000000007", + "0x1cff0806a31116da29d8cfa54e57eb748bc5f377e49400fdd788b649eca1ac4" + "361834013b2ad7322480a89ca58e0cf74bc9e540c2add6897fad0a3084f302adc", + "0x4531acd1fe0023c7550d267b6b2fee80922b14b2ffb90f04d4eb7c09b5d2d15d" + "a82f2d7ecb1dbac719905c5eecc423f1d86e25edbe23c595d644aaf187e6e6df", + + "0x24d19cc64572ee30f396bf6ebbfd7a6c5213b3b3d7057cc825f91093a68cd762" + "fd60611262cd838dc6b60aa7eee804e28bc849977fac33b4b530f1b120248a9a", + "0x2bb312a43bd2ce6e0d020613c857acddcfbf061e91e5f2c3f32447c259f39b2" + "c83ab156d77f1496bf7eb3351e1ee4e43dc1a18b91b24640b6dbb92cb1add371e", + "0x01" + }, + { + "GOST2012-tc26-A", 512, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffdc7", + "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffdc4", + "0xe8c2505dedfc86ddc1bd0b2b6667f1da34b82574761cb0e879bd081cfd0b6265" + "ee3cb090f30d27614cb4574010da90dd862ef9d4ebee4761503190785a71c760", + "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "27e69532f48d89116ff22b8d4e0560609b4b38abfad2b85dcacdb1411f10b275", + "0x0000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000000003", + "0x7503cfe87a836ae3a61b8816e25450e6ce5e1c93acf1abc1778064fdcbefa921" + "df1626be4fd036e93d75e6a50e3a41e98028fe5fc235f5b889a589cb5215f2a4", + "0x01" + }, + { + "GOST2012-tc26-B", 512, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0x8000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000006f", + "0x8000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000006c", + "0x687d1b459dc841457e3e06cf6f5e2517b97c7d614af138bcbf85dc806c4b289f" + "3e965d2db1416d217f8b276fad1ab69c50f78bee1fa3106efb8ccbc7c5140116", + "0x8000000000000000000000000000000000000000000000000000000000000001" + "49a1ec142565a545acfdb77bd9d40cfa8b996712101bea0ec6346c54374f25bd", + "0x0000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000000002", + "0x1a8f7eda389b094c2c071e3647a8940f3c123b697578c213be6dd9e6c8ec7335" + "dcb228fd1edf4a39152cbcaaf8c0398828041055f94ceeec7e21340780fe41bd", + "0x01" + }, + + { + "secp256k1", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F", + "0x0000000000000000000000000000000000000000000000000000000000000000", + "0x0000000000000000000000000000000000000000000000000000000000000007", + "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141", + "0x79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798", + "0x483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8", + "0x01" + }, + + { NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL } + }; + + + + +/* Return a copy of POINT. */ +static gcry_mpi_point_t +point_copy (gcry_mpi_point_t point) +{ + gcry_mpi_point_t newpoint; + + if (point) + { + newpoint = mpi_point_new (0); + point_set (newpoint, point); + } + else + newpoint = NULL; + return newpoint; +} + + +/* Helper to scan a hex string. */ +static gcry_mpi_t +scanval (const char *string) +{ + gpg_err_code_t rc; + gcry_mpi_t val; + + rc = _gcry_mpi_scan (&val, GCRYMPI_FMT_HEX, string, 0, NULL); + if (rc) + log_fatal ("scanning ECC parameter failed: %s\n", gpg_strerror (rc)); + return val; +} + + +/* Return the index of the domain_parms table for a curve with NAME. + Return -1 if not found. */ +static int +find_domain_parms_idx (const char *name) +{ + int idx, aliasno; + + /* First check our native curves. */ + for (idx = 0; domain_parms[idx].desc; idx++) + if (!strcmp (name, domain_parms[idx].desc)) + return idx; + + /* If not found consult the alias table. */ + if (!domain_parms[idx].desc) + { + for (aliasno = 0; curve_aliases[aliasno].name; aliasno++) + if (!strcmp (name, curve_aliases[aliasno].other)) + break; + if (curve_aliases[aliasno].name) + { + for (idx = 0; domain_parms[idx].desc; idx++) + if (!strcmp (curve_aliases[aliasno].name, domain_parms[idx].desc)) + return idx; + } + } + + return -1; +} + + +/* Generate the crypto system setup. This function takes the NAME of + a curve or the desired number of bits and stores at R_CURVE the + parameters of the named curve or those of a suitable curve. If + R_NBITS is not NULL, the chosen number of bits is stored there. + NULL may be given for R_CURVE, if the value is not required and for + example only a quick test for availability is desired. Note that + the curve fields should be initialized to zero because fields which + are not NULL are skipped. */ +gpg_err_code_t +_gcry_ecc_fill_in_curve (unsigned int nbits, const char *name, + elliptic_curve_t *curve, unsigned int *r_nbits) +{ + int idx; + const char *resname = NULL; /* Set to a found curve name. */ + + if (name) + idx = find_domain_parms_idx (name); + else + { + for (idx = 0; domain_parms[idx].desc; idx++) + if (nbits == domain_parms[idx].nbits + && domain_parms[idx].model == MPI_EC_WEIERSTRASS) + break; + if (!domain_parms[idx].desc) + idx = -1; + } + if (idx < 0) + return GPG_ERR_UNKNOWN_CURVE; + + resname = domain_parms[idx].desc; + + /* In fips mode we only support NIST curves. Note that it is + possible to bypass this check by specifying the curve parameters + directly. */ + if (fips_mode () && !domain_parms[idx].fips ) + return GPG_ERR_NOT_SUPPORTED; + + switch (domain_parms[idx].model) + { + case MPI_EC_WEIERSTRASS: + case MPI_EC_EDWARDS: + case MPI_EC_MONTGOMERY: + break; + default: + return GPG_ERR_BUG; + } + + + if (r_nbits) + *r_nbits = domain_parms[idx].nbits; + + if (curve) + { + curve->model = domain_parms[idx].model; + curve->dialect = domain_parms[idx].dialect; + if (!curve->p) + curve->p = scanval (domain_parms[idx].p); + if (!curve->a) + { + curve->a = scanval (domain_parms[idx].a); + if (curve->a->sign) + mpi_add (curve->a, curve->p, curve->a); + } + if (!curve->b) + { + curve->b = scanval (domain_parms[idx].b); + if (curve->b->sign) + mpi_add (curve->b, curve->p, curve->b); + } + if (!curve->n) + curve->n = scanval (domain_parms[idx].n); + if (!curve->h) + curve->h = scanval (domain_parms[idx].h); + if (!curve->G.x) + curve->G.x = scanval (domain_parms[idx].g_x); + if (!curve->G.y) + curve->G.y = scanval (domain_parms[idx].g_y); + if (!curve->G.z) + curve->G.z = mpi_alloc_set_ui (1); + if (!curve->name) + curve->name = resname; + } + + return 0; +} + + +/* Give the name of the curve NAME, store the curve parameters into P, + A, B, G, N, and H if they point to NULL value. Note that G is returned + in standard uncompressed format. Also update MODEL and DIALECT if + they are not NULL. */ +gpg_err_code_t +_gcry_ecc_update_curve_param (const char *name, + enum gcry_mpi_ec_models *model, + enum ecc_dialects *dialect, + gcry_mpi_t *p, gcry_mpi_t *a, gcry_mpi_t *b, + gcry_mpi_t *g, gcry_mpi_t *n, gcry_mpi_t *h) +{ + int idx; + + idx = find_domain_parms_idx (name); + if (idx < 0) + return GPG_ERR_UNKNOWN_CURVE; + + if (g) + { + char *buf; + size_t len; + + len = 4; + len += strlen (domain_parms[idx].g_x+2); + len += strlen (domain_parms[idx].g_y+2); + len++; + buf = xtrymalloc (len); + if (!buf) + return gpg_err_code_from_syserror (); + strcpy (stpcpy (stpcpy (buf, "0x04"), domain_parms[idx].g_x+2), + domain_parms[idx].g_y+2); + _gcry_mpi_release (*g); + *g = scanval (buf); + xfree (buf); + } + if (model) + *model = domain_parms[idx].model; + if (dialect) + *dialect = domain_parms[idx].dialect; + if (p) + { + _gcry_mpi_release (*p); + *p = scanval (domain_parms[idx].p); + } + if (a) + { + _gcry_mpi_release (*a); + *a = scanval (domain_parms[idx].a); + } + if (b) + { + _gcry_mpi_release (*b); + *b = scanval (domain_parms[idx].b); + } + if (n) + { + _gcry_mpi_release (*n); + *n = scanval (domain_parms[idx].n); + } + if (h) + { + _gcry_mpi_release (*h); + *h = scanval (domain_parms[idx].h); + } + return 0; +} + + +/* Return the name matching the parameters in PKEY. This works only + with curves described by the Weierstrass equation. */ +const char * +_gcry_ecc_get_curve (gcry_sexp_t keyparms, int iterator, unsigned int *r_nbits) +{ + gpg_err_code_t rc; + const char *result = NULL; + elliptic_curve_t E; + gcry_mpi_t mpi_g = NULL; + gcry_mpi_t tmp = NULL; + int idx; + + memset (&E, 0, sizeof E); + + if (r_nbits) + *r_nbits = 0; + + if (!keyparms) + { + idx = iterator; + if (idx >= 0 && idx < DIM (domain_parms)) + { + result = domain_parms[idx].desc; + if (r_nbits) + *r_nbits = domain_parms[idx].nbits; + } + return result; + } + + + /* + * Extract the curve parameters.. + */ + rc = gpg_err_code (sexp_extract_param (keyparms, NULL, "-pabgnh", + &E.p, &E.a, &E.b, &mpi_g, &E.n, &E.h, + NULL)); + if (rc == GPG_ERR_NO_OBJ) + { + /* This might be the second use case of checking whether a + specific curve given by name is supported. */ + gcry_sexp_t l1; + char *name; + + l1 = sexp_find_token (keyparms, "curve", 5); + if (!l1) + goto leave; /* No curve name parameter. */ + + name = sexp_nth_string (l1, 1); + sexp_release (l1); + if (!name) + goto leave; /* Name missing or out of core. */ + + idx = find_domain_parms_idx (name); + xfree (name); + if (idx >= 0) /* Curve found. */ + { + result = domain_parms[idx].desc; + if (r_nbits) + *r_nbits = domain_parms[idx].nbits; + } + return result; + } + + if (rc) + goto leave; + + if (mpi_g) + { + _gcry_mpi_point_init (&E.G); + if (_gcry_ecc_os2ec (&E.G, mpi_g)) + goto leave; + } + + for (idx = 0; domain_parms[idx].desc; idx++) + { + mpi_free (tmp); + tmp = scanval (domain_parms[idx].p); + if (!mpi_cmp (tmp, E.p)) + { + mpi_free (tmp); + tmp = scanval (domain_parms[idx].a); + if (!mpi_cmp (tmp, E.a)) + { + mpi_free (tmp); + tmp = scanval (domain_parms[idx].b); + if (!mpi_cmp (tmp, E.b)) + { + mpi_free (tmp); + tmp = scanval (domain_parms[idx].n); + if (!mpi_cmp (tmp, E.n)) + { + mpi_free (tmp); + tmp = scanval (domain_parms[idx].h); + if (!mpi_cmp (tmp, E.h)) + { + mpi_free (tmp); + tmp = scanval (domain_parms[idx].g_x); + if (!mpi_cmp (tmp, E.G.x)) + { + mpi_free (tmp); + tmp = scanval (domain_parms[idx].g_y); + if (!mpi_cmp (tmp, E.G.y)) + { + result = domain_parms[idx].desc; + if (r_nbits) + *r_nbits = domain_parms[idx].nbits; + goto leave; + } + } + } + } + } + } + } + } + + leave: + _gcry_mpi_release (tmp); + _gcry_mpi_release (E.p); + _gcry_mpi_release (E.a); + _gcry_mpi_release (E.b); + _gcry_mpi_release (mpi_g); + _gcry_mpi_point_free_parts (&E.G); + _gcry_mpi_release (E.n); + _gcry_mpi_release (E.h); + return result; +} + + +/* Helper to extract an MPI from key parameters. */ +static gpg_err_code_t +mpi_from_keyparam (gcry_mpi_t *r_a, gcry_sexp_t keyparam, const char *name) +{ + gcry_err_code_t ec = 0; + gcry_sexp_t l1; + + l1 = sexp_find_token (keyparam, name, 0); + if (l1) + { + *r_a = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG); + sexp_release (l1); + if (!*r_a) + ec = GPG_ERR_INV_OBJ; + } + return ec; +} + +/* Helper to extract a point from key parameters. If no parameter + with NAME is found, the functions tries to find a non-encoded point + by appending ".x", ".y" and ".z" to NAME. ".z" is in this case + optional and defaults to 1. EC is the context which at this point + may not be fully initialized. */ +static gpg_err_code_t +point_from_keyparam (gcry_mpi_point_t *r_a, + gcry_sexp_t keyparam, const char *name, mpi_ec_t ec) +{ + gcry_err_code_t rc; + gcry_sexp_t l1; + gcry_mpi_point_t point; + + l1 = sexp_find_token (keyparam, name, 0); + if (l1) + { + gcry_mpi_t a; + + a = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_OPAQUE); + sexp_release (l1); + if (!a) + return GPG_ERR_INV_OBJ; + + point = mpi_point_new (0); + if (ec && ec->dialect == ECC_DIALECT_ED25519) + rc = _gcry_ecc_eddsa_decodepoint (a, ec, point, NULL, NULL); + else + rc = _gcry_ecc_os2ec (point, a); + mpi_free (a); + if (rc) + { + mpi_point_release (point); + return rc; + } + } + else + { + char *tmpname; + gcry_mpi_t x = NULL; + gcry_mpi_t y = NULL; + gcry_mpi_t z = NULL; + + tmpname = xtrymalloc (strlen (name) + 2 + 1); + if (!tmpname) + return gpg_err_code_from_syserror (); + strcpy (stpcpy (tmpname, name), ".x"); + rc = mpi_from_keyparam (&x, keyparam, tmpname); + if (rc) + { + xfree (tmpname); + return rc; + } + strcpy (stpcpy (tmpname, name), ".y"); + rc = mpi_from_keyparam (&y, keyparam, tmpname); + if (rc) + { + mpi_free (x); + xfree (tmpname); + return rc; + } + strcpy (stpcpy (tmpname, name), ".z"); + rc = mpi_from_keyparam (&z, keyparam, tmpname); + if (rc) + { + mpi_free (y); + mpi_free (x); + xfree (tmpname); + return rc; + } + if (!z) + z = mpi_set_ui (NULL, 1); + if (x && y) + point = mpi_point_snatch_set (NULL, x, y, z); + else + { + mpi_free (x); + mpi_free (y); + mpi_free (z); + point = NULL; + } + xfree (tmpname); + } + + if (point) + *r_a = point; + return 0; +} + + +/* This function creates a new context for elliptic curve operations. + Either KEYPARAM or CURVENAME must be given. If both are given and + KEYPARAM has no curve parameter, CURVENAME is used to add missing + parameters. On success 0 is returned and the new context stored at + R_CTX. On error NULL is stored at R_CTX and an error code is + returned. The context needs to be released using + gcry_ctx_release. */ +gpg_err_code_t +_gcry_mpi_ec_new (gcry_ctx_t *r_ctx, + gcry_sexp_t keyparam, const char *curvename) +{ + gpg_err_code_t errc; + gcry_ctx_t ctx = NULL; + enum gcry_mpi_ec_models model = MPI_EC_WEIERSTRASS; + enum ecc_dialects dialect = ECC_DIALECT_STANDARD; + gcry_mpi_t p = NULL; + gcry_mpi_t a = NULL; + gcry_mpi_t b = NULL; + gcry_mpi_point_t G = NULL; + gcry_mpi_t n = NULL; + gcry_mpi_t h = NULL; + gcry_mpi_point_t Q = NULL; + gcry_mpi_t d = NULL; + int flags = 0; + gcry_sexp_t l1; + + *r_ctx = NULL; + + if (keyparam) + { + /* Parse an optional flags list. */ + l1 = sexp_find_token (keyparam, "flags", 0); + if (l1) + { + errc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL); + sexp_release (l1); + l1 = NULL; + if (errc) + goto leave; + } + + /* Check whether a curve name was given. */ + l1 = sexp_find_token (keyparam, "curve", 5); + + /* If we don't have a curve name or if override parameters have + explicitly been requested, parse them. */ + if (!l1 || (flags & PUBKEY_FLAG_PARAM)) + { + errc = mpi_from_keyparam (&p, keyparam, "p"); + if (errc) + goto leave; + errc = mpi_from_keyparam (&a, keyparam, "a"); + if (errc) + goto leave; + errc = mpi_from_keyparam (&b, keyparam, "b"); + if (errc) + goto leave; + errc = point_from_keyparam (&G, keyparam, "g", NULL); + if (errc) + goto leave; + errc = mpi_from_keyparam (&n, keyparam, "n"); + if (errc) + goto leave; + errc = mpi_from_keyparam (&h, keyparam, "h"); + if (errc) + goto leave; + } + } + else + l1 = NULL; /* No curvename. */ + + /* Check whether a curve parameter is available and use that to fill + in missing values. If no curve parameter is available try an + optional provided curvename. If only the curvename has been + given use that one. */ + if (l1 || curvename) + { + char *name; + elliptic_curve_t *E; + + if (l1) + { + name = sexp_nth_string (l1, 1); + sexp_release (l1); + if (!name) + { + errc = GPG_ERR_INV_OBJ; /* Name missing or out of core. */ + goto leave; + } + } + else + name = NULL; + + E = xtrycalloc (1, sizeof *E); + if (!E) + { + errc = gpg_err_code_from_syserror (); + xfree (name); + goto leave; + } + + errc = _gcry_ecc_fill_in_curve (0, name? name : curvename, E, NULL); + xfree (name); + if (errc) + { + xfree (E); + goto leave; + } + + model = E->model; + dialect = E->dialect; + + if (!p) + { + p = E->p; + E->p = NULL; + } + if (!a) + { + a = E->a; + E->a = NULL; + } + if (!b) + { + b = E->b; + E->b = NULL; + } + if (!G) + { + G = mpi_point_snatch_set (NULL, E->G.x, E->G.y, E->G.z); + E->G.x = NULL; + E->G.y = NULL; + E->G.z = NULL; + } + if (!n) + { + n = E->n; + E->n = NULL; + } + if (!h) + { + h = E->h; + E->h = NULL; + } + _gcry_ecc_curve_free (E); + xfree (E); + } + + + errc = _gcry_mpi_ec_p_new (&ctx, model, dialect, flags, p, a, b); + if (!errc) + { + mpi_ec_t ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC); + + if (b) + { + mpi_free (ec->b); + ec->b = b; + b = NULL; + } + if (G) + { + ec->G = G; + G = NULL; + } + if (n) + { + ec->n = n; + n = NULL; + } + if (h) + { + ec->h = h; + h = NULL; + } + + /* Now that we know the curve name we can look for the public key + Q. point_from_keyparam needs to know the curve parameters so + that it is able to use the correct decompression. Parsing + the private key D could have been done earlier but it is less + surprising if we do it here as well. */ + if (keyparam) + { + errc = point_from_keyparam (&Q, keyparam, "q", ec); + if (errc) + goto leave; + errc = mpi_from_keyparam (&d, keyparam, "d"); + if (errc) + goto leave; + } + + if (Q) + { + ec->Q = Q; + Q = NULL; + } + if (d) + { + ec->d = d; + d = NULL; + } + + *r_ctx = ctx; + ctx = NULL; + } + + leave: + _gcry_ctx_release (ctx); + mpi_free (p); + mpi_free (a); + mpi_free (b); + _gcry_mpi_point_release (G); + mpi_free (n); + mpi_free (h); + _gcry_mpi_point_release (Q); + mpi_free (d); + return errc; +} + + +/* Return the parameters of the curve NAME as an S-expression. */ +gcry_sexp_t +_gcry_ecc_get_param_sexp (const char *name) +{ + unsigned int nbits; + elliptic_curve_t E; + mpi_ec_t ctx; + gcry_mpi_t g_x, g_y; + gcry_mpi_t pkey[7]; + gcry_sexp_t result; + int i; + + memset (&E, 0, sizeof E); + if (_gcry_ecc_fill_in_curve (0, name, &E, &nbits)) + return NULL; + + g_x = mpi_new (0); + g_y = mpi_new (0); + ctx = _gcry_mpi_ec_p_internal_new (MPI_EC_WEIERSTRASS, + ECC_DIALECT_STANDARD, + 0, + E.p, E.a, NULL); + if (_gcry_mpi_ec_get_affine (g_x, g_y, &E.G, ctx)) + log_fatal ("ecc get param: Failed to get affine coordinates\n"); + _gcry_mpi_ec_free (ctx); + _gcry_mpi_point_free_parts (&E.G); + + pkey[0] = E.p; + pkey[1] = E.a; + pkey[2] = E.b; + pkey[3] = _gcry_ecc_ec2os (g_x, g_y, E.p); + pkey[4] = E.n; + pkey[5] = E.h; + pkey[6] = NULL; + + mpi_free (g_x); + mpi_free (g_y); + + if (sexp_build (&result, NULL, + "(public-key(ecc(p%m)(a%m)(b%m)(g%m)(n%m)(h%m)))", + pkey[0], pkey[1], pkey[2], pkey[3], pkey[4], pkey[5])) + result = NULL; + + for (i=0; pkey[i]; i++) + _gcry_mpi_release (pkey[i]); + + return result; +} + + +/* Return an MPI (or opaque MPI) described by NAME and the context EC. + If COPY is true a copy is returned, if not a const MPI may be + returned. In any case mpi_free must be used. */ +gcry_mpi_t +_gcry_ecc_get_mpi (const char *name, mpi_ec_t ec, int copy) +{ + if (!*name) + return NULL; + + if (!strcmp (name, "p") && ec->p) + return mpi_is_const (ec->p) && !copy? ec->p : mpi_copy (ec->p); + if (!strcmp (name, "a") && ec->a) + return mpi_is_const (ec->a) && !copy? ec->a : mpi_copy (ec->a); + if (!strcmp (name, "b") && ec->b) + return mpi_is_const (ec->b) && !copy? ec->b : mpi_copy (ec->b); + if (!strcmp (name, "n") && ec->n) + return mpi_is_const (ec->n) && !copy? ec->n : mpi_copy (ec->n); + if (!strcmp (name, "h") && ec->h) + return mpi_is_const (ec->h) && !copy? ec->h : mpi_copy (ec->h); + if (!strcmp (name, "d") && ec->d) + return mpi_is_const (ec->d) && !copy? ec->d : mpi_copy (ec->d); + + /* Return a requested point coordinate. */ + if (!strcmp (name, "g.x") && ec->G && ec->G->x) + return mpi_is_const (ec->G->x) && !copy? ec->G->x : mpi_copy (ec->G->x); + if (!strcmp (name, "g.y") && ec->G && ec->G->y) + return mpi_is_const (ec->G->y) && !copy? ec->G->y : mpi_copy (ec->G->y); + if (!strcmp (name, "q.x") && ec->Q && ec->Q->x) + return mpi_is_const (ec->Q->x) && !copy? ec->Q->x : mpi_copy (ec->Q->x); + if (!strcmp (name, "q.y") && ec->Q && ec->Q->y) + return mpi_is_const (ec->Q->y) && !copy? ec->Q->y : mpi_copy (ec->Q->y); + + /* If the base point has been requested, return it in standard + encoding. */ + if (!strcmp (name, "g") && ec->G) + return _gcry_mpi_ec_ec2os (ec->G, ec); + + /* If the public key has been requested, return it by default in + standard uncompressed encoding or if requested in other + encodings. */ + if (*name == 'q' && (!name[1] || name[1] == '@')) + { + /* If only the private key is given, compute the public key. */ + if (!ec->Q) + ec->Q = _gcry_ecc_compute_public (NULL, ec, NULL, NULL); + + if (!ec->Q) + return NULL; + + if (name[1] != '@') + return _gcry_mpi_ec_ec2os (ec->Q, ec); + + if (!strcmp (name+2, "eddsa") && ec->model == MPI_EC_EDWARDS) + { + unsigned char *encpk; + unsigned int encpklen; + + if (!_gcry_ecc_eddsa_encodepoint (ec->Q, ec, NULL, NULL, 0, + &encpk, &encpklen)) + return mpi_set_opaque (NULL, encpk, encpklen*8); + } + } + + return NULL; +} + + +/* Return a point described by NAME and the context EC. */ +gcry_mpi_point_t +_gcry_ecc_get_point (const char *name, mpi_ec_t ec) +{ + if (!strcmp (name, "g") && ec->G) + return point_copy (ec->G); + if (!strcmp (name, "q")) + { + /* If only the private key is given, compute the public key. */ + if (!ec->Q) + ec->Q = _gcry_ecc_compute_public (NULL, ec, NULL, NULL); + + if (ec->Q) + return point_copy (ec->Q); + } + + return NULL; +} + + +/* Store the MPI NEWVALUE into the context EC under NAME. */ +gpg_err_code_t +_gcry_ecc_set_mpi (const char *name, gcry_mpi_t newvalue, mpi_ec_t ec) +{ + gpg_err_code_t rc = 0; + + if (!*name) + ; + else if (!strcmp (name, "p")) + { + mpi_free (ec->p); + ec->p = mpi_copy (newvalue); + _gcry_mpi_ec_get_reset (ec); + } + else if (!strcmp (name, "a")) + { + mpi_free (ec->a); + ec->a = mpi_copy (newvalue); + _gcry_mpi_ec_get_reset (ec); + } + else if (!strcmp (name, "b")) + { + mpi_free (ec->b); + ec->b = mpi_copy (newvalue); + } + else if (!strcmp (name, "n")) + { + mpi_free (ec->n); + ec->n = mpi_copy (newvalue); + } + else if (!strcmp (name, "h")) + { + mpi_free (ec->h); + ec->h = mpi_copy (newvalue); + } + else if (*name == 'q' && (!name[1] || name[1] == '@')) + { + if (newvalue) + { + if (!ec->Q) + ec->Q = mpi_point_new (0); + if (ec->dialect == ECC_DIALECT_ED25519) + rc = _gcry_ecc_eddsa_decodepoint (newvalue, ec, ec->Q, NULL, NULL); + else + rc = _gcry_ecc_os2ec (ec->Q, newvalue); + } + if (rc || !newvalue) + { + _gcry_mpi_point_release (ec->Q); + ec->Q = NULL; + } + /* Note: We assume that Q matches d and thus do not reset d. */ + } + else if (!strcmp (name, "d")) + { + mpi_free (ec->d); + ec->d = mpi_copy (newvalue); + if (ec->d) + { + /* We need to reset the public key because it may not + anymore match. */ + _gcry_mpi_point_release (ec->Q); + ec->Q = NULL; + } + } + else + rc = GPG_ERR_UNKNOWN_NAME; + + return rc; +} + + +/* Store the point NEWVALUE into the context EC under NAME. */ +gpg_err_code_t +_gcry_ecc_set_point (const char *name, gcry_mpi_point_t newvalue, mpi_ec_t ec) +{ + if (!strcmp (name, "g")) + { + _gcry_mpi_point_release (ec->G); + ec->G = point_copy (newvalue); + } + else if (!strcmp (name, "q")) + { + _gcry_mpi_point_release (ec->Q); + ec->Q = point_copy (newvalue); + } + else + return GPG_ERR_UNKNOWN_NAME; + + return 0; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/ecc-ecdsa.c b/libotr/libgcrypt-1.8.7/cipher/ecc-ecdsa.c new file mode 100644 index 0000000..56846f4 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/ecc-ecdsa.c @@ -0,0 +1,255 @@ +/* ecc-ecdsa.c - Elliptic Curve ECDSA signatures + * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "pubkey-internal.h" +#include "ecc-common.h" + + +/* Compute an ECDSA signature. + * Return the signature struct (r,s) from the message hash. The caller + * must have allocated R and S. + */ +gpg_err_code_t +_gcry_ecc_ecdsa_sign (gcry_mpi_t input, ECC_secret_key *skey, + gcry_mpi_t r, gcry_mpi_t s, + int flags, int hashalgo) +{ + gpg_err_code_t rc = 0; + int extraloops = 0; + gcry_mpi_t k, dr, sum, k_1, x; + mpi_point_struct I; + gcry_mpi_t hash; + const void *abuf; + unsigned int abits, qbits; + mpi_ec_t ctx; + gcry_mpi_t b; /* Random number needed for blinding. */ + gcry_mpi_t bi; /* multiplicative inverse of B. */ + + if (DBG_CIPHER) + log_mpidump ("ecdsa sign hash ", input ); + + qbits = mpi_get_nbits (skey->E.n); + + /* Convert the INPUT into an MPI if needed. */ + rc = _gcry_dsa_normalize_hash (input, &hash, qbits); + if (rc) + return rc; + + b = mpi_snew (qbits); + bi = mpi_snew (qbits); + do + { + _gcry_mpi_randomize (b, qbits, GCRY_WEAK_RANDOM); + mpi_mod (b, b, skey->E.n); + } + while (!mpi_invm (bi, b, skey->E.n)); + + k = NULL; + dr = mpi_alloc (0); + sum = mpi_alloc (0); + k_1 = mpi_alloc (0); + x = mpi_alloc (0); + point_init (&I); + + ctx = _gcry_mpi_ec_p_internal_new (skey->E.model, skey->E.dialect, 0, + skey->E.p, skey->E.a, skey->E.b); + + /* Two loops to avoid R or S are zero. This is more of a joke than + a real demand because the probability of them being zero is less + than any hardware failure. Some specs however require it. */ + do + { + do + { + mpi_free (k); + k = NULL; + if ((flags & PUBKEY_FLAG_RFC6979) && hashalgo) + { + /* Use Pornin's method for deterministic DSA. If this + flag is set, it is expected that HASH is an opaque + MPI with the to be signed hash. That hash is also + used as h1 from 3.2.a. */ + if (!mpi_is_opaque (input)) + { + rc = GPG_ERR_CONFLICT; + goto leave; + } + + abuf = mpi_get_opaque (input, &abits); + rc = _gcry_dsa_gen_rfc6979_k (&k, skey->E.n, skey->d, + abuf, (abits+7)/8, + hashalgo, extraloops); + if (rc) + goto leave; + extraloops++; + } + else + k = _gcry_dsa_gen_k (skey->E.n, GCRY_STRONG_RANDOM); + + mpi_invm (k_1, k, skey->E.n); /* k_1 = k^(-1) mod n */ + + _gcry_dsa_modify_k (k, skey->E.n, qbits); + + _gcry_mpi_ec_mul_point (&I, k, &skey->E.G, ctx); + if (_gcry_mpi_ec_get_affine (x, NULL, &I, ctx)) + { + if (DBG_CIPHER) + log_debug ("ecc sign: Failed to get affine coordinates\n"); + rc = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + mpi_mod (r, x, skey->E.n); /* r = x mod n */ + } + while (!mpi_cmp_ui (r, 0)); + + /* Computation of dr, sum, and s are blinded with b. */ + mpi_mulm (dr, b, skey->d, skey->E.n); + mpi_mulm (dr, dr, r, skey->E.n); /* dr = d*r mod n */ + mpi_mulm (sum, b, hash, skey->E.n); + mpi_addm (sum, sum, dr, skey->E.n); /* sum = hash + (d*r) mod n */ + mpi_mulm (s, k_1, sum, skey->E.n); /* s = k^(-1)*(hash+(d*r)) mod n */ + /* Undo blinding by b^-1 */ + mpi_mulm (s, bi, s, skey->E.n); + } + while (!mpi_cmp_ui (s, 0)); + + if (DBG_CIPHER) + { + log_mpidump ("ecdsa sign result r ", r); + log_mpidump ("ecdsa sign result s ", s); + } + + leave: + mpi_free (b); + mpi_free (bi); + _gcry_mpi_ec_free (ctx); + point_free (&I); + mpi_free (x); + mpi_free (k_1); + mpi_free (sum); + mpi_free (dr); + mpi_free (k); + + if (hash != input) + mpi_free (hash); + + return rc; +} + + +/* Verify an ECDSA signature. + * Check if R and S verifies INPUT. + */ +gpg_err_code_t +_gcry_ecc_ecdsa_verify (gcry_mpi_t input, ECC_public_key *pkey, + gcry_mpi_t r, gcry_mpi_t s) +{ + gpg_err_code_t err = 0; + gcry_mpi_t hash, h, h1, h2, x; + mpi_point_struct Q, Q1, Q2; + mpi_ec_t ctx; + unsigned int nbits; + + if( !(mpi_cmp_ui (r, 0) > 0 && mpi_cmp (r, pkey->E.n) < 0) ) + return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < r < n failed. */ + if( !(mpi_cmp_ui (s, 0) > 0 && mpi_cmp (s, pkey->E.n) < 0) ) + return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < s < n failed. */ + + nbits = mpi_get_nbits (pkey->E.n); + err = _gcry_dsa_normalize_hash (input, &hash, nbits); + if (err) + return err; + + h = mpi_alloc (0); + h1 = mpi_alloc (0); + h2 = mpi_alloc (0); + x = mpi_alloc (0); + point_init (&Q); + point_init (&Q1); + point_init (&Q2); + + ctx = _gcry_mpi_ec_p_internal_new (pkey->E.model, pkey->E.dialect, 0, + pkey->E.p, pkey->E.a, pkey->E.b); + + /* h = s^(-1) (mod n) */ + mpi_invm (h, s, pkey->E.n); + /* h1 = hash * s^(-1) (mod n) */ + mpi_mulm (h1, hash, h, pkey->E.n); + /* Q1 = [ hash * s^(-1) ]G */ + _gcry_mpi_ec_mul_point (&Q1, h1, &pkey->E.G, ctx); + /* h2 = r * s^(-1) (mod n) */ + mpi_mulm (h2, r, h, pkey->E.n); + /* Q2 = [ r * s^(-1) ]Q */ + _gcry_mpi_ec_mul_point (&Q2, h2, &pkey->Q, ctx); + /* Q = ([hash * s^(-1)]G) + ([r * s^(-1)]Q) */ + _gcry_mpi_ec_add_points (&Q, &Q1, &Q2, ctx); + + if (!mpi_cmp_ui (Q.z, 0)) + { + if (DBG_CIPHER) + log_debug ("ecc verify: Rejected\n"); + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + if (_gcry_mpi_ec_get_affine (x, NULL, &Q, ctx)) + { + if (DBG_CIPHER) + log_debug ("ecc verify: Failed to get affine coordinates\n"); + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + mpi_mod (x, x, pkey->E.n); /* x = x mod E_n */ + if (mpi_cmp (x, r)) /* x != r */ + { + if (DBG_CIPHER) + { + log_mpidump (" x", x); + log_mpidump (" r", r); + log_mpidump (" s", s); + } + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + + leave: + _gcry_mpi_ec_free (ctx); + point_free (&Q2); + point_free (&Q1); + point_free (&Q); + mpi_free (x); + mpi_free (h2); + mpi_free (h1); + mpi_free (h); + if (hash != input) + mpi_free (hash); + + return err; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/ecc-eddsa.c b/libotr/libgcrypt-1.8.7/cipher/ecc-eddsa.c new file mode 100644 index 0000000..89b708a --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/ecc-eddsa.c @@ -0,0 +1,864 @@ +/* ecc-eddsa.c - Elliptic Curve EdDSA signatures + * Copyright (C) 2013, 2014 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "ecc-common.h" + + + +static void +reverse_buffer (unsigned char *buffer, unsigned int length) +{ + unsigned int tmp, i; + + for (i=0; i < length/2; i++) + { + tmp = buffer[i]; + buffer[i] = buffer[length-1-i]; + buffer[length-1-i] = tmp; + } +} + + +/* Helper to scan a hex string. */ +static gcry_mpi_t +scanval (const char *string) +{ + gpg_err_code_t rc; + gcry_mpi_t val; + + rc = _gcry_mpi_scan (&val, GCRYMPI_FMT_HEX, string, 0, NULL); + if (rc) + log_fatal ("scanning ECC parameter failed: %s\n", gpg_strerror (rc)); + return val; +} + + + +/* Encode MPI using the EdDSA scheme. MINLEN specifies the required + length of the buffer in bytes. On success 0 is returned an a + malloced buffer with the encoded point is stored at R_BUFFER; the + length of this buffer is stored at R_BUFLEN. */ +static gpg_err_code_t +eddsa_encodempi (gcry_mpi_t mpi, unsigned int minlen, + unsigned char **r_buffer, unsigned int *r_buflen) +{ + unsigned char *rawmpi; + unsigned int rawmpilen; + + rawmpi = _gcry_mpi_get_buffer (mpi, minlen, &rawmpilen, NULL); + if (!rawmpi) + return gpg_err_code_from_syserror (); + + *r_buffer = rawmpi; + *r_buflen = rawmpilen; + return 0; +} + + +/* Encode (X,Y) using the EdDSA scheme. MINLEN is the required length + in bytes for the result. If WITH_PREFIX is set the returned buffer + is prefixed with a 0x40 byte. On success 0 is returned and a + malloced buffer with the encoded point is stored at R_BUFFER; the + length of this buffer is stored at R_BUFLEN. */ +static gpg_err_code_t +eddsa_encode_x_y (gcry_mpi_t x, gcry_mpi_t y, unsigned int minlen, + int with_prefix, + unsigned char **r_buffer, unsigned int *r_buflen) +{ + unsigned char *rawmpi; + unsigned int rawmpilen; + int off = with_prefix? 1:0; + + rawmpi = _gcry_mpi_get_buffer_extra (y, minlen, off?-1:0, &rawmpilen, NULL); + if (!rawmpi) + return gpg_err_code_from_syserror (); + if (mpi_test_bit (x, 0) && rawmpilen) + rawmpi[off + rawmpilen - 1] |= 0x80; /* Set sign bit. */ + if (off) + rawmpi[0] = 0x40; + + *r_buffer = rawmpi; + *r_buflen = rawmpilen + off; + return 0; +} + +/* Encode POINT using the EdDSA scheme. X and Y are either scratch + variables supplied by the caller or NULL. CTX is the usual + context. If WITH_PREFIX is set the returned buffer is prefixed + with a 0x40 byte. On success 0 is returned and a malloced buffer + with the encoded point is stored at R_BUFFER; the length of this + buffer is stored at R_BUFLEN. */ +gpg_err_code_t +_gcry_ecc_eddsa_encodepoint (mpi_point_t point, mpi_ec_t ec, + gcry_mpi_t x_in, gcry_mpi_t y_in, + int with_prefix, + unsigned char **r_buffer, unsigned int *r_buflen) +{ + gpg_err_code_t rc; + gcry_mpi_t x, y; + + x = x_in? x_in : mpi_new (0); + y = y_in? y_in : mpi_new (0); + + if (_gcry_mpi_ec_get_affine (x, y, point, ec)) + { + log_error ("eddsa_encodepoint: Failed to get affine coordinates\n"); + rc = GPG_ERR_INTERNAL; + } + else + rc = eddsa_encode_x_y (x, y, ec->nbits/8, with_prefix, r_buffer, r_buflen); + + if (!x_in) + mpi_free (x); + if (!y_in) + mpi_free (y); + return rc; +} + + +/* Make sure that the opaque MPI VALUE is in compact EdDSA format. + This function updates MPI if needed. */ +gpg_err_code_t +_gcry_ecc_eddsa_ensure_compact (gcry_mpi_t value, unsigned int nbits) +{ + gpg_err_code_t rc; + const unsigned char *buf; + unsigned int rawmpilen; + gcry_mpi_t x, y; + unsigned char *enc; + unsigned int enclen; + + if (!mpi_is_opaque (value)) + return GPG_ERR_INV_OBJ; + buf = mpi_get_opaque (value, &rawmpilen); + if (!buf) + return GPG_ERR_INV_OBJ; + rawmpilen = (rawmpilen + 7)/8; + + if (rawmpilen > 1 && (rawmpilen%2)) + { + if (buf[0] == 0x04) + { + /* Buffer is in SEC1 uncompressed format. Extract y and + compress. */ + rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_STD, + buf+1, (rawmpilen-1)/2, NULL); + if (rc) + return rc; + rc = _gcry_mpi_scan (&y, GCRYMPI_FMT_STD, + buf+1+(rawmpilen-1)/2, (rawmpilen-1)/2, NULL); + if (rc) + { + mpi_free (x); + return rc; + } + + rc = eddsa_encode_x_y (x, y, nbits/8, 0, &enc, &enclen); + mpi_free (x); + mpi_free (y); + if (rc) + return rc; + + mpi_set_opaque (value, enc, 8*enclen); + } + else if (buf[0] == 0x40) + { + /* Buffer is compressed but with our SEC1 alike compression + indicator. Remove that byte. FIXME: We should write and + use a function to manipulate an opaque MPI in place. */ + if (!_gcry_mpi_set_opaque_copy (value, buf + 1, (rawmpilen - 1)*8)) + return gpg_err_code_from_syserror (); + } + } + + return 0; +} + + +/* Recover X from Y and SIGN (which actually is a parity bit). */ +gpg_err_code_t +_gcry_ecc_eddsa_recover_x (gcry_mpi_t x, gcry_mpi_t y, int sign, mpi_ec_t ec) +{ + gpg_err_code_t rc = 0; + gcry_mpi_t u, v, v3, t; + static gcry_mpi_t p58, seven; + + if (ec->dialect != ECC_DIALECT_ED25519) + return GPG_ERR_NOT_IMPLEMENTED; + + if (!p58) + p58 = scanval ("0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFD"); + if (!seven) + seven = mpi_set_ui (NULL, 7); + + u = mpi_new (0); + v = mpi_new (0); + v3 = mpi_new (0); + t = mpi_new (0); + + /* Compute u and v */ + /* u = y^2 */ + mpi_mulm (u, y, y, ec->p); + /* v = b*y^2 */ + mpi_mulm (v, ec->b, u, ec->p); + /* u = y^2-1 */ + mpi_sub_ui (u, u, 1); + /* v = b*y^2+1 */ + mpi_add_ui (v, v, 1); + + /* Compute sqrt(u/v) */ + /* v3 = v^3 */ + mpi_powm (v3, v, mpi_const (MPI_C_THREE), ec->p); + /* t = v3 * v3 * u * v = u * v^7 */ + mpi_powm (t, v, seven, ec->p); + mpi_mulm (t, t, u, ec->p); + /* t = t^((p-5)/8) = (u * v^7)^((p-5)/8) */ + mpi_powm (t, t, p58, ec->p); + /* x = t * u * v^3 = (u * v^3) * (u * v^7)^((p-5)/8) */ + mpi_mulm (t, t, u, ec->p); + mpi_mulm (x, t, v3, ec->p); + + /* Adjust if needed. */ + /* t = v * x^2 */ + mpi_mulm (t, x, x, ec->p); + mpi_mulm (t, t, v, ec->p); + /* -t == u ? x = x * sqrt(-1) */ + mpi_sub (t, ec->p, t); + if (!mpi_cmp (t, u)) + { + static gcry_mpi_t m1; /* Fixme: this is not thread-safe. */ + if (!m1) + m1 = scanval ("2B8324804FC1DF0B2B4D00993DFBD7A7" + "2F431806AD2FE478C4EE1B274A0EA0B0"); + mpi_mulm (x, x, m1, ec->p); + /* t = v * x^2 */ + mpi_mulm (t, x, x, ec->p); + mpi_mulm (t, t, v, ec->p); + /* -t == u ? x = x * sqrt(-1) */ + mpi_sub (t, ec->p, t); + if (!mpi_cmp (t, u)) + rc = GPG_ERR_INV_OBJ; + } + + /* Choose the desired square root according to parity */ + if (mpi_test_bit (x, 0) != !!sign) + mpi_sub (x, ec->p, x); + + mpi_free (t); + mpi_free (v3); + mpi_free (v); + mpi_free (u); + + return rc; +} + + +/* Decode the EdDSA style encoded PK and set it into RESULT. CTX is + the usual curve context. If R_ENCPK is not NULL, the encoded PK is + stored at that address; this is a new copy to be released by the + caller. In contrast to the supplied PK, this is not an MPI and + thus guaranteed to be properly padded. R_ENCPKLEN receives the + length of that encoded key. */ +gpg_err_code_t +_gcry_ecc_eddsa_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result, + unsigned char **r_encpk, unsigned int *r_encpklen) +{ + gpg_err_code_t rc; + unsigned char *rawmpi; + unsigned int rawmpilen; + int sign; + + if (mpi_is_opaque (pk)) + { + const unsigned char *buf; + + buf = mpi_get_opaque (pk, &rawmpilen); + if (!buf) + return GPG_ERR_INV_OBJ; + rawmpilen = (rawmpilen + 7)/8; + + /* Handle compression prefixes. The size of the buffer will be + odd in this case. */ + if (rawmpilen > 1 && (rawmpilen%2)) + { + /* First check whether the public key has been given in + standard uncompressed format (SEC1). No need to recover + x in this case. */ + if (buf[0] == 0x04) + { + gcry_mpi_t x, y; + + rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_STD, + buf+1, (rawmpilen-1)/2, NULL); + if (rc) + return rc; + rc = _gcry_mpi_scan (&y, GCRYMPI_FMT_STD, + buf+1+(rawmpilen-1)/2, (rawmpilen-1)/2,NULL); + if (rc) + { + mpi_free (x); + return rc; + } + + if (r_encpk) + { + rc = eddsa_encode_x_y (x, y, ctx->nbits/8, 0, + r_encpk, r_encpklen); + if (rc) + { + mpi_free (x); + mpi_free (y); + return rc; + } + } + mpi_snatch (result->x, x); + mpi_snatch (result->y, y); + mpi_set_ui (result->z, 1); + return 0; + } + + /* Check whether the public key has been prefixed with a 0x40 + byte to explicitly indicate compressed format using a SEC1 + alike prefix byte. This is a Libgcrypt extension. */ + if (buf[0] == 0x40) + { + rawmpilen--; + buf++; + } + } + + /* EdDSA compressed point. */ + rawmpi = xtrymalloc (rawmpilen? rawmpilen:1); + if (!rawmpi) + return gpg_err_code_from_syserror (); + memcpy (rawmpi, buf, rawmpilen); + reverse_buffer (rawmpi, rawmpilen); + } + else + { + /* Note: Without using an opaque MPI it is not reliable possible + to find out whether the public key has been given in + uncompressed format. Thus we expect native EdDSA format. */ + rawmpi = _gcry_mpi_get_buffer (pk, ctx->nbits/8, &rawmpilen, NULL); + if (!rawmpi) + return gpg_err_code_from_syserror (); + } + + if (rawmpilen) + { + sign = !!(rawmpi[0] & 0x80); + rawmpi[0] &= 0x7f; + } + else + sign = 0; + _gcry_mpi_set_buffer (result->y, rawmpi, rawmpilen, 0); + if (r_encpk) + { + /* Revert to little endian. */ + if (sign && rawmpilen) + rawmpi[0] |= 0x80; + reverse_buffer (rawmpi, rawmpilen); + *r_encpk = rawmpi; + if (r_encpklen) + *r_encpklen = rawmpilen; + } + else + xfree (rawmpi); + + rc = _gcry_ecc_eddsa_recover_x (result->x, result->y, sign, ctx); + mpi_set_ui (result->z, 1); + + return rc; +} + + +/* Compute the A value as used by EdDSA. The caller needs to provide + the context EC and the actual secret D as an MPI. The function + returns a newly allocated 64 byte buffer at r_digest; the first 32 + bytes represent the A value. NULL is returned on error and NULL + stored at R_DIGEST. */ +gpg_err_code_t +_gcry_ecc_eddsa_compute_h_d (unsigned char **r_digest, + gcry_mpi_t d, mpi_ec_t ec) +{ + gpg_err_code_t rc; + unsigned char *rawmpi = NULL; + unsigned int rawmpilen; + unsigned char *digest; + gcry_buffer_t hvec[2]; + int hashalgo, b; + + *r_digest = NULL; + + hashalgo = GCRY_MD_SHA512; + if (hashalgo != GCRY_MD_SHA512) + return GPG_ERR_DIGEST_ALGO; + + b = (ec->nbits+7)/8; + if (b != 256/8) + return GPG_ERR_INTERNAL; /* We only support 256 bit. */ + + /* Note that we clear DIGEST so we can use it as input to left pad + the key with zeroes for hashing. */ + digest = xtrycalloc_secure (2, b); + if (!digest) + return gpg_err_code_from_syserror (); + + memset (hvec, 0, sizeof hvec); + + rawmpi = _gcry_mpi_get_buffer (d, 0, &rawmpilen, NULL); + if (!rawmpi) + { + xfree (digest); + return gpg_err_code_from_syserror (); + } + + hvec[0].data = digest; + hvec[0].off = 0; + hvec[0].len = b > rawmpilen? b - rawmpilen : 0; + hvec[1].data = rawmpi; + hvec[1].off = 0; + hvec[1].len = rawmpilen; + rc = _gcry_md_hash_buffers (hashalgo, 0, digest, hvec, 2); + xfree (rawmpi); + if (rc) + { + xfree (digest); + return rc; + } + + /* Compute the A value. */ + reverse_buffer (digest, 32); /* Only the first half of the hash. */ + digest[0] = (digest[0] & 0x7f) | 0x40; + digest[31] &= 0xf8; + + *r_digest = digest; + return 0; +} + + +/** + * _gcry_ecc_eddsa_genkey - EdDSA version of the key generation. + * + * @sk: A struct to receive the secret key. + * @E: Parameters of the curve. + * @ctx: Elliptic curve computation context. + * @flags: Flags controlling aspects of the creation. + * + * Return: An error code. + * + * The only @flags bit used by this function is %PUBKEY_FLAG_TRANSIENT + * to use a faster RNG. + */ +gpg_err_code_t +_gcry_ecc_eddsa_genkey (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx, + int flags) +{ + gpg_err_code_t rc; + int b = 256/8; /* The only size we currently support. */ + gcry_mpi_t a, x, y; + mpi_point_struct Q; + gcry_random_level_t random_level; + char *dbuf; + size_t dlen; + gcry_buffer_t hvec[1]; + unsigned char *hash_d = NULL; + + point_init (&Q); + memset (hvec, 0, sizeof hvec); + + if ((flags & PUBKEY_FLAG_TRANSIENT_KEY)) + random_level = GCRY_STRONG_RANDOM; + else + random_level = GCRY_VERY_STRONG_RANDOM; + + a = mpi_snew (0); + x = mpi_new (0); + y = mpi_new (0); + + /* Generate a secret. */ + hash_d = xtrymalloc_secure (2*b); + if (!hash_d) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + dlen = b; + dbuf = _gcry_random_bytes_secure (dlen, random_level); + + /* Compute the A value. */ + hvec[0].data = dbuf; + hvec[0].len = dlen; + rc = _gcry_md_hash_buffers (GCRY_MD_SHA512, 0, hash_d, hvec, 1); + if (rc) + goto leave; + sk->d = _gcry_mpi_set_opaque (NULL, dbuf, dlen*8); + dbuf = NULL; + reverse_buffer (hash_d, 32); /* Only the first half of the hash. */ + hash_d[0] = (hash_d[0] & 0x7f) | 0x40; + hash_d[31] &= 0xf8; + _gcry_mpi_set_buffer (a, hash_d, 32, 0); + xfree (hash_d); hash_d = NULL; + /* log_printmpi ("ecgen a", a); */ + + /* Compute Q. */ + _gcry_mpi_ec_mul_point (&Q, a, &E->G, ctx); + if (DBG_CIPHER) + log_printpnt ("ecgen pk", &Q, ctx); + + /* Copy the stuff to the key structures. */ + sk->E.model = E->model; + sk->E.dialect = E->dialect; + sk->E.p = mpi_copy (E->p); + sk->E.a = mpi_copy (E->a); + sk->E.b = mpi_copy (E->b); + point_init (&sk->E.G); + point_set (&sk->E.G, &E->G); + sk->E.n = mpi_copy (E->n); + sk->E.h = mpi_copy (E->h); + point_init (&sk->Q); + point_set (&sk->Q, &Q); + + leave: + point_free (&Q); + _gcry_mpi_release (a); + _gcry_mpi_release (x); + _gcry_mpi_release (y); + xfree (hash_d); + return rc; +} + + +/* Compute an EdDSA signature. See: + * [ed25519] 23pp. (PDF) Daniel J. Bernstein, Niels Duif, Tanja + * Lange, Peter Schwabe, Bo-Yin Yang. High-speed high-security + * signatures. Journal of Cryptographic Engineering 2 (2012), 77-89. + * Document ID: a1a62a2f76d23f65d622484ddd09caf8. + * URL: http://cr.yp.to/papers.html#ed25519. Date: 2011.09.26. + * + * Despite that this function requires the specification of a hash + * algorithm, we only support what has been specified by the paper. + * This may change in the future. Note that we don't check the used + * curve; the user is responsible to use Ed25519. + * + * Return the signature struct (r,s) from the message hash. The caller + * must have allocated R_R and S. + */ +gpg_err_code_t +_gcry_ecc_eddsa_sign (gcry_mpi_t input, ECC_secret_key *skey, + gcry_mpi_t r_r, gcry_mpi_t s, int hashalgo, gcry_mpi_t pk) +{ + int rc; + mpi_ec_t ctx = NULL; + int b; + unsigned int tmp; + unsigned char *digest = NULL; + gcry_buffer_t hvec[3]; + const void *mbuf; + size_t mlen; + unsigned char *rawmpi = NULL; + unsigned int rawmpilen; + unsigned char *encpk = NULL; /* Encoded public key. */ + unsigned int encpklen; + mpi_point_struct I; /* Intermediate value. */ + mpi_point_struct Q; /* Public key. */ + gcry_mpi_t a, x, y, r; + + memset (hvec, 0, sizeof hvec); + + if (!mpi_is_opaque (input)) + return GPG_ERR_INV_DATA; + + /* Initialize some helpers. */ + point_init (&I); + point_init (&Q); + a = mpi_snew (0); + x = mpi_new (0); + y = mpi_new (0); + r = mpi_snew (0); + ctx = _gcry_mpi_ec_p_internal_new (skey->E.model, skey->E.dialect, 0, + skey->E.p, skey->E.a, skey->E.b); + b = (ctx->nbits+7)/8; + if (b != 256/8) { + rc = GPG_ERR_INTERNAL; /* We only support 256 bit. */ + goto leave; + } + + rc = _gcry_ecc_eddsa_compute_h_d (&digest, skey->d, ctx); + if (rc) + goto leave; + _gcry_mpi_set_buffer (a, digest, 32, 0); + + /* Compute the public key if it has not been supplied as optional + parameter. */ + if (pk) + { + rc = _gcry_ecc_eddsa_decodepoint (pk, ctx, &Q, &encpk, &encpklen); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printhex ("* e_pk", encpk, encpklen); + if (!_gcry_mpi_ec_curve_point (&Q, ctx)) + { + rc = GPG_ERR_BROKEN_PUBKEY; + goto leave; + } + } + else + { + _gcry_mpi_ec_mul_point (&Q, a, &skey->E.G, ctx); + rc = _gcry_ecc_eddsa_encodepoint (&Q, ctx, x, y, 0, &encpk, &encpklen); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printhex (" e_pk", encpk, encpklen); + } + + /* Compute R. */ + mbuf = mpi_get_opaque (input, &tmp); + mlen = (tmp +7)/8; + if (DBG_CIPHER) + log_printhex (" m", mbuf, mlen); + + hvec[0].data = digest; + hvec[0].off = 32; + hvec[0].len = 32; + hvec[1].data = (char*)mbuf; + hvec[1].len = mlen; + rc = _gcry_md_hash_buffers (hashalgo, 0, digest, hvec, 2); + if (rc) + goto leave; + reverse_buffer (digest, 64); + if (DBG_CIPHER) + log_printhex (" r", digest, 64); + _gcry_mpi_set_buffer (r, digest, 64, 0); + _gcry_mpi_ec_mul_point (&I, r, &skey->E.G, ctx); + if (DBG_CIPHER) + log_printpnt (" r", &I, ctx); + + /* Convert R into affine coordinates and apply encoding. */ + rc = _gcry_ecc_eddsa_encodepoint (&I, ctx, x, y, 0, &rawmpi, &rawmpilen); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printhex (" e_r", rawmpi, rawmpilen); + + /* S = r + a * H(encodepoint(R) + encodepoint(pk) + m) mod n */ + hvec[0].data = rawmpi; /* (this is R) */ + hvec[0].off = 0; + hvec[0].len = rawmpilen; + hvec[1].data = encpk; + hvec[1].off = 0; + hvec[1].len = encpklen; + hvec[2].data = (char*)mbuf; + hvec[2].off = 0; + hvec[2].len = mlen; + rc = _gcry_md_hash_buffers (hashalgo, 0, digest, hvec, 3); + if (rc) + goto leave; + + /* No more need for RAWMPI thus we now transfer it to R_R. */ + mpi_set_opaque (r_r, rawmpi, rawmpilen*8); + rawmpi = NULL; + + reverse_buffer (digest, 64); + if (DBG_CIPHER) + log_printhex (" H(R+)", digest, 64); + _gcry_mpi_set_buffer (s, digest, 64, 0); + mpi_mulm (s, s, a, skey->E.n); + mpi_addm (s, s, r, skey->E.n); + rc = eddsa_encodempi (s, b, &rawmpi, &rawmpilen); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printhex (" e_s", rawmpi, rawmpilen); + mpi_set_opaque (s, rawmpi, rawmpilen*8); + rawmpi = NULL; + + rc = 0; + + leave: + _gcry_mpi_release (a); + _gcry_mpi_release (x); + _gcry_mpi_release (y); + _gcry_mpi_release (r); + xfree (digest); + _gcry_mpi_ec_free (ctx); + point_free (&I); + point_free (&Q); + xfree (encpk); + xfree (rawmpi); + return rc; +} + + +/* Verify an EdDSA signature. See sign_eddsa for the reference. + * Check if R_IN and S_IN verifies INPUT. PKEY has the curve + * parameters and PK is the EdDSA style encoded public key. + */ +gpg_err_code_t +_gcry_ecc_eddsa_verify (gcry_mpi_t input, ECC_public_key *pkey, + gcry_mpi_t r_in, gcry_mpi_t s_in, int hashalgo, + gcry_mpi_t pk) +{ + int rc; + mpi_ec_t ctx = NULL; + int b; + unsigned int tmp; + mpi_point_struct Q; /* Public key. */ + unsigned char *encpk = NULL; /* Encoded public key. */ + unsigned int encpklen; + const void *mbuf, *rbuf; + unsigned char *tbuf = NULL; + size_t mlen, rlen; + unsigned int tlen; + unsigned char digest[64]; + gcry_buffer_t hvec[3]; + gcry_mpi_t h, s; + mpi_point_struct Ia, Ib; + + if (!mpi_is_opaque (input) || !mpi_is_opaque (r_in) || !mpi_is_opaque (s_in)) + return GPG_ERR_INV_DATA; + if (hashalgo != GCRY_MD_SHA512) + return GPG_ERR_DIGEST_ALGO; + + point_init (&Q); + point_init (&Ia); + point_init (&Ib); + h = mpi_new (0); + s = mpi_new (0); + + ctx = _gcry_mpi_ec_p_internal_new (pkey->E.model, pkey->E.dialect, 0, + pkey->E.p, pkey->E.a, pkey->E.b); + b = ctx->nbits/8; + if (b != 256/8) + { + rc = GPG_ERR_INTERNAL; /* We only support 256 bit. */ + goto leave; + } + + /* Decode and check the public key. */ + rc = _gcry_ecc_eddsa_decodepoint (pk, ctx, &Q, &encpk, &encpklen); + if (rc) + goto leave; + if (!_gcry_mpi_ec_curve_point (&Q, ctx)) + { + rc = GPG_ERR_BROKEN_PUBKEY; + goto leave; + } + if (DBG_CIPHER) + log_printhex (" e_pk", encpk, encpklen); + if (encpklen != b) + { + rc = GPG_ERR_INV_LENGTH; + goto leave; + } + + /* Convert the other input parameters. */ + mbuf = mpi_get_opaque (input, &tmp); + mlen = (tmp +7)/8; + if (DBG_CIPHER) + log_printhex (" m", mbuf, mlen); + rbuf = mpi_get_opaque (r_in, &tmp); + rlen = (tmp +7)/8; + if (DBG_CIPHER) + log_printhex (" r", rbuf, rlen); + if (rlen != b) + { + rc = GPG_ERR_INV_LENGTH; + goto leave; + } + + /* h = H(encodepoint(R) + encodepoint(pk) + m) */ + hvec[0].data = (char*)rbuf; + hvec[0].off = 0; + hvec[0].len = rlen; + hvec[1].data = encpk; + hvec[1].off = 0; + hvec[1].len = encpklen; + hvec[2].data = (char*)mbuf; + hvec[2].off = 0; + hvec[2].len = mlen; + rc = _gcry_md_hash_buffers (hashalgo, 0, digest, hvec, 3); + if (rc) + goto leave; + reverse_buffer (digest, 64); + if (DBG_CIPHER) + log_printhex (" H(R+)", digest, 64); + _gcry_mpi_set_buffer (h, digest, 64, 0); + + /* According to the paper the best way for verification is: + encodepoint(sG - h·Q) = encodepoint(r) + because we don't need to decode R. */ + { + void *sbuf; + unsigned int slen; + + sbuf = _gcry_mpi_get_opaque_copy (s_in, &tmp); + slen = (tmp +7)/8; + reverse_buffer (sbuf, slen); + if (DBG_CIPHER) + log_printhex (" s", sbuf, slen); + _gcry_mpi_set_buffer (s, sbuf, slen, 0); + xfree (sbuf); + if (slen != b) + { + rc = GPG_ERR_INV_LENGTH; + goto leave; + } + } + + _gcry_mpi_ec_mul_point (&Ia, s, &pkey->E.G, ctx); + _gcry_mpi_ec_mul_point (&Ib, h, &Q, ctx); + _gcry_mpi_sub (Ib.x, ctx->p, Ib.x); + _gcry_mpi_ec_add_points (&Ia, &Ia, &Ib, ctx); + rc = _gcry_ecc_eddsa_encodepoint (&Ia, ctx, s, h, 0, &tbuf, &tlen); + if (rc) + goto leave; + if (tlen != rlen || memcmp (tbuf, rbuf, tlen)) + { + rc = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + + rc = 0; + + leave: + xfree (encpk); + xfree (tbuf); + _gcry_mpi_ec_free (ctx); + _gcry_mpi_release (s); + _gcry_mpi_release (h); + point_free (&Ia); + point_free (&Ib); + point_free (&Q); + return rc; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/ecc-gost.c b/libotr/libgcrypt-1.8.7/cipher/ecc-gost.c new file mode 100644 index 0000000..0362a6c --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/ecc-gost.c @@ -0,0 +1,235 @@ +/* ecc-gots.c - Elliptic Curve GOST signatures + * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013 Dmitry Eremin-Solenikov + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "ecc-common.h" +#include "pubkey-internal.h" + + +/* Compute an GOST R 34.10-01/-12 signature. + * Return the signature struct (r,s) from the message hash. The caller + * must have allocated R and S. + */ +gpg_err_code_t +_gcry_ecc_gost_sign (gcry_mpi_t input, ECC_secret_key *skey, + gcry_mpi_t r, gcry_mpi_t s) +{ + gpg_err_code_t rc = 0; + gcry_mpi_t k, dr, sum, ke, x, e; + mpi_point_struct I; + gcry_mpi_t hash; + const void *abuf; + unsigned int abits, qbits; + mpi_ec_t ctx; + + if (DBG_CIPHER) + log_mpidump ("gost sign hash ", input ); + + qbits = mpi_get_nbits (skey->E.n); + + /* Convert the INPUT into an MPI if needed. */ + if (mpi_is_opaque (input)) + { + abuf = mpi_get_opaque (input, &abits); + rc = _gcry_mpi_scan (&hash, GCRYMPI_FMT_USG, abuf, (abits+7)/8, NULL); + if (rc) + return rc; + if (abits > qbits) + mpi_rshift (hash, hash, abits - qbits); + } + else + hash = input; + + + k = NULL; + dr = mpi_alloc (0); + sum = mpi_alloc (0); + ke = mpi_alloc (0); + e = mpi_alloc (0); + x = mpi_alloc (0); + point_init (&I); + + ctx = _gcry_mpi_ec_p_internal_new (skey->E.model, skey->E.dialect, 0, + skey->E.p, skey->E.a, skey->E.b); + + mpi_mod (e, input, skey->E.n); /* e = hash mod n */ + + if (!mpi_cmp_ui (e, 0)) + mpi_set_ui (e, 1); + + /* Two loops to avoid R or S are zero. This is more of a joke than + a real demand because the probability of them being zero is less + than any hardware failure. Some specs however require it. */ + do + { + do + { + mpi_free (k); + k = _gcry_dsa_gen_k (skey->E.n, GCRY_STRONG_RANDOM); + + _gcry_dsa_modify_k (k, skey->E.n, qbits); + + _gcry_mpi_ec_mul_point (&I, k, &skey->E.G, ctx); + if (_gcry_mpi_ec_get_affine (x, NULL, &I, ctx)) + { + if (DBG_CIPHER) + log_debug ("ecc sign: Failed to get affine coordinates\n"); + rc = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + mpi_mod (r, x, skey->E.n); /* r = x mod n */ + } + while (!mpi_cmp_ui (r, 0)); + mpi_mulm (dr, skey->d, r, skey->E.n); /* dr = d*r mod n */ + mpi_mulm (ke, k, e, skey->E.n); /* ke = k*e mod n */ + mpi_addm (s, ke, dr, skey->E.n); /* sum = (k*e+ d*r) mod n */ + } + while (!mpi_cmp_ui (s, 0)); + + if (DBG_CIPHER) + { + log_mpidump ("gost sign result r ", r); + log_mpidump ("gost sign result s ", s); + } + + leave: + _gcry_mpi_ec_free (ctx); + point_free (&I); + mpi_free (x); + mpi_free (e); + mpi_free (ke); + mpi_free (sum); + mpi_free (dr); + mpi_free (k); + + if (hash != input) + mpi_free (hash); + + return rc; +} + + +/* Verify a GOST R 34.10-01/-12 signature. + * Check if R and S verifies INPUT. + */ +gpg_err_code_t +_gcry_ecc_gost_verify (gcry_mpi_t input, ECC_public_key *pkey, + gcry_mpi_t r, gcry_mpi_t s) +{ + gpg_err_code_t err = 0; + gcry_mpi_t e, x, z1, z2, v, rv, zero; + mpi_point_struct Q, Q1, Q2; + mpi_ec_t ctx; + + if( !(mpi_cmp_ui (r, 0) > 0 && mpi_cmp (r, pkey->E.n) < 0) ) + return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < r < n failed. */ + if( !(mpi_cmp_ui (s, 0) > 0 && mpi_cmp (s, pkey->E.n) < 0) ) + return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < s < n failed. */ + + x = mpi_alloc (0); + e = mpi_alloc (0); + z1 = mpi_alloc (0); + z2 = mpi_alloc (0); + v = mpi_alloc (0); + rv = mpi_alloc (0); + zero = mpi_alloc (0); + + point_init (&Q); + point_init (&Q1); + point_init (&Q2); + + ctx = _gcry_mpi_ec_p_internal_new (pkey->E.model, pkey->E.dialect, 0, + pkey->E.p, pkey->E.a, pkey->E.b); + + mpi_mod (e, input, pkey->E.n); /* e = hash mod n */ + if (!mpi_cmp_ui (e, 0)) + mpi_set_ui (e, 1); + mpi_invm (v, e, pkey->E.n); /* v = e^(-1) (mod n) */ + mpi_mulm (z1, s, v, pkey->E.n); /* z1 = s*v (mod n) */ + mpi_mulm (rv, r, v, pkey->E.n); /* rv = s*v (mod n) */ + mpi_subm (z2, zero, rv, pkey->E.n); /* z2 = -r*v (mod n) */ + + _gcry_mpi_ec_mul_point (&Q1, z1, &pkey->E.G, ctx); +/* log_mpidump ("Q1.x", Q1.x); */ +/* log_mpidump ("Q1.y", Q1.y); */ +/* log_mpidump ("Q1.z", Q1.z); */ + _gcry_mpi_ec_mul_point (&Q2, z2, &pkey->Q, ctx); +/* log_mpidump ("Q2.x", Q2.x); */ +/* log_mpidump ("Q2.y", Q2.y); */ +/* log_mpidump ("Q2.z", Q2.z); */ + _gcry_mpi_ec_add_points (&Q, &Q1, &Q2, ctx); +/* log_mpidump (" Q.x", Q.x); */ +/* log_mpidump (" Q.y", Q.y); */ +/* log_mpidump (" Q.z", Q.z); */ + + if (!mpi_cmp_ui (Q.z, 0)) + { + if (DBG_CIPHER) + log_debug ("ecc verify: Rejected\n"); + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + if (_gcry_mpi_ec_get_affine (x, NULL, &Q, ctx)) + { + if (DBG_CIPHER) + log_debug ("ecc verify: Failed to get affine coordinates\n"); + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + mpi_mod (x, x, pkey->E.n); /* x = x mod E_n */ + if (mpi_cmp (x, r)) /* x != r */ + { + if (DBG_CIPHER) + { + log_mpidump (" x", x); + log_mpidump (" r", r); + log_mpidump (" s", s); + log_debug ("ecc verify: Not verified\n"); + } + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + if (DBG_CIPHER) + log_debug ("ecc verify: Accepted\n"); + + leave: + _gcry_mpi_ec_free (ctx); + point_free (&Q2); + point_free (&Q1); + point_free (&Q); + mpi_free (zero); + mpi_free (rv); + mpi_free (v); + mpi_free (z2); + mpi_free (z1); + mpi_free (x); + mpi_free (e); + return err; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/ecc-misc.c b/libotr/libgcrypt-1.8.7/cipher/ecc-misc.c new file mode 100644 index 0000000..34dd680 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/ecc-misc.c @@ -0,0 +1,363 @@ +/* ecc-misc.c - Elliptic Curve miscellaneous functions + * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "ecc-common.h" + + +/* + * Release a curve object. + */ +void +_gcry_ecc_curve_free (elliptic_curve_t *E) +{ + mpi_free (E->p); E->p = NULL; + mpi_free (E->a); E->a = NULL; + mpi_free (E->b); E->b = NULL; + _gcry_mpi_point_free_parts (&E->G); + mpi_free (E->n); E->n = NULL; + mpi_free (E->h); E->h = NULL; +} + + +/* + * Return a copy of a curve object. + */ +elliptic_curve_t +_gcry_ecc_curve_copy (elliptic_curve_t E) +{ + elliptic_curve_t R; + + R.model = E.model; + R.dialect = E.dialect; + R.name = E.name; + R.p = mpi_copy (E.p); + R.a = mpi_copy (E.a); + R.b = mpi_copy (E.b); + _gcry_mpi_point_init (&R.G); + point_set (&R.G, &E.G); + R.n = mpi_copy (E.n); + R.h = mpi_copy (E.h); + + return R; +} + + +/* + * Return a description of the curve model. + */ +const char * +_gcry_ecc_model2str (enum gcry_mpi_ec_models model) +{ + const char *str = "?"; + switch (model) + { + case MPI_EC_WEIERSTRASS: str = "Weierstrass"; break; + case MPI_EC_MONTGOMERY: str = "Montgomery"; break; + case MPI_EC_EDWARDS: str = "Edwards"; break; + } + return str; +} + + +/* + * Return a description of the curve dialect. + */ +const char * +_gcry_ecc_dialect2str (enum ecc_dialects dialect) +{ + const char *str = "?"; + switch (dialect) + { + case ECC_DIALECT_STANDARD: str = "Standard"; break; + case ECC_DIALECT_ED25519: str = "Ed25519"; break; + } + return str; +} + + +gcry_mpi_t +_gcry_ecc_ec2os (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t p) +{ + gpg_err_code_t rc; + int pbytes = (mpi_get_nbits (p)+7)/8; + size_t n; + unsigned char *buf, *ptr; + gcry_mpi_t result; + + buf = xmalloc ( 1 + 2*pbytes ); + *buf = 04; /* Uncompressed point. */ + ptr = buf+1; + rc = _gcry_mpi_print (GCRYMPI_FMT_USG, ptr, pbytes, &n, x); + if (rc) + log_fatal ("mpi_print failed: %s\n", gpg_strerror (rc)); + if (n < pbytes) + { + memmove (ptr+(pbytes-n), ptr, n); + memset (ptr, 0, (pbytes-n)); + } + ptr += pbytes; + rc = _gcry_mpi_print (GCRYMPI_FMT_USG, ptr, pbytes, &n, y); + if (rc) + log_fatal ("mpi_print failed: %s\n", gpg_strerror (rc)); + if (n < pbytes) + { + memmove (ptr+(pbytes-n), ptr, n); + memset (ptr, 0, (pbytes-n)); + } + + rc = _gcry_mpi_scan (&result, GCRYMPI_FMT_USG, buf, 1+2*pbytes, NULL); + if (rc) + log_fatal ("mpi_scan failed: %s\n", gpg_strerror (rc)); + xfree (buf); + + return result; +} + + +/* Convert POINT into affine coordinates using the context CTX and + return a newly allocated MPI. If the conversion is not possible + NULL is returned. This function won't print an error message. */ +gcry_mpi_t +_gcry_mpi_ec_ec2os (gcry_mpi_point_t point, mpi_ec_t ectx) +{ + gcry_mpi_t g_x, g_y, result; + + g_x = mpi_new (0); + g_y = mpi_new (0); + if (_gcry_mpi_ec_get_affine (g_x, g_y, point, ectx)) + result = NULL; + else + result = _gcry_ecc_ec2os (g_x, g_y, ectx->p); + mpi_free (g_x); + mpi_free (g_y); + + return result; +} + + +/* RESULT must have been initialized and is set on success to the + point given by VALUE. */ +gcry_err_code_t +_gcry_ecc_os2ec (mpi_point_t result, gcry_mpi_t value) +{ + gcry_err_code_t rc; + size_t n; + const unsigned char *buf; + unsigned char *buf_memory; + gcry_mpi_t x, y; + + if (mpi_is_opaque (value)) + { + unsigned int nbits; + + buf = mpi_get_opaque (value, &nbits); + if (!buf) + return GPG_ERR_INV_OBJ; + n = (nbits + 7)/8; + buf_memory = NULL; + } + else + { + n = (mpi_get_nbits (value)+7)/8; + buf_memory = xmalloc (n); + rc = _gcry_mpi_print (GCRYMPI_FMT_USG, buf_memory, n, &n, value); + if (rc) + { + xfree (buf_memory); + return rc; + } + buf = buf_memory; + } + + if (n < 1) + { + xfree (buf_memory); + return GPG_ERR_INV_OBJ; + } + if (*buf != 4) + { + xfree (buf_memory); + return GPG_ERR_NOT_IMPLEMENTED; /* No support for point compression. */ + } + if ( ((n-1)%2) ) + { + xfree (buf_memory); + return GPG_ERR_INV_OBJ; + } + n = (n-1)/2; + rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_USG, buf+1, n, NULL); + if (rc) + { + xfree (buf_memory); + return rc; + } + rc = _gcry_mpi_scan (&y, GCRYMPI_FMT_USG, buf+1+n, n, NULL); + xfree (buf_memory); + if (rc) + { + mpi_free (x); + return rc; + } + + mpi_set (result->x, x); + mpi_set (result->y, y); + mpi_set_ui (result->z, 1); + + mpi_free (x); + mpi_free (y); + + return 0; +} + + +/* Compute the public key from the the context EC. Obviously a + requirement is that the secret key is available in EC. On success + Q is returned; on error NULL. If Q is NULL a newly allocated point + is returned. If G or D are given they override the values taken + from EC. */ +mpi_point_t +_gcry_ecc_compute_public (mpi_point_t Q, mpi_ec_t ec, + mpi_point_t G, gcry_mpi_t d) +{ + if (!G) + G = ec->G; + if (!d) + d = ec->d; + + if (!d || !G || !ec->p || !ec->a) + return NULL; + if (ec->model == MPI_EC_EDWARDS && !ec->b) + return NULL; + + if (ec->dialect == ECC_DIALECT_ED25519 + && (ec->flags & PUBKEY_FLAG_EDDSA)) + { + gcry_mpi_t a; + unsigned char *digest; + + if (_gcry_ecc_eddsa_compute_h_d (&digest, d, ec)) + return NULL; + + a = mpi_snew (0); + _gcry_mpi_set_buffer (a, digest, 32, 0); + xfree (digest); + + /* And finally the public key. */ + if (!Q) + Q = mpi_point_new (0); + if (Q) + _gcry_mpi_ec_mul_point (Q, a, G, ec); + mpi_free (a); + } + else + { + if (!Q) + Q = mpi_point_new (0); + if (Q) + _gcry_mpi_ec_mul_point (Q, d, G, ec); + } + + return Q; +} + + +gpg_err_code_t +_gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result) +{ + unsigned char *rawmpi; + unsigned int rawmpilen; + + if (mpi_is_opaque (pk)) + { + const unsigned char *buf; + unsigned char *p; + + buf = mpi_get_opaque (pk, &rawmpilen); + if (!buf) + return GPG_ERR_INV_OBJ; + rawmpilen = (rawmpilen + 7)/8; + + if (rawmpilen > 1 && (rawmpilen%2) && buf[0] == 0x40) + { + rawmpilen--; + buf++; + } + + rawmpi = xtrymalloc (rawmpilen? rawmpilen:1); + if (!rawmpi) + return gpg_err_code_from_syserror (); + + p = rawmpi + rawmpilen; + while (p > rawmpi) + *--p = *buf++; + } + else + { + unsigned int nbytes = (ctx->nbits+7)/8; + + rawmpi = _gcry_mpi_get_buffer (pk, nbytes, &rawmpilen, NULL); + if (!rawmpi) + return gpg_err_code_from_syserror (); + /* + * It is not reliable to assume that 0x40 means the prefix. + * + * For newer implementation, it is reliable since we always put + * 0x40 for x-only coordinate. + * + * For data with older implementation (non-released development + * version), it is possible to have the 0x40 as a part of data. + * Besides, when data was parsed as MPI, we might have 0x00 + * prefix. + * + * So, we need to check if it's really the prefix or not. + * Only when it's the prefix, we remove it. + */ + /* + * It is possible for data created by older implementation to + * have shorter length when it was parsed as MPI. Those removed + * zero(s) are recovered by _gcry_mpi_get_buffer. + */ + /* + * When we have the prefix (0x40 or 0x00), it comes at the end, + * since it is taken by _gcry_mpi_get_buffer with little endian. + * Just setting RAWMPILEN to NBYTES is enough in this case. + * Othewise, RAWMPILEN is NBYTES already. + */ + rawmpilen = nbytes; + } + + rawmpi[0] &= (1 << (ctx->nbits % 8)) - 1; + _gcry_mpi_set_buffer (result->x, rawmpi, rawmpilen, 0); + xfree (rawmpi); + mpi_set_ui (result->z, 1); + + return 0; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/ecc.c b/libotr/libgcrypt-1.8.7/cipher/ecc.c new file mode 100644 index 0000000..3f221a2 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/ecc.c @@ -0,0 +1,2256 @@ +/* ecc.c - Elliptic Curve Cryptography + * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013, 2015 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* This code is originally based on the Patch 0.1.6 for the gnupg + 1.4.x branch as retrieved on 2007-03-21 from + http://www.calcurco.cat/eccGnuPG/src/gnupg-1.4.6-ecc0.2.0beta1.diff.bz2 + The original authors are: + Written by + Sergi Blanch i Torne <d4372211 at alumnes.eup.udl.es>, + Ramiro Moreno Chiral <ramiro at eup.udl.es> + Maintainers + Sergi Blanch i Torne + Ramiro Moreno Chiral + Mikael Mylnikov (mmr) + For use in Libgcrypt the code has been heavily modified and cleaned + up. In fact there is not much left of the originally code except for + some variable names and the text book implementaion of the sign and + verification algorithms. The arithmetic functions have entirely + been rewritten and moved to mpi/ec.c. + + ECDH encrypt and decrypt code written by Andrey Jivsov. +*/ + + +/* TODO: + + - In mpi/ec.c we use mpi_powm for x^2 mod p: Either implement a + special case in mpi_powm or check whether mpi_mulm is faster. + +*/ + + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "pubkey-internal.h" +#include "ecc-common.h" + + +static const char *ecc_names[] = + { + "ecc", + "ecdsa", + "ecdh", + "eddsa", + "gost", + NULL, + }; + + +/* Sample NIST P-256 key from RFC 6979 A.2.5 */ +static const char sample_public_key_secp256[] = + "(public-key" + " (ecc" + " (curve secp256r1)" + " (q #04" + /**/ "60FED4BA255A9D31C961EB74C6356D68C049B8923B61FA6CE669622E60F29FB6" + /**/ "7903FE1008B8BC99A41AE9E95628BC64F2F1B20C2D7E9F5177A3C294D4462299#)))"; + +static const char sample_secret_key_secp256[] = + "(private-key" + " (ecc" + " (curve secp256r1)" + " (d #C9AFA9D845BA75166B5C215767B1D6934E50C3DB36E89B127B8A622B120F6721#)" + " (q #04" + /**/ "60FED4BA255A9D31C961EB74C6356D68C049B8923B61FA6CE669622E60F29FB6" + /**/ "7903FE1008B8BC99A41AE9E95628BC64F2F1B20C2D7E9F5177A3C294D4462299#)))"; + + +/* Registered progress function and its callback value. */ +static void (*progress_cb) (void *, const char*, int, int, int); +static void *progress_cb_data; + + + +/* Local prototypes. */ +static void test_keys (ECC_secret_key * sk, unsigned int nbits); +static void test_ecdh_only_keys (ECC_secret_key * sk, unsigned int nbits, int flags); +static unsigned int ecc_get_nbits (gcry_sexp_t parms); + + + + +void +_gcry_register_pk_ecc_progress (void (*cb) (void *, const char *, + int, int, int), + void *cb_data) +{ + progress_cb = cb; + progress_cb_data = cb_data; +} + +/* static void */ +/* progress (int c) */ +/* { */ +/* if (progress_cb) */ +/* progress_cb (progress_cb_data, "pk_ecc", c, 0, 0); */ +/* } */ + + + +/** + * nist_generate_key - Standard version of the ECC key generation. + * @sk: A struct to receive the secret key. + * @E: Parameters of the curve. + * @ctx: Elliptic curve computation context. + * @flags: Flags controlling aspects of the creation. + * @nbits: Only for testing + * @r_x: On success this receives an allocated MPI with the affine + * x-coordinate of the poblic key. On error NULL is stored. + * @r_y: Ditto for the y-coordinate. + * + * Return: An error code. + * + * The @flags bits used by this function are %PUBKEY_FLAG_TRANSIENT to + * use a faster RNG, and %PUBKEY_FLAG_NO_KEYTEST to skip the assertion + * that the key works as expected. + * + * FIXME: Check whether N is needed. + */ +static gpg_err_code_t +nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx, + int flags, unsigned int nbits, + gcry_mpi_t *r_x, gcry_mpi_t *r_y) +{ + mpi_point_struct Q; + gcry_random_level_t random_level; + gcry_mpi_t x, y; + const unsigned int pbits = mpi_get_nbits (E->p); + + point_init (&Q); + + if ((flags & PUBKEY_FLAG_TRANSIENT_KEY)) + random_level = GCRY_STRONG_RANDOM; + else + random_level = GCRY_VERY_STRONG_RANDOM; + + /* Generate a secret. */ + if (ctx->dialect == ECC_DIALECT_ED25519 || (flags & PUBKEY_FLAG_DJB_TWEAK)) + { + char *rndbuf; + + sk->d = mpi_snew (256); + rndbuf = _gcry_random_bytes_secure (32, random_level); + rndbuf[0] &= 0x7f; /* Clear bit 255. */ + rndbuf[0] |= 0x40; /* Set bit 254. */ + rndbuf[31] &= 0xf8; /* Clear bits 2..0 so that d mod 8 == 0 */ + _gcry_mpi_set_buffer (sk->d, rndbuf, 32, 0); + xfree (rndbuf); + } + else + sk->d = _gcry_dsa_gen_k (E->n, random_level); + + + /* Compute Q. */ + _gcry_mpi_ec_mul_point (&Q, sk->d, &E->G, ctx); + + /* Copy the stuff to the key structures. */ + sk->E.model = E->model; + sk->E.dialect = E->dialect; + sk->E.p = mpi_copy (E->p); + sk->E.a = mpi_copy (E->a); + sk->E.b = mpi_copy (E->b); + point_init (&sk->E.G); + point_set (&sk->E.G, &E->G); + sk->E.n = mpi_copy (E->n); + sk->E.h = mpi_copy (E->h); + point_init (&sk->Q); + + x = mpi_new (pbits); + if (r_y == NULL) + y = NULL; + else + y = mpi_new (pbits); + if (_gcry_mpi_ec_get_affine (x, y, &Q, ctx)) + log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "Q"); + + /* We want the Q=(x,y) be a "compliant key" in terms of the + * http://tools.ietf.org/html/draft-jivsov-ecc-compact, which simply + * means that we choose either Q=(x,y) or -Q=(x,p-y) such that we + * end up with the min(y,p-y) as the y coordinate. Such a public + * key allows the most efficient compression: y can simply be + * dropped because we know that it's a minimum of the two + * possibilities without any loss of security. Note that we don't + * do that for Ed25519 so that we do not violate the special + * construction of the secret key. */ + if (r_y == NULL || E->dialect == ECC_DIALECT_ED25519) + point_set (&sk->Q, &Q); + else + { + gcry_mpi_t negative; + + negative = mpi_new (pbits); + + if (E->model == MPI_EC_WEIERSTRASS) + mpi_sub (negative, E->p, y); /* negative = p - y */ + else + mpi_sub (negative, E->p, x); /* negative = p - x */ + + if (mpi_cmp (negative, y) < 0) /* p - y < p */ + { + /* We need to end up with -Q; this assures that new Q's y is + the smallest one */ + if (E->model == MPI_EC_WEIERSTRASS) + { + mpi_free (y); + y = negative; + } + else + { + mpi_free (x); + x = negative; + } + mpi_sub (sk->d, E->n, sk->d); /* d = order - d */ + mpi_point_set (&sk->Q, x, y, mpi_const (MPI_C_ONE)); + + if (DBG_CIPHER) + log_debug ("ecgen converted Q to a compliant point\n"); + } + else /* p - y >= p */ + { + /* No change is needed exactly 50% of the time: just copy. */ + mpi_free (negative); + point_set (&sk->Q, &Q); + if (DBG_CIPHER) + log_debug ("ecgen didn't need to convert Q to a compliant point\n"); + } + } + + *r_x = x; + if (r_y) + *r_y = y; + + point_free (&Q); + /* Now we can test our keys (this should never fail!). */ + if ((flags & PUBKEY_FLAG_NO_KEYTEST)) + ; /* User requested to skip the test. */ + else if (sk->E.model != MPI_EC_MONTGOMERY) + test_keys (sk, nbits - 64); + else + test_ecdh_only_keys (sk, nbits - 64, flags); + + return 0; +} + + +/* + * To verify correct skey it use a random information. + * First, encrypt and decrypt this dummy value, + * test if the information is recuperated. + * Second, test with the sign and verify functions. + */ +static void +test_keys (ECC_secret_key *sk, unsigned int nbits) +{ + ECC_public_key pk; + gcry_mpi_t test = mpi_new (nbits); + mpi_point_struct R_; + gcry_mpi_t c = mpi_new (nbits); + gcry_mpi_t out = mpi_new (nbits); + gcry_mpi_t r = mpi_new (nbits); + gcry_mpi_t s = mpi_new (nbits); + + if (DBG_CIPHER) + log_debug ("Testing key.\n"); + + point_init (&R_); + + pk.E = _gcry_ecc_curve_copy (sk->E); + point_init (&pk.Q); + point_set (&pk.Q, &sk->Q); + + _gcry_mpi_randomize (test, nbits, GCRY_WEAK_RANDOM); + + if (_gcry_ecc_ecdsa_sign (test, sk, r, s, 0, 0) ) + log_fatal ("ECDSA operation: sign failed\n"); + + if (_gcry_ecc_ecdsa_verify (test, &pk, r, s)) + { + log_fatal ("ECDSA operation: sign, verify failed\n"); + } + + if (DBG_CIPHER) + log_debug ("ECDSA operation: sign, verify ok.\n"); + + point_free (&pk.Q); + _gcry_ecc_curve_free (&pk.E); + + point_free (&R_); + mpi_free (s); + mpi_free (r); + mpi_free (out); + mpi_free (c); + mpi_free (test); +} + + +static void +test_ecdh_only_keys (ECC_secret_key *sk, unsigned int nbits, int flags) +{ + ECC_public_key pk; + gcry_mpi_t test; + mpi_point_struct R_; + gcry_mpi_t x0, x1; + mpi_ec_t ec; + + if (DBG_CIPHER) + log_debug ("Testing ECDH only key.\n"); + + point_init (&R_); + + pk.E = _gcry_ecc_curve_copy (sk->E); + point_init (&pk.Q); + point_set (&pk.Q, &sk->Q); + + if ((flags & PUBKEY_FLAG_DJB_TWEAK)) + { + char *rndbuf; + + test = mpi_new (256); + rndbuf = _gcry_random_bytes (32, GCRY_WEAK_RANDOM); + rndbuf[0] &= 0x7f; /* Clear bit 255. */ + rndbuf[0] |= 0x40; /* Set bit 254. */ + rndbuf[31] &= 0xf8; /* Clear bits 2..0 so that d mod 8 == 0 */ + _gcry_mpi_set_buffer (test, rndbuf, 32, 0); + xfree (rndbuf); + } + else + { + test = mpi_new (nbits); + _gcry_mpi_randomize (test, nbits, GCRY_WEAK_RANDOM); + } + + ec = _gcry_mpi_ec_p_internal_new (pk.E.model, pk.E.dialect, flags, + pk.E.p, pk.E.a, pk.E.b); + x0 = mpi_new (0); + x1 = mpi_new (0); + + /* R_ = hkQ <=> R_ = hkdG */ + _gcry_mpi_ec_mul_point (&R_, test, &pk.Q, ec); + if (!(flags & PUBKEY_FLAG_DJB_TWEAK)) + _gcry_mpi_ec_mul_point (&R_, ec->h, &R_, ec); + if (_gcry_mpi_ec_get_affine (x0, NULL, &R_, ec)) + log_fatal ("ecdh: Failed to get affine coordinates for hkQ\n"); + + _gcry_mpi_ec_mul_point (&R_, test, &pk.E.G, ec); + _gcry_mpi_ec_mul_point (&R_, sk->d, &R_, ec); + /* R_ = hdkG */ + if (!(flags & PUBKEY_FLAG_DJB_TWEAK)) + _gcry_mpi_ec_mul_point (&R_, ec->h, &R_, ec); + + if (_gcry_mpi_ec_get_affine (x1, NULL, &R_, ec)) + log_fatal ("ecdh: Failed to get affine coordinates for hdkG\n"); + + if (mpi_cmp (x0, x1)) + { + log_fatal ("ECDH test failed.\n"); + } + + mpi_free (x0); + mpi_free (x1); + _gcry_mpi_ec_free (ec); + + point_free (&pk.Q); + _gcry_ecc_curve_free (&pk.E); + + point_free (&R_); + mpi_free (test); +} + + +/* + * To check the validity of the value, recalculate the correspondence + * between the public value and the secret one. + */ +static int +check_secret_key (ECC_secret_key *sk, mpi_ec_t ec, int flags) +{ + int rc = 1; + mpi_point_struct Q; + gcry_mpi_t x1, y1; + gcry_mpi_t x2 = NULL; + gcry_mpi_t y2 = NULL; + + point_init (&Q); + x1 = mpi_new (0); + if (ec->model == MPI_EC_MONTGOMERY) + y1 = NULL; + else + y1 = mpi_new (0); + + /* G in E(F_p) */ + if (!_gcry_mpi_ec_curve_point (&sk->E.G, ec)) + { + if (DBG_CIPHER) + log_debug ("Bad check: Point 'G' does not belong to curve 'E'!\n"); + goto leave; + } + + /* G != PaI */ + if (!mpi_cmp_ui (sk->E.G.z, 0)) + { + if (DBG_CIPHER) + log_debug ("Bad check: 'G' cannot be Point at Infinity!\n"); + goto leave; + } + + /* Check order of curve. */ + if (sk->E.dialect != ECC_DIALECT_ED25519 && !(flags & PUBKEY_FLAG_DJB_TWEAK)) + { + _gcry_mpi_ec_mul_point (&Q, sk->E.n, &sk->E.G, ec); + if (mpi_cmp_ui (Q.z, 0)) + { + if (DBG_CIPHER) + log_debug ("check_secret_key: E is not a curve of order n\n"); + goto leave; + } + } + + /* Pubkey cannot be PaI */ + if (!mpi_cmp_ui (sk->Q.z, 0)) + { + if (DBG_CIPHER) + log_debug ("Bad check: Q can not be a Point at Infinity!\n"); + goto leave; + } + + /* pubkey = [d]G over E */ + if (!_gcry_ecc_compute_public (&Q, ec, &sk->E.G, sk->d)) + { + if (DBG_CIPHER) + log_debug ("Bad check: computation of dG failed\n"); + goto leave; + } + if (_gcry_mpi_ec_get_affine (x1, y1, &Q, ec)) + { + if (DBG_CIPHER) + log_debug ("Bad check: Q can not be a Point at Infinity!\n"); + goto leave; + } + + if ((flags & PUBKEY_FLAG_EDDSA)) + ; /* Fixme: EdDSA is special. */ + else if (!mpi_cmp_ui (sk->Q.z, 1)) + { + /* Fast path if Q is already in affine coordinates. */ + if (mpi_cmp (x1, sk->Q.x) || (y1 && mpi_cmp (y1, sk->Q.y))) + { + if (DBG_CIPHER) + log_debug + ("Bad check: There is NO correspondence between 'd' and 'Q'!\n"); + goto leave; + } + } + else + { + x2 = mpi_new (0); + y2 = mpi_new (0); + if (_gcry_mpi_ec_get_affine (x2, y2, &sk->Q, ec)) + { + if (DBG_CIPHER) + log_debug ("Bad check: Q can not be a Point at Infinity!\n"); + goto leave; + } + + if (mpi_cmp (x1, x2) || mpi_cmp (y1, y2)) + { + if (DBG_CIPHER) + log_debug + ("Bad check: There is NO correspondence between 'd' and 'Q'!\n"); + goto leave; + } + } + rc = 0; /* Okay. */ + + leave: + mpi_free (x2); + mpi_free (x1); + mpi_free (y1); + mpi_free (y2); + point_free (&Q); + return rc; +} + + + +/********************************************* + ************** interface ****************** + *********************************************/ + +static gcry_err_code_t +ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) +{ + gpg_err_code_t rc; + unsigned int nbits; + elliptic_curve_t E; + ECC_secret_key sk; + gcry_mpi_t Gx = NULL; + gcry_mpi_t Gy = NULL; + gcry_mpi_t Qx = NULL; + gcry_mpi_t Qy = NULL; + char *curve_name = NULL; + gcry_sexp_t l1; + mpi_ec_t ctx = NULL; + gcry_sexp_t curve_info = NULL; + gcry_sexp_t curve_flags = NULL; + gcry_mpi_t base = NULL; + gcry_mpi_t public = NULL; + gcry_mpi_t secret = NULL; + int flags = 0; + + memset (&E, 0, sizeof E); + memset (&sk, 0, sizeof sk); + + rc = _gcry_pk_util_get_nbits (genparms, &nbits); + if (rc) + return rc; + + /* Parse the optional "curve" parameter. */ + l1 = sexp_find_token (genparms, "curve", 0); + if (l1) + { + curve_name = _gcry_sexp_nth_string (l1, 1); + sexp_release (l1); + if (!curve_name) + return GPG_ERR_INV_OBJ; /* No curve name or value too large. */ + } + + /* Parse the optional flags list. */ + l1 = sexp_find_token (genparms, "flags", 0); + if (l1) + { + rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL); + sexp_release (l1); + if (rc) + goto leave; + } + + /* Parse the deprecated optional transient-key flag. */ + l1 = sexp_find_token (genparms, "transient-key", 0); + if (l1) + { + flags |= PUBKEY_FLAG_TRANSIENT_KEY; + sexp_release (l1); + } + + /* NBITS is required if no curve name has been given. */ + if (!nbits && !curve_name) + return GPG_ERR_NO_OBJ; /* No NBITS parameter. */ + + rc = _gcry_ecc_fill_in_curve (nbits, curve_name, &E, &nbits); + if (rc) + goto leave; + + if (DBG_CIPHER) + { + log_debug ("ecgen curve info: %s/%s\n", + _gcry_ecc_model2str (E.model), + _gcry_ecc_dialect2str (E.dialect)); + if (E.name) + log_debug ("ecgen curve used: %s\n", E.name); + log_printmpi ("ecgen curve p", E.p); + log_printmpi ("ecgen curve a", E.a); + log_printmpi ("ecgen curve b", E.b); + log_printmpi ("ecgen curve n", E.n); + log_printmpi ("ecgen curve h", E.h); + log_printpnt ("ecgen curve G", &E.G, NULL); + } + + ctx = _gcry_mpi_ec_p_internal_new (E.model, E.dialect, flags, E.p, E.a, E.b); + + if (E.model == MPI_EC_MONTGOMERY) + rc = nist_generate_key (&sk, &E, ctx, flags, nbits, &Qx, NULL); + else if ((flags & PUBKEY_FLAG_EDDSA)) + rc = _gcry_ecc_eddsa_genkey (&sk, &E, ctx, flags); + else + rc = nist_generate_key (&sk, &E, ctx, flags, nbits, &Qx, &Qy); + if (rc) + goto leave; + + /* Copy data to the result. */ + Gx = mpi_new (0); + Gy = mpi_new (0); + if (E.model != MPI_EC_MONTGOMERY) + { + if (_gcry_mpi_ec_get_affine (Gx, Gy, &sk.E.G, ctx)) + log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "G"); + base = _gcry_ecc_ec2os (Gx, Gy, sk.E.p); + } + if ((sk.E.dialect == ECC_DIALECT_ED25519 || E.model == MPI_EC_MONTGOMERY) + && !(flags & PUBKEY_FLAG_NOCOMP)) + { + unsigned char *encpk; + unsigned int encpklen; + + if (E.model != MPI_EC_MONTGOMERY) + /* (Gx and Gy are used as scratch variables) */ + rc = _gcry_ecc_eddsa_encodepoint (&sk.Q, ctx, Gx, Gy, + !!(flags & PUBKEY_FLAG_COMP), + &encpk, &encpklen); + else + { + encpk = _gcry_mpi_get_buffer_extra (Qx, nbits/8, + -1, &encpklen, NULL); + if (encpk == NULL) + rc = gpg_err_code_from_syserror (); + else + { + encpk[0] = 0x40; + encpklen++; + } + } + if (rc) + goto leave; + public = mpi_new (0); + mpi_set_opaque (public, encpk, encpklen*8); + } + else + { + if (!Qx) + { + /* This is the case for a key from _gcry_ecc_eddsa_generate + with no compression. */ + Qx = mpi_new (0); + Qy = mpi_new (0); + if (_gcry_mpi_ec_get_affine (Qx, Qy, &sk.Q, ctx)) + log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "Q"); + } + public = _gcry_ecc_ec2os (Qx, Qy, sk.E.p); + } + secret = sk.d; sk.d = NULL; + if (E.name) + { + rc = sexp_build (&curve_info, NULL, "(curve %s)", E.name); + if (rc) + goto leave; + } + + if ((flags & PUBKEY_FLAG_PARAM) || (flags & PUBKEY_FLAG_EDDSA) + || (flags & PUBKEY_FLAG_DJB_TWEAK)) + { + rc = sexp_build + (&curve_flags, NULL, + ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_EDDSA))? + "(flags param eddsa)" : + ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_EDDSA))? + "(flags param djb-tweak)" : + ((flags & PUBKEY_FLAG_PARAM))? + "(flags param)" : ((flags & PUBKEY_FLAG_EDDSA))? + "(flags eddsa)" : "(flags djb-tweak)" ); + if (rc) + goto leave; + } + + if ((flags & PUBKEY_FLAG_PARAM) && E.name) + rc = sexp_build (r_skey, NULL, + "(key-data" + " (public-key" + " (ecc%S%S(p%m)(a%m)(b%m)(g%m)(n%m)(h%m)(q%m)))" + " (private-key" + " (ecc%S%S(p%m)(a%m)(b%m)(g%m)(n%m)(h%m)(q%m)(d%m)))" + " )", + curve_info, curve_flags, + sk.E.p, sk.E.a, sk.E.b, base, sk.E.n, sk.E.h, public, + curve_info, curve_flags, + sk.E.p, sk.E.a, sk.E.b, base, sk.E.n, sk.E.h, public, + secret); + else + rc = sexp_build (r_skey, NULL, + "(key-data" + " (public-key" + " (ecc%S%S(q%m)))" + " (private-key" + " (ecc%S%S(q%m)(d%m)))" + " )", + curve_info, curve_flags, + public, + curve_info, curve_flags, + public, secret); + if (rc) + goto leave; + + if (DBG_CIPHER) + { + log_printmpi ("ecgen result p", sk.E.p); + log_printmpi ("ecgen result a", sk.E.a); + log_printmpi ("ecgen result b", sk.E.b); + log_printmpi ("ecgen result G", base); + log_printmpi ("ecgen result n", sk.E.n); + log_printmpi ("ecgen result h", sk.E.h); + log_printmpi ("ecgen result Q", public); + log_printmpi ("ecgen result d", secret); + if ((flags & PUBKEY_FLAG_EDDSA)) + log_debug ("ecgen result using Ed25519+EdDSA\n"); + } + + leave: + mpi_free (secret); + mpi_free (public); + mpi_free (base); + { + _gcry_ecc_curve_free (&sk.E); + point_free (&sk.Q); + mpi_free (sk.d); + } + _gcry_ecc_curve_free (&E); + mpi_free (Gx); + mpi_free (Gy); + mpi_free (Qx); + mpi_free (Qy); + _gcry_mpi_ec_free (ctx); + xfree (curve_name); + sexp_release (curve_flags); + sexp_release (curve_info); + return rc; +} + + +static gcry_err_code_t +ecc_check_secret_key (gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + gcry_sexp_t l1 = NULL; + int flags = 0; + char *curvename = NULL; + gcry_mpi_t mpi_g = NULL; + gcry_mpi_t mpi_q = NULL; + ECC_secret_key sk; + mpi_ec_t ec = NULL; + + memset (&sk, 0, sizeof sk); + + /* Look for flags. */ + l1 = sexp_find_token (keyparms, "flags", 0); + if (l1) + { + rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL); + if (rc) + goto leave; + } + + /* Extract the parameters. */ + if ((flags & PUBKEY_FLAG_PARAM)) + rc = sexp_extract_param (keyparms, NULL, "-p?a?b?g?n?h?/q?+d", + &sk.E.p, &sk.E.a, &sk.E.b, &mpi_g, &sk.E.n, + &sk.E.h, &mpi_q, &sk.d, NULL); + else + rc = sexp_extract_param (keyparms, NULL, "/q?+d", + &mpi_q, &sk.d, NULL); + if (rc) + goto leave; + + /* Add missing parameters using the optional curve parameter. */ + sexp_release (l1); + l1 = sexp_find_token (keyparms, "curve", 5); + if (l1) + { + curvename = sexp_nth_string (l1, 1); + if (curvename) + { + rc = _gcry_ecc_fill_in_curve (0, curvename, &sk.E, NULL); + if (rc) + goto leave; + } + } + if (mpi_g) + { + if (!sk.E.G.x) + point_init (&sk.E.G); + rc = _gcry_ecc_os2ec (&sk.E.G, mpi_g); + if (rc) + goto leave; + } + + /* Guess required fields if a curve parameter has not been given. + FIXME: This is a crude hacks. We need to fix that. */ + if (!curvename) + { + sk.E.model = ((flags & PUBKEY_FLAG_EDDSA) + ? MPI_EC_EDWARDS + : MPI_EC_WEIERSTRASS); + sk.E.dialect = ((flags & PUBKEY_FLAG_EDDSA) + ? ECC_DIALECT_ED25519 + : ECC_DIALECT_STANDARD); + if (!sk.E.h) + sk.E.h = mpi_const (MPI_C_ONE); + } + if (DBG_CIPHER) + { + log_debug ("ecc_testkey inf: %s/%s\n", + _gcry_ecc_model2str (sk.E.model), + _gcry_ecc_dialect2str (sk.E.dialect)); + if (sk.E.name) + log_debug ("ecc_testkey nam: %s\n", sk.E.name); + log_printmpi ("ecc_testkey p", sk.E.p); + log_printmpi ("ecc_testkey a", sk.E.a); + log_printmpi ("ecc_testkey b", sk.E.b); + log_printpnt ("ecc_testkey g", &sk.E.G, NULL); + log_printmpi ("ecc_testkey n", sk.E.n); + log_printmpi ("ecc_testkey h", sk.E.h); + log_printmpi ("ecc_testkey q", mpi_q); + if (!fips_mode ()) + log_printmpi ("ecc_testkey d", sk.d); + } + if (!sk.E.p || !sk.E.a || !sk.E.b || !sk.E.G.x || !sk.E.n || !sk.E.h || !sk.d) + { + rc = GPG_ERR_NO_OBJ; + goto leave; + } + + ec = _gcry_mpi_ec_p_internal_new (sk.E.model, sk.E.dialect, flags, + sk.E.p, sk.E.a, sk.E.b); + + if (mpi_q) + { + point_init (&sk.Q); + if (ec->dialect == ECC_DIALECT_ED25519) + rc = _gcry_ecc_eddsa_decodepoint (mpi_q, ec, &sk.Q, NULL, NULL); + else if (ec->model == MPI_EC_MONTGOMERY) + rc = _gcry_ecc_mont_decodepoint (mpi_q, ec, &sk.Q); + else + rc = _gcry_ecc_os2ec (&sk.Q, mpi_q); + if (rc) + goto leave; + } + else + { + /* The secret key test requires Q. */ + rc = GPG_ERR_NO_OBJ; + goto leave; + } + + if (check_secret_key (&sk, ec, flags)) + rc = GPG_ERR_BAD_SECKEY; + + leave: + _gcry_mpi_ec_free (ec); + _gcry_mpi_release (sk.E.p); + _gcry_mpi_release (sk.E.a); + _gcry_mpi_release (sk.E.b); + _gcry_mpi_release (mpi_g); + point_free (&sk.E.G); + _gcry_mpi_release (sk.E.n); + _gcry_mpi_release (sk.E.h); + _gcry_mpi_release (mpi_q); + point_free (&sk.Q); + _gcry_mpi_release (sk.d); + xfree (curvename); + sexp_release (l1); + if (DBG_CIPHER) + log_debug ("ecc_testkey => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +ecc_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_mpi_t data = NULL; + gcry_sexp_t l1 = NULL; + char *curvename = NULL; + gcry_mpi_t mpi_g = NULL; + gcry_mpi_t mpi_q = NULL; + ECC_secret_key sk; + gcry_mpi_t sig_r = NULL; + gcry_mpi_t sig_s = NULL; + + memset (&sk, 0, sizeof sk); + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN, 0); + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_mpidump ("ecc_sign data", data); + + /* + * Extract the key. + */ + if ((ctx.flags & PUBKEY_FLAG_PARAM)) + rc = sexp_extract_param (keyparms, NULL, "-p?a?b?g?n?h?/q?+d", + &sk.E.p, &sk.E.a, &sk.E.b, &mpi_g, &sk.E.n, + &sk.E.h, &mpi_q, &sk.d, NULL); + else + rc = sexp_extract_param (keyparms, NULL, "/q?+d", + &mpi_q, &sk.d, NULL); + if (rc) + goto leave; + if (mpi_g) + { + point_init (&sk.E.G); + rc = _gcry_ecc_os2ec (&sk.E.G, mpi_g); + if (rc) + goto leave; + } + /* Add missing parameters using the optional curve parameter. */ + l1 = sexp_find_token (keyparms, "curve", 5); + if (l1) + { + curvename = sexp_nth_string (l1, 1); + if (curvename) + { + rc = _gcry_ecc_fill_in_curve (0, curvename, &sk.E, NULL); + if (rc) + goto leave; + } + } + /* Guess required fields if a curve parameter has not been given. + FIXME: This is a crude hacks. We need to fix that. */ + if (!curvename) + { + sk.E.model = ((ctx.flags & PUBKEY_FLAG_EDDSA) + ? MPI_EC_EDWARDS + : MPI_EC_WEIERSTRASS); + sk.E.dialect = ((ctx.flags & PUBKEY_FLAG_EDDSA) + ? ECC_DIALECT_ED25519 + : ECC_DIALECT_STANDARD); + if (!sk.E.h) + sk.E.h = mpi_const (MPI_C_ONE); + } + if (DBG_CIPHER) + { + log_debug ("ecc_sign info: %s/%s%s\n", + _gcry_ecc_model2str (sk.E.model), + _gcry_ecc_dialect2str (sk.E.dialect), + (ctx.flags & PUBKEY_FLAG_EDDSA)? "+EdDSA":""); + if (sk.E.name) + log_debug ("ecc_sign name: %s\n", sk.E.name); + log_printmpi ("ecc_sign p", sk.E.p); + log_printmpi ("ecc_sign a", sk.E.a); + log_printmpi ("ecc_sign b", sk.E.b); + log_printpnt ("ecc_sign g", &sk.E.G, NULL); + log_printmpi ("ecc_sign n", sk.E.n); + log_printmpi ("ecc_sign h", sk.E.h); + log_printmpi ("ecc_sign q", mpi_q); + if (!fips_mode ()) + log_printmpi ("ecc_sign d", sk.d); + } + if (!sk.E.p || !sk.E.a || !sk.E.b || !sk.E.G.x || !sk.E.n || !sk.E.h || !sk.d) + { + rc = GPG_ERR_NO_OBJ; + goto leave; + } + + + sig_r = mpi_new (0); + sig_s = mpi_new (0); + if ((ctx.flags & PUBKEY_FLAG_EDDSA)) + { + /* EdDSA requires the public key. */ + rc = _gcry_ecc_eddsa_sign (data, &sk, sig_r, sig_s, ctx.hash_algo, mpi_q); + if (!rc) + rc = sexp_build (r_sig, NULL, + "(sig-val(eddsa(r%M)(s%M)))", sig_r, sig_s); + } + else if ((ctx.flags & PUBKEY_FLAG_GOST)) + { + rc = _gcry_ecc_gost_sign (data, &sk, sig_r, sig_s); + if (!rc) + rc = sexp_build (r_sig, NULL, + "(sig-val(gost(r%M)(s%M)))", sig_r, sig_s); + } + else + { + rc = _gcry_ecc_ecdsa_sign (data, &sk, sig_r, sig_s, + ctx.flags, ctx.hash_algo); + if (!rc) + rc = sexp_build (r_sig, NULL, + "(sig-val(ecdsa(r%M)(s%M)))", sig_r, sig_s); + } + + + leave: + _gcry_mpi_release (sk.E.p); + _gcry_mpi_release (sk.E.a); + _gcry_mpi_release (sk.E.b); + _gcry_mpi_release (mpi_g); + point_free (&sk.E.G); + _gcry_mpi_release (sk.E.n); + _gcry_mpi_release (sk.E.h); + _gcry_mpi_release (mpi_q); + point_free (&sk.Q); + _gcry_mpi_release (sk.d); + _gcry_mpi_release (sig_r); + _gcry_mpi_release (sig_s); + xfree (curvename); + _gcry_mpi_release (data); + sexp_release (l1); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("ecc_sign => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_sexp_t l1 = NULL; + char *curvename = NULL; + gcry_mpi_t mpi_g = NULL; + gcry_mpi_t mpi_q = NULL; + gcry_mpi_t sig_r = NULL; + gcry_mpi_t sig_s = NULL; + gcry_mpi_t data = NULL; + ECC_public_key pk; + int sigflags; + + memset (&pk, 0, sizeof pk); + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY, + ecc_get_nbits (s_keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_mpidump ("ecc_verify data", data); + + /* + * Extract the signature value. + */ + rc = _gcry_pk_util_preparse_sigval (s_sig, ecc_names, &l1, &sigflags); + if (rc) + goto leave; + rc = sexp_extract_param (l1, NULL, (sigflags & PUBKEY_FLAG_EDDSA)? "/rs":"rs", + &sig_r, &sig_s, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("ecc_verify s_r", sig_r); + log_mpidump ("ecc_verify s_s", sig_s); + } + if ((ctx.flags & PUBKEY_FLAG_EDDSA) ^ (sigflags & PUBKEY_FLAG_EDDSA)) + { + rc = GPG_ERR_CONFLICT; /* Inconsistent use of flag/algoname. */ + goto leave; + } + + + /* + * Extract the key. + */ + if ((ctx.flags & PUBKEY_FLAG_PARAM)) + rc = sexp_extract_param (s_keyparms, NULL, "-p?a?b?g?n?h?/q", + &pk.E.p, &pk.E.a, &pk.E.b, &mpi_g, &pk.E.n, + &pk.E.h, &mpi_q, NULL); + else + rc = sexp_extract_param (s_keyparms, NULL, "/q", + &mpi_q, NULL); + if (rc) + goto leave; + if (mpi_g) + { + point_init (&pk.E.G); + rc = _gcry_ecc_os2ec (&pk.E.G, mpi_g); + if (rc) + goto leave; + } + /* Add missing parameters using the optional curve parameter. */ + sexp_release (l1); + l1 = sexp_find_token (s_keyparms, "curve", 5); + if (l1) + { + curvename = sexp_nth_string (l1, 1); + if (curvename) + { + rc = _gcry_ecc_fill_in_curve (0, curvename, &pk.E, NULL); + if (rc) + goto leave; + } + } + /* Guess required fields if a curve parameter has not been given. + FIXME: This is a crude hacks. We need to fix that. */ + if (!curvename) + { + pk.E.model = ((sigflags & PUBKEY_FLAG_EDDSA) + ? MPI_EC_EDWARDS + : MPI_EC_WEIERSTRASS); + pk.E.dialect = ((sigflags & PUBKEY_FLAG_EDDSA) + ? ECC_DIALECT_ED25519 + : ECC_DIALECT_STANDARD); + if (!pk.E.h) + pk.E.h = mpi_const (MPI_C_ONE); + } + + if (DBG_CIPHER) + { + log_debug ("ecc_verify info: %s/%s%s\n", + _gcry_ecc_model2str (pk.E.model), + _gcry_ecc_dialect2str (pk.E.dialect), + (sigflags & PUBKEY_FLAG_EDDSA)? "+EdDSA":""); + if (pk.E.name) + log_debug ("ecc_verify name: %s\n", pk.E.name); + log_printmpi ("ecc_verify p", pk.E.p); + log_printmpi ("ecc_verify a", pk.E.a); + log_printmpi ("ecc_verify b", pk.E.b); + log_printpnt ("ecc_verify g", &pk.E.G, NULL); + log_printmpi ("ecc_verify n", pk.E.n); + log_printmpi ("ecc_verify h", pk.E.h); + log_printmpi ("ecc_verify q", mpi_q); + } + if (!pk.E.p || !pk.E.a || !pk.E.b || !pk.E.G.x || !pk.E.n || !pk.E.h || !mpi_q) + { + rc = GPG_ERR_NO_OBJ; + goto leave; + } + + + /* + * Verify the signature. + */ + if ((sigflags & PUBKEY_FLAG_EDDSA)) + { + rc = _gcry_ecc_eddsa_verify (data, &pk, sig_r, sig_s, + ctx.hash_algo, mpi_q); + } + else if ((sigflags & PUBKEY_FLAG_GOST)) + { + point_init (&pk.Q); + rc = _gcry_ecc_os2ec (&pk.Q, mpi_q); + if (rc) + goto leave; + + rc = _gcry_ecc_gost_verify (data, &pk, sig_r, sig_s); + } + else + { + point_init (&pk.Q); + if (pk.E.dialect == ECC_DIALECT_ED25519) + { + mpi_ec_t ec; + + /* Fixme: Factor the curve context setup out of eddsa_verify + and ecdsa_verify. So that we don't do it twice. */ + ec = _gcry_mpi_ec_p_internal_new (pk.E.model, pk.E.dialect, 0, + pk.E.p, pk.E.a, pk.E.b); + + rc = _gcry_ecc_eddsa_decodepoint (mpi_q, ec, &pk.Q, NULL, NULL); + _gcry_mpi_ec_free (ec); + } + else + { + rc = _gcry_ecc_os2ec (&pk.Q, mpi_q); + } + if (rc) + goto leave; + + if (mpi_is_opaque (data)) + { + const void *abuf; + unsigned int abits, qbits; + gcry_mpi_t a; + + qbits = mpi_get_nbits (pk.E.n); + + abuf = mpi_get_opaque (data, &abits); + rc = _gcry_mpi_scan (&a, GCRYMPI_FMT_USG, abuf, (abits+7)/8, NULL); + if (!rc) + { + if (abits > qbits) + mpi_rshift (a, a, abits - qbits); + + rc = _gcry_ecc_ecdsa_verify (a, &pk, sig_r, sig_s); + _gcry_mpi_release (a); + } + } + else + rc = _gcry_ecc_ecdsa_verify (data, &pk, sig_r, sig_s); + } + + leave: + _gcry_mpi_release (pk.E.p); + _gcry_mpi_release (pk.E.a); + _gcry_mpi_release (pk.E.b); + _gcry_mpi_release (mpi_g); + point_free (&pk.E.G); + _gcry_mpi_release (pk.E.n); + _gcry_mpi_release (pk.E.h); + _gcry_mpi_release (mpi_q); + point_free (&pk.Q); + _gcry_mpi_release (data); + _gcry_mpi_release (sig_r); + _gcry_mpi_release (sig_s); + xfree (curvename); + sexp_release (l1); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("ecc_verify => %s\n", rc?gpg_strerror (rc):"Good"); + return rc; +} + + +/* ecdh raw is classic 2-round DH protocol published in 1976. + * + * Overview of ecc_encrypt_raw and ecc_decrypt_raw. + * + * As with any PK operation, encrypt version uses a public key and + * decrypt -- private. + * + * Symbols used below: + * G - field generator point + * d - private long-term scalar + * dG - public long-term key + * k - ephemeral scalar + * kG - ephemeral public key + * dkG - shared secret + * + * ecc_encrypt_raw description: + * input: + * data[0] : private scalar (k) + * output: A new S-expression with the parameters: + * s : shared point (kdG) + * e : generated ephemeral public key (kG) + * + * ecc_decrypt_raw description: + * input: + * data[0] : a point kG (ephemeral public key) + * output: + * result[0] : shared point (kdG) + */ +static gcry_err_code_t +ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + unsigned int nbits; + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_sexp_t l1 = NULL; + char *curvename = NULL; + gcry_mpi_t mpi_g = NULL; + gcry_mpi_t mpi_q = NULL; + gcry_mpi_t mpi_s = NULL; + gcry_mpi_t mpi_e = NULL; + gcry_mpi_t data = NULL; + ECC_public_key pk; + mpi_ec_t ec = NULL; + int flags = 0; + + memset (&pk, 0, sizeof pk); + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_ENCRYPT, + (nbits = ecc_get_nbits (keyparms))); + + /* Look for flags. */ + l1 = sexp_find_token (keyparms, "flags", 0); + if (l1) + { + rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL); + if (rc) + goto leave; + } + sexp_release (l1); + l1 = NULL; + + /* + * Extract the data. + */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (mpi_is_opaque (data)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* + * Extract the key. + */ + rc = sexp_extract_param (keyparms, NULL, + (flags & PUBKEY_FLAG_DJB_TWEAK)? + "-p?a?b?g?n?h?/q" : "-p?a?b?g?n?h?+q", + &pk.E.p, &pk.E.a, &pk.E.b, &mpi_g, &pk.E.n, &pk.E.h, + &mpi_q, NULL); + if (rc) + goto leave; + if (mpi_g) + { + point_init (&pk.E.G); + rc = _gcry_ecc_os2ec (&pk.E.G, mpi_g); + if (rc) + goto leave; + } + /* Add missing parameters using the optional curve parameter. */ + l1 = sexp_find_token (keyparms, "curve", 5); + if (l1) + { + curvename = sexp_nth_string (l1, 1); + if (curvename) + { + rc = _gcry_ecc_fill_in_curve (0, curvename, &pk.E, NULL); + if (rc) + goto leave; + } + } + /* Guess required fields if a curve parameter has not been given. */ + if (!curvename) + { + pk.E.model = MPI_EC_WEIERSTRASS; + pk.E.dialect = ECC_DIALECT_STANDARD; + if (!pk.E.h) + pk.E.h = mpi_const (MPI_C_ONE); + } + + /* + * Tweak the scalar bits by cofactor and number of bits of the field. + * It assumes the cofactor is a power of 2. + */ + if ((flags & PUBKEY_FLAG_DJB_TWEAK)) + { + int i; + + for (i = 0; i < mpi_get_nbits (pk.E.h) - 1; i++) + mpi_clear_bit (data, i); + mpi_set_highbit (data, mpi_get_nbits (pk.E.p) - 1); + } + if (DBG_CIPHER) + log_mpidump ("ecc_encrypt data", data); + + if (DBG_CIPHER) + { + log_debug ("ecc_encrypt info: %s/%s\n", + _gcry_ecc_model2str (pk.E.model), + _gcry_ecc_dialect2str (pk.E.dialect)); + if (pk.E.name) + log_debug ("ecc_encrypt name: %s\n", pk.E.name); + log_printmpi ("ecc_encrypt p", pk.E.p); + log_printmpi ("ecc_encrypt a", pk.E.a); + log_printmpi ("ecc_encrypt b", pk.E.b); + log_printpnt ("ecc_encrypt g", &pk.E.G, NULL); + log_printmpi ("ecc_encrypt n", pk.E.n); + log_printmpi ("ecc_encrypt h", pk.E.h); + log_printmpi ("ecc_encrypt q", mpi_q); + } + if (!pk.E.p || !pk.E.a || !pk.E.b || !pk.E.G.x || !pk.E.n || !pk.E.h || !mpi_q) + { + rc = GPG_ERR_NO_OBJ; + goto leave; + } + + /* Compute the encrypted value. */ + ec = _gcry_mpi_ec_p_internal_new (pk.E.model, pk.E.dialect, flags, + pk.E.p, pk.E.a, pk.E.b); + + /* Convert the public key. */ + if (mpi_q) + { + point_init (&pk.Q); + if (ec->model == MPI_EC_MONTGOMERY) + rc = _gcry_ecc_mont_decodepoint (mpi_q, ec, &pk.Q); + else + rc = _gcry_ecc_os2ec (&pk.Q, mpi_q); + if (rc) + goto leave; + } + + /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so */ + { + mpi_point_struct R; /* Result that we return. */ + gcry_mpi_t x, y; + unsigned char *rawmpi; + unsigned int rawmpilen; + + rc = 0; + x = mpi_new (0); + if (ec->model == MPI_EC_MONTGOMERY) + y = NULL; + else + y = mpi_new (0); + + point_init (&R); + + /* R = kQ <=> R = kdG */ + _gcry_mpi_ec_mul_point (&R, data, &pk.Q, ec); + + if (_gcry_mpi_ec_get_affine (x, y, &R, ec)) + { + /* + * Here, X is 0. In the X25519 computation on Curve25519, X0 + * function maps infinity to zero. So, when PUBKEY_FLAG_DJB_TWEAK + * is enabled, return the result of 0 not raising an error. + * + * This is a corner case. It never occurs with properly + * generated public keys, but it might happen with blindly + * imported public key which might not follow the key + * generation procedure. + */ + if (!(flags & PUBKEY_FLAG_DJB_TWEAK)) + { /* It's not for X25519, then, the input data was simply wrong. */ + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + } + if (y) + mpi_s = _gcry_ecc_ec2os (x, y, pk.E.p); + else + { + rawmpi = _gcry_mpi_get_buffer_extra (x, nbits/8, -1, &rawmpilen, NULL); + if (!rawmpi) + rc = gpg_err_code_from_syserror (); + else + { + rawmpi[0] = 0x40; + rawmpilen++; + mpi_s = mpi_new (0); + mpi_set_opaque (mpi_s, rawmpi, rawmpilen*8); + } + } + + /* R = kG */ + _gcry_mpi_ec_mul_point (&R, data, &pk.E.G, ec); + + if (_gcry_mpi_ec_get_affine (x, y, &R, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + if (y) + mpi_e = _gcry_ecc_ec2os (x, y, pk.E.p); + else + { + rawmpi = _gcry_mpi_get_buffer_extra (x, nbits/8, -1, &rawmpilen, NULL); + if (!rawmpi) + rc = gpg_err_code_from_syserror (); + else + { + rawmpi[0] = 0x40; + rawmpilen++; + mpi_e = mpi_new (0); + mpi_set_opaque (mpi_e, rawmpi, rawmpilen*8); + } + } + + leave_main: + mpi_free (x); + mpi_free (y); + point_free (&R); + if (rc) + goto leave; + } + + if (!rc) + rc = sexp_build (r_ciph, NULL, "(enc-val(ecdh(s%m)(e%m)))", mpi_s, mpi_e); + + leave: + _gcry_mpi_release (pk.E.p); + _gcry_mpi_release (pk.E.a); + _gcry_mpi_release (pk.E.b); + _gcry_mpi_release (mpi_g); + point_free (&pk.E.G); + _gcry_mpi_release (pk.E.n); + _gcry_mpi_release (pk.E.h); + _gcry_mpi_release (mpi_q); + point_free (&pk.Q); + _gcry_mpi_release (data); + _gcry_mpi_release (mpi_s); + _gcry_mpi_release (mpi_e); + xfree (curvename); + sexp_release (l1); + _gcry_mpi_ec_free (ec); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("ecc_encrypt => %s\n", gpg_strerror (rc)); + return rc; +} + + +/* input: + * data[0] : a point kG (ephemeral public key) + * output: + * resaddr[0] : shared point kdG + * + * see ecc_encrypt_raw for details. + */ +static gcry_err_code_t +ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + unsigned int nbits; + gpg_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_sexp_t l1 = NULL; + gcry_mpi_t data_e = NULL; + ECC_secret_key sk; + gcry_mpi_t mpi_g = NULL; + char *curvename = NULL; + mpi_ec_t ec = NULL; + mpi_point_struct kG; + mpi_point_struct R; + gcry_mpi_t r = NULL; + int flags = 0; + + memset (&sk, 0, sizeof sk); + point_init (&kG); + point_init (&R); + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT, + (nbits = ecc_get_nbits (keyparms))); + + /* Look for flags. */ + l1 = sexp_find_token (keyparms, "flags", 0); + if (l1) + { + rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL); + if (rc) + goto leave; + } + sexp_release (l1); + l1 = NULL; + + /* + * Extract the data. + */ + rc = _gcry_pk_util_preparse_encval (s_data, ecc_names, &l1, &ctx); + if (rc) + goto leave; + rc = sexp_extract_param (l1, NULL, "e", &data_e, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printmpi ("ecc_decrypt d_e", data_e); + if (mpi_is_opaque (data_e)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* + * Extract the key. + */ + rc = sexp_extract_param (keyparms, NULL, "-p?a?b?g?n?h?+d", + &sk.E.p, &sk.E.a, &sk.E.b, &mpi_g, &sk.E.n, + &sk.E.h, &sk.d, NULL); + if (rc) + goto leave; + if (mpi_g) + { + point_init (&sk.E.G); + rc = _gcry_ecc_os2ec (&sk.E.G, mpi_g); + if (rc) + goto leave; + } + /* Add missing parameters using the optional curve parameter. */ + sexp_release (l1); + l1 = sexp_find_token (keyparms, "curve", 5); + if (l1) + { + curvename = sexp_nth_string (l1, 1); + if (curvename) + { + rc = _gcry_ecc_fill_in_curve (0, curvename, &sk.E, NULL); + if (rc) + goto leave; + } + } + /* Guess required fields if a curve parameter has not been given. */ + if (!curvename) + { + sk.E.model = MPI_EC_WEIERSTRASS; + sk.E.dialect = ECC_DIALECT_STANDARD; + if (!sk.E.h) + sk.E.h = mpi_const (MPI_C_ONE); + } + if (DBG_CIPHER) + { + log_debug ("ecc_decrypt info: %s/%s\n", + _gcry_ecc_model2str (sk.E.model), + _gcry_ecc_dialect2str (sk.E.dialect)); + if (sk.E.name) + log_debug ("ecc_decrypt name: %s\n", sk.E.name); + log_printmpi ("ecc_decrypt p", sk.E.p); + log_printmpi ("ecc_decrypt a", sk.E.a); + log_printmpi ("ecc_decrypt b", sk.E.b); + log_printpnt ("ecc_decrypt g", &sk.E.G, NULL); + log_printmpi ("ecc_decrypt n", sk.E.n); + log_printmpi ("ecc_decrypt h", sk.E.h); + if (!fips_mode ()) + log_printmpi ("ecc_decrypt d", sk.d); + } + if (!sk.E.p || !sk.E.a || !sk.E.b || !sk.E.G.x || !sk.E.n || !sk.E.h || !sk.d) + { + rc = GPG_ERR_NO_OBJ; + goto leave; + } + + + ec = _gcry_mpi_ec_p_internal_new (sk.E.model, sk.E.dialect, flags, + sk.E.p, sk.E.a, sk.E.b); + + /* + * Compute the plaintext. + */ + if (ec->model == MPI_EC_MONTGOMERY) + rc = _gcry_ecc_mont_decodepoint (data_e, ec, &kG); + else + rc = _gcry_ecc_os2ec (&kG, data_e); + if (rc) + goto leave; + + if (DBG_CIPHER) + log_printpnt ("ecc_decrypt kG", &kG, NULL); + + if ((flags & PUBKEY_FLAG_DJB_TWEAK)) + { + /* For X25519, by its definition, validation should not be done. */ + /* (Instead, we do output check.) + * + * However, to mitigate secret key leak from our implementation, + * we also do input validation here. For constant-time + * implementation, we can remove this input validation. + */ + if (_gcry_mpi_ec_bad_point (&kG, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + } + else if (!_gcry_mpi_ec_curve_point (&kG, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* R = dkG */ + _gcry_mpi_ec_mul_point (&R, sk.d, &kG, ec); + + /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so: */ + { + gcry_mpi_t x, y; + + x = mpi_new (0); + if (ec->model == MPI_EC_MONTGOMERY) + y = NULL; + else + y = mpi_new (0); + + if (_gcry_mpi_ec_get_affine (x, y, &R, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + /* + * Note for X25519. + * + * By the definition of X25519, this is the case where X25519 + * returns 0, mapping infinity to zero. However, we + * deliberately let it return an error. + * + * For X25519 ECDH, comming here means that it might be + * decrypted by anyone with the shared secret of 0 (the result + * of this function could be always 0 by other scalar values, + * other than the private key of SK.D). + * + * So, it looks like an encrypted message but it can be + * decrypted by anyone, or at least something wrong + * happens. Recipient should not proceed as if it were + * properly encrypted message. + * + * This handling is needed for our major usage of GnuPG, + * where it does the One-Pass Diffie-Hellman method, + * C(1, 1, ECC CDH), with an ephemeral key. + */ + } + + if (y) + r = _gcry_ecc_ec2os (x, y, sk.E.p); + else + { + unsigned char *rawmpi; + unsigned int rawmpilen; + + rawmpi = _gcry_mpi_get_buffer_extra (x, nbits/8, -1, + &rawmpilen, NULL); + if (!rawmpi) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + else + { + rawmpi[0] = 0x40; + rawmpilen++; + r = mpi_new (0); + mpi_set_opaque (r, rawmpi, rawmpilen*8); + } + } + if (!r) + rc = gpg_err_code_from_syserror (); + else + rc = 0; + mpi_free (x); + mpi_free (y); + } + if (DBG_CIPHER) + log_printmpi ("ecc_decrypt res", r); + + if (!rc) + rc = sexp_build (r_plain, NULL, "(value %m)", r); + + leave: + point_free (&R); + point_free (&kG); + _gcry_mpi_release (r); + _gcry_mpi_release (sk.E.p); + _gcry_mpi_release (sk.E.a); + _gcry_mpi_release (sk.E.b); + _gcry_mpi_release (mpi_g); + point_free (&sk.E.G); + _gcry_mpi_release (sk.E.n); + _gcry_mpi_release (sk.E.h); + _gcry_mpi_release (sk.d); + _gcry_mpi_release (data_e); + xfree (curvename); + sexp_release (l1); + _gcry_mpi_ec_free (ec); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("ecc_decrypt => %s\n", gpg_strerror (rc)); + return rc; +} + + +/* Return the number of bits for the key described by PARMS. On error + * 0 is returned. The format of PARMS starts with the algorithm name; + * for example: + * + * (ecc + * (curve <name>) + * (p <mpi>) + * (a <mpi>) + * (b <mpi>) + * (g <mpi>) + * (n <mpi>) + * (q <mpi>)) + * + * More parameters may be given. Either P or CURVE is needed. + */ +static unsigned int +ecc_get_nbits (gcry_sexp_t parms) +{ + gcry_sexp_t l1; + gcry_mpi_t p; + unsigned int nbits = 0; + char *curve; + + l1 = sexp_find_token (parms, "p", 1); + if (!l1) + { /* Parameter P not found - check whether we have "curve". */ + l1 = sexp_find_token (parms, "curve", 5); + if (!l1) + return 0; /* Neither P nor CURVE found. */ + + curve = sexp_nth_string (l1, 1); + sexp_release (l1); + if (!curve) + return 0; /* No curve name given (or out of core). */ + + if (_gcry_ecc_fill_in_curve (0, curve, NULL, &nbits)) + nbits = 0; + xfree (curve); + } + else + { + p = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG); + sexp_release (l1); + if (p) + { + nbits = mpi_get_nbits (p); + _gcry_mpi_release (p); + } + } + return nbits; +} + + +/* See rsa.c for a description of this function. */ +static gpg_err_code_t +compute_keygrip (gcry_md_hd_t md, gcry_sexp_t keyparms) +{ +#define N_COMPONENTS 7 + static const char names[N_COMPONENTS] = "pabgnhq"; + gpg_err_code_t rc; + gcry_sexp_t l1; + gcry_mpi_t values[N_COMPONENTS]; + int idx; + char *curvename = NULL; + int flags = 0; + enum gcry_mpi_ec_models model = 0; + enum ecc_dialects dialect = 0; + + /* Clear the values first. */ + for (idx=0; idx < N_COMPONENTS; idx++) + values[idx] = NULL; + + + /* Look for flags. */ + l1 = sexp_find_token (keyparms, "flags", 0); + if (l1) + { + rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL); + if (rc) + goto leave; + } + + /* Extract the parameters. */ + if ((flags & PUBKEY_FLAG_PARAM)) + { + if ((flags & PUBKEY_FLAG_DJB_TWEAK)) + rc = sexp_extract_param (keyparms, NULL, "p?a?b?g?n?h?/q", + &values[0], &values[1], &values[2], + &values[3], &values[4], &values[5], + &values[6], NULL); + else + rc = sexp_extract_param (keyparms, NULL, "p?a?b?g?n?h?q", + &values[0], &values[1], &values[2], + &values[3], &values[4], &values[5], + &values[6], NULL); + } + else + { + if ((flags & PUBKEY_FLAG_DJB_TWEAK)) + rc = sexp_extract_param (keyparms, NULL, "/q", + &values[6], NULL); + else + rc = sexp_extract_param (keyparms, NULL, "q", + &values[6], NULL); + } + if (rc) + goto leave; + + /* Check whether a curve parameter is available and use that to fill + in missing values. */ + sexp_release (l1); + l1 = sexp_find_token (keyparms, "curve", 5); + if (l1) + { + curvename = sexp_nth_string (l1, 1); + if (curvename) + { + rc = _gcry_ecc_update_curve_param (curvename, + &model, &dialect, + &values[0], &values[1], &values[2], + &values[3], &values[4], &values[5]); + if (rc) + goto leave; + } + } + + /* Guess required fields if a curve parameter has not been given. + FIXME: This is a crude hacks. We need to fix that. */ + if (!curvename) + { + model = ((flags & PUBKEY_FLAG_EDDSA) + ? MPI_EC_EDWARDS + : MPI_EC_WEIERSTRASS); + dialect = ((flags & PUBKEY_FLAG_EDDSA) + ? ECC_DIALECT_ED25519 + : ECC_DIALECT_STANDARD); + if (!values[5]) + values[5] = mpi_const (MPI_C_ONE); + } + + /* Check that all parameters are known and normalize all MPIs (that + should not be required but we use an internal function later and + thus we better make 100% sure that they are normalized). */ + for (idx = 0; idx < N_COMPONENTS; idx++) + if (!values[idx]) + { + rc = GPG_ERR_NO_OBJ; + goto leave; + } + else + _gcry_mpi_normalize (values[idx]); + + /* Uncompress the public key with the exception of EdDSA where + compression is the default and we thus compute the keygrip using + the compressed version. Because we don't support any non-eddsa + compression, the only thing we need to do is to compress + EdDSA. */ + if ((flags & PUBKEY_FLAG_DJB_TWEAK)) + { + rc = _gcry_ecc_eddsa_ensure_compact (values[6], 256); + if (rc) + goto leave; + } + + /* Hash them all. */ + for (idx = 0; idx < N_COMPONENTS; idx++) + { + char buf[30]; + + if (idx == 5) + continue; /* Skip cofactor. */ + + if (mpi_is_opaque (values[idx])) + { + const unsigned char *raw; + unsigned int n; + + raw = mpi_get_opaque (values[idx], &n); + n = (n + 7)/8; + snprintf (buf, sizeof buf, "(1:%c%u:", names[idx], n); + _gcry_md_write (md, buf, strlen (buf)); + _gcry_md_write (md, raw, n); + _gcry_md_write (md, ")", 1); + } + else + { + unsigned char *rawmpi; + unsigned int rawmpilen; + + rawmpi = _gcry_mpi_get_buffer (values[idx], 0, &rawmpilen, NULL); + if (!rawmpi) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + snprintf (buf, sizeof buf, "(1:%c%u:", names[idx], rawmpilen); + _gcry_md_write (md, buf, strlen (buf)); + _gcry_md_write (md, rawmpi, rawmpilen); + _gcry_md_write (md, ")", 1); + xfree (rawmpi); + } + } + + leave: + xfree (curvename); + sexp_release (l1); + for (idx = 0; idx < N_COMPONENTS; idx++) + _gcry_mpi_release (values[idx]); + + return rc; +#undef N_COMPONENTS +} + + + +/* + Low-level API helper functions. + */ + +/* This is the worker function for gcry_pubkey_get_sexp for ECC + algorithms. Note that the caller has already stored NULL at + R_SEXP. */ +gpg_err_code_t +_gcry_pk_ecc_get_sexp (gcry_sexp_t *r_sexp, int mode, mpi_ec_t ec) +{ + gpg_err_code_t rc; + gcry_mpi_t mpi_G = NULL; + gcry_mpi_t mpi_Q = NULL; + + if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->h) + return GPG_ERR_BAD_CRYPT_CTX; + + if (mode == GCRY_PK_GET_SECKEY && !ec->d) + return GPG_ERR_NO_SECKEY; + + /* Compute the public point if it is missing. */ + if (!ec->Q && ec->d) + ec->Q = _gcry_ecc_compute_public (NULL, ec, NULL, NULL); + + /* Encode G and Q. */ + mpi_G = _gcry_mpi_ec_ec2os (ec->G, ec); + if (!mpi_G) + { + rc = GPG_ERR_BROKEN_PUBKEY; + goto leave; + } + if (!ec->Q) + { + rc = GPG_ERR_BAD_CRYPT_CTX; + goto leave; + } + + if (ec->dialect == ECC_DIALECT_ED25519) + { + unsigned char *encpk; + unsigned int encpklen; + + rc = _gcry_ecc_eddsa_encodepoint (ec->Q, ec, NULL, NULL, 0, + &encpk, &encpklen); + if (rc) + goto leave; + mpi_Q = mpi_set_opaque (NULL, encpk, encpklen*8); + encpk = NULL; + } + else + { + mpi_Q = _gcry_mpi_ec_ec2os (ec->Q, ec); + } + if (!mpi_Q) + { + rc = GPG_ERR_BROKEN_PUBKEY; + goto leave; + } + + /* Fixme: We should return a curve name instead of the parameters if + if know that they match a curve. */ + + if (ec->d && (!mode || mode == GCRY_PK_GET_SECKEY)) + { + /* Let's return a private key. */ + rc = sexp_build (r_sexp, NULL, + "(private-key(ecc(p%m)(a%m)(b%m)(g%m)(n%m)(h%m)(q%m)(d%m)))", + ec->p, ec->a, ec->b, mpi_G, ec->n, ec->h, mpi_Q, ec->d); + } + else if (ec->Q) + { + /* Let's return a public key. */ + rc = sexp_build (r_sexp, NULL, + "(public-key(ecc(p%m)(a%m)(b%m)(g%m)(n%m)(h%m)(q%m)))", + ec->p, ec->a, ec->b, mpi_G, ec->n, ec->h, mpi_Q); + } + else + rc = GPG_ERR_BAD_CRYPT_CTX; + + leave: + mpi_free (mpi_Q); + mpi_free (mpi_G); + return rc; +} + + + +/* + Self-test section. + */ + +static const char * +selftest_sign (gcry_sexp_t pkey, gcry_sexp_t skey) +{ + /* Sample data from RFC 6979 section A.2.5, hash is of message "sample" */ + static const char sample_data[] = + "(data (flags rfc6979)" + " (hash sha256 #af2bdbe1aa9b6ec1e2ade1d694f41fc71a831d0268e98915" + /**/ "62113d8a62add1bf#))"; + static const char sample_data_bad[] = + "(data (flags rfc6979)" + " (hash sha256 #bf2bdbe1aa9b6ec1e2ade1d694f41fc71a831d0268e98915" + /**/ "62113d8a62add1bf#))"; + static const char signature_r[] = + "efd48b2aacb6a8fd1140dd9cd45e81d69d2c877b56aaf991c34d0ea84eaf3716"; + static const char signature_s[] = + "f7cb1c942d657c41d436c7a1b6e29f65f3e900dbb9aff4064dc4ab2f843acda8"; + + const char *errtxt = NULL; + gcry_error_t err; + gcry_sexp_t data = NULL; + gcry_sexp_t data_bad = NULL; + gcry_sexp_t sig = NULL; + gcry_sexp_t l1 = NULL; + gcry_sexp_t l2 = NULL; + gcry_mpi_t r = NULL; + gcry_mpi_t s = NULL; + gcry_mpi_t calculated_r = NULL; + gcry_mpi_t calculated_s = NULL; + int cmp; + + err = sexp_sscan (&data, NULL, sample_data, strlen (sample_data)); + if (!err) + err = sexp_sscan (&data_bad, NULL, + sample_data_bad, strlen (sample_data_bad)); + if (!err) + err = _gcry_mpi_scan (&r, GCRYMPI_FMT_HEX, signature_r, 0, NULL); + if (!err) + err = _gcry_mpi_scan (&s, GCRYMPI_FMT_HEX, signature_s, 0, NULL); + + if (err) + { + errtxt = "converting data failed"; + goto leave; + } + + err = _gcry_pk_sign (&sig, data, skey); + if (err) + { + errtxt = "signing failed"; + goto leave; + } + + /* check against known signature */ + errtxt = "signature validity failed"; + l1 = _gcry_sexp_find_token (sig, "sig-val", 0); + if (!l1) + goto leave; + l2 = _gcry_sexp_find_token (l1, "ecdsa", 0); + if (!l2) + goto leave; + + sexp_release (l1); + l1 = l2; + + l2 = _gcry_sexp_find_token (l1, "r", 0); + if (!l2) + goto leave; + calculated_r = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG); + if (!calculated_r) + goto leave; + + sexp_release (l2); + l2 = _gcry_sexp_find_token (l1, "s", 0); + if (!l2) + goto leave; + calculated_s = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG); + if (!calculated_s) + goto leave; + + errtxt = "known sig check failed"; + + cmp = _gcry_mpi_cmp (r, calculated_r); + if (cmp) + goto leave; + cmp = _gcry_mpi_cmp (s, calculated_s); + if (cmp) + goto leave; + + errtxt = NULL; + + /* verify generated signature */ + err = _gcry_pk_verify (sig, data, pkey); + if (err) + { + errtxt = "verify failed"; + goto leave; + } + err = _gcry_pk_verify (sig, data_bad, pkey); + if (gcry_err_code (err) != GPG_ERR_BAD_SIGNATURE) + { + errtxt = "bad signature not detected"; + goto leave; + } + + + leave: + sexp_release (sig); + sexp_release (data_bad); + sexp_release (data); + sexp_release (l1); + sexp_release (l2); + mpi_release (r); + mpi_release (s); + mpi_release (calculated_r); + mpi_release (calculated_s); + return errtxt; +} + + +static gpg_err_code_t +selftests_ecdsa (selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + gcry_error_t err; + gcry_sexp_t skey = NULL; + gcry_sexp_t pkey = NULL; + + what = "convert"; + err = sexp_sscan (&skey, NULL, sample_secret_key_secp256, + strlen (sample_secret_key_secp256)); + if (!err) + err = sexp_sscan (&pkey, NULL, sample_public_key_secp256, + strlen (sample_public_key_secp256)); + if (err) + { + errtxt = _gcry_strerror (err); + goto failed; + } + + what = "key consistency"; + err = ecc_check_secret_key(skey); + if (err) + { + errtxt = _gcry_strerror (err); + goto failed; + } + + what = "sign"; + errtxt = selftest_sign (pkey, skey); + if (errtxt) + goto failed; + + sexp_release(pkey); + sexp_release(skey); + return 0; /* Succeeded. */ + + failed: + sexp_release(pkey); + sexp_release(skey); + if (report) + report ("pubkey", GCRY_PK_ECC, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +/* Run a full self-test for ALGO and return 0 on success. */ +static gpg_err_code_t +run_selftests (int algo, int extended, selftest_report_func_t report) +{ + (void)extended; + + if (algo != GCRY_PK_ECC) + return GPG_ERR_PUBKEY_ALGO; + + return selftests_ecdsa (report); +} + + + + +gcry_pk_spec_t _gcry_pubkey_spec_ecc = + { + GCRY_PK_ECC, { 0, 1 }, + (GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR), + "ECC", ecc_names, + "pabgnhq", "pabgnhqd", "sw", "rs", "pabgnhq", + ecc_generate, + ecc_check_secret_key, + ecc_encrypt_raw, + ecc_decrypt_raw, + ecc_sign, + ecc_verify, + ecc_get_nbits, + run_selftests, + compute_keygrip, + _gcry_ecc_get_curve, + _gcry_ecc_get_param_sexp + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/elgamal.c b/libotr/libgcrypt-1.8.7/cipher/elgamal.c new file mode 100644 index 0000000..4eb52d6 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/elgamal.c @@ -0,0 +1,1149 @@ +/* Elgamal.c - Elgamal Public Key encryption + * Copyright (C) 1998, 2000, 2001, 2002, 2003, + * 2008 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * For a description of the algorithm, see: + * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996. + * ISBN 0-471-11709-9. Pages 476 ff. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "pubkey-internal.h" + + +/* Blinding is used to mitigate side-channel attacks. You may undef + this to speed up the operation in case the system is secured + against physical and network mounted side-channel attacks. */ +#define USE_BLINDING 1 + + +typedef struct +{ + gcry_mpi_t p; /* prime */ + gcry_mpi_t g; /* group generator */ + gcry_mpi_t y; /* g^x mod p */ +} ELG_public_key; + + +typedef struct +{ + gcry_mpi_t p; /* prime */ + gcry_mpi_t g; /* group generator */ + gcry_mpi_t y; /* g^x mod p */ + gcry_mpi_t x; /* secret exponent */ +} ELG_secret_key; + + +static const char *elg_names[] = + { + "elg", + "openpgp-elg", + "openpgp-elg-sig", + NULL, + }; + + +static int test_keys (ELG_secret_key *sk, unsigned int nbits, int nodie); +static gcry_mpi_t gen_k (gcry_mpi_t p, int small_k); +static gcry_err_code_t generate (ELG_secret_key *sk, unsigned nbits, + gcry_mpi_t **factors); +static int check_secret_key (ELG_secret_key *sk); +static void do_encrypt (gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, + ELG_public_key *pkey); +static void decrypt (gcry_mpi_t output, gcry_mpi_t a, gcry_mpi_t b, + ELG_secret_key *skey); +static void sign (gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, + ELG_secret_key *skey); +static int verify (gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, + ELG_public_key *pkey); +static unsigned int elg_get_nbits (gcry_sexp_t parms); + + +static void (*progress_cb) (void *, const char *, int, int, int); +static void *progress_cb_data; + +void +_gcry_register_pk_elg_progress (void (*cb) (void *, const char *, + int, int, int), + void *cb_data) +{ + progress_cb = cb; + progress_cb_data = cb_data; +} + + +static void +progress (int c) +{ + if (progress_cb) + progress_cb (progress_cb_data, "pk_elg", c, 0, 0); +} + + +/**************** + * Michael Wiener's table on subgroup sizes to match field sizes. + * (floating around somewhere, probably based on the paper from + * Eurocrypt 96, page 332) + */ +static unsigned int +wiener_map( unsigned int n ) +{ + static struct { unsigned int p_n, q_n; } t[] = + { /* p q attack cost */ + { 512, 119 }, /* 9 x 10^17 */ + { 768, 145 }, /* 6 x 10^21 */ + { 1024, 165 }, /* 7 x 10^24 */ + { 1280, 183 }, /* 3 x 10^27 */ + { 1536, 198 }, /* 7 x 10^29 */ + { 1792, 212 }, /* 9 x 10^31 */ + { 2048, 225 }, /* 8 x 10^33 */ + { 2304, 237 }, /* 5 x 10^35 */ + { 2560, 249 }, /* 3 x 10^37 */ + { 2816, 259 }, /* 1 x 10^39 */ + { 3072, 269 }, /* 3 x 10^40 */ + { 3328, 279 }, /* 8 x 10^41 */ + { 3584, 288 }, /* 2 x 10^43 */ + { 3840, 296 }, /* 4 x 10^44 */ + { 4096, 305 }, /* 7 x 10^45 */ + { 4352, 313 }, /* 1 x 10^47 */ + { 4608, 320 }, /* 2 x 10^48 */ + { 4864, 328 }, /* 2 x 10^49 */ + { 5120, 335 }, /* 3 x 10^50 */ + { 0, 0 } + }; + int i; + + for(i=0; t[i].p_n; i++ ) + { + if( n <= t[i].p_n ) + return t[i].q_n; + } + /* Not in table - use an arbitrary high number. */ + return n / 8 + 200; +} + +static int +test_keys ( ELG_secret_key *sk, unsigned int nbits, int nodie ) +{ + ELG_public_key pk; + gcry_mpi_t test = mpi_new ( 0 ); + gcry_mpi_t out1_a = mpi_new ( nbits ); + gcry_mpi_t out1_b = mpi_new ( nbits ); + gcry_mpi_t out2 = mpi_new ( nbits ); + int failed = 0; + + pk.p = sk->p; + pk.g = sk->g; + pk.y = sk->y; + + _gcry_mpi_randomize ( test, nbits, GCRY_WEAK_RANDOM ); + + do_encrypt ( out1_a, out1_b, test, &pk ); + decrypt ( out2, out1_a, out1_b, sk ); + if ( mpi_cmp( test, out2 ) ) + failed |= 1; + + sign ( out1_a, out1_b, test, sk ); + if ( !verify( out1_a, out1_b, test, &pk ) ) + failed |= 2; + + _gcry_mpi_release ( test ); + _gcry_mpi_release ( out1_a ); + _gcry_mpi_release ( out1_b ); + _gcry_mpi_release ( out2 ); + + if (failed && !nodie) + log_fatal ("Elgamal test key for %s %s failed\n", + (failed & 1)? "encrypt+decrypt":"", + (failed & 2)? "sign+verify":""); + if (failed && DBG_CIPHER) + log_debug ("Elgamal test key for %s %s failed\n", + (failed & 1)? "encrypt+decrypt":"", + (failed & 2)? "sign+verify":""); + + return failed; +} + + +/**************** + * Generate a random secret exponent k from prime p, so that k is + * relatively prime to p-1. With SMALL_K set, k will be selected for + * better encryption performance - this must never be used signing! + */ +static gcry_mpi_t +gen_k( gcry_mpi_t p, int small_k ) +{ + gcry_mpi_t k = mpi_alloc_secure( 0 ); + gcry_mpi_t temp = mpi_alloc( mpi_get_nlimbs(p) ); + gcry_mpi_t p_1 = mpi_copy(p); + unsigned int orig_nbits = mpi_get_nbits(p); + unsigned int nbits, nbytes; + char *rndbuf = NULL; + + if (small_k) + { + /* Using a k much lesser than p is sufficient for encryption and + * it greatly improves the encryption performance. We use + * Wiener's table and add a large safety margin. */ + nbits = wiener_map( orig_nbits ) * 3 / 2; + if( nbits >= orig_nbits ) + BUG(); + } + else + nbits = orig_nbits; + + + nbytes = (nbits+7)/8; + if( DBG_CIPHER ) + log_debug("choosing a random k\n"); + mpi_sub_ui( p_1, p, 1); + for(;;) + { + if( !rndbuf || nbits < 32 ) + { + xfree(rndbuf); + rndbuf = _gcry_random_bytes_secure( nbytes, GCRY_STRONG_RANDOM ); + } + else + { + /* Change only some of the higher bits. We could improve + this by directly requesting more memory at the first call + to get_random_bytes() and use this the here maybe it is + easier to do this directly in random.c Anyway, it is + highly inlikely that we will ever reach this code. */ + char *pp = _gcry_random_bytes_secure( 4, GCRY_STRONG_RANDOM ); + memcpy( rndbuf, pp, 4 ); + xfree(pp); + } + _gcry_mpi_set_buffer( k, rndbuf, nbytes, 0 ); + + for(;;) + { + if( !(mpi_cmp( k, p_1 ) < 0) ) /* check: k < (p-1) */ + { + if( DBG_CIPHER ) + progress('+'); + break; /* no */ + } + if( !(mpi_cmp_ui( k, 0 ) > 0) ) /* check: k > 0 */ + { + if( DBG_CIPHER ) + progress('-'); + break; /* no */ + } + if (mpi_gcd( temp, k, p_1 )) + goto found; /* okay, k is relative prime to (p-1) */ + mpi_add_ui( k, k, 1 ); + if( DBG_CIPHER ) + progress('.'); + } + } + found: + xfree (rndbuf); + if( DBG_CIPHER ) + progress('\n'); + mpi_free(p_1); + mpi_free(temp); + + return k; +} + +/**************** + * Generate a key pair with a key of size NBITS + * Returns: 2 structures filled with all needed values + * and an array with n-1 factors of (p-1) + */ +static gcry_err_code_t +generate ( ELG_secret_key *sk, unsigned int nbits, gcry_mpi_t **ret_factors ) +{ + gcry_err_code_t rc; + gcry_mpi_t p; /* the prime */ + gcry_mpi_t p_min1; + gcry_mpi_t g; + gcry_mpi_t x; /* the secret exponent */ + gcry_mpi_t y; + unsigned int qbits; + unsigned int xbits; + byte *rndbuf; + + p_min1 = mpi_new ( nbits ); + qbits = wiener_map( nbits ); + if( qbits & 1 ) /* better have a even one */ + qbits++; + g = mpi_alloc(1); + rc = _gcry_generate_elg_prime (0, nbits, qbits, g, &p, ret_factors); + if (rc) + { + mpi_free (p_min1); + mpi_free (g); + return rc; + } + mpi_sub_ui(p_min1, p, 1); + + + /* Select a random number which has these properties: + * 0 < x < p-1 + * This must be a very good random number because this is the + * secret part. The prime is public and may be shared anyway, + * so a random generator level of 1 is used for the prime. + * + * I don't see a reason to have a x of about the same size + * as the p. It should be sufficient to have one about the size + * of q or the later used k plus a large safety margin. Decryption + * will be much faster with such an x. + */ + xbits = qbits * 3 / 2; + if( xbits >= nbits ) + BUG(); + x = mpi_snew ( xbits ); + if( DBG_CIPHER ) + log_debug("choosing a random x of size %u\n", xbits ); + rndbuf = NULL; + do + { + if( DBG_CIPHER ) + progress('.'); + if( rndbuf ) + { /* Change only some of the higher bits */ + if( xbits < 16 ) /* should never happen ... */ + { + xfree(rndbuf); + rndbuf = _gcry_random_bytes_secure ((xbits+7)/8, + GCRY_VERY_STRONG_RANDOM); + } + else + { + char *r = _gcry_random_bytes_secure (2, GCRY_VERY_STRONG_RANDOM); + memcpy(rndbuf, r, 2 ); + xfree (r); + } + } + else + { + rndbuf = _gcry_random_bytes_secure ((xbits+7)/8, + GCRY_VERY_STRONG_RANDOM ); + } + _gcry_mpi_set_buffer( x, rndbuf, (xbits+7)/8, 0 ); + mpi_clear_highbit( x, xbits+1 ); + } + while( !( mpi_cmp_ui( x, 0 )>0 && mpi_cmp( x, p_min1 )<0 ) ); + xfree(rndbuf); + + y = mpi_new (nbits); + mpi_powm( y, g, x, p ); + + if( DBG_CIPHER ) + { + progress ('\n'); + log_mpidump ("elg p", p ); + log_mpidump ("elg g", g ); + log_mpidump ("elg y", y ); + log_mpidump ("elg x", x ); + } + + /* Copy the stuff to the key structures */ + sk->p = p; + sk->g = g; + sk->y = y; + sk->x = x; + + _gcry_mpi_release ( p_min1 ); + + /* Now we can test our keys (this should never fail!) */ + test_keys ( sk, nbits - 64, 0 ); + + return 0; +} + + +/* Generate a key pair with a key of size NBITS not using a random + value for the secret key but the one given as X. This is useful to + implement a passphrase based decryption for a public key based + encryption. It has appliactions in backup systems. + + Returns: A structure filled with all needed values and an array + with n-1 factors of (p-1). */ +static gcry_err_code_t +generate_using_x (ELG_secret_key *sk, unsigned int nbits, gcry_mpi_t x, + gcry_mpi_t **ret_factors ) +{ + gcry_err_code_t rc; + gcry_mpi_t p; /* The prime. */ + gcry_mpi_t p_min1; /* The prime minus 1. */ + gcry_mpi_t g; /* The generator. */ + gcry_mpi_t y; /* g^x mod p. */ + unsigned int qbits; + unsigned int xbits; + + sk->p = NULL; + sk->g = NULL; + sk->y = NULL; + sk->x = NULL; + + /* Do a quick check to see whether X is suitable. */ + xbits = mpi_get_nbits (x); + if ( xbits < 64 || xbits >= nbits ) + return GPG_ERR_INV_VALUE; + + p_min1 = mpi_new ( nbits ); + qbits = wiener_map ( nbits ); + if ( (qbits & 1) ) /* Better have an even one. */ + qbits++; + g = mpi_alloc (1); + rc = _gcry_generate_elg_prime (0, nbits, qbits, g, &p, ret_factors ); + if (rc) + { + mpi_free (p_min1); + mpi_free (g); + return rc; + } + mpi_sub_ui (p_min1, p, 1); + + if (DBG_CIPHER) + log_debug ("using a supplied x of size %u", xbits ); + if ( !(mpi_cmp_ui ( x, 0 ) > 0 && mpi_cmp ( x, p_min1 ) <0 ) ) + { + _gcry_mpi_release ( p_min1 ); + _gcry_mpi_release ( p ); + _gcry_mpi_release ( g ); + return GPG_ERR_INV_VALUE; + } + + y = mpi_new (nbits); + mpi_powm ( y, g, x, p ); + + if ( DBG_CIPHER ) + { + progress ('\n'); + log_mpidump ("elg p", p ); + log_mpidump ("elg g", g ); + log_mpidump ("elg y", y ); + log_mpidump ("elg x", x ); + } + + /* Copy the stuff to the key structures */ + sk->p = p; + sk->g = g; + sk->y = y; + sk->x = mpi_copy (x); + + _gcry_mpi_release ( p_min1 ); + + /* Now we can test our keys. */ + if ( test_keys ( sk, nbits - 64, 1 ) ) + { + _gcry_mpi_release ( sk->p ); sk->p = NULL; + _gcry_mpi_release ( sk->g ); sk->g = NULL; + _gcry_mpi_release ( sk->y ); sk->y = NULL; + _gcry_mpi_release ( sk->x ); sk->x = NULL; + return GPG_ERR_BAD_SECKEY; + } + + return 0; +} + + +/**************** + * Test whether the secret key is valid. + * Returns: if this is a valid key. + */ +static int +check_secret_key( ELG_secret_key *sk ) +{ + int rc; + gcry_mpi_t y = mpi_alloc( mpi_get_nlimbs(sk->y) ); + + mpi_powm (y, sk->g, sk->x, sk->p); + rc = !mpi_cmp( y, sk->y ); + mpi_free( y ); + return rc; +} + + +static void +do_encrypt(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, ELG_public_key *pkey ) +{ + gcry_mpi_t k; + + /* Note: maybe we should change the interface, so that it + * is possible to check that input is < p and return an + * error code. + */ + + k = gen_k( pkey->p, 1 ); + mpi_powm (a, pkey->g, k, pkey->p); + + /* b = (y^k * input) mod p + * = ((y^k mod p) * (input mod p)) mod p + * and because input is < p + * = ((y^k mod p) * input) mod p + */ + mpi_powm (b, pkey->y, k, pkey->p); + mpi_mulm (b, b, input, pkey->p); +#if 0 + if( DBG_CIPHER ) + { + log_mpidump("elg encrypted y", pkey->y); + log_mpidump("elg encrypted p", pkey->p); + log_mpidump("elg encrypted k", k); + log_mpidump("elg encrypted M", input); + log_mpidump("elg encrypted a", a); + log_mpidump("elg encrypted b", b); + } +#endif + mpi_free(k); +} + + + + +static void +decrypt (gcry_mpi_t output, gcry_mpi_t a, gcry_mpi_t b, ELG_secret_key *skey ) +{ + gcry_mpi_t t1, t2, r; + unsigned int nbits = mpi_get_nbits (skey->p); + + mpi_normalize (a); + mpi_normalize (b); + + t1 = mpi_snew (nbits); + +#ifdef USE_BLINDING + + t2 = mpi_snew (nbits); + r = mpi_new (nbits); + + /* We need a random number of about the prime size. The random + number merely needs to be unpredictable; thus we use level 0. */ + _gcry_mpi_randomize (r, nbits, GCRY_WEAK_RANDOM); + + /* t1 = r^x mod p */ + mpi_powm (t1, r, skey->x, skey->p); + /* t2 = (a * r)^-x mod p */ + mpi_mulm (t2, a, r, skey->p); + mpi_powm (t2, t2, skey->x, skey->p); + mpi_invm (t2, t2, skey->p); + /* t1 = (t1 * t2) mod p*/ + mpi_mulm (t1, t1, t2, skey->p); + + mpi_free (r); + mpi_free (t2); + +#else /*!USE_BLINDING*/ + + /* output = b/(a^x) mod p */ + mpi_powm (t1, a, skey->x, skey->p); + mpi_invm (t1, t1, skey->p); + +#endif /*!USE_BLINDING*/ + + mpi_mulm (output, b, t1, skey->p); + +#if 0 + if( DBG_CIPHER ) + { + log_mpidump ("elg decrypted x", skey->x); + log_mpidump ("elg decrypted p", skey->p); + log_mpidump ("elg decrypted a", a); + log_mpidump ("elg decrypted b", b); + log_mpidump ("elg decrypted M", output); + } +#endif + mpi_free (t1); +} + + +/**************** + * Make an Elgamal signature out of INPUT + */ + +static void +sign(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, ELG_secret_key *skey ) +{ + gcry_mpi_t k; + gcry_mpi_t t = mpi_alloc( mpi_get_nlimbs(a) ); + gcry_mpi_t inv = mpi_alloc( mpi_get_nlimbs(a) ); + gcry_mpi_t p_1 = mpi_copy(skey->p); + + /* + * b = (t * inv) mod (p-1) + * b = (t * inv(k,(p-1),(p-1)) mod (p-1) + * b = (((M-x*a) mod (p-1)) * inv(k,(p-1),(p-1))) mod (p-1) + * + */ + mpi_sub_ui(p_1, p_1, 1); + k = gen_k( skey->p, 0 /* no small K ! */ ); + mpi_powm( a, skey->g, k, skey->p ); + mpi_mul(t, skey->x, a ); + mpi_subm(t, input, t, p_1 ); + mpi_invm(inv, k, p_1 ); + mpi_mulm(b, t, inv, p_1 ); + +#if 0 + if( DBG_CIPHER ) + { + log_mpidump ("elg sign p", skey->p); + log_mpidump ("elg sign g", skey->g); + log_mpidump ("elg sign y", skey->y); + log_mpidump ("elg sign x", skey->x); + log_mpidump ("elg sign k", k); + log_mpidump ("elg sign M", input); + log_mpidump ("elg sign a", a); + log_mpidump ("elg sign b", b); + } +#endif + mpi_free(k); + mpi_free(t); + mpi_free(inv); + mpi_free(p_1); +} + + +/**************** + * Returns true if the signature composed of A and B is valid. + */ +static int +verify(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, ELG_public_key *pkey ) +{ + int rc; + gcry_mpi_t t1; + gcry_mpi_t t2; + gcry_mpi_t base[4]; + gcry_mpi_t ex[4]; + + if( !(mpi_cmp_ui( a, 0 ) > 0 && mpi_cmp( a, pkey->p ) < 0) ) + return 0; /* assertion 0 < a < p failed */ + + t1 = mpi_alloc( mpi_get_nlimbs(a) ); + t2 = mpi_alloc( mpi_get_nlimbs(a) ); + +#if 0 + /* t1 = (y^a mod p) * (a^b mod p) mod p */ + gcry_mpi_powm( t1, pkey->y, a, pkey->p ); + gcry_mpi_powm( t2, a, b, pkey->p ); + mpi_mulm( t1, t1, t2, pkey->p ); + + /* t2 = g ^ input mod p */ + gcry_mpi_powm( t2, pkey->g, input, pkey->p ); + + rc = !mpi_cmp( t1, t2 ); +#elif 0 + /* t1 = (y^a mod p) * (a^b mod p) mod p */ + base[0] = pkey->y; ex[0] = a; + base[1] = a; ex[1] = b; + base[2] = NULL; ex[2] = NULL; + mpi_mulpowm( t1, base, ex, pkey->p ); + + /* t2 = g ^ input mod p */ + gcry_mpi_powm( t2, pkey->g, input, pkey->p ); + + rc = !mpi_cmp( t1, t2 ); +#else + /* t1 = g ^ - input * y ^ a * a ^ b mod p */ + mpi_invm(t2, pkey->g, pkey->p ); + base[0] = t2 ; ex[0] = input; + base[1] = pkey->y; ex[1] = a; + base[2] = a; ex[2] = b; + base[3] = NULL; ex[3] = NULL; + mpi_mulpowm( t1, base, ex, pkey->p ); + rc = !mpi_cmp_ui( t1, 1 ); + +#endif + + mpi_free(t1); + mpi_free(t2); + return rc; +} + +/********************************************* + ************** interface ****************** + *********************************************/ + +static gpg_err_code_t +elg_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) +{ + gpg_err_code_t rc; + unsigned int nbits; + ELG_secret_key sk; + gcry_mpi_t xvalue = NULL; + gcry_sexp_t l1; + gcry_mpi_t *factors = NULL; + gcry_sexp_t misc_info = NULL; + + memset (&sk, 0, sizeof sk); + + rc = _gcry_pk_util_get_nbits (genparms, &nbits); + if (rc) + return rc; + + /* Parse the optional xvalue element. */ + l1 = sexp_find_token (genparms, "xvalue", 0); + if (l1) + { + xvalue = sexp_nth_mpi (l1, 1, 0); + sexp_release (l1); + if (!xvalue) + return GPG_ERR_BAD_MPI; + } + + if (xvalue) + { + rc = generate_using_x (&sk, nbits, xvalue, &factors); + mpi_free (xvalue); + } + else + { + rc = generate (&sk, nbits, &factors); + } + if (rc) + goto leave; + + if (factors && factors[0]) + { + int nfac; + void **arg_list; + char *buffer, *p; + + for (nfac = 0; factors[nfac]; nfac++) + ; + arg_list = xtrycalloc (nfac+1, sizeof *arg_list); + if (!arg_list) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + buffer = xtrymalloc (30 + nfac*2 + 2 + 1); + if (!buffer) + { + rc = gpg_err_code_from_syserror (); + xfree (arg_list); + goto leave; + } + p = stpcpy (buffer, "(misc-key-info(pm1-factors"); + for(nfac = 0; factors[nfac]; nfac++) + { + p = stpcpy (p, "%m"); + arg_list[nfac] = factors + nfac; + } + p = stpcpy (p, "))"); + rc = sexp_build_array (&misc_info, NULL, buffer, arg_list); + xfree (arg_list); + xfree (buffer); + if (rc) + goto leave; + } + + rc = sexp_build (r_skey, NULL, + "(key-data" + " (public-key" + " (elg(p%m)(g%m)(y%m)))" + " (private-key" + " (elg(p%m)(g%m)(y%m)(x%m)))" + " %S)", + sk.p, sk.g, sk.y, + sk.p, sk.g, sk.y, sk.x, + misc_info); + + leave: + mpi_free (sk.p); + mpi_free (sk.g); + mpi_free (sk.y); + mpi_free (sk.x); + sexp_release (misc_info); + if (factors) + { + gcry_mpi_t *mp; + for (mp = factors; *mp; mp++) + mpi_free (*mp); + xfree (factors); + } + + return rc; +} + + +static gcry_err_code_t +elg_check_secret_key (gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + ELG_secret_key sk = {NULL, NULL, NULL, NULL}; + + rc = sexp_extract_param (keyparms, NULL, "pgyx", + &sk.p, &sk.g, &sk.y, &sk.x, + NULL); + if (rc) + goto leave; + + if (!check_secret_key (&sk)) + rc = GPG_ERR_BAD_SECKEY; + + leave: + _gcry_mpi_release (sk.p); + _gcry_mpi_release (sk.g); + _gcry_mpi_release (sk.y); + _gcry_mpi_release (sk.x); + if (DBG_CIPHER) + log_debug ("elg_testkey => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +elg_encrypt (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_mpi_t mpi_a = NULL; + gcry_mpi_t mpi_b = NULL; + gcry_mpi_t data = NULL; + ELG_public_key pk = { NULL, NULL, NULL }; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_ENCRYPT, + elg_get_nbits (keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_mpidump ("elg_encrypt data", data); + if (mpi_is_opaque (data)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* Extract the key. */ + rc = sexp_extract_param (keyparms, NULL, "pgy", + &pk.p, &pk.g, &pk.y, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("elg_encrypt p", pk.p); + log_mpidump ("elg_encrypt g", pk.g); + log_mpidump ("elg_encrypt y", pk.y); + } + + /* Do Elgamal computation and build result. */ + mpi_a = mpi_new (0); + mpi_b = mpi_new (0); + do_encrypt (mpi_a, mpi_b, data, &pk); + rc = sexp_build (r_ciph, NULL, "(enc-val(elg(a%m)(b%m)))", mpi_a, mpi_b); + + leave: + _gcry_mpi_release (mpi_a); + _gcry_mpi_release (mpi_b); + _gcry_mpi_release (pk.p); + _gcry_mpi_release (pk.g); + _gcry_mpi_release (pk.y); + _gcry_mpi_release (data); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("elg_encrypt => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +elg_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + gpg_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_sexp_t l1 = NULL; + gcry_mpi_t data_a = NULL; + gcry_mpi_t data_b = NULL; + ELG_secret_key sk = {NULL, NULL, NULL, NULL}; + gcry_mpi_t plain = NULL; + unsigned char *unpad = NULL; + size_t unpadlen = 0; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT, + elg_get_nbits (keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_preparse_encval (s_data, elg_names, &l1, &ctx); + if (rc) + goto leave; + rc = sexp_extract_param (l1, NULL, "ab", &data_a, &data_b, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_printmpi ("elg_decrypt d_a", data_a); + log_printmpi ("elg_decrypt d_b", data_b); + } + if (mpi_is_opaque (data_a) || mpi_is_opaque (data_b)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* Extract the key. */ + rc = sexp_extract_param (keyparms, NULL, "pgyx", + &sk.p, &sk.g, &sk.y, &sk.x, + NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_printmpi ("elg_decrypt p", sk.p); + log_printmpi ("elg_decrypt g", sk.g); + log_printmpi ("elg_decrypt y", sk.y); + if (!fips_mode ()) + log_printmpi ("elg_decrypt x", sk.x); + } + + plain = mpi_snew (ctx.nbits); + decrypt (plain, data_a, data_b, &sk); + if (DBG_CIPHER) + log_printmpi ("elg_decrypt res", plain); + + /* Reverse the encoding and build the s-expression. */ + switch (ctx.encoding) + { + case PUBKEY_ENC_PKCS1: + rc = _gcry_rsa_pkcs1_decode_for_enc (&unpad, &unpadlen, ctx.nbits, plain); + mpi_free (plain); plain = NULL; + if (!rc) + rc = sexp_build (r_plain, NULL, "(value %b)", (int)unpadlen, unpad); + break; + + case PUBKEY_ENC_OAEP: + rc = _gcry_rsa_oaep_decode (&unpad, &unpadlen, + ctx.nbits, ctx.hash_algo, plain, + ctx.label, ctx.labellen); + mpi_free (plain); plain = NULL; + if (!rc) + rc = sexp_build (r_plain, NULL, "(value %b)", (int)unpadlen, unpad); + break; + + default: + /* Raw format. For backward compatibility we need to assume a + signed mpi by using the sexp format string "%m". */ + rc = sexp_build (r_plain, NULL, + (ctx.flags & PUBKEY_FLAG_LEGACYRESULT) + ? "%m" : "(value %m)", + plain); + break; + } + + + leave: + xfree (unpad); + _gcry_mpi_release (plain); + _gcry_mpi_release (sk.p); + _gcry_mpi_release (sk.g); + _gcry_mpi_release (sk.y); + _gcry_mpi_release (sk.x); + _gcry_mpi_release (data_a); + _gcry_mpi_release (data_b); + sexp_release (l1); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("elg_decrypt => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +elg_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_mpi_t data = NULL; + ELG_secret_key sk = {NULL, NULL, NULL, NULL}; + gcry_mpi_t sig_r = NULL; + gcry_mpi_t sig_s = NULL; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN, + elg_get_nbits (keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_mpidump ("elg_sign data", data); + if (mpi_is_opaque (data)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* Extract the key. */ + rc = sexp_extract_param (keyparms, NULL, "pgyx", + &sk.p, &sk.g, &sk.y, &sk.x, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("elg_sign p", sk.p); + log_mpidump ("elg_sign g", sk.g); + log_mpidump ("elg_sign y", sk.y); + if (!fips_mode ()) + log_mpidump ("elg_sign x", sk.x); + } + + sig_r = mpi_new (0); + sig_s = mpi_new (0); + sign (sig_r, sig_s, data, &sk); + if (DBG_CIPHER) + { + log_mpidump ("elg_sign sig_r", sig_r); + log_mpidump ("elg_sign sig_s", sig_s); + } + rc = sexp_build (r_sig, NULL, "(sig-val(elg(r%M)(s%M)))", sig_r, sig_s); + + leave: + _gcry_mpi_release (sig_r); + _gcry_mpi_release (sig_s); + _gcry_mpi_release (sk.p); + _gcry_mpi_release (sk.g); + _gcry_mpi_release (sk.y); + _gcry_mpi_release (sk.x); + _gcry_mpi_release (data); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("elg_sign => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +elg_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_sexp_t l1 = NULL; + gcry_mpi_t sig_r = NULL; + gcry_mpi_t sig_s = NULL; + gcry_mpi_t data = NULL; + ELG_public_key pk = { NULL, NULL, NULL }; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY, + elg_get_nbits (s_keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_mpidump ("elg_verify data", data); + if (mpi_is_opaque (data)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* Extract the signature value. */ + rc = _gcry_pk_util_preparse_sigval (s_sig, elg_names, &l1, NULL); + if (rc) + goto leave; + rc = sexp_extract_param (l1, NULL, "rs", &sig_r, &sig_s, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("elg_verify s_r", sig_r); + log_mpidump ("elg_verify s_s", sig_s); + } + + /* Extract the key. */ + rc = sexp_extract_param (s_keyparms, NULL, "pgy", + &pk.p, &pk.g, &pk.y, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("elg_verify p", pk.p); + log_mpidump ("elg_verify g", pk.g); + log_mpidump ("elg_verify y", pk.y); + } + + /* Verify the signature. */ + if (!verify (sig_r, sig_s, data, &pk)) + rc = GPG_ERR_BAD_SIGNATURE; + + leave: + _gcry_mpi_release (pk.p); + _gcry_mpi_release (pk.g); + _gcry_mpi_release (pk.y); + _gcry_mpi_release (data); + _gcry_mpi_release (sig_r); + _gcry_mpi_release (sig_s); + sexp_release (l1); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("elg_verify => %s\n", rc?gpg_strerror (rc):"Good"); + return rc; +} + + +/* Return the number of bits for the key described by PARMS. On error + * 0 is returned. The format of PARMS starts with the algorithm name; + * for example: + * + * (dsa + * (p <mpi>) + * (g <mpi>) + * (y <mpi>)) + * + * More parameters may be given but we only need P here. + */ +static unsigned int +elg_get_nbits (gcry_sexp_t parms) +{ + gcry_sexp_t l1; + gcry_mpi_t p; + unsigned int nbits; + + l1 = sexp_find_token (parms, "p", 1); + if (!l1) + return 0; /* Parameter P not found. */ + + p= sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG); + sexp_release (l1); + nbits = p? mpi_get_nbits (p) : 0; + _gcry_mpi_release (p); + return nbits; +} + + + +gcry_pk_spec_t _gcry_pubkey_spec_elg = + { + GCRY_PK_ELG, { 0, 0 }, + (GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR), + "ELG", elg_names, + "pgy", "pgyx", "ab", "rs", "pgy", + elg_generate, + elg_check_secret_key, + elg_encrypt, + elg_decrypt, + elg_sign, + elg_verify, + elg_get_nbits, + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/gost-s-box.c b/libotr/libgcrypt-1.8.7/cipher/gost-s-box.c new file mode 100644 index 0000000..7aa5444 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/gost-s-box.c @@ -0,0 +1,257 @@ +/* gost-s-box.c - GOST 28147-89 S-Box expander + * Copyright (C) 2013 Dmitry Eremin-Solenikov + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> + +#define DIM(v) (sizeof(v)/sizeof((v)[0])) + +struct gost_sbox +{ + const char *name; + const char *oid; + unsigned char sbox[16*8]; +} gost_sboxes[] = { + { "test_3411", "1.2.643.2.2.30.0", { + 0x4, 0xE, 0x5, 0x7, 0x6, 0x4, 0xD, 0x1, + 0xA, 0xB, 0x8, 0xD, 0xC, 0xB, 0xB, 0xF, + 0x9, 0x4, 0x1, 0xA, 0x7, 0xA, 0x4, 0xD, + 0x2, 0xC, 0xD, 0x1, 0x1, 0x0, 0x1, 0x0, + + 0xD, 0x6, 0xA, 0x0, 0x5, 0x7, 0x3, 0x5, + 0x8, 0xD, 0x3, 0x8, 0xF, 0x2, 0xF, 0x7, + 0x0, 0xF, 0x4, 0x9, 0xD, 0x1, 0x5, 0xA, + 0xE, 0xA, 0x2, 0xF, 0x8, 0xD, 0x9, 0x4, + + 0x6, 0x2, 0xE, 0xE, 0x4, 0x3, 0x0, 0x9, + 0xB, 0x3, 0xF, 0x4, 0xA, 0x6, 0xA, 0x2, + 0x1, 0x8, 0xC, 0x6, 0x9, 0x8, 0xE, 0x3, + 0xC, 0x1, 0x7, 0xC, 0xE, 0x5, 0x7, 0xE, + + 0x7, 0x0, 0x6, 0xB, 0x0, 0x9, 0x6, 0x6, + 0xF, 0x7, 0x0, 0x2, 0x3, 0xC, 0x8, 0xB, + 0x5, 0x5, 0x9, 0x5, 0xB, 0xF, 0x2, 0x8, + 0x3, 0x9, 0xB, 0x3, 0x2, 0xE, 0xC, 0xC, + } + }, + { "CryptoPro_3411", "1.2.643.2.2.30.1", { + 0xA, 0x5, 0x7, 0x4, 0x7, 0x7, 0xD, 0x1, + 0x4, 0xF, 0xF, 0xA, 0x6, 0x6, 0xE, 0x3, + 0x5, 0x4, 0xC, 0x7, 0x4, 0x2, 0x4, 0xA, + 0x6, 0x0, 0xE, 0xC, 0xB, 0x4, 0x1, 0x9, + + 0x8, 0x2, 0x9, 0x0, 0x9, 0xD, 0x7, 0x5, + 0x1, 0xD, 0x4, 0xF, 0xC, 0x9, 0x0, 0xB, + 0x3, 0xB, 0x1, 0x2, 0x2, 0xF, 0x5, 0x4, + 0x7, 0x9, 0x0, 0x8, 0xA, 0x0, 0xA, 0xF, + + 0xD, 0x1, 0x3, 0xE, 0x1, 0xA, 0x3, 0x8, + 0xC, 0x7, 0xB, 0x1, 0x8, 0x1, 0xC, 0x6, + 0xE, 0x6, 0x5, 0x6, 0x0, 0x5, 0x8, 0x7, + 0x0, 0x3, 0x2, 0x5, 0xE, 0xB, 0xF, 0xE, + + 0x9, 0xC, 0x6, 0xD, 0xF, 0x8, 0x6, 0xD, + 0x2, 0xE, 0xA, 0xB, 0xD, 0xE, 0x2, 0x0, + 0xB, 0xA, 0x8, 0x9, 0x3, 0xC, 0x9, 0x2, + 0xF, 0x8, 0xD, 0x3, 0x5, 0x3, 0xB, 0xC, + } + }, + { "Test_89", "1.2.643.2.2.31.0", { + 0x4, 0xC, 0xD, 0xE, 0x3, 0x8, 0x9, 0xC, + 0x2, 0x9, 0x8, 0x9, 0xE, 0xF, 0xB, 0x6, + 0xF, 0xF, 0xE, 0xB, 0x5, 0x6, 0xC, 0x5, + 0x5, 0xE, 0xC, 0x2, 0x9, 0xB, 0x0, 0x2, + + 0x9, 0x8, 0x7, 0x5, 0x6, 0x1, 0x3, 0xB, + 0x1, 0x1, 0x3, 0xF, 0x8, 0x9, 0x6, 0x0, + 0x0, 0x3, 0x9, 0x7, 0x0, 0xC, 0x7, 0x9, + 0x8, 0xA, 0xA, 0x1, 0xD, 0x5, 0x5, 0xD, + + 0xE, 0x2, 0x1, 0x0, 0xA, 0xD, 0x4, 0x3, + 0x3, 0x7, 0x5, 0xD, 0xB, 0x3, 0x8, 0xE, + 0xB, 0x4, 0x2, 0xC, 0x7, 0x7, 0xE, 0x7, + 0xC, 0xD, 0x4, 0x6, 0xC, 0xA, 0xF, 0xA, + + 0xD, 0x6, 0x6, 0xA, 0x2, 0x0, 0x1, 0xF, + 0x7, 0x0, 0xF, 0x4, 0x1, 0xE, 0xA, 0x4, + 0xA, 0xB, 0x0, 0x3, 0xF, 0x2, 0x2, 0x1, + 0x6, 0x5, 0xB, 0x8, 0x4, 0x4, 0xD, 0x8, + } + }, + { "CryptoPro_A", "1.2.643.2.2.31.1", { + 0x9, 0x3, 0xE, 0xE, 0xB, 0x3, 0x1, 0xB, + 0x6, 0x7, 0x4, 0x7, 0x5, 0xA, 0xD, 0xA, + 0x3, 0xE, 0x6, 0xA, 0x1, 0xD, 0x2, 0xF, + 0x2, 0x9, 0x2, 0xC, 0x9, 0xC, 0x9, 0x5, + + 0x8, 0x8, 0xB, 0xD, 0x8, 0x1, 0x7, 0x0, + 0xB, 0xA, 0x3, 0x1, 0xD, 0x2, 0xA, 0xC, + 0x1, 0xF, 0xD, 0x3, 0xF, 0x0, 0x6, 0xE, + 0x7, 0x0, 0x8, 0x9, 0x0, 0xB, 0x0, 0x8, + + 0xA, 0x5, 0xC, 0x0, 0xE, 0x7, 0x8, 0x6, + 0x4, 0x2, 0xF, 0x2, 0x4, 0x5, 0xC, 0x2, + 0xE, 0x6, 0x5, 0xB, 0x2, 0x9, 0x4, 0x3, + 0xF, 0xC, 0xA, 0x4, 0x3, 0x4, 0x5, 0x9, + + 0xC, 0xB, 0x0, 0xF, 0xC, 0x8, 0xF, 0x1, + 0x0, 0x4, 0x7, 0x8, 0x7, 0xF, 0x3, 0x7, + 0xD, 0xD, 0x1, 0x5, 0xA, 0xE, 0xB, 0xD, + 0x5, 0x1, 0x9, 0x6, 0x6, 0x6, 0xE, 0x4, + } + }, + { "CryptoPro_B", "1.2.643.2.2.31.2", { + 0x8, 0x0, 0xE, 0x7, 0x2, 0x8, 0x5, 0x0, + 0x4, 0x1, 0xC, 0x5, 0x7, 0x3, 0x2, 0x4, + 0xB, 0x2, 0x0, 0x0, 0xC, 0x2, 0xA, 0xB, + 0x1, 0xA, 0xA, 0xD, 0xF, 0x6, 0xB, 0xE, + + 0x3, 0x4, 0x9, 0xB, 0x9, 0x4, 0x9, 0x8, + 0x5, 0xD, 0x2, 0x6, 0x5, 0xD, 0x1, 0x3, + 0x0, 0x5, 0xD, 0x1, 0xA, 0xE, 0xC, 0x7, + 0x9, 0xC, 0xB, 0x2, 0xB, 0xB, 0x3, 0x1, + + 0x2, 0x9, 0x7, 0x3, 0x1, 0xC, 0x7, 0xA, + 0xE, 0x7, 0x5, 0xA, 0x4, 0x1, 0x4, 0x2, + 0xA, 0x3, 0x8, 0xC, 0x0, 0x7, 0xD, 0x9, + 0xC, 0xF, 0xF, 0xF, 0xD, 0xF, 0x0, 0x6, + + 0xD, 0xB, 0x3, 0x4, 0x6, 0xA, 0x6, 0xF, + 0x6, 0x8, 0x6, 0xE, 0x8, 0x0, 0xF, 0xD, + 0x7, 0x6, 0x1, 0x9, 0xE, 0x9, 0x8, 0x5, + 0xF, 0xE, 0x4, 0x8, 0x3, 0x5, 0xE, 0xC, + } + }, + { "CryptoPro_C", "1.2.643.2.2.31.3", { + 0x1, 0x0, 0x8, 0x3, 0x8, 0xC, 0xA, 0x7, + 0xB, 0x1, 0x2, 0x6, 0xD, 0x9, 0x9, 0x4, + 0xC, 0x7, 0x5, 0x0, 0xB, 0xB, 0x6, 0x0, + 0x2, 0xD, 0x0, 0x1, 0x0, 0x1, 0x8, 0x5, + + 0x9, 0xB, 0x4, 0x5, 0x4, 0x8, 0xD, 0xA, + 0xD, 0x4, 0x9, 0xD, 0x5, 0xE, 0xE, 0x2, + 0x0, 0x5, 0xF, 0xA, 0x1, 0x2, 0x2, 0xF, + 0xF, 0x2, 0xA, 0x8, 0x2, 0x4, 0x0, 0xE, + + 0x4, 0x8, 0x3, 0xB, 0x9, 0x7, 0xF, 0xC, + 0x5, 0xE, 0x7, 0x2, 0x3, 0x3, 0x3, 0x6, + 0x8, 0xF, 0xC, 0x9, 0xC, 0x6, 0x5, 0x1, + 0xE, 0xC, 0xD, 0x7, 0xE, 0x5, 0xB, 0xB, + + 0xA, 0x9, 0x6, 0xE, 0x6, 0xA, 0x4, 0xD, + 0x7, 0xA, 0xE, 0xF, 0xF, 0x0, 0x1, 0x9, + 0x6, 0x6, 0x1, 0xC, 0xA, 0xF, 0xC, 0x3, + 0x3, 0x3, 0xB, 0x4, 0x7, 0xD, 0x7, 0x8, + } + }, + { "CryptoPro_D", "1.2.643.2.2.31.4", { + 0xF, 0xB, 0x1, 0x1, 0x0, 0x8, 0x3, 0x1, + 0xC, 0x6, 0xC, 0x5, 0xC, 0x0, 0x0, 0xA, + 0x2, 0x3, 0xB, 0xE, 0x8, 0xF, 0x6, 0x6, + 0xA, 0x4, 0x0, 0xC, 0x9, 0x3, 0xF, 0x8, + + 0x6, 0xC, 0xF, 0xA, 0xD, 0x2, 0x1, 0xF, + 0x4, 0xF, 0xE, 0x7, 0x2, 0x5, 0xE, 0xB, + 0x5, 0xE, 0x6, 0x0, 0xA, 0xE, 0x9, 0x0, + 0x0, 0x2, 0x5, 0xD, 0xB, 0xB, 0x2, 0x4, + + 0x7, 0x7, 0xA, 0x6, 0x7, 0x1, 0xD, 0xC, + 0x9, 0xD, 0xD, 0x2, 0x3, 0xA, 0x8, 0x3, + 0xE, 0x8, 0x4, 0xB, 0x6, 0x4, 0xC, 0x5, + 0xD, 0x0, 0x8, 0x4, 0x5, 0x7, 0x4, 0x9, + + 0x1, 0x5, 0x9, 0x9, 0x4, 0xC, 0xB, 0x7, + 0xB, 0xA, 0x3, 0x3, 0xE, 0x9, 0xA, 0xD, + 0x8, 0x9, 0x7, 0xF, 0xF, 0xD, 0x5, 0x2, + 0x3, 0x1, 0x2, 0x8, 0x1, 0x6, 0x7, 0xE, + } + }, + { "TC26_Z", "1.2.643.7.1.2.5.1.1", { + 0xc, 0x6, 0xb, 0xc, 0x7, 0x5, 0x8, 0x1, + 0x4, 0x8, 0x3, 0x8, 0xf, 0xd, 0xe, 0x7, + 0x6, 0x2, 0x5, 0x2, 0x5, 0xf, 0x2, 0xe, + 0x2, 0x3, 0x8, 0x1, 0xa, 0x6, 0x5, 0xd, + + 0xa, 0x9, 0x2, 0xd, 0x8, 0x9, 0x6, 0x0, + 0x5, 0xa, 0xf, 0x4, 0x1, 0x2, 0x9, 0x5, + 0xb, 0x5, 0xa, 0xf, 0x6, 0xc, 0x1, 0x8, + 0x9, 0xc, 0xd, 0x6, 0xd, 0xa, 0xc, 0x3, + + 0xe, 0x1, 0xe, 0x7, 0x0, 0xb, 0xf, 0x4, + 0x8, 0xe, 0x1, 0x0, 0x9, 0x7, 0x4, 0xf, + 0xd, 0x4, 0x7, 0xa, 0x3, 0x8, 0xb, 0xa, + 0x7, 0x7, 0x4, 0x5, 0xe, 0x1, 0x0, 0x6, + + 0x0, 0xb, 0xc, 0x3, 0xb, 0x4, 0xd, 0x9, + 0x3, 0xd, 0x9, 0xe, 0x4, 0x3, 0xa, 0xc, + 0xf, 0x0, 0x6, 0x9, 0x2, 0xe, 0x3, 0xb, + 0x1, 0xf, 0x0, 0xb, 0xc, 0x0, 0x7, 0x2, + } + }, +}; + +int main(int argc, char **argv) +{ + unsigned int i, j, s; + FILE *f; + + if (argc == 1) + f = stdin; + else + f = fopen(argv[1], "w"); + + if (!f) + { + perror("fopen"); + exit(1); + } + + for (s = 0; s < DIM(gost_sboxes); s++) + { + unsigned char *sbox = gost_sboxes[s].sbox; + fprintf (f, "static const u32 sbox_%s[4*256] =\n {", gost_sboxes[s].name); + for (i = 0; i < 4; i++) { + fprintf (f, "\n /* %d */\n ", i); + for (j = 0; j < 256; j++) { + unsigned int val; + if (j % 4 == 0 && j != 0) + fprintf (f, "\n "); + val = sbox[ (j & 0xf) * 8 + 2 * i + 0] | + (sbox[ (j >> 4) * 8 + 2 * i + 1] << 4); + val <<= (8*i); + val = (val << 11) | (val >> 21); + fprintf (f, " 0x%08x,", val); + } + } + fprintf (f, "\n };\n\n"); + } + + fprintf (f, "static struct\n{\n const char *oid;\n const u32 *sbox;\n} gost_oid_map[] = {\n"); + + for (s = 0; s < DIM(gost_sboxes); s++) + { + fprintf (f, " { \"%s\", sbox_%s },\n", gost_sboxes[s].oid, gost_sboxes[s].name ); + } + + fprintf(f, " { NULL, NULL }\n};\n"); + + fclose (f); + + return 0; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/gost.h b/libotr/libgcrypt-1.8.7/cipher/gost.h new file mode 100644 index 0000000..025119c --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/gost.h @@ -0,0 +1,32 @@ +/* gost.h - GOST 28147-89 implementation + * Copyright (C) 2012 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _GCRY_GOST_H +#define _GCRY_GOST_H + +typedef struct { + u32 key[8]; + const u32 *sbox; +} GOST28147_context; + +/* This is a simple interface that will be used by GOST R 34.11-94 */ +unsigned int _gcry_gost_enc_data (GOST28147_context *c, const u32 *key, + u32 *o1, u32 *o2, u32 n1, u32 n2, int cryptopro); + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/gost28147.c b/libotr/libgcrypt-1.8.7/cipher/gost28147.c new file mode 100644 index 0000000..4ff80b4 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/gost28147.c @@ -0,0 +1,227 @@ +/* gost28147.c - GOST 28147-89 implementation for Libgcrypt + * Copyright (C) 2012 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* GOST 28147-89 defines several modes of encryption: + * - ECB which should be used only for key transfer + * - CFB mode + * - OFB-like mode with additional transformation on keystream + * RFC 5830 names this 'counter encryption' mode + * Original GOST text uses the term 'gammirovanie' + * - MAC mode + * + * This implementation handles ECB and CFB modes via usual libgcrypt handling. + * OFB-like and MAC modes are unsupported. + */ + +#include <config.h> +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" + +#include "gost.h" +#include "gost-sb.h" + +static gcry_err_code_t +gost_setkey (void *c, const byte *key, unsigned keylen) +{ + int i; + GOST28147_context *ctx = c; + + if (keylen != 256 / 8) + return GPG_ERR_INV_KEYLEN; + + if (!ctx->sbox) + ctx->sbox = sbox_test_3411; + + for (i = 0; i < 8; i++) + { + ctx->key[i] = buf_get_le32(&key[4*i]); + } + return GPG_ERR_NO_ERROR; +} + +static u32 +gost_val (GOST28147_context *ctx, u32 cm1, int subkey) +{ + cm1 += ctx->key[subkey]; + cm1 = ctx->sbox[0*256 + ((cm1 >> 0) & 0xff)] | + ctx->sbox[1*256 + ((cm1 >> 8) & 0xff)] | + ctx->sbox[2*256 + ((cm1 >> 16) & 0xff)] | + ctx->sbox[3*256 + ((cm1 >> 24) & 0xff)]; + return cm1; +} + +static unsigned int +_gost_encrypt_data (void *c, u32 *o1, u32 *o2, u32 n1, u32 n2) +{ + GOST28147_context *ctx = c; + + n2 ^= gost_val (ctx, n1, 0); n1 ^= gost_val (ctx, n2, 1); + n2 ^= gost_val (ctx, n1, 2); n1 ^= gost_val (ctx, n2, 3); + n2 ^= gost_val (ctx, n1, 4); n1 ^= gost_val (ctx, n2, 5); + n2 ^= gost_val (ctx, n1, 6); n1 ^= gost_val (ctx, n2, 7); + + n2 ^= gost_val (ctx, n1, 0); n1 ^= gost_val (ctx, n2, 1); + n2 ^= gost_val (ctx, n1, 2); n1 ^= gost_val (ctx, n2, 3); + n2 ^= gost_val (ctx, n1, 4); n1 ^= gost_val (ctx, n2, 5); + n2 ^= gost_val (ctx, n1, 6); n1 ^= gost_val (ctx, n2, 7); + + n2 ^= gost_val (ctx, n1, 0); n1 ^= gost_val (ctx, n2, 1); + n2 ^= gost_val (ctx, n1, 2); n1 ^= gost_val (ctx, n2, 3); + n2 ^= gost_val (ctx, n1, 4); n1 ^= gost_val (ctx, n2, 5); + n2 ^= gost_val (ctx, n1, 6); n1 ^= gost_val (ctx, n2, 7); + + n2 ^= gost_val (ctx, n1, 7); n1 ^= gost_val (ctx, n2, 6); + n2 ^= gost_val (ctx, n1, 5); n1 ^= gost_val (ctx, n2, 4); + n2 ^= gost_val (ctx, n1, 3); n1 ^= gost_val (ctx, n2, 2); + n2 ^= gost_val (ctx, n1, 1); n1 ^= gost_val (ctx, n2, 0); + + *o1 = n2; + *o2 = n1; + + return /* burn_stack */ 4*sizeof(void*) /* func call */ + + 3*sizeof(void*) /* stack */ + + 4*sizeof(void*) /* gost_val call */; +} + +static unsigned int +gost_encrypt_block (void *c, byte *outbuf, const byte *inbuf) +{ + GOST28147_context *ctx = c; + u32 n1, n2; + unsigned int burn; + + n1 = buf_get_le32 (inbuf); + n2 = buf_get_le32 (inbuf+4); + + burn = _gost_encrypt_data(ctx, &n1, &n2, n1, n2); + + buf_put_le32 (outbuf+0, n1); + buf_put_le32 (outbuf+4, n2); + + return /* burn_stack */ burn + 6*sizeof(void*) /* func call */; +} + +unsigned int _gcry_gost_enc_data (GOST28147_context *c, const u32 *key, + u32 *o1, u32 *o2, u32 n1, u32 n2, int cryptopro) +{ + if (cryptopro) + c->sbox = sbox_CryptoPro_3411; + else + c->sbox = sbox_test_3411; + memcpy (c->key, key, 8*4); + return _gost_encrypt_data (c, o1, o2, n1, n2) + 7 * sizeof(void *); +} + +static unsigned int +gost_decrypt_block (void *c, byte *outbuf, const byte *inbuf) +{ + GOST28147_context *ctx = c; + u32 n1, n2; + + n1 = buf_get_le32 (inbuf); + n2 = buf_get_le32 (inbuf+4); + + n2 ^= gost_val (ctx, n1, 0); n1 ^= gost_val (ctx, n2, 1); + n2 ^= gost_val (ctx, n1, 2); n1 ^= gost_val (ctx, n2, 3); + n2 ^= gost_val (ctx, n1, 4); n1 ^= gost_val (ctx, n2, 5); + n2 ^= gost_val (ctx, n1, 6); n1 ^= gost_val (ctx, n2, 7); + + n2 ^= gost_val (ctx, n1, 7); n1 ^= gost_val (ctx, n2, 6); + n2 ^= gost_val (ctx, n1, 5); n1 ^= gost_val (ctx, n2, 4); + n2 ^= gost_val (ctx, n1, 3); n1 ^= gost_val (ctx, n2, 2); + n2 ^= gost_val (ctx, n1, 1); n1 ^= gost_val (ctx, n2, 0); + + n2 ^= gost_val (ctx, n1, 7); n1 ^= gost_val (ctx, n2, 6); + n2 ^= gost_val (ctx, n1, 5); n1 ^= gost_val (ctx, n2, 4); + n2 ^= gost_val (ctx, n1, 3); n1 ^= gost_val (ctx, n2, 2); + n2 ^= gost_val (ctx, n1, 1); n1 ^= gost_val (ctx, n2, 0); + + n2 ^= gost_val (ctx, n1, 7); n1 ^= gost_val (ctx, n2, 6); + n2 ^= gost_val (ctx, n1, 5); n1 ^= gost_val (ctx, n2, 4); + n2 ^= gost_val (ctx, n1, 3); n1 ^= gost_val (ctx, n2, 2); + n2 ^= gost_val (ctx, n1, 1); n1 ^= gost_val (ctx, n2, 0); + + buf_put_le32 (outbuf+0, n2); + buf_put_le32 (outbuf+4, n1); + + return /* burn_stack */ 4*sizeof(void*) /* func call */ + + 3*sizeof(void*) /* stack */ + + 4*sizeof(void*) /* gost_val call */; +} + +static gpg_err_code_t +gost_set_sbox (GOST28147_context *ctx, const char *oid) +{ + int i; + + for (i = 0; gost_oid_map[i].oid; i++) + { + if (!strcmp(gost_oid_map[i].oid, oid)) + { + ctx->sbox = gost_oid_map[i].sbox; + return 0; + } + } + return GPG_ERR_VALUE_NOT_FOUND; +} + +static gpg_err_code_t +gost_set_extra_info (void *c, int what, const void *buffer, size_t buflen) +{ + GOST28147_context *ctx = c; + gpg_err_code_t ec = 0; + + (void)buffer; + (void)buflen; + + switch (what) + { + case GCRYCTL_SET_SBOX: + ec = gost_set_sbox (ctx, buffer); + break; + + default: + ec = GPG_ERR_INV_OP; + break; + } + return ec; +} + +static gcry_cipher_oid_spec_t oids_gost28147[] = + { + /* { "1.2.643.2.2.31.0", GCRY_CIPHER_MODE_CNTGOST }, */ + { "1.2.643.2.2.31.1", GCRY_CIPHER_MODE_CFB }, + { "1.2.643.2.2.31.2", GCRY_CIPHER_MODE_CFB }, + { "1.2.643.2.2.31.3", GCRY_CIPHER_MODE_CFB }, + { "1.2.643.2.2.31.4", GCRY_CIPHER_MODE_CFB }, + { NULL } + }; + +gcry_cipher_spec_t _gcry_cipher_spec_gost28147 = + { + GCRY_CIPHER_GOST28147, {0, 0}, + "GOST28147", NULL, oids_gost28147, 8, 256, + sizeof (GOST28147_context), + gost_setkey, + gost_encrypt_block, + gost_decrypt_block, + NULL, NULL, NULL, gost_set_extra_info, + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/gostr3411-94.c b/libotr/libgcrypt-1.8.7/cipher/gostr3411-94.c new file mode 100644 index 0000000..a782427 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/gostr3411-94.c @@ -0,0 +1,355 @@ +/* gostr3411-94.c - GOST R 34.11-94 hash function + * Copyright (C) 2012 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" +#include "bithelp.h" +#include "bufhelp.h" +#include "cipher.h" +#include "hash-common.h" + +#include "gost.h" + +#define max(a, b) (((a) > (b)) ? (a) : (b)) + +typedef struct { + gcry_md_block_ctx_t bctx; + GOST28147_context hd; + union { + u32 h[8]; + byte result[32]; + }; + u32 sigma[8]; + u32 len; + int cryptopro; +} GOSTR3411_CONTEXT; + +static unsigned int +transform (void *c, const unsigned char *data, size_t nblks); + +static void +gost3411_init (void *context, unsigned int flags) +{ + GOSTR3411_CONTEXT *hd = context; + + (void)flags; + + memset (&hd->hd, 0, sizeof(hd->hd)); + memset (hd->h, 0, 32); + memset (hd->sigma, 0, 32); + + hd->bctx.nblocks = 0; + hd->bctx.count = 0; + hd->bctx.blocksize = 32; + hd->bctx.bwrite = transform; + hd->cryptopro = 0; +} + +static void +gost3411_cp_init (void *context, unsigned int flags) +{ + GOSTR3411_CONTEXT *hd = context; + gost3411_init (context, flags); + hd->cryptopro = 1; +} + +static void +do_p (u32 *p, u32 *u, u32 *v) +{ + int k; + u32 t[8]; + + for (k = 0; k < 8; k++) + t[k] = u[k] ^ v[k]; + + for (k = 0; k < 4; k++) + { + p[k+0] = ((t[0] >> (8*k)) & 0xff) << 0 | + ((t[2] >> (8*k)) & 0xff) << 8 | + ((t[4] >> (8*k)) & 0xff) << 16 | + ((t[6] >> (8*k)) & 0xff) << 24; + p[k+4] = ((t[1] >> (8*k)) & 0xff) << 0 | + ((t[3] >> (8*k)) & 0xff) << 8 | + ((t[5] >> (8*k)) & 0xff) << 16 | + ((t[7] >> (8*k)) & 0xff) << 24; + } +} + +static void +do_a (u32 *u) +{ + u32 t[2]; + int i; + memcpy(t, u, 2*4); + for (i = 0; i < 6; i++) + u[i] = u[i+2]; + u[6] = u[0] ^ t[0]; + u[7] = u[1] ^ t[1]; +} +/* apply do_a twice: 1 2 3 4 -> 3 4 1^2 2^3 */ +static void +do_a2 (u32 *u) +{ + u32 t[4]; + int i; + memcpy (t, u, 16); + memcpy (u, u + 4, 16); + for (i = 0; i < 2; i++) + { + u[4+i] = t[i] ^ t[i + 2]; + u[6+i] = u[i] ^ t[i + 2]; + } +} + +static void +do_apply_c2 (u32 *u) +{ + u[ 0] ^= 0xff00ff00; + u[ 1] ^= 0xff00ff00; + u[ 2] ^= 0x00ff00ff; + u[ 3] ^= 0x00ff00ff; + u[ 4] ^= 0x00ffff00; + u[ 5] ^= 0xff0000ff; + u[ 6] ^= 0x000000ff; + u[ 7] ^= 0xff00ffff; +} + +#define do_chi_step12(e) \ + e[6] ^= ((e[6] >> 16) ^ e[7] ^ (e[7] >> 16) ^ e[4] ^ (e[5] >>16)) & 0xffff; + +#define do_chi_step13(e) \ + e[6] ^= ((e[7] ^ (e[7] >> 16) ^ e[0] ^ (e[4] >> 16) ^ e[6]) & 0xffff) << 16; + +#define do_chi_doublestep(e, i) \ + e[i] ^= (e[i] >> 16) ^ (e[(i+1)%8] << 16) ^ e[(i+1)%8] ^ (e[(i+1)%8] >> 16) ^ (e[(i+2)%8] << 16) ^ e[(i+6)%8] ^ (e[(i+7)%8] >> 16); \ + e[i] ^= (e[i] << 16); + +static void +do_chi_submix12 (u32 *e, u32 *x) +{ + e[6] ^= x[0]; + e[7] ^= x[1]; + e[0] ^= x[2]; + e[1] ^= x[3]; + e[2] ^= x[4]; + e[3] ^= x[5]; + e[4] ^= x[6]; + e[5] ^= x[7]; +} + +static void +do_chi_submix13 (u32 *e, u32 *x) +{ + e[6] ^= (x[0] << 16) | (x[7] >> 16); + e[7] ^= (x[1] << 16) | (x[0] >> 16); + e[0] ^= (x[2] << 16) | (x[1] >> 16); + e[1] ^= (x[3] << 16) | (x[2] >> 16); + e[2] ^= (x[4] << 16) | (x[3] >> 16); + e[3] ^= (x[5] << 16) | (x[4] >> 16); + e[4] ^= (x[6] << 16) | (x[5] >> 16); + e[5] ^= (x[7] << 16) | (x[6] >> 16); +} + +static void +do_add (u32 *s, u32 *a) +{ + u32 carry = 0; + int i; + + for (i = 0; i < 8; i++) + { + u32 op = carry + a[i]; + s[i] += op; + carry = (a[i] > op) || (op > s[i]); + } +} + +static unsigned int +do_hash_step (GOSTR3411_CONTEXT *hd, u32 *h, u32 *m) +{ + u32 u[8], v[8]; + u32 s[8]; + u32 k[8]; + unsigned int burn; + int i; + + memcpy (u, h, 32); + memcpy (v, m, 32); + + for (i = 0; i < 4; i++) { + do_p (k, u, v); + + burn = _gcry_gost_enc_data (&hd->hd, k, &s[2*i], &s[2*i+1], h[2*i], h[2*i+1], hd->cryptopro); + + do_a (u); + if (i == 1) + do_apply_c2 (u); + do_a2 (v); + } + + for (i = 0; i < 5; i++) + { + do_chi_doublestep (s, 0); + do_chi_doublestep (s, 1); + do_chi_doublestep (s, 2); + do_chi_doublestep (s, 3); + do_chi_doublestep (s, 4); + /* That is in total 12 + 1 + 61 = 74 = 16 * 4 + 10 rounds */ + if (i == 4) + break; + do_chi_doublestep (s, 5); + if (i == 0) + do_chi_submix12(s, m); + do_chi_step12 (s); + if (i == 0) + do_chi_submix13(s, h); + do_chi_step13 (s); + do_chi_doublestep (s, 7); + } + + memcpy (h, s+5, 12); + memcpy (h+3, s, 20); + + return /* burn_stack */ 4 * sizeof(void*) /* func call (ret addr + args) */ + + 4 * 32 + 2 * sizeof(int) /* stack */ + + max(burn /* _gcry_gost_enc_one */, + sizeof(void*) * 2 /* do_a2 call */ + + 16 + sizeof(int) /* do_a2 stack */ ); +} + +static unsigned int +transform_blk (void *ctx, const unsigned char *data) +{ + GOSTR3411_CONTEXT *hd = ctx; + u32 m[8]; + unsigned int burn; + int i; + + for (i = 0; i < 8; i++) + m[i] = buf_get_le32(data + i*4); + burn = do_hash_step (hd, hd->h, m); + do_add (hd->sigma, m); + + return /* burn_stack */ burn + 3 * sizeof(void*) + 32 + 2 * sizeof(void*); +} + + +static unsigned int +transform ( void *c, const unsigned char *data, size_t nblks ) +{ + unsigned int burn; + + do + { + burn = transform_blk (c, data); + data += 32; + } + while (--nblks); + + return burn; +} + + +/* + The routine finally terminates the computation and returns the + digest. The handle is prepared for a new cycle, but adding bytes + to the handle will the destroy the returned buffer. Returns: 32 + bytes with the message the digest. */ +static void +gost3411_final (void *context) +{ + GOSTR3411_CONTEXT *hd = context; + size_t padlen = 0; + u32 l[8]; + int i; + MD_NBLOCKS_TYPE nblocks; + + if (hd->bctx.count > 0) + { + padlen = 32 - hd->bctx.count; + memset (hd->bctx.buf + hd->bctx.count, 0, padlen); + hd->bctx.count += padlen; + _gcry_md_block_write (hd, NULL, 0); /* flush */; + } + + if (hd->bctx.count != 0) + return; /* Something went wrong */ + + memset (l, 0, 32); + + nblocks = hd->bctx.nblocks; + if (padlen) + { + nblocks --; + l[0] = 256 - padlen * 8; + } + l[0] |= nblocks << 8; + nblocks >>= 24; + + for (i = 1; i < 8 && nblocks != 0; i++) + { + l[i] = nblocks; + nblocks >>= 24; + } + + do_hash_step (hd, hd->h, l); + do_hash_step (hd, hd->h, hd->sigma); + for (i = 0; i < 8; i++) + hd->h[i] = le_bswap32(hd->h[i]); +} + +static byte * +gost3411_read (void *context) +{ + GOSTR3411_CONTEXT *hd = context; + + return hd->result; +} + +static unsigned char asn[6] = /* Object ID is 1.2.643.2.2.3 */ + { 0x2a, 0x85, 0x03, 0x02, 0x02, 0x03 }; + +static gcry_md_oid_spec_t oid_spec_gostr3411[] = + { + /* iso.member-body.ru.rans.cryptopro.3 (gostR3411-94-with-gostR3410-2001) */ + { "1.2.643.2.2.3" }, + /* iso.member-body.ru.rans.cryptopro.9 (gostR3411-94) */ + { "1.2.643.2.2.9" }, + {NULL}, + }; + +gcry_md_spec_t _gcry_digest_spec_gost3411_94 = + { + GCRY_MD_GOSTR3411_94, {0, 0}, + "GOSTR3411_94", NULL, 0, NULL, 32, + gost3411_init, _gcry_md_block_write, gost3411_final, gost3411_read, NULL, + sizeof (GOSTR3411_CONTEXT) + }; +gcry_md_spec_t _gcry_digest_spec_gost3411_cp = + { + GCRY_MD_GOSTR3411_CP, {0, 0}, + "GOSTR3411_CP", asn, DIM (asn), oid_spec_gostr3411, 32, + gost3411_cp_init, _gcry_md_block_write, gost3411_final, gost3411_read, NULL, + sizeof (GOSTR3411_CONTEXT) + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/hash-common.c b/libotr/libgcrypt-1.8.7/cipher/hash-common.c new file mode 100644 index 0000000..a750d64 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/hash-common.c @@ -0,0 +1,167 @@ +/* hash-common.c - Common code for hash algorithms + * Copyright (C) 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#ifdef HAVE_STDINT_H +# include <stdint.h> +#endif + +#include "g10lib.h" +#include "hash-common.h" + + +/* Run a selftest for hash algorithm ALGO. If the resulting digest + matches EXPECT/EXPECTLEN and everything else is fine as well, + return NULL. If an error occurs, return a static text string + describing the error. + + DATAMODE controls what will be hashed according to this table: + + 0 - Hash the supplied DATA of DATALEN. + 1 - Hash one million times a 'a'. DATA and DATALEN are ignored. + +*/ +const char * +_gcry_hash_selftest_check_one (int algo, + int datamode, const void *data, size_t datalen, + const void *expect, size_t expectlen) +{ + const char *result = NULL; + gcry_error_t err = 0; + gcry_md_hd_t hd; + unsigned char *digest; + char aaa[1000]; + int xof = 0; + + if (_gcry_md_get_algo_dlen (algo) == 0) + xof = 1; + else if (_gcry_md_get_algo_dlen (algo) != expectlen) + return "digest size does not match expected size"; + + err = _gcry_md_open (&hd, algo, 0); + if (err) + return "gcry_md_open failed"; + + switch (datamode) + { + case 0: + _gcry_md_write (hd, data, datalen); + break; + + case 1: /* Hash one million times an "a". */ + { + int i; + + /* Write in odd size chunks so that we test the buffering. */ + memset (aaa, 'a', 1000); + for (i = 0; i < 1000; i++) + _gcry_md_write (hd, aaa, 1000); + } + break; + + default: + result = "invalid DATAMODE"; + } + + if (!result) + { + if (!xof) + { + digest = _gcry_md_read (hd, algo); + + if ( memcmp (digest, expect, expectlen) ) + result = "digest mismatch"; + } + else + { + gcry_assert(expectlen <= sizeof(aaa)); + + err = _gcry_md_extract (hd, algo, aaa, expectlen); + if (err) + result = "error extracting output from XOF"; + else if ( memcmp (aaa, expect, expectlen) ) + result = "digest mismatch"; + } + } + + _gcry_md_close (hd); + + return result; +} + + +/* Common function to write a chunk of data to the transform function + of a hash algorithm. Note that the use of the term "block" does + not imply a fixed size block. Note that we explicitly allow to use + this function after the context has been finalized; the result does + not have any meaning but writing after finalize is sometimes + helpful to mitigate timing attacks. */ +void +_gcry_md_block_write (void *context, const void *inbuf_arg, size_t inlen) +{ + const unsigned char *inbuf = inbuf_arg; + gcry_md_block_ctx_t *hd = context; + unsigned int stack_burn = 0; + const unsigned int blocksize = hd->blocksize; + size_t inblocks; + + if (sizeof(hd->buf) < blocksize) + BUG(); + + if (!hd->bwrite) + return; + + if (hd->count == blocksize) /* Flush the buffer. */ + { + stack_burn = hd->bwrite (hd, hd->buf, 1); + _gcry_burn_stack (stack_burn); + stack_burn = 0; + hd->count = 0; + if (!++hd->nblocks) + hd->nblocks_high++; + } + if (!inbuf) + return; + + if (hd->count) + { + for (; inlen && hd->count < blocksize; inlen--) + hd->buf[hd->count++] = *inbuf++; + _gcry_md_block_write (hd, NULL, 0); + if (!inlen) + return; + } + + if (inlen >= blocksize) + { + inblocks = inlen / blocksize; + stack_burn = hd->bwrite (hd, inbuf, inblocks); + hd->count = 0; + hd->nblocks_high += (hd->nblocks + inblocks < inblocks); + hd->nblocks += inblocks; + inlen -= inblocks * blocksize; + inbuf += inblocks * blocksize; + } + _gcry_burn_stack (stack_burn); + for (; inlen && hd->count < blocksize; inlen--) + hd->buf[hd->count++] = *inbuf++; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/hash-common.h b/libotr/libgcrypt-1.8.7/cipher/hash-common.h new file mode 100644 index 0000000..23f81ed --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/hash-common.h @@ -0,0 +1,59 @@ +/* hash-common.h - Declarations of common code for hash algorithms. + * Copyright (C) 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef GCRY_HASH_COMMON_H +#define GCRY_HASH_COMMON_H + +#include "types.h" + + +const char * _gcry_hash_selftest_check_one +/**/ (int algo, + int datamode, const void *data, size_t datalen, + const void *expect, size_t expectlen); + +/* Type for the md_write helper function. */ +typedef unsigned int (*_gcry_md_block_write_t) (void *c, + const unsigned char *blks, + size_t nblks); + +#if (defined(USE_SHA512) || defined(USE_WHIRLPOOL)) +/* SHA-512 and Whirlpool needs u64. SHA-512 needs larger buffer. */ +# define MD_BLOCK_MAX_BLOCKSIZE 128 +# define MD_NBLOCKS_TYPE u64 +#else +# define MD_BLOCK_MAX_BLOCKSIZE 64 +# define MD_NBLOCKS_TYPE u32 +#endif + +typedef struct gcry_md_block_ctx +{ + byte buf[MD_BLOCK_MAX_BLOCKSIZE]; + MD_NBLOCKS_TYPE nblocks; + MD_NBLOCKS_TYPE nblocks_high; + int count; + size_t blocksize; + _gcry_md_block_write_t bwrite; +} gcry_md_block_ctx_t; + + +void +_gcry_md_block_write( void *context, const void *inbuf_arg, size_t inlen); + +#endif /*GCRY_HASH_COMMON_H*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/hmac-tests.c b/libotr/libgcrypt-1.8.7/cipher/hmac-tests.c new file mode 100644 index 0000000..78d260a --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/hmac-tests.c @@ -0,0 +1,1160 @@ +/* hmac-tests.c - HMAC selftests. + * Copyright (C) 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + Although algorithm self-tests are usually implemented in the module + implementing the algorithm, the case for HMAC is different because + HMAC is implemented on a higher level using a special feature of the + gcry_md_ functions. It would be possible to do this also in the + digest algorithm modules, but that would blow up the code too much + and spread the hmac tests over several modules. + + Thus we implement all HMAC tests in this test module and provide a + function to run the tests. + + To run all the Libgcrypt selftest in a verbose mode, use + + $ tests/basic --selftest + +*/ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#ifdef HAVE_STDINT_H +# include <stdint.h> +#endif + +#include "g10lib.h" +#include "cipher.h" +#include "hmac256.h" + +/* Check one HMAC with digest ALGO using the regualr HAMC + * API. (DATA,DATALEN) is the data to be MACed, (KEY,KEYLEN) the key + * and (EXPECT,EXPECTLEN) the expected result. If TRUNC is set, the + * EXPECTLEN may be less than the digest length. Returns NULL on + * success or a string describing the failure. */ +static const char * +check_one (int algo, + const void *data, size_t datalen, + const void *key, size_t keylen, + const void *expect, size_t expectlen, int trunc) +{ + gcry_md_hd_t hd; + const unsigned char *digest; + +/* printf ("HMAC algo %d\n", algo); */ + if (trunc) + { + if (_gcry_md_get_algo_dlen (algo) < expectlen) + return "invalid tests data"; + } + else + { + if (_gcry_md_get_algo_dlen (algo) != expectlen) + return "invalid tests data"; + } + if (_gcry_md_open (&hd, algo, GCRY_MD_FLAG_HMAC)) + return "gcry_md_open failed"; + if (_gcry_md_setkey (hd, key, keylen)) + { + _gcry_md_close (hd); + return "gcry_md_setkey failed"; + } + _gcry_md_write (hd, data, datalen); + digest = _gcry_md_read (hd, algo); + if (!digest) + { + _gcry_md_close (hd); + return "gcry_md_read failed"; + } + if (memcmp (digest, expect, expectlen)) + { +/* int i; */ + +/* fputs (" {", stdout); */ +/* for (i=0; i < expectlen-1; i++) */ +/* { */ +/* if (i && !(i % 8)) */ +/* fputs ("\n ", stdout); */ +/* printf (" 0x%02x,", digest[i]); */ +/* } */ +/* printf (" 0x%02x } },\n", digest[i]); */ + + _gcry_md_close (hd); + return "does not match"; + } + _gcry_md_close (hd); + return NULL; +} + + +static gpg_err_code_t +selftests_sha1 (int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + unsigned char key[128]; + int i, j; + + what = "FIPS-198a, A.1"; + for (i=0; i < 64; i++) + key[i] = i; + errtxt = check_one (GCRY_MD_SHA1, + "Sample #1", 9, + key, 64, + "\x4f\x4c\xa3\xd5\xd6\x8b\xa7\xcc\x0a\x12" + "\x08\xc9\xc6\x1e\x9c\x5d\xa0\x40\x3c\x0a", 20, 0); + if (errtxt) + goto failed; + + if (extended) + { + what = "FIPS-198a, A.2"; + for (i=0, j=0x30; i < 20; i++) + key[i] = j++; + errtxt = check_one (GCRY_MD_SHA1, + "Sample #2", 9, + key, 20, + "\x09\x22\xd3\x40\x5f\xaa\x3d\x19\x4f\x82" + "\xa4\x58\x30\x73\x7d\x5c\xc6\xc7\x5d\x24", 20, 0); + if (errtxt) + goto failed; + + what = "FIPS-198a, A.3"; + for (i=0, j=0x50; i < 100; i++) + key[i] = j++; + errtxt = check_one (GCRY_MD_SHA1, + "Sample #3", 9, + key, 100, + "\xbc\xf4\x1e\xab\x8b\xb2\xd8\x02\xf3\xd0" + "\x5c\xaf\x7c\xb0\x92\xec\xf8\xd1\xa3\xaa", 20, 0); + if (errtxt) + goto failed; + + what = "FIPS-198a, A.4"; + for (i=0, j=0x70; i < 49; i++) + key[i] = j++; + errtxt = check_one (GCRY_MD_SHA1, + "Sample #4", 9, + key, 49, + "\x9e\xa8\x86\xef\xe2\x68\xdb\xec\xce\x42" + "\x0c\x75\x24\xdf\x32\xe0\x75\x1a\x2a\x26", 20, 0); + if (errtxt) + goto failed; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("hmac", GCRY_MD_SHA1, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + + +static gpg_err_code_t +selftests_sha224 (int extended, selftest_report_func_t report) +{ + static struct + { + const char * const desc; + const char * const data; + const char * const key; + const char expect[28]; + } tv[] = + { + { "data-28 key-4", + "what do ya want for nothing?", + "Jefe", + { 0xa3, 0x0e, 0x01, 0x09, 0x8b, 0xc6, 0xdb, 0xbf, + 0x45, 0x69, 0x0f, 0x3a, 0x7e, 0x9e, 0x6d, 0x0f, + 0x8b, 0xbe, 0xa2, 0xa3, 0x9e, 0x61, 0x48, 0x00, + 0x8f, 0xd0, 0x5e, 0x44 } }, + + { "data-9 key-20", + "Hi There", + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b", + { 0x89, 0x6f, 0xb1, 0x12, 0x8a, 0xbb, 0xdf, 0x19, + 0x68, 0x32, 0x10, 0x7c, 0xd4, 0x9d, 0xf3, 0x3f, + 0x47, 0xb4, 0xb1, 0x16, 0x99, 0x12, 0xba, 0x4f, + 0x53, 0x68, 0x4b, 0x22 } }, + + { "data-50 key-20", + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa", + { 0x7f, 0xb3, 0xcb, 0x35, 0x88, 0xc6, 0xc1, 0xf6, + 0xff, 0xa9, 0x69, 0x4d, 0x7d, 0x6a, 0xd2, 0x64, + 0x93, 0x65, 0xb0, 0xc1, 0xf6, 0x5d, 0x69, 0xd1, + 0xec, 0x83, 0x33, 0xea } }, + + { "data-50 key-26", + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd", + "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18\x19", + { 0x6c, 0x11, 0x50, 0x68, 0x74, 0x01, 0x3c, 0xac, + 0x6a, 0x2a, 0xbc, 0x1b, 0xb3, 0x82, 0x62, 0x7c, + 0xec, 0x6a, 0x90, 0xd8, 0x6e, 0xfc, 0x01, 0x2d, + 0xe7, 0xaf, 0xec, 0x5a } }, + + { "data-54 key-131", + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0x95, 0xe9, 0xa0, 0xdb, 0x96, 0x20, 0x95, 0xad, + 0xae, 0xbe, 0x9b, 0x2d, 0x6f, 0x0d, 0xbc, 0xe2, + 0xd4, 0x99, 0xf1, 0x12, 0xf2, 0xd2, 0xb7, 0x27, + 0x3f, 0xa6, 0x87, 0x0e } }, + + { "data-152 key-131", + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0x3a, 0x85, 0x41, 0x66, 0xac, 0x5d, 0x9f, 0x02, + 0x3f, 0x54, 0xd5, 0x17, 0xd0, 0xb3, 0x9d, 0xbd, + 0x94, 0x67, 0x70, 0xdb, 0x9c, 0x2b, 0x95, 0xc9, + 0xf6, 0xf5, 0x65, 0xd1 } }, + + { NULL } + }; + const char *what; + const char *errtxt; + int tvidx; + + for (tvidx=0; tv[tvidx].desc; tvidx++) + { + what = tv[tvidx].desc; + errtxt = check_one (GCRY_MD_SHA224, + tv[tvidx].data, strlen (tv[tvidx].data), + tv[tvidx].key, strlen (tv[tvidx].key), + tv[tvidx].expect, DIM (tv[tvidx].expect), 0); + if (errtxt) + goto failed; + if (!extended) + break; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("hmac", GCRY_MD_SHA224, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +static gpg_err_code_t +selftests_sha256 (int extended, selftest_report_func_t report) +{ + static struct + { + const char * const desc; + const char * const data; + const char * const key; + const char expect[32]; + } tv[] = + { + { "data-28 key-4", + "what do ya want for nothing?", + "Jefe", + { 0x5b, 0xdc, 0xc1, 0x46, 0xbf, 0x60, 0x75, 0x4e, + 0x6a, 0x04, 0x24, 0x26, 0x08, 0x95, 0x75, 0xc7, + 0x5a, 0x00, 0x3f, 0x08, 0x9d, 0x27, 0x39, 0x83, + 0x9d, 0xec, 0x58, 0xb9, 0x64, 0xec, 0x38, 0x43 } }, + + { "data-9 key-20", + "Hi There", + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b", + { 0xb0, 0x34, 0x4c, 0x61, 0xd8, 0xdb, 0x38, 0x53, + 0x5c, 0xa8, 0xaf, 0xce, 0xaf, 0x0b, 0xf1, 0x2b, + 0x88, 0x1d, 0xc2, 0x00, 0xc9, 0x83, 0x3d, 0xa7, + 0x26, 0xe9, 0x37, 0x6c, 0x2e, 0x32, 0xcf, 0xf7 } }, + + { "data-50 key-20", + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa", + { 0x77, 0x3e, 0xa9, 0x1e, 0x36, 0x80, 0x0e, 0x46, + 0x85, 0x4d, 0xb8, 0xeb, 0xd0, 0x91, 0x81, 0xa7, + 0x29, 0x59, 0x09, 0x8b, 0x3e, 0xf8, 0xc1, 0x22, + 0xd9, 0x63, 0x55, 0x14, 0xce, 0xd5, 0x65, 0xfe } }, + + { "data-50 key-26", + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd", + "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18\x19", + { 0x82, 0x55, 0x8a, 0x38, 0x9a, 0x44, 0x3c, 0x0e, + 0xa4, 0xcc, 0x81, 0x98, 0x99, 0xf2, 0x08, 0x3a, + 0x85, 0xf0, 0xfa, 0xa3, 0xe5, 0x78, 0xf8, 0x07, + 0x7a, 0x2e, 0x3f, 0xf4, 0x67, 0x29, 0x66, 0x5b } }, + + { "data-54 key-131", + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0x60, 0xe4, 0x31, 0x59, 0x1e, 0xe0, 0xb6, 0x7f, + 0x0d, 0x8a, 0x26, 0xaa, 0xcb, 0xf5, 0xb7, 0x7f, + 0x8e, 0x0b, 0xc6, 0x21, 0x37, 0x28, 0xc5, 0x14, + 0x05, 0x46, 0x04, 0x0f, 0x0e, 0xe3, 0x7f, 0x54 } }, + + { "data-152 key-131", + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0x9b, 0x09, 0xff, 0xa7, 0x1b, 0x94, 0x2f, 0xcb, + 0x27, 0x63, 0x5f, 0xbc, 0xd5, 0xb0, 0xe9, 0x44, + 0xbf, 0xdc, 0x63, 0x64, 0x4f, 0x07, 0x13, 0x93, + 0x8a, 0x7f, 0x51, 0x53, 0x5c, 0x3a, 0x35, 0xe2 } }, + + { NULL } + }; + const char *what; + const char *errtxt; + int tvidx; + + for (tvidx=0; tv[tvidx].desc; tvidx++) + { + hmac256_context_t hmachd; + const unsigned char *digest; + size_t dlen; + + what = tv[tvidx].desc; + errtxt = check_one (GCRY_MD_SHA256, + tv[tvidx].data, strlen (tv[tvidx].data), + tv[tvidx].key, strlen (tv[tvidx].key), + tv[tvidx].expect, DIM (tv[tvidx].expect), 0); + if (errtxt) + goto failed; + + hmachd = _gcry_hmac256_new (tv[tvidx].key, strlen (tv[tvidx].key)); + if (!hmachd) + { + errtxt = "_gcry_hmac256_new failed"; + goto failed; + } + _gcry_hmac256_update (hmachd, tv[tvidx].data, strlen (tv[tvidx].data)); + digest = _gcry_hmac256_finalize (hmachd, &dlen); + if (!digest) + { + errtxt = "_gcry_hmac256_finalize failed"; + _gcry_hmac256_release (hmachd); + goto failed; + } + if (dlen != DIM (tv[tvidx].expect) + || memcmp (digest, tv[tvidx].expect, DIM (tv[tvidx].expect))) + { + errtxt = "does not match in second implementation"; + _gcry_hmac256_release (hmachd); + goto failed; + } + _gcry_hmac256_release (hmachd); + + if (!extended) + break; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("hmac", GCRY_MD_SHA256, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +static gpg_err_code_t +selftests_sha384 (int extended, selftest_report_func_t report) +{ + static struct + { + const char * const desc; + const char * const data; + const char * const key; + const char expect[48]; + } tv[] = + { + { "data-28 key-4", + "what do ya want for nothing?", + "Jefe", + { 0xaf, 0x45, 0xd2, 0xe3, 0x76, 0x48, 0x40, 0x31, + 0x61, 0x7f, 0x78, 0xd2, 0xb5, 0x8a, 0x6b, 0x1b, + 0x9c, 0x7e, 0xf4, 0x64, 0xf5, 0xa0, 0x1b, 0x47, + 0xe4, 0x2e, 0xc3, 0x73, 0x63, 0x22, 0x44, 0x5e, + 0x8e, 0x22, 0x40, 0xca, 0x5e, 0x69, 0xe2, 0xc7, + 0x8b, 0x32, 0x39, 0xec, 0xfa, 0xb2, 0x16, 0x49 } }, + + { "data-9 key-20", + "Hi There", + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b", + { 0xaf, 0xd0, 0x39, 0x44, 0xd8, 0x48, 0x95, 0x62, + 0x6b, 0x08, 0x25, 0xf4, 0xab, 0x46, 0x90, 0x7f, + 0x15, 0xf9, 0xda, 0xdb, 0xe4, 0x10, 0x1e, 0xc6, + 0x82, 0xaa, 0x03, 0x4c, 0x7c, 0xeb, 0xc5, 0x9c, + 0xfa, 0xea, 0x9e, 0xa9, 0x07, 0x6e, 0xde, 0x7f, + 0x4a, 0xf1, 0x52, 0xe8, 0xb2, 0xfa, 0x9c, 0xb6 } }, + + { "data-50 key-20", + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa", + { 0x88, 0x06, 0x26, 0x08, 0xd3, 0xe6, 0xad, 0x8a, + 0x0a, 0xa2, 0xac, 0xe0, 0x14, 0xc8, 0xa8, 0x6f, + 0x0a, 0xa6, 0x35, 0xd9, 0x47, 0xac, 0x9f, 0xeb, + 0xe8, 0x3e, 0xf4, 0xe5, 0x59, 0x66, 0x14, 0x4b, + 0x2a, 0x5a, 0xb3, 0x9d, 0xc1, 0x38, 0x14, 0xb9, + 0x4e, 0x3a, 0xb6, 0xe1, 0x01, 0xa3, 0x4f, 0x27 } }, + + { "data-50 key-26", + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd", + "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18\x19", + { 0x3e, 0x8a, 0x69, 0xb7, 0x78, 0x3c, 0x25, 0x85, + 0x19, 0x33, 0xab, 0x62, 0x90, 0xaf, 0x6c, 0xa7, + 0x7a, 0x99, 0x81, 0x48, 0x08, 0x50, 0x00, 0x9c, + 0xc5, 0x57, 0x7c, 0x6e, 0x1f, 0x57, 0x3b, 0x4e, + 0x68, 0x01, 0xdd, 0x23, 0xc4, 0xa7, 0xd6, 0x79, + 0xcc, 0xf8, 0xa3, 0x86, 0xc6, 0x74, 0xcf, 0xfb } }, + + { "data-54 key-131", + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0x4e, 0xce, 0x08, 0x44, 0x85, 0x81, 0x3e, 0x90, + 0x88, 0xd2, 0xc6, 0x3a, 0x04, 0x1b, 0xc5, 0xb4, + 0x4f, 0x9e, 0xf1, 0x01, 0x2a, 0x2b, 0x58, 0x8f, + 0x3c, 0xd1, 0x1f, 0x05, 0x03, 0x3a, 0xc4, 0xc6, + 0x0c, 0x2e, 0xf6, 0xab, 0x40, 0x30, 0xfe, 0x82, + 0x96, 0x24, 0x8d, 0xf1, 0x63, 0xf4, 0x49, 0x52 } }, + + { "data-152 key-131", + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0x66, 0x17, 0x17, 0x8e, 0x94, 0x1f, 0x02, 0x0d, + 0x35, 0x1e, 0x2f, 0x25, 0x4e, 0x8f, 0xd3, 0x2c, + 0x60, 0x24, 0x20, 0xfe, 0xb0, 0xb8, 0xfb, 0x9a, + 0xdc, 0xce, 0xbb, 0x82, 0x46, 0x1e, 0x99, 0xc5, + 0xa6, 0x78, 0xcc, 0x31, 0xe7, 0x99, 0x17, 0x6d, + 0x38, 0x60, 0xe6, 0x11, 0x0c, 0x46, 0x52, 0x3e } }, + + { NULL } + }; + const char *what; + const char *errtxt; + int tvidx; + + for (tvidx=0; tv[tvidx].desc; tvidx++) + { + what = tv[tvidx].desc; + errtxt = check_one (GCRY_MD_SHA384, + tv[tvidx].data, strlen (tv[tvidx].data), + tv[tvidx].key, strlen (tv[tvidx].key), + tv[tvidx].expect, DIM (tv[tvidx].expect), 0); + if (errtxt) + goto failed; + if (!extended) + break; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("hmac", GCRY_MD_SHA384, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +static gpg_err_code_t +selftests_sha512 (int extended, selftest_report_func_t report) +{ + static struct + { + const char * const desc; + const char * const data; + const char * const key; + const char expect[64]; + } tv[] = + { + { "data-28 key-4", + "what do ya want for nothing?", + "Jefe", + { 0x16, 0x4b, 0x7a, 0x7b, 0xfc, 0xf8, 0x19, 0xe2, + 0xe3, 0x95, 0xfb, 0xe7, 0x3b, 0x56, 0xe0, 0xa3, + 0x87, 0xbd, 0x64, 0x22, 0x2e, 0x83, 0x1f, 0xd6, + 0x10, 0x27, 0x0c, 0xd7, 0xea, 0x25, 0x05, 0x54, + 0x97, 0x58, 0xbf, 0x75, 0xc0, 0x5a, 0x99, 0x4a, + 0x6d, 0x03, 0x4f, 0x65, 0xf8, 0xf0, 0xe6, 0xfd, + 0xca, 0xea, 0xb1, 0xa3, 0x4d, 0x4a, 0x6b, 0x4b, + 0x63, 0x6e, 0x07, 0x0a, 0x38, 0xbc, 0xe7, 0x37 } }, + + { "data-9 key-20", + "Hi There", + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b", + { 0x87, 0xaa, 0x7c, 0xde, 0xa5, 0xef, 0x61, 0x9d, + 0x4f, 0xf0, 0xb4, 0x24, 0x1a, 0x1d, 0x6c, 0xb0, + 0x23, 0x79, 0xf4, 0xe2, 0xce, 0x4e, 0xc2, 0x78, + 0x7a, 0xd0, 0xb3, 0x05, 0x45, 0xe1, 0x7c, 0xde, + 0xda, 0xa8, 0x33, 0xb7, 0xd6, 0xb8, 0xa7, 0x02, + 0x03, 0x8b, 0x27, 0x4e, 0xae, 0xa3, 0xf4, 0xe4, + 0xbe, 0x9d, 0x91, 0x4e, 0xeb, 0x61, 0xf1, 0x70, + 0x2e, 0x69, 0x6c, 0x20, 0x3a, 0x12, 0x68, 0x54 } }, + + { "data-50 key-20", + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa", + { 0xfa, 0x73, 0xb0, 0x08, 0x9d, 0x56, 0xa2, 0x84, + 0xef, 0xb0, 0xf0, 0x75, 0x6c, 0x89, 0x0b, 0xe9, + 0xb1, 0xb5, 0xdb, 0xdd, 0x8e, 0xe8, 0x1a, 0x36, + 0x55, 0xf8, 0x3e, 0x33, 0xb2, 0x27, 0x9d, 0x39, + 0xbf, 0x3e, 0x84, 0x82, 0x79, 0xa7, 0x22, 0xc8, + 0x06, 0xb4, 0x85, 0xa4, 0x7e, 0x67, 0xc8, 0x07, + 0xb9, 0x46, 0xa3, 0x37, 0xbe, 0xe8, 0x94, 0x26, + 0x74, 0x27, 0x88, 0x59, 0xe1, 0x32, 0x92, 0xfb } }, + + { "data-50 key-26", + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd", + "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18\x19", + { 0xb0, 0xba, 0x46, 0x56, 0x37, 0x45, 0x8c, 0x69, + 0x90, 0xe5, 0xa8, 0xc5, 0xf6, 0x1d, 0x4a, 0xf7, + 0xe5, 0x76, 0xd9, 0x7f, 0xf9, 0x4b, 0x87, 0x2d, + 0xe7, 0x6f, 0x80, 0x50, 0x36, 0x1e, 0xe3, 0xdb, + 0xa9, 0x1c, 0xa5, 0xc1, 0x1a, 0xa2, 0x5e, 0xb4, + 0xd6, 0x79, 0x27, 0x5c, 0xc5, 0x78, 0x80, 0x63, + 0xa5, 0xf1, 0x97, 0x41, 0x12, 0x0c, 0x4f, 0x2d, + 0xe2, 0xad, 0xeb, 0xeb, 0x10, 0xa2, 0x98, 0xdd } }, + + { "data-54 key-131", + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0x80, 0xb2, 0x42, 0x63, 0xc7, 0xc1, 0xa3, 0xeb, + 0xb7, 0x14, 0x93, 0xc1, 0xdd, 0x7b, 0xe8, 0xb4, + 0x9b, 0x46, 0xd1, 0xf4, 0x1b, 0x4a, 0xee, 0xc1, + 0x12, 0x1b, 0x01, 0x37, 0x83, 0xf8, 0xf3, 0x52, + 0x6b, 0x56, 0xd0, 0x37, 0xe0, 0x5f, 0x25, 0x98, + 0xbd, 0x0f, 0xd2, 0x21, 0x5d, 0x6a, 0x1e, 0x52, + 0x95, 0xe6, 0x4f, 0x73, 0xf6, 0x3f, 0x0a, 0xec, + 0x8b, 0x91, 0x5a, 0x98, 0x5d, 0x78, 0x65, 0x98 } }, + + { "data-152 key-131", + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0xe3, 0x7b, 0x6a, 0x77, 0x5d, 0xc8, 0x7d, 0xba, + 0xa4, 0xdf, 0xa9, 0xf9, 0x6e, 0x5e, 0x3f, 0xfd, + 0xde, 0xbd, 0x71, 0xf8, 0x86, 0x72, 0x89, 0x86, + 0x5d, 0xf5, 0xa3, 0x2d, 0x20, 0xcd, 0xc9, 0x44, + 0xb6, 0x02, 0x2c, 0xac, 0x3c, 0x49, 0x82, 0xb1, + 0x0d, 0x5e, 0xeb, 0x55, 0xc3, 0xe4, 0xde, 0x15, + 0x13, 0x46, 0x76, 0xfb, 0x6d, 0xe0, 0x44, 0x60, + 0x65, 0xc9, 0x74, 0x40, 0xfa, 0x8c, 0x6a, 0x58 } }, + + { NULL } + }; + const char *what; + const char *errtxt; + int tvidx; + + for (tvidx=0; tv[tvidx].desc; tvidx++) + { + what = tv[tvidx].desc; + errtxt = check_one (GCRY_MD_SHA512, + tv[tvidx].data, strlen (tv[tvidx].data), + tv[tvidx].key, strlen (tv[tvidx].key), + tv[tvidx].expect, DIM (tv[tvidx].expect), 0); + if (errtxt) + goto failed; + if (!extended) + break; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("hmac", GCRY_MD_SHA512, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + + +/* Test for the SHA3 algorithms. Vectors taken on 2017-07-18 from + * http://www.wolfgang-ehrhardt.de/hmac-sha3-testvectors.html */ +static gpg_err_code_t +selftests_sha3 (int hashalgo, int extended, selftest_report_func_t report) +{ + static struct + { + const char * const desc; + const char * const data; + const char * const key; + const char expect_224[28]; + const char expect_256[32]; + const char expect_384[48]; + const char expect_512[64]; + unsigned char trunc; + } tv[] = + { + { "data-9 key-20", /* Test 1 */ + "Hi There", + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b", + + { 0x3b, 0x16, 0x54, 0x6b, 0xbc, 0x7b, 0xe2, 0x70, + 0x6a, 0x03, 0x1d, 0xca, 0xfd, 0x56, 0x37, 0x3d, + 0x98, 0x84, 0x36, 0x76, 0x41, 0xd8, 0xc5, 0x9a, + 0xf3, 0xc8, 0x60, 0xf7 }, + { 0xba, 0x85, 0x19, 0x23, 0x10, 0xdf, 0xfa, 0x96, + 0xe2, 0xa3, 0xa4, 0x0e, 0x69, 0x77, 0x43, 0x51, + 0x14, 0x0b, 0xb7, 0x18, 0x5e, 0x12, 0x02, 0xcd, + 0xcc, 0x91, 0x75, 0x89, 0xf9, 0x5e, 0x16, 0xbb }, + { 0x68, 0xd2, 0xdc, 0xf7, 0xfd, 0x4d, 0xdd, 0x0a, + 0x22, 0x40, 0xc8, 0xa4, 0x37, 0x30, 0x5f, 0x61, + 0xfb, 0x73, 0x34, 0xcf, 0xb5, 0xd0, 0x22, 0x6e, + 0x1b, 0xc2, 0x7d, 0xc1, 0x0a, 0x2e, 0x72, 0x3a, + 0x20, 0xd3, 0x70, 0xb4, 0x77, 0x43, 0x13, 0x0e, + 0x26, 0xac, 0x7e, 0x3d, 0x53, 0x28, 0x86, 0xbd }, + { 0xeb, 0x3f, 0xbd, 0x4b, 0x2e, 0xaa, 0xb8, 0xf5, + 0xc5, 0x04, 0xbd, 0x3a, 0x41, 0x46, 0x5a, 0xac, + 0xec, 0x15, 0x77, 0x0a, 0x7c, 0xab, 0xac, 0x53, + 0x1e, 0x48, 0x2f, 0x86, 0x0b, 0x5e, 0xc7, 0xba, + 0x47, 0xcc, 0xb2, 0xc6, 0xf2, 0xaf, 0xce, 0x8f, + 0x88, 0xd2, 0x2b, 0x6d, 0xc6, 0x13, 0x80, 0xf2, + 0x3a, 0x66, 0x8f, 0xd3, 0x88, 0x8b, 0xb8, 0x05, + 0x37, 0xc0, 0xa0, 0xb8, 0x64, 0x07, 0x68, 0x9e } + }, + + { "data-28 key-4", /* Test 2 */ + /* Test with a key shorter than the length of the HMAC output. */ + "what do ya want for nothing?", + "Jefe", + + { 0x7f, 0xdb, 0x8d, 0xd8, 0x8b, 0xd2, 0xf6, 0x0d, + 0x1b, 0x79, 0x86, 0x34, 0xad, 0x38, 0x68, 0x11, + 0xc2, 0xcf, 0xc8, 0x5b, 0xfa, 0xf5, 0xd5, 0x2b, + 0xba, 0xce, 0x5e, 0x66 }, + { 0xc7, 0xd4, 0x07, 0x2e, 0x78, 0x88, 0x77, 0xae, + 0x35, 0x96, 0xbb, 0xb0, 0xda, 0x73, 0xb8, 0x87, + 0xc9, 0x17, 0x1f, 0x93, 0x09, 0x5b, 0x29, 0x4a, + 0xe8, 0x57, 0xfb, 0xe2, 0x64, 0x5e, 0x1b, 0xa5 }, + { 0xf1, 0x10, 0x1f, 0x8c, 0xbf, 0x97, 0x66, 0xfd, + 0x67, 0x64, 0xd2, 0xed, 0x61, 0x90, 0x3f, 0x21, + 0xca, 0x9b, 0x18, 0xf5, 0x7c, 0xf3, 0xe1, 0xa2, + 0x3c, 0xa1, 0x35, 0x08, 0xa9, 0x32, 0x43, 0xce, + 0x48, 0xc0, 0x45, 0xdc, 0x00, 0x7f, 0x26, 0xa2, + 0x1b, 0x3f, 0x5e, 0x0e, 0x9d, 0xf4, 0xc2, 0x0a }, + { 0x5a, 0x4b, 0xfe, 0xab, 0x61, 0x66, 0x42, 0x7c, + 0x7a, 0x36, 0x47, 0xb7, 0x47, 0x29, 0x2b, 0x83, + 0x84, 0x53, 0x7c, 0xdb, 0x89, 0xaf, 0xb3, 0xbf, + 0x56, 0x65, 0xe4, 0xc5, 0xe7, 0x09, 0x35, 0x0b, + 0x28, 0x7b, 0xae, 0xc9, 0x21, 0xfd, 0x7c, 0xa0, + 0xee, 0x7a, 0x0c, 0x31, 0xd0, 0x22, 0xa9, 0x5e, + 0x1f, 0xc9, 0x2b, 0xa9, 0xd7, 0x7d, 0xf8, 0x83, + 0x96, 0x02, 0x75, 0xbe, 0xb4, 0xe6, 0x20, 0x24 } + }, + + { "data-50 key-20", /* Test 3 */ + /* Test with a combined length of key and data that is larger + * than 64 bytes (= block-size of SHA-224 and SHA-256). */ + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa", + + { 0x67, 0x6c, 0xfc, 0x7d, 0x16, 0x15, 0x36, 0x38, + 0x78, 0x03, 0x90, 0x69, 0x2b, 0xe1, 0x42, 0xd2, + 0xdf, 0x7c, 0xe9, 0x24, 0xb9, 0x09, 0xc0, 0xc0, + 0x8d, 0xbf, 0xdc, 0x1a }, + { 0x84, 0xec, 0x79, 0x12, 0x4a, 0x27, 0x10, 0x78, + 0x65, 0xce, 0xdd, 0x8b, 0xd8, 0x2d, 0xa9, 0x96, + 0x5e, 0x5e, 0xd8, 0xc3, 0x7b, 0x0a, 0xc9, 0x80, + 0x05, 0xa7, 0xf3, 0x9e, 0xd5, 0x8a, 0x42, 0x07 }, + { 0x27, 0x5c, 0xd0, 0xe6, 0x61, 0xbb, 0x8b, 0x15, + 0x1c, 0x64, 0xd2, 0x88, 0xf1, 0xf7, 0x82, 0xfb, + 0x91, 0xa8, 0xab, 0xd5, 0x68, 0x58, 0xd7, 0x2b, + 0xab, 0xb2, 0xd4, 0x76, 0xf0, 0x45, 0x83, 0x73, + 0xb4, 0x1b, 0x6a, 0xb5, 0xbf, 0x17, 0x4b, 0xec, + 0x42, 0x2e, 0x53, 0xfc, 0x31, 0x35, 0xac, 0x6e }, + { 0x30, 0x9e, 0x99, 0xf9, 0xec, 0x07, 0x5e, 0xc6, + 0xc6, 0xd4, 0x75, 0xed, 0xa1, 0x18, 0x06, 0x87, + 0xfc, 0xf1, 0x53, 0x11, 0x95, 0x80, 0x2a, 0x99, + 0xb5, 0x67, 0x74, 0x49, 0xa8, 0x62, 0x51, 0x82, + 0x85, 0x1c, 0xb3, 0x32, 0xaf, 0xb6, 0xa8, 0x9c, + 0x41, 0x13, 0x25, 0xfb, 0xcb, 0xcd, 0x42, 0xaf, + 0xcb, 0x7b, 0x6e, 0x5a, 0xab, 0x7e, 0xa4, 0x2c, + 0x66, 0x0f, 0x97, 0xfd, 0x85, 0x84, 0xbf, 0x03 } + }, + + { "data-50 key-25", /* Test 4 */ + /* Test with a combined length of key and data that is larger + * than 64 bytes (= block-size of SHA-224 and SHA-256). */ + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd", + "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18\x19", + + { 0xa9, 0xd7, 0x68, 0x5a, 0x19, 0xc4, 0xe0, 0xdb, + 0xd9, 0xdf, 0x25, 0x56, 0xcc, 0x8a, 0x7d, 0x2a, + 0x77, 0x33, 0xb6, 0x76, 0x25, 0xce, 0x59, 0x4c, + 0x78, 0x27, 0x0e, 0xeb }, + { 0x57, 0x36, 0x6a, 0x45, 0xe2, 0x30, 0x53, 0x21, + 0xa4, 0xbc, 0x5a, 0xa5, 0xfe, 0x2e, 0xf8, 0xa9, + 0x21, 0xf6, 0xaf, 0x82, 0x73, 0xd7, 0xfe, 0x7b, + 0xe6, 0xcf, 0xed, 0xb3, 0xf0, 0xae, 0xa6, 0xd7 }, + { 0x3a, 0x5d, 0x7a, 0x87, 0x97, 0x02, 0xc0, 0x86, + 0xbc, 0x96, 0xd1, 0xdd, 0x8a, 0xa1, 0x5d, 0x9c, + 0x46, 0x44, 0x6b, 0x95, 0x52, 0x13, 0x11, 0xc6, + 0x06, 0xfd, 0xc4, 0xe3, 0x08, 0xf4, 0xb9, 0x84, + 0xda, 0x2d, 0x0f, 0x94, 0x49, 0xb3, 0xba, 0x84, + 0x25, 0xec, 0x7f, 0xb8, 0xc3, 0x1b, 0xc1, 0x36 }, + { 0xb2, 0x7e, 0xab, 0x1d, 0x6e, 0x8d, 0x87, 0x46, + 0x1c, 0x29, 0xf7, 0xf5, 0x73, 0x9d, 0xd5, 0x8e, + 0x98, 0xaa, 0x35, 0xf8, 0xe8, 0x23, 0xad, 0x38, + 0xc5, 0x49, 0x2a, 0x20, 0x88, 0xfa, 0x02, 0x81, + 0x99, 0x3b, 0xbf, 0xff, 0x9a, 0x0e, 0x9c, 0x6b, + 0xf1, 0x21, 0xae, 0x9e, 0xc9, 0xbb, 0x09, 0xd8, + 0x4a, 0x5e, 0xba, 0xc8, 0x17, 0x18, 0x2e, 0xa9, + 0x74, 0x67, 0x3f, 0xb1, 0x33, 0xca, 0x0d, 0x1d } + }, + + { "data-20 key-20 trunc", /* Test 5 */ + /* Test with a truncation of output to 128 bits. */ + "Test With Truncation", + "\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c" + "\x0c\x0c\x0c\x0c", + + { 0x49, 0xfd, 0xd3, 0xab, 0xd0, 0x05, 0xeb, 0xb8, + 0xae, 0x63, 0xfe, 0xa9, 0x46, 0xd1, 0x88, 0x3c }, + { 0x6e, 0x02, 0xc6, 0x45, 0x37, 0xfb, 0x11, 0x80, + 0x57, 0xab, 0xb7, 0xfb, 0x66, 0xa2, 0x3b, 0x3c }, + { 0x47, 0xc5, 0x1a, 0xce, 0x1f, 0xfa, 0xcf, 0xfd, + 0x74, 0x94, 0x72, 0x46, 0x82, 0x61, 0x57, 0x83 }, + { 0x0f, 0xa7, 0x47, 0x59, 0x48, 0xf4, 0x3f, 0x48, + 0xca, 0x05, 0x16, 0x67, 0x1e, 0x18, 0x97, 0x8c }, + 16 + }, + + { "data-54 key-131", /* Test 6 */ + /* Test with a key larger than 128 bytes (= block-size of + * SHA-384 and SHA-512). */ + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + + { 0xb4, 0xa1, 0xf0, 0x4c, 0x00, 0x28, 0x7a, 0x9b, + 0x7f, 0x60, 0x75, 0xb3, 0x13, 0xd2, 0x79, 0xb8, + 0x33, 0xbc, 0x8f, 0x75, 0x12, 0x43, 0x52, 0xd0, + 0x5f, 0xb9, 0x99, 0x5f }, + { 0xed, 0x73, 0xa3, 0x74, 0xb9, 0x6c, 0x00, 0x52, + 0x35, 0xf9, 0x48, 0x03, 0x2f, 0x09, 0x67, 0x4a, + 0x58, 0xc0, 0xce, 0x55, 0x5c, 0xfc, 0x1f, 0x22, + 0x3b, 0x02, 0x35, 0x65, 0x60, 0x31, 0x2c, 0x3b }, + { 0x0f, 0xc1, 0x95, 0x13, 0xbf, 0x6b, 0xd8, 0x78, + 0x03, 0x70, 0x16, 0x70, 0x6a, 0x0e, 0x57, 0xbc, + 0x52, 0x81, 0x39, 0x83, 0x6b, 0x9a, 0x42, 0xc3, + 0xd4, 0x19, 0xe4, 0x98, 0xe0, 0xe1, 0xfb, 0x96, + 0x16, 0xfd, 0x66, 0x91, 0x38, 0xd3, 0x3a, 0x11, + 0x05, 0xe0, 0x7c, 0x72, 0xb6, 0x95, 0x3b, 0xcc }, + { 0x00, 0xf7, 0x51, 0xa9, 0xe5, 0x06, 0x95, 0xb0, + 0x90, 0xed, 0x69, 0x11, 0xa4, 0xb6, 0x55, 0x24, + 0x95, 0x1c, 0xdc, 0x15, 0xa7, 0x3a, 0x5d, 0x58, + 0xbb, 0x55, 0x21, 0x5e, 0xa2, 0xcd, 0x83, 0x9a, + 0xc7, 0x9d, 0x2b, 0x44, 0xa3, 0x9b, 0xaf, 0xab, + 0x27, 0xe8, 0x3f, 0xde, 0x9e, 0x11, 0xf6, 0x34, + 0x0b, 0x11, 0xd9, 0x91, 0xb1, 0xb9, 0x1b, 0xf2, + 0xee, 0xe7, 0xfc, 0x87, 0x24, 0x26, 0xc3, 0xa4 } + }, + + { "data-54 key-147", /* Test 6a */ + /* Test with a key larger than 144 bytes (= block-size of + * SHA3-224). */ + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + + { 0xb9, 0x6d, 0x73, 0x0c, 0x14, 0x8c, 0x2d, 0xaa, + 0xd8, 0x64, 0x9d, 0x83, 0xde, 0xfa, 0xa3, 0x71, + 0x97, 0x38, 0xd3, 0x47, 0x75, 0x39, 0x7b, 0x75, + 0x71, 0xc3, 0x85, 0x15 }, + { 0xa6, 0x07, 0x2f, 0x86, 0xde, 0x52, 0xb3, 0x8b, + 0xb3, 0x49, 0xfe, 0x84, 0xcd, 0x6d, 0x97, 0xfb, + 0x6a, 0x37, 0xc4, 0xc0, 0xf6, 0x2a, 0xae, 0x93, + 0x98, 0x11, 0x93, 0xa7, 0x22, 0x9d, 0x34, 0x67 }, + { 0x71, 0x3d, 0xff, 0x03, 0x02, 0xc8, 0x50, 0x86, + 0xec, 0x5a, 0xd0, 0x76, 0x8d, 0xd6, 0x5a, 0x13, + 0xdd, 0xd7, 0x90, 0x68, 0xd8, 0xd4, 0xc6, 0x21, + 0x2b, 0x71, 0x2e, 0x41, 0x64, 0x94, 0x49, 0x11, + 0x14, 0x80, 0x23, 0x00, 0x44, 0x18, 0x5a, 0x99, + 0x10, 0x3e, 0xd8, 0x20, 0x04, 0xdd, 0xbf, 0xcc }, + { 0xb1, 0x48, 0x35, 0xc8, 0x19, 0xa2, 0x90, 0xef, + 0xb0, 0x10, 0xac, 0xe6, 0xd8, 0x56, 0x8d, 0xc6, + 0xb8, 0x4d, 0xe6, 0x0b, 0xc4, 0x9b, 0x00, 0x4c, + 0x3b, 0x13, 0xed, 0xa7, 0x63, 0x58, 0x94, 0x51, + 0xe5, 0xdd, 0x74, 0x29, 0x28, 0x84, 0xd1, 0xbd, + 0xce, 0x64, 0xe6, 0xb9, 0x19, 0xdd, 0x61, 0xdc, + 0x9c, 0x56, 0xa2, 0x82, 0xa8, 0x1c, 0x0b, 0xd1, + 0x4f, 0x1f, 0x36, 0x5b, 0x49, 0xb8, 0x3a, 0x5b } + }, + + { "data-152 key-131", /* Test 7 */ + /* Test with a key and data that is larger than 128 bytes (= + * block-size of SHA-384 and SHA-512). */ + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + + { 0x05, 0xd8, 0xcd, 0x6d, 0x00, 0xfa, 0xea, 0x8d, + 0x1e, 0xb6, 0x8a, 0xde, 0x28, 0x73, 0x0b, 0xbd, + 0x3c, 0xba, 0xb6, 0x92, 0x9f, 0x0a, 0x08, 0x6b, + 0x29, 0xcd, 0x62, 0xa0 }, + { 0x65, 0xc5, 0xb0, 0x6d, 0x4c, 0x3d, 0xe3, 0x2a, + 0x7a, 0xef, 0x87, 0x63, 0x26, 0x1e, 0x49, 0xad, + 0xb6, 0xe2, 0x29, 0x3e, 0xc8, 0xe7, 0xc6, 0x1e, + 0x8d, 0xe6, 0x17, 0x01, 0xfc, 0x63, 0xe1, 0x23 }, + { 0x02, 0x6f, 0xdf, 0x6b, 0x50, 0x74, 0x1e, 0x37, + 0x38, 0x99, 0xc9, 0xf7, 0xd5, 0x40, 0x6d, 0x4e, + 0xb0, 0x9f, 0xc6, 0x66, 0x56, 0x36, 0xfc, 0x1a, + 0x53, 0x00, 0x29, 0xdd, 0xf5, 0xcf, 0x3c, 0xa5, + 0xa9, 0x00, 0xed, 0xce, 0x01, 0xf5, 0xf6, 0x1e, + 0x2f, 0x40, 0x8c, 0xdf, 0x2f, 0xd3, 0xe7, 0xe8 }, + { 0x38, 0xa4, 0x56, 0xa0, 0x04, 0xbd, 0x10, 0xd3, + 0x2c, 0x9a, 0xb8, 0x33, 0x66, 0x84, 0x11, 0x28, + 0x62, 0xc3, 0xdb, 0x61, 0xad, 0xcc, 0xa3, 0x18, + 0x29, 0x35, 0x5e, 0xaf, 0x46, 0xfd, 0x5c, 0x73, + 0xd0, 0x6a, 0x1f, 0x0d, 0x13, 0xfe, 0xc9, 0xa6, + 0x52, 0xfb, 0x38, 0x11, 0xb5, 0x77, 0xb1, 0xb1, + 0xd1, 0xb9, 0x78, 0x9f, 0x97, 0xae, 0x5b, 0x83, + 0xc6, 0xf4, 0x4d, 0xfc, 0xf1, 0xd6, 0x7e, 0xba } + }, + + { "data-152 key-147", /* Test 7a */ + /* Test with a key larger than 144 bytes (= block-size of + * SHA3-224). */ + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + + { 0xc7, 0x9c, 0x9b, 0x09, 0x34, 0x24, 0xe5, 0x88, + 0xa9, 0x87, 0x8b, 0xbc, 0xb0, 0x89, 0xe0, 0x18, + 0x27, 0x00, 0x96, 0xe9, 0xb4, 0xb1, 0xa9, 0xe8, + 0x22, 0x0c, 0x86, 0x6a }, + { 0xe6, 0xa3, 0x6d, 0x9b, 0x91, 0x5f, 0x86, 0xa0, + 0x93, 0xca, 0xc7, 0xd1, 0x10, 0xe9, 0xe0, 0x4c, + 0xf1, 0xd6, 0x10, 0x0d, 0x30, 0x47, 0x55, 0x09, + 0xc2, 0x47, 0x5f, 0x57, 0x1b, 0x75, 0x8b, 0x5a }, + { 0xca, 0xd1, 0x8a, 0x8f, 0xf6, 0xc4, 0xcc, 0x3a, + 0xd4, 0x87, 0xb9, 0x5f, 0x97, 0x69, 0xe9, 0xb6, + 0x1c, 0x06, 0x2a, 0xef, 0xd6, 0x95, 0x25, 0x69, + 0xe6, 0xe6, 0x42, 0x18, 0x97, 0x05, 0x4c, 0xfc, + 0x70, 0xb5, 0xfd, 0xc6, 0x60, 0x5c, 0x18, 0x45, + 0x71, 0x12, 0xfc, 0x6a, 0xaa, 0xd4, 0x55, 0x85 }, + { 0xdc, 0x03, 0x0e, 0xe7, 0x88, 0x70, 0x34, 0xf3, + 0x2c, 0xf4, 0x02, 0xdf, 0x34, 0x62, 0x2f, 0x31, + 0x1f, 0x3e, 0x6c, 0xf0, 0x48, 0x60, 0xc6, 0xbb, + 0xd7, 0xfa, 0x48, 0x86, 0x74, 0x78, 0x2b, 0x46, + 0x59, 0xfd, 0xbd, 0xf3, 0xfd, 0x87, 0x78, 0x52, + 0x88, 0x5c, 0xfe, 0x6e, 0x22, 0x18, 0x5f, 0xe7, + 0xb2, 0xee, 0x95, 0x20, 0x43, 0x62, 0x9b, 0xc9, + 0xd5, 0xf3, 0x29, 0x8a, 0x41, 0xd0, 0x2c, 0x66 } + }/*,*/ + + /* Our API does not allow to specify a bit count and thus we + * can't use the following test. */ + /* { "data-5bit key-4", /\* Test 8 *\/ */ + /* /\* Test with data bit size no multiple of 8, the data bits are */ + /* * '11001' from the NIST example using SHA-3 order (= 5 bits */ + /* * from LSB hex byte 13 or 5 bits from MSB hex byte c8). *\/ */ + /* "\xc8", */ + /* "Jefe", */ + + /* { 0x5f, 0x8c, 0x0e, 0xa7, 0xfa, 0xfe, 0xcd, 0x0c, */ + /* 0x34, 0x63, 0xaa, 0xd0, 0x97, 0x42, 0xce, 0xce, */ + /* 0xb1, 0x42, 0xfe, 0x0a, 0xb6, 0xf4, 0x53, 0x94, */ + /* 0x38, 0xc5, 0x9d, 0xe8 }, */ + /* { 0xec, 0x82, 0x22, 0x77, 0x3f, 0xac, 0x68, 0xb3, */ + /* 0xd3, 0xdc, 0xb1, 0x82, 0xae, 0xc8, 0xb0, 0x50, */ + /* 0x7a, 0xce, 0x44, 0x48, 0xd2, 0x0a, 0x11, 0x47, */ + /* 0xe6, 0x82, 0x11, 0x8d, 0xa4, 0xe3, 0xf4, 0x4c }, */ + /* { 0x21, 0xfb, 0xd3, 0xbf, 0x3e, 0xbb, 0xa3, 0xcf, */ + /* 0xc9, 0xef, 0x64, 0xc0, 0x59, 0x1c, 0x92, 0xc5, */ + /* 0xac, 0xb2, 0x65, 0xe9, 0x2d, 0x87, 0x61, 0xd1, */ + /* 0xf9, 0x1a, 0x52, 0xa1, 0x03, 0xa6, 0xc7, 0x96, */ + /* 0x94, 0xcf, 0xd6, 0x7a, 0x9a, 0x2a, 0xc1, 0x32, */ + /* 0x4f, 0x02, 0xfe, 0xa6, 0x3b, 0x81, 0xef, 0xfc }, */ + /* { 0x27, 0xf9, 0x38, 0x8c, 0x15, 0x67, 0xef, 0x4e, */ + /* 0xf2, 0x00, 0x60, 0x2a, 0x6c, 0xf8, 0x71, 0xd6, */ + /* 0x8a, 0x6f, 0xb0, 0x48, 0xd4, 0x73, 0x7a, 0xc4, */ + /* 0x41, 0x8a, 0x2f, 0x02, 0x12, 0x89, 0xd1, 0x3d, */ + /* 0x1f, 0xd1, 0x12, 0x0f, 0xec, 0xb9, 0xcf, 0x96, */ + /* 0x4c, 0x5b, 0x11, 0x7a, 0xb5, 0xb1, 0x1c, 0x61, */ + /* 0x4b, 0x2d, 0xa3, 0x9d, 0xad, 0xd5, 0x1f, 0x2f, */ + /* 0x5e, 0x22, 0xaa, 0xcc, 0xec, 0x7d, 0x57, 0x6e } */ + /* } */ + + }; + const char *what; + const char *errtxt; + int tvidx; + const char *expect; + int nexpect; + + for (tvidx=0; tvidx < DIM(tv); tvidx++) + { + what = tv[tvidx].desc; + if (hashalgo == GCRY_MD_SHA3_224) + { + expect = tv[tvidx].expect_224; + nexpect = DIM (tv[tvidx].expect_224); + } + else if (hashalgo == GCRY_MD_SHA3_256) + { + expect = tv[tvidx].expect_256; + nexpect = DIM (tv[tvidx].expect_256); + } + else if (hashalgo == GCRY_MD_SHA3_384) + { + expect = tv[tvidx].expect_384; + nexpect = DIM (tv[tvidx].expect_384); + } + else if (hashalgo == GCRY_MD_SHA3_512) + { + expect = tv[tvidx].expect_512; + nexpect = DIM (tv[tvidx].expect_512); + } + else + BUG(); + + if (tv[tvidx].trunc && tv[tvidx].trunc < nexpect) + nexpect = tv[tvidx].trunc; + + errtxt = check_one (hashalgo, + tv[tvidx].data, strlen (tv[tvidx].data), + tv[tvidx].key, strlen (tv[tvidx].key), + expect, nexpect, !!tv[tvidx].trunc); + if (errtxt) + goto failed; + if (!extended) + break; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("hmac", hashalgo, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +/* Run a full self-test for ALGO and return 0 on success. */ +static gpg_err_code_t +run_selftests (int algo, int extended, selftest_report_func_t report) +{ + gpg_err_code_t ec; + + switch (algo) + { + case GCRY_MD_SHA1: + ec = selftests_sha1 (extended, report); + break; + case GCRY_MD_SHA224: + ec = selftests_sha224 (extended, report); + break; + case GCRY_MD_SHA256: + ec = selftests_sha256 (extended, report); + break; + case GCRY_MD_SHA384: + ec = selftests_sha384 (extended, report); + break; + case GCRY_MD_SHA512: + ec = selftests_sha512 (extended, report); + break; + + case GCRY_MD_SHA3_224: + case GCRY_MD_SHA3_256: + case GCRY_MD_SHA3_384: + case GCRY_MD_SHA3_512: + ec = selftests_sha3 (algo, extended, report); + break; + + default: + ec = GPG_ERR_DIGEST_ALGO; + break; + } + return ec; +} + + + + +/* Run the selftests for HMAC with digest algorithm ALGO with optional + reporting function REPORT. */ +gpg_error_t +_gcry_hmac_selftest (int algo, int extended, selftest_report_func_t report) +{ + gcry_err_code_t ec = 0; + + if (!_gcry_md_test_algo (algo)) + { + ec = run_selftests (algo, extended, report); + } + else + { + ec = GPG_ERR_DIGEST_ALGO; + if (report) + report ("hmac", algo, "module", "algorithm not available"); + } + return gpg_error (ec); +} diff --git a/libotr/libgcrypt-1.8.7/cipher/idea.c b/libotr/libgcrypt-1.8.7/cipher/idea.c new file mode 100644 index 0000000..ffe821d --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/idea.c @@ -0,0 +1,379 @@ +/* idea.c - IDEA function + * Copyright 1997, 1998, 1999, 2001 Werner Koch (dd9jn) + * Copyright 2013 g10 Code GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * WERNER KOCH BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of Werner Koch shall not be + * used in advertising or otherwise to promote the sale, use or other dealings + * in this Software without prior written authorization from Werner Koch. + * + * Patents on IDEA have expired: + * Europe: EP0482154 on 2011-05-16, + * Japan: JP3225440 on 2011-05-16, + * U.S.: 5,214,703 on 2012-01-07. + */ + +/* + * Please see http://www.noepatents.org/ to learn why software patents + * are bad for society and what you can do to fight them. + * + * The code herein is based on the one from: + * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996. + * ISBN 0-471-11709-9. + */ + + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include "types.h" /* for byte and u32 typedefs */ +#include "g10lib.h" +#include "cipher.h" + + +#define IDEA_KEYSIZE 16 +#define IDEA_BLOCKSIZE 8 +#define IDEA_ROUNDS 8 +#define IDEA_KEYLEN (6*IDEA_ROUNDS+4) + +typedef struct { + u16 ek[IDEA_KEYLEN]; + u16 dk[IDEA_KEYLEN]; + int have_dk; +} IDEA_context; + +static const char *selftest(void); + + +static u16 +mul_inv( u16 x ) +{ + u16 t0, t1; + u16 q, y; + + if( x < 2 ) + return x; + t1 = 0x10001UL / x; + y = 0x10001UL % x; + if( y == 1 ) + return (1-t1) & 0xffff; + + t0 = 1; + do { + q = x / y; + x = x % y; + t0 += q * t1; + if( x == 1 ) + return t0; + q = y / x; + y = y % x; + t1 += q * t0; + } while( y != 1 ); + return (1-t1) & 0xffff; +} + + + +static void +expand_key( const byte *userkey, u16 *ek ) +{ + int i,j; + + for(j=0; j < 8; j++ ) { + ek[j] = (*userkey << 8) + userkey[1]; + userkey += 2; + } + for(i=0; j < IDEA_KEYLEN; j++ ) { + i++; + ek[i+7] = ek[i&7] << 9 | ek[(i+1)&7] >> 7; + ek += i & 8; + i &= 7; + } +} + + +static void +invert_key( u16 *ek, u16 dk[IDEA_KEYLEN] ) +{ + int i; + u16 t1, t2, t3; + u16 temp[IDEA_KEYLEN]; + u16 *p = temp + IDEA_KEYLEN; + + t1 = mul_inv( *ek++ ); + t2 = -*ek++; + t3 = -*ek++; + *--p = mul_inv( *ek++ ); + *--p = t3; + *--p = t2; + *--p = t1; + + for(i=0; i < IDEA_ROUNDS-1; i++ ) { + t1 = *ek++; + *--p = *ek++; + *--p = t1; + + t1 = mul_inv( *ek++ ); + t2 = -*ek++; + t3 = -*ek++; + *--p = mul_inv( *ek++ ); + *--p = t2; + *--p = t3; + *--p = t1; + } + t1 = *ek++; + *--p = *ek++; + *--p = t1; + + t1 = mul_inv( *ek++ ); + t2 = -*ek++; + t3 = -*ek++; + *--p = mul_inv( *ek++ ); + *--p = t3; + *--p = t2; + *--p = t1; + memcpy(dk, temp, sizeof(temp) ); + wipememory(temp, sizeof(temp)); +} + + +static void +cipher( byte *outbuf, const byte *inbuf, u16 *key ) +{ + u16 s2, s3; + u16 in[4]; + int r = IDEA_ROUNDS; +#define x1 (in[0]) +#define x2 (in[1]) +#define x3 (in[2]) +#define x4 (in[3]) +#define MUL(x,y) \ + do {u16 _t16; u32 _t32; \ + if( (_t16 = (y)) ) { \ + if( (x = (x)&0xffff) ) { \ + _t32 = (u32)x * _t16; \ + x = _t32 & 0xffff; \ + _t16 = _t32 >> 16; \ + x = ((x)-_t16) + (x<_t16?1:0); \ + } \ + else { \ + x = 1 - _t16; \ + } \ + } \ + else { \ + x = 1 - x; \ + } \ + } while(0) + + memcpy (in, inbuf, sizeof in); +#ifndef WORDS_BIGENDIAN + x1 = (x1>>8) | (x1<<8); + x2 = (x2>>8) | (x2<<8); + x3 = (x3>>8) | (x3<<8); + x4 = (x4>>8) | (x4<<8); +#endif + do { + MUL(x1, *key++); + x2 += *key++; + x3 += *key++; + MUL(x4, *key++ ); + + s3 = x3; + x3 ^= x1; + MUL(x3, *key++); + s2 = x2; + x2 ^=x4; + x2 += x3; + MUL(x2, *key++); + x3 += x2; + + x1 ^= x2; + x4 ^= x3; + + x2 ^= s3; + x3 ^= s2; + } while( --r ); + MUL(x1, *key++); + x3 += *key++; + x2 += *key++; + MUL(x4, *key); + +#ifndef WORDS_BIGENDIAN + x1 = (x1>>8) | (x1<<8); + x2 = (x2>>8) | (x2<<8); + x3 = (x3>>8) | (x3<<8); + x4 = (x4>>8) | (x4<<8); +#endif + memcpy (outbuf+0, &x1, 2); + memcpy (outbuf+2, &x3, 2); + memcpy (outbuf+4, &x2, 2); + memcpy (outbuf+6, &x4, 2); +#undef MUL +#undef x1 +#undef x2 +#undef x3 +#undef x4 +} + + +static int +do_setkey( IDEA_context *c, const byte *key, unsigned int keylen ) +{ + static int initialized = 0; + static const char *selftest_failed = 0; + + if( !initialized ) { + initialized = 1; + selftest_failed = selftest(); + if( selftest_failed ) + log_error( "%s\n", selftest_failed ); + } + if( selftest_failed ) + return GPG_ERR_SELFTEST_FAILED; + + assert(keylen == 16); + c->have_dk = 0; + expand_key( key, c->ek ); + invert_key( c->ek, c->dk ); + return 0; +} + +static gcry_err_code_t +idea_setkey (void *context, const byte *key, unsigned int keylen) +{ + IDEA_context *ctx = context; + int rc = do_setkey (ctx, key, keylen); + _gcry_burn_stack (23+6*sizeof(void*)); + return rc; +} + +static void +encrypt_block( IDEA_context *c, byte *outbuf, const byte *inbuf ) +{ + cipher( outbuf, inbuf, c->ek ); +} + +static unsigned int +idea_encrypt (void *context, byte *out, const byte *in) +{ + IDEA_context *ctx = context; + encrypt_block (ctx, out, in); + return /*burn_stack*/ (24+3*sizeof (void*)); +} + +static void +decrypt_block( IDEA_context *c, byte *outbuf, const byte *inbuf ) +{ + if( !c->have_dk ) { + c->have_dk = 1; + invert_key( c->ek, c->dk ); + } + cipher( outbuf, inbuf, c->dk ); +} + +static unsigned int +idea_decrypt (void *context, byte *out, const byte *in) +{ + IDEA_context *ctx = context; + decrypt_block (ctx, out, in); + return /*burn_stack*/ (24+3*sizeof (void*)); +} + + +static const char * +selftest( void ) +{ +static struct { + byte key[16]; + byte plain[8]; + byte cipher[8]; +} test_vectors[] = { + { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, + 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, + { 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x03 }, + { 0x11, 0xFB, 0xED, 0x2B, 0x01, 0x98, 0x6D, 0xE5 } }, + { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, + 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, + { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }, + { 0x54, 0x0E, 0x5F, 0xEA, 0x18, 0xC2, 0xF8, 0xB1 } }, + { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, + 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, + { 0x00, 0x19, 0x32, 0x4B, 0x64, 0x7D, 0x96, 0xAF }, + { 0x9F, 0x0A, 0x0A, 0xB6, 0xE1, 0x0C, 0xED, 0x78 } }, + { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, + 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, + { 0xF5, 0x20, 0x2D, 0x5B, 0x9C, 0x67, 0x1B, 0x08 }, + { 0xCF, 0x18, 0xFD, 0x73, 0x55, 0xE2, 0xC5, 0xC5 } }, + { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, + 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, + { 0xFA, 0xE6, 0xD2, 0xBE, 0xAA, 0x96, 0x82, 0x6E }, + { 0x85, 0xDF, 0x52, 0x00, 0x56, 0x08, 0x19, 0x3D } }, + { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, + 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, + { 0x0A, 0x14, 0x1E, 0x28, 0x32, 0x3C, 0x46, 0x50 }, + { 0x2F, 0x7D, 0xE7, 0x50, 0x21, 0x2F, 0xB7, 0x34 } }, + { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, + 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, + { 0x05, 0x0A, 0x0F, 0x14, 0x19, 0x1E, 0x23, 0x28 }, + { 0x7B, 0x73, 0x14, 0x92, 0x5D, 0xE5, 0x9C, 0x09 } }, + { { 0x00, 0x05, 0x00, 0x0A, 0x00, 0x0F, 0x00, 0x14, + 0x00, 0x19, 0x00, 0x1E, 0x00, 0x23, 0x00, 0x28 }, + { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }, + { 0x3E, 0xC0, 0x47, 0x80, 0xBE, 0xFF, 0x6E, 0x20 } }, + { { 0x3A, 0x98, 0x4E, 0x20, 0x00, 0x19, 0x5D, 0xB3, + 0x2E, 0xE5, 0x01, 0xC8, 0xC4, 0x7C, 0xEA, 0x60 }, + { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }, + { 0x97, 0xBC, 0xD8, 0x20, 0x07, 0x80, 0xDA, 0x86 } }, + { { 0x00, 0x64, 0x00, 0xC8, 0x01, 0x2C, 0x01, 0x90, + 0x01, 0xF4, 0x02, 0x58, 0x02, 0xBC, 0x03, 0x20 }, + { 0x05, 0x32, 0x0A, 0x64, 0x14, 0xC8, 0x19, 0xFA }, + { 0x65, 0xBE, 0x87, 0xE7, 0xA2, 0x53, 0x8A, 0xED } }, + { { 0x9D, 0x40, 0x75, 0xC1, 0x03, 0xBC, 0x32, 0x2A, + 0xFB, 0x03, 0xE7, 0xBE, 0x6A, 0xB3, 0x00, 0x06 }, + { 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 }, + { 0xF5, 0xDB, 0x1A, 0xC4, 0x5E, 0x5E, 0xF9, 0xF9 } } +}; + IDEA_context c; + byte buffer[8]; + int i; + + for(i=0; i < DIM(test_vectors); i++ ) { + do_setkey( &c, test_vectors[i].key, 16 ); + encrypt_block( &c, buffer, test_vectors[i].plain ); + if( memcmp( buffer, test_vectors[i].cipher, 8 ) ) + return "IDEA test encryption failed."; + decrypt_block( &c, buffer, test_vectors[i].cipher ); + if( memcmp( buffer, test_vectors[i].plain, 8 ) ) + return "IDEA test decryption failed."; + } + + return NULL; +} + + +gcry_cipher_spec_t _gcry_cipher_spec_idea = + { + GCRY_CIPHER_IDEA, {0, 0}, + "IDEA", NULL, NULL, IDEA_BLOCKSIZE, 128, + sizeof (IDEA_context), + idea_setkey, idea_encrypt, idea_decrypt + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/kdf-internal.h b/libotr/libgcrypt-1.8.7/cipher/kdf-internal.h new file mode 100644 index 0000000..7079860 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/kdf-internal.h @@ -0,0 +1,40 @@ +/* kdf-internal.h - Internal defs for kdf.c + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef GCRY_KDF_INTERNAL_H +#define GCRY_KDF_INTERNAL_H + +/*-- kdf.c --*/ +gpg_err_code_t +_gcry_kdf_pkdf2 (const void *passphrase, size_t passphraselen, + int hashalgo, + const void *salt, size_t saltlen, + unsigned long iterations, + size_t keysize, void *keybuffer); + +/*-- scrypt.c --*/ +gcry_err_code_t +_gcry_kdf_scrypt (const unsigned char *passwd, size_t passwdlen, + int algo, int subalgo, + const unsigned char *salt, size_t saltlen, + unsigned long iterations, + size_t dklen, unsigned char *dk); + + +#endif /*GCRY_KDF_INTERNAL_H*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/kdf.c b/libotr/libgcrypt-1.8.7/cipher/kdf.c new file mode 100644 index 0000000..27f5789 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/kdf.c @@ -0,0 +1,307 @@ +/* kdf.c - Key Derivation Functions + * Copyright (C) 1998, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "cipher.h" +#include "kdf-internal.h" + + +/* Transform a passphrase into a suitable key of length KEYSIZE and + store this key in the caller provided buffer KEYBUFFER. The caller + must provide an HASHALGO, a valid ALGO and depending on that algo a + SALT of 8 bytes and the number of ITERATIONS. Code taken from + gnupg/agent/protect.c:hash_passphrase. */ +static gpg_err_code_t +openpgp_s2k (const void *passphrase, size_t passphraselen, + int algo, int hashalgo, + const void *salt, size_t saltlen, + unsigned long iterations, + size_t keysize, void *keybuffer) +{ + gpg_err_code_t ec; + gcry_md_hd_t md; + char *key = keybuffer; + int pass, i; + int used = 0; + int secmode; + + if ((algo == GCRY_KDF_SALTED_S2K || algo == GCRY_KDF_ITERSALTED_S2K) + && (!salt || saltlen != 8)) + return GPG_ERR_INV_VALUE; + + secmode = _gcry_is_secure (passphrase) || _gcry_is_secure (keybuffer); + + ec = _gcry_md_open (&md, hashalgo, secmode? GCRY_MD_FLAG_SECURE : 0); + if (ec) + return ec; + + for (pass=0; used < keysize; pass++) + { + if (pass) + { + _gcry_md_reset (md); + for (i=0; i < pass; i++) /* Preset the hash context. */ + _gcry_md_putc (md, 0); + } + + if (algo == GCRY_KDF_SALTED_S2K || algo == GCRY_KDF_ITERSALTED_S2K) + { + int len2 = passphraselen + 8; + unsigned long count = len2; + + if (algo == GCRY_KDF_ITERSALTED_S2K) + { + count = iterations; + if (count < len2) + count = len2; + } + + while (count > len2) + { + _gcry_md_write (md, salt, saltlen); + _gcry_md_write (md, passphrase, passphraselen); + count -= len2; + } + if (count < saltlen) + _gcry_md_write (md, salt, count); + else + { + _gcry_md_write (md, salt, saltlen); + count -= saltlen; + _gcry_md_write (md, passphrase, count); + } + } + else + _gcry_md_write (md, passphrase, passphraselen); + + _gcry_md_final (md); + i = _gcry_md_get_algo_dlen (hashalgo); + if (i > keysize - used) + i = keysize - used; + memcpy (key+used, _gcry_md_read (md, hashalgo), i); + used += i; + } + _gcry_md_close (md); + return 0; +} + + +/* Transform a passphrase into a suitable key of length KEYSIZE and + store this key in the caller provided buffer KEYBUFFER. The caller + must provide PRFALGO which indicates the pseudorandom function to + use: This shall be the algorithms id of a hash algorithm; it is + used in HMAC mode. SALT is a salt of length SALTLEN and ITERATIONS + gives the number of iterations. */ +gpg_err_code_t +_gcry_kdf_pkdf2 (const void *passphrase, size_t passphraselen, + int hashalgo, + const void *salt, size_t saltlen, + unsigned long iterations, + size_t keysize, void *keybuffer) +{ + gpg_err_code_t ec; + gcry_md_hd_t md; + int secmode; + unsigned long dklen = keysize; + char *dk = keybuffer; + unsigned int hlen; /* Output length of the digest function. */ + unsigned int l; /* Rounded up number of blocks. */ + unsigned int r; /* Number of octets in the last block. */ + char *sbuf; /* Malloced buffer to concatenate salt and iter + as well as space to hold TBUF and UBUF. */ + char *tbuf; /* Buffer for T; ptr into SBUF, size is HLEN. */ + char *ubuf; /* Buffer for U; ptr into SBUF, size is HLEN. */ + unsigned int lidx; /* Current block number. */ + unsigned long iter; /* Current iteration number. */ + unsigned int i; + + /* We allow for a saltlen of 0 here to support scrypt. It is not + clear whether rfc2898 allows for this this, thus we do a test on + saltlen > 0 only in gcry_kdf_derive. */ + if (!salt || !iterations || !dklen) + return GPG_ERR_INV_VALUE; + + hlen = _gcry_md_get_algo_dlen (hashalgo); + if (!hlen) + return GPG_ERR_DIGEST_ALGO; + + secmode = _gcry_is_secure (passphrase) || _gcry_is_secure (keybuffer); + + /* Step 1 */ + /* If dkLen > (2^32 - 1) * hLen, output "derived key too long" and + * stop. We use a stronger inequality but only if our type can hold + * a larger value. */ + +#if SIZEOF_UNSIGNED_LONG > 4 + if (dklen > 0xffffffffU) + return GPG_ERR_INV_VALUE; +#endif + + + /* Step 2 */ + l = ((dklen - 1)/ hlen) + 1; + r = dklen - (l - 1) * hlen; + + /* Setup buffers and prepare a hash context. */ + sbuf = (secmode + ? xtrymalloc_secure (saltlen + 4 + hlen + hlen) + : xtrymalloc (saltlen + 4 + hlen + hlen)); + if (!sbuf) + return gpg_err_code_from_syserror (); + tbuf = sbuf + saltlen + 4; + ubuf = tbuf + hlen; + + ec = _gcry_md_open (&md, hashalgo, (GCRY_MD_FLAG_HMAC + | (secmode?GCRY_MD_FLAG_SECURE:0))); + if (ec) + { + xfree (sbuf); + return ec; + } + + ec = _gcry_md_setkey (md, passphrase, passphraselen); + if (ec) + { + _gcry_md_close (md); + xfree (sbuf); + return ec; + } + + /* Step 3 and 4. */ + memcpy (sbuf, salt, saltlen); + for (lidx = 1; lidx <= l; lidx++) + { + for (iter = 0; iter < iterations; iter++) + { + _gcry_md_reset (md); + if (!iter) /* Compute U_1: */ + { + sbuf[saltlen] = (lidx >> 24); + sbuf[saltlen + 1] = (lidx >> 16); + sbuf[saltlen + 2] = (lidx >> 8); + sbuf[saltlen + 3] = lidx; + _gcry_md_write (md, sbuf, saltlen + 4); + memcpy (ubuf, _gcry_md_read (md, 0), hlen); + memcpy (tbuf, ubuf, hlen); + } + else /* Compute U_(2..c): */ + { + _gcry_md_write (md, ubuf, hlen); + memcpy (ubuf, _gcry_md_read (md, 0), hlen); + for (i=0; i < hlen; i++) + tbuf[i] ^= ubuf[i]; + } + } + if (lidx == l) /* Last block. */ + memcpy (dk, tbuf, r); + else + { + memcpy (dk, tbuf, hlen); + dk += hlen; + } + } + + _gcry_md_close (md); + xfree (sbuf); + return 0; +} + + +/* Derive a key from a passphrase. KEYSIZE gives the requested size + of the keys in octets. KEYBUFFER is a caller provided buffer + filled on success with the derived key. The input passphrase is + taken from (PASSPHRASE,PASSPHRASELEN) which is an arbitrary memory + buffer. ALGO specifies the KDF algorithm to use; these are the + constants GCRY_KDF_*. SUBALGO specifies an algorithm used + internally by the KDF algorithms; this is usually a hash algorithm + but certain KDF algorithm may use it differently. {SALT,SALTLEN} + is a salt as needed by most KDF algorithms. ITERATIONS is a + positive integer parameter to most KDFs. 0 is returned on success, + or an error code on failure. */ +gpg_err_code_t +_gcry_kdf_derive (const void *passphrase, size_t passphraselen, + int algo, int subalgo, + const void *salt, size_t saltlen, + unsigned long iterations, + size_t keysize, void *keybuffer) +{ + gpg_err_code_t ec; + + if (!passphrase) + { + ec = GPG_ERR_INV_DATA; + goto leave; + } + + if (!keybuffer || !keysize) + { + ec = GPG_ERR_INV_VALUE; + goto leave; + } + + + switch (algo) + { + case GCRY_KDF_SIMPLE_S2K: + case GCRY_KDF_SALTED_S2K: + case GCRY_KDF_ITERSALTED_S2K: + if (!passphraselen) + ec = GPG_ERR_INV_DATA; + else + ec = openpgp_s2k (passphrase, passphraselen, algo, subalgo, + salt, saltlen, iterations, keysize, keybuffer); + break; + + case GCRY_KDF_PBKDF1: + ec = GPG_ERR_UNSUPPORTED_ALGORITHM; + break; + + case GCRY_KDF_PBKDF2: + if (!saltlen) + ec = GPG_ERR_INV_VALUE; + else + ec = _gcry_kdf_pkdf2 (passphrase, passphraselen, subalgo, + salt, saltlen, iterations, keysize, keybuffer); + break; + + case 41: + case GCRY_KDF_SCRYPT: +#if USE_SCRYPT + ec = _gcry_kdf_scrypt (passphrase, passphraselen, algo, subalgo, + salt, saltlen, iterations, keysize, keybuffer); +#else + ec = GPG_ERR_UNSUPPORTED_ALGORITHM; +#endif /*USE_SCRYPT*/ + break; + + default: + ec = GPG_ERR_UNKNOWN_ALGORITHM; + break; + } + + leave: + return ec; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/keccak-armv7-neon.S b/libotr/libgcrypt-1.8.7/cipher/keccak-armv7-neon.S new file mode 100644 index 0000000..0bec8d5 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/keccak-armv7-neon.S @@ -0,0 +1,945 @@ +/* keccak-armv7-neon.S - ARMv7/NEON implementation of Keccak + * + * Copyright (C) 2015 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) + +/* Based on public-domain/CC0 implementation from SUPERCOP package + * (keccakc1024/inplace-armv7a-neon/keccak2.s) + * + * Original copyright header follows: + */ + +@ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +@ Michaël Peeters and Gilles Van Assche. For more information, feedback or +@ questions, please refer to our website: http://keccak.noekeon.org/ +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ + +.text + +.syntax unified +.fpu neon +.arm + + +.extern _gcry_keccak_round_consts_64bit; + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + + +@// --- offsets in state +.equ Aba, 0*8 +.equ Aga, 1*8 +.equ Aka, 2*8 +.equ Ama, 3*8 +.equ Asa, 4*8 + +@// --- macros + +.macro KeccakThetaRhoPiChiIota argA1, argA2, argA3, argA4, argA5 + + @Prepare Theta + @Ca = Aba^Aga^Aka^Ama^Asa@ + @Ce = Abe^Age^Ake^Ame^Ase@ + @Ci = Abi^Agi^Aki^Ami^Asi@ + @Co = Abo^Ago^Ako^Amo^Aso@ + @Cu = Abu^Agu^Aku^Amu^Asu@ + @De = Ca^ROL64(Ci, 1)@ + @Di = Ce^ROL64(Co, 1)@ + @Do = Ci^ROL64(Cu, 1)@ + @Du = Co^ROL64(Ca, 1)@ + @Da = Cu^ROL64(Ce, 1)@ + + veor.64 q4, q6, q7 + veor.64 q5, q9, q10 + veor.64 d8, d8, d9 + veor.64 d10, d10, d11 + veor.64 d1, d8, d16 + veor.64 d2, d10, d17 + + veor.64 q4, q11, q12 + veor.64 q5, q14, q15 + veor.64 d8, d8, d9 + veor.64 d10, d10, d11 + veor.64 d3, d8, d26 + + vadd.u64 q4, q1, q1 + veor.64 d4, d10, d27 + vmov.64 d0, d5 + vsri.64 q4, q1, #63 + + vadd.u64 q5, q2, q2 + veor.64 q4, q4, q0 + vsri.64 q5, q2, #63 + vadd.u64 d7, d1, d1 + veor.64 \argA2, \argA2, d8 + veor.64 q5, q5, q1 + + vsri.64 d7, d1, #63 + vshl.u64 d1, \argA2, #44 + veor.64 \argA3, \argA3, d9 + veor.64 d7, d7, d4 + + @Ba = argA1^Da@ + @Be = ROL64((argA2^De), 44)@ + @Bi = ROL64((argA3^Di), 43)@ + @Bo = ROL64((argA4^Do), 21)@ + @Bu = ROL64((argA5^Du), 14)@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + @argA1 = Ba ^((~Be)& Bi )@ argA1 ^= KeccakF1600RoundConstants[i+round]@ + vsri.64 d1, \argA2, #64-44 + vshl.u64 d2, \argA3, #43 + vldr.64 d0, [sp, #\argA1] + veor.64 \argA4, \argA4, d10 + vsri.64 d2, \argA3, #64-43 + vshl.u64 d3, \argA4, #21 + veor.64 \argA5, \argA5, d11 + veor.64 d0, d0, d7 + vsri.64 d3, \argA4, #64-21 + vbic.64 d5, d2, d1 + vshl.u64 d4, \argA5, #14 + vbic.64 \argA2, d3, d2 + vld1.64 d6, [ip]! + veor.64 d5, d0 + vsri.64 d4, \argA5, #64-14 + veor.64 d5, d6 + vbic.64 \argA5, d1, d0 + vbic.64 \argA3, d4, d3 + vbic.64 \argA4, d0, d4 + veor.64 \argA2, d1 + vstr.64 d5, [sp, #\argA1] + veor.64 \argA3, d2 + veor.64 \argA4, d3 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi1 argA1, argA2, argA3, argA4, argA5 + + @d2 = ROL64((argA1^Da), 3)@ + @d3 = ROL64((argA2^De), 45)@ + @d4 = ROL64((argA3^Di), 61)@ + @d0 = ROL64((argA4^Do), 28)@ + @d1 = ROL64((argA5^Du), 20)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA2, \argA2, d8 + veor.64 \argA3, \argA3, d9 + vshl.u64 d3, \argA2, #45 + vldr.64 d6, [sp, #\argA1] + vshl.u64 d4, \argA3, #61 + veor.64 \argA4, \argA4, d10 + vsri.64 d3, \argA2, #64-45 + veor.64 \argA5, \argA5, d11 + vsri.64 d4, \argA3, #64-61 + vshl.u64 d0, \argA4, #28 + veor.64 d6, d6, d7 + vshl.u64 d1, \argA5, #20 + vbic.64 \argA3, d4, d3 + vsri.64 d0, \argA4, #64-28 + vbic.64 \argA4, d0, d4 + vshl.u64 d2, d6, #3 + vsri.64 d1, \argA5, #64-20 + veor.64 \argA4, d3 + vsri.64 d2, d6, #64-3 + vbic.64 \argA5, d1, d0 + vbic.64 d6, d2, d1 + vbic.64 \argA2, d3, d2 + veor.64 d6, d0 + veor.64 \argA2, d1 + vstr.64 d6, [sp, #\argA1] + veor.64 \argA3, d2 + veor.64 d5, d6 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi2 argA1, argA2, argA3, argA4, argA5 + + @d4 = ROL64((argA1^Da), 18)@ + @d0 = ROL64((argA2^De), 1)@ + @d1 = ROL64((argA3^Di), 6)@ + @d2 = ROL64((argA4^Do), 25)@ + @d3 = ROL64((argA5^Du), 8)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA3, \argA3, d9 + veor.64 \argA4, \argA4, d10 + vshl.u64 d1, \argA3, #6 + vldr.64 d6, [sp, #\argA1] + vshl.u64 d2, \argA4, #25 + veor.64 \argA5, \argA5, d11 + vsri.64 d1, \argA3, #64-6 + veor.64 \argA2, \argA2, d8 + vsri.64 d2, \argA4, #64-25 + vext.8 d3, \argA5, \argA5, #7 + veor.64 d6, d6, d7 + vbic.64 \argA3, d2, d1 + vadd.u64 d0, \argA2, \argA2 + vbic.64 \argA4, d3, d2 + vsri.64 d0, \argA2, #64-1 + vshl.u64 d4, d6, #18 + veor.64 \argA2, d1, \argA4 + veor.64 \argA3, d0 + vsri.64 d4, d6, #64-18 + vstr.64 \argA3, [sp, #\argA1] + veor.64 d5, \argA3 + vbic.64 \argA5, d1, d0 + vbic.64 \argA3, d4, d3 + vbic.64 \argA4, d0, d4 + veor.64 \argA3, d2 + veor.64 \argA4, d3 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi3 argA1, argA2, argA3, argA4, argA5 + + @d1 = ROL64((argA1^Da), 36)@ + @d2 = ROL64((argA2^De), 10)@ + @d3 = ROL64((argA3^Di), 15)@ + @d4 = ROL64((argA4^Do), 56)@ + @d0 = ROL64((argA5^Du), 27)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA2, \argA2, d8 + veor.64 \argA3, \argA3, d9 + vshl.u64 d2, \argA2, #10 + vldr.64 d6, [sp, #\argA1] + vshl.u64 d3, \argA3, #15 + veor.64 \argA4, \argA4, d10 + vsri.64 d2, \argA2, #64-10 + vsri.64 d3, \argA3, #64-15 + veor.64 \argA5, \argA5, d11 + vext.8 d4, \argA4, \argA4, #1 + vbic.64 \argA2, d3, d2 + vshl.u64 d0, \argA5, #27 + veor.64 d6, d6, d7 + vbic.64 \argA3, d4, d3 + vsri.64 d0, \argA5, #64-27 + vshl.u64 d1, d6, #36 + veor.64 \argA3, d2 + vbic.64 \argA4, d0, d4 + vsri.64 d1, d6, #64-36 + + veor.64 \argA4, d3 + vbic.64 d6, d2, d1 + vbic.64 \argA5, d1, d0 + veor.64 d6, d0 + veor.64 \argA2, d1 + vstr.64 d6, [sp, #\argA1] + veor.64 d5, d6 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi4 argA1, argA2, argA3, argA4, argA5 + + @d3 = ROL64((argA1^Da), 41)@ + @d4 = ROL64((argA2^De), 2)@ + @d0 = ROL64((argA3^Di), 62)@ + @d1 = ROL64((argA4^Do), 55)@ + @d2 = ROL64((argA5^Du), 39)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA2, \argA2, d8 + veor.64 \argA3, \argA3, d9 + vshl.u64 d4, \argA2, #2 + veor.64 \argA5, \argA5, d11 + vshl.u64 d0, \argA3, #62 + vldr.64 d6, [sp, #\argA1] + vsri.64 d4, \argA2, #64-2 + veor.64 \argA4, \argA4, d10 + vsri.64 d0, \argA3, #64-62 + + vshl.u64 d1, \argA4, #55 + veor.64 d6, d6, d7 + vshl.u64 d2, \argA5, #39 + vsri.64 d1, \argA4, #64-55 + vbic.64 \argA4, d0, d4 + vsri.64 d2, \argA5, #64-39 + vbic.64 \argA2, d1, d0 + vshl.u64 d3, d6, #41 + veor.64 \argA5, d4, \argA2 + vbic.64 \argA2, d2, d1 + vsri.64 d3, d6, #64-41 + veor.64 d6, d0, \argA2 + + vbic.64 \argA2, d3, d2 + vbic.64 \argA3, d4, d3 + veor.64 \argA2, d1 + vstr.64 d6, [sp, #\argA1] + veor.64 d5, d6 + veor.64 \argA3, d2 + veor.64 \argA4, d3 + + .endm + + +@// --- code + +@not callable from C! +.p2align 3 +.type KeccakF_armv7a_neon_asm,%function; +KeccakF_armv7a_neon_asm: @ + +.LroundLoop: + + KeccakThetaRhoPiChiIota Aba, d13, d19, d25, d31 + KeccakThetaRhoPiChi1 Aka, d15, d21, d22, d28 + KeccakThetaRhoPiChi2 Asa, d12, d18, d24, d30 + KeccakThetaRhoPiChi3 Aga, d14, d20, d26, d27 + KeccakThetaRhoPiChi4 Ama, d16, d17, d23, d29 + + KeccakThetaRhoPiChiIota Aba, d15, d18, d26, d29 + KeccakThetaRhoPiChi1 Asa, d14, d17, d25, d28 + KeccakThetaRhoPiChi2 Ama, d13, d21, d24, d27 + KeccakThetaRhoPiChi3 Aka, d12, d20, d23, d31 + KeccakThetaRhoPiChi4 Aga, d16, d19, d22, d30 + + KeccakThetaRhoPiChiIota Aba, d14, d21, d23, d30 + KeccakThetaRhoPiChi1 Ama, d12, d19, d26, d28 + KeccakThetaRhoPiChi2 Aga, d15, d17, d24, d31 + KeccakThetaRhoPiChi3 Asa, d13, d20, d22, d29 + KeccakThetaRhoPiChi4 Aka, d16, d18, d25, d27 + + KeccakThetaRhoPiChiIota Aba, d12, d17, d22, d27 + KeccakThetaRhoPiChi1 Aga, d13, d18, d23, d28 + KeccakThetaRhoPiChi2 Aka, d14, d19, d24, d29 + ldr r0, [ip] + KeccakThetaRhoPiChi3 Ama, d15, d20, d25, d30 + cmp r0, #0xFFFFFFFF + KeccakThetaRhoPiChi4 Asa, d16, d21, d26, d31 + + bne .LroundLoop + sub ip, #(8*24) + bx lr +.p2align 2 +.ltorg +.size KeccakF_armv7a_neon_asm,.-KeccakF_armv7a_neon_asm; + + +@//unsigned _gcry_keccak_permute_armv7_neon(u64 *state) callable from C +.p2align 3 +.global _gcry_keccak_permute_armv7_neon +.type _gcry_keccak_permute_armv7_neon,%function; +_gcry_keccak_permute_armv7_neon: + + push {ip, lr} + vpush {q4-q7} + sub sp,sp, #5*8 + + vldr.64 d0, [r0, #0*8] + vldr.64 d12, [r0, #1*8] + vldr.64 d17, [r0, #2*8] + vldr.64 d22, [r0, #3*8] + vldr.64 d27, [r0, #4*8] + + GET_DATA_POINTER(ip, _gcry_keccak_round_consts_64bit, lr); + + vldr.64 d1, [r0, #5*8] + vldr.64 d13, [r0, #6*8] + vldr.64 d18, [r0, #7*8] + vldr.64 d23, [r0, #8*8] + vldr.64 d28, [r0, #9*8] + + vldr.64 d2, [r0, #10*8] + vldr.64 d14, [r0, #11*8] + vldr.64 d19, [r0, #12*8] + vldr.64 d24, [r0, #13*8] + vldr.64 d29, [r0, #14*8] + + vldr.64 d3, [r0, #15*8] + vldr.64 d15, [r0, #16*8] + vldr.64 d20, [r0, #17*8] + vldr.64 d25, [r0, #18*8] + vldr.64 d30, [r0, #19*8] + + vldr.64 d4, [r0, #20*8] + vldr.64 d16, [r0, #21*8] + vldr.64 d21, [r0, #22*8] + vldr.64 d26, [r0, #23*8] + vldr.64 d31, [r0, #24*8] + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + mov r1, r0 + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + vpop.64 { d0- d4 } + + vstr.64 d0, [r1, #0*8] + vstr.64 d12, [r1, #1*8] + vstr.64 d17, [r1, #2*8] + vstr.64 d22, [r1, #3*8] + vstr.64 d27, [r1, #4*8] + + vstr.64 d1, [r1, #5*8] + vstr.64 d13, [r1, #6*8] + vstr.64 d18, [r1, #7*8] + vstr.64 d23, [r1, #8*8] + vstr.64 d28, [r1, #9*8] + + vstr.64 d2, [r1, #10*8] + vstr.64 d14, [r1, #11*8] + vstr.64 d19, [r1, #12*8] + vstr.64 d24, [r1, #13*8] + vstr.64 d29, [r1, #14*8] + + vstr.64 d3, [r1, #15*8] + vstr.64 d15, [r1, #16*8] + vstr.64 d20, [r1, #17*8] + vstr.64 d25, [r1, #18*8] + vstr.64 d30, [r1, #19*8] + + vstr.64 d4, [r1, #20*8] + vstr.64 d16, [r1, #21*8] + vstr.64 d21, [r1, #22*8] + vstr.64 d26, [r1, #23*8] + vstr.64 d31, [r1, #24*8] + + mov r0, #112 + vpop {q4-q7} + pop {ip, pc} +.p2align 2 +.ltorg +.size _gcry_keccak_permute_armv7_neon,.-_gcry_keccak_permute_armv7_neon; + +@//unsigned _gcry_keccak_permute_armv7_neon(u64 *state, @r4 +@ int pos, @r1 +@ const byte *lanes, @r2 +@ unsigned int nlanes, @r3 +@ int blocklanes) @ r5 callable from C +.p2align 3 +.global _gcry_keccak_absorb_lanes64_armv7_neon +.type _gcry_keccak_absorb_lanes64_armv7_neon,%function; +_gcry_keccak_absorb_lanes64_armv7_neon: + + cmp r3, #0 @ nlanes == 0 + itt eq + moveq r0, #0 + bxeq lr + + push {r4-r5, ip, lr} + beq .Lout + mov r4, r0 + ldr r5, [sp, #(4*4)] + vpush {q4-q7} + + @ load state + vldr.64 d0, [r4, #0*8] + vldr.64 d12, [r4, #1*8] + vldr.64 d17, [r4, #2*8] + vldr.64 d22, [r4, #3*8] + vldr.64 d27, [r4, #4*8] + + GET_DATA_POINTER(ip, _gcry_keccak_round_consts_64bit, lr); + + vldr.64 d1, [r4, #5*8] + vldr.64 d13, [r4, #6*8] + vldr.64 d18, [r4, #7*8] + vldr.64 d23, [r4, #8*8] + vldr.64 d28, [r4, #9*8] + + vldr.64 d2, [r4, #10*8] + vldr.64 d14, [r4, #11*8] + vldr.64 d19, [r4, #12*8] + vldr.64 d24, [r4, #13*8] + vldr.64 d29, [r4, #14*8] + + vldr.64 d3, [r4, #15*8] + vldr.64 d15, [r4, #16*8] + vldr.64 d20, [r4, #17*8] + vldr.64 d25, [r4, #18*8] + vldr.64 d30, [r4, #19*8] + + vldr.64 d4, [r4, #20*8] + vldr.64 d16, [r4, #21*8] + vldr.64 d21, [r4, #22*8] + vldr.64 d26, [r4, #23*8] + vldr.64 d31, [r4, #24*8] + +.Lmain_loop: + + @ detect absorb mode (full blocks vs lanes) + + cmp r1, #0 @ pos != 0 + bne .Llanes_loop + +.Lmain_loop_pos0: + + @ full blocks mode + + @ switch (blocksize) + cmp r5, #21 + beq .Lfull_block_21 + cmp r5, #18 + beq .Lfull_block_18 + cmp r5, #17 + beq .Lfull_block_17 + cmp r5, #13 + beq .Lfull_block_13 + cmp r5, #9 + beq .Lfull_block_9 + + @ unknown blocksize + b .Llanes_loop + +.Lfull_block_21: + + @ SHAKE128 + + cmp r3, #21 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d11}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + vld1.64 {d5-d8}, [r2]! + veor d14, d9 + veor d19, d10 + veor d24, d11 + vld1.64 {d9-d11}, [r2]! + veor d29, d5 + + veor d3, d6 + veor d15, d7 + veor d20, d8 + veor d25, d9 + veor d30, d10 + + veor d4, d11 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #21 @ nlanes -= 21 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_21 + +.Lfull_block_18: + + @ SHA3-224 + + cmp r3, #18 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d11}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + vld1.64 {d5-d8}, [r2]! + veor d14, d9 + veor d19, d10 + veor d24, d11 + veor d29, d5 + + veor d3, d6 + veor d15, d7 + veor d20, d8 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #18 @ nlanes -= 18 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_18 + +.Lfull_block_17: + + @ SHA3-256 & SHAKE256 + + cmp r3, #17 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d11}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + vld1.64 {d5-d7}, [r2]! + veor d14, d9 + veor d19, d10 + veor d24, d11 + veor d29, d5 + + veor d3, d6 + veor d15, d7 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #17 @ nlanes -= 17 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_17 + +.Lfull_block_13: + + @ SHA3-384 + + cmp r3, #13 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d10}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + veor d14, d9 + veor d19, d10 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #13 @ nlanes -= 13 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_13 + +.Lfull_block_9: + + @ SHA3-512 + + cmp r3, #9 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d6}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + veor d18, d5 + veor d23, d6 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #9 @ nlanes -= 9 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_9 + +.Llanes_loop: + + @ per-lane mode + + @ switch (pos) + ldrb r0, [pc, r1] + add pc, pc, r0, lsl #2 +.Lswitch_table: + .byte (.Llane0-.Lswitch_table-4)/4 + .byte (.Llane1-.Lswitch_table-4)/4 + .byte (.Llane2-.Lswitch_table-4)/4 + .byte (.Llane3-.Lswitch_table-4)/4 + .byte (.Llane4-.Lswitch_table-4)/4 + .byte (.Llane5-.Lswitch_table-4)/4 + .byte (.Llane6-.Lswitch_table-4)/4 + .byte (.Llane7-.Lswitch_table-4)/4 + .byte (.Llane8-.Lswitch_table-4)/4 + .byte (.Llane9-.Lswitch_table-4)/4 + .byte (.Llane10-.Lswitch_table-4)/4 + .byte (.Llane11-.Lswitch_table-4)/4 + .byte (.Llane12-.Lswitch_table-4)/4 + .byte (.Llane13-.Lswitch_table-4)/4 + .byte (.Llane14-.Lswitch_table-4)/4 + .byte (.Llane15-.Lswitch_table-4)/4 + .byte (.Llane16-.Lswitch_table-4)/4 + .byte (.Llane17-.Lswitch_table-4)/4 + .byte (.Llane18-.Lswitch_table-4)/4 + .byte (.Llane19-.Lswitch_table-4)/4 + .byte (.Llane20-.Lswitch_table-4)/4 + .byte (.Llane21-.Lswitch_table-4)/4 + .byte (.Llane22-.Lswitch_table-4)/4 + .byte (.Llane23-.Lswitch_table-4)/4 + .byte (.Llane24-.Lswitch_table-4)/4 +.p2align 2 + +#define ABSORB_LANE(label, vreg) \ + label: \ + add r1, #1; \ + vld1.64 d5, [r2]!; \ + cmp r1, r5; /* pos == blocklanes */ \ + veor vreg, vreg, d5; \ + beq .Llanes_permute; \ + subs r3, #1; \ + beq .Ldone; + + ABSORB_LANE(.Llane0, d0) + ABSORB_LANE(.Llane1, d12) + ABSORB_LANE(.Llane2, d17) + ABSORB_LANE(.Llane3, d22) + ABSORB_LANE(.Llane4, d27) + + ABSORB_LANE(.Llane5, d1) + ABSORB_LANE(.Llane6, d13) + ABSORB_LANE(.Llane7, d18) + ABSORB_LANE(.Llane8, d23) + ABSORB_LANE(.Llane9, d28) + + ABSORB_LANE(.Llane10, d2) + ABSORB_LANE(.Llane11, d14) + ABSORB_LANE(.Llane12, d19) + ABSORB_LANE(.Llane13, d24) + ABSORB_LANE(.Llane14, d29) + + ABSORB_LANE(.Llane15, d3) + ABSORB_LANE(.Llane16, d15) + ABSORB_LANE(.Llane17, d20) + ABSORB_LANE(.Llane18, d25) + ABSORB_LANE(.Llane19, d30) + + ABSORB_LANE(.Llane20, d4) + ABSORB_LANE(.Llane21, d16) + ABSORB_LANE(.Llane22, d21) + ABSORB_LANE(.Llane23, d26) + ABSORB_LANE(.Llane24, d31) + + b .Llanes_loop + +.Llanes_permute: + + sub sp,sp, #5*8 + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + mov r1, #0 @ pos <= 0 + subs r3, #1 + + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lmain_loop_pos0 + +.Ldone: + + @ save state + vstr.64 d0, [r4, #0*8] + vstr.64 d12, [r4, #1*8] + vstr.64 d17, [r4, #2*8] + vstr.64 d22, [r4, #3*8] + vstr.64 d27, [r4, #4*8] + + vstr.64 d1, [r4, #5*8] + vstr.64 d13, [r4, #6*8] + vstr.64 d18, [r4, #7*8] + vstr.64 d23, [r4, #8*8] + vstr.64 d28, [r4, #9*8] + + vstr.64 d2, [r4, #10*8] + vstr.64 d14, [r4, #11*8] + vstr.64 d19, [r4, #12*8] + vstr.64 d24, [r4, #13*8] + vstr.64 d29, [r4, #14*8] + + vstr.64 d3, [r4, #15*8] + vstr.64 d15, [r4, #16*8] + vstr.64 d20, [r4, #17*8] + vstr.64 d25, [r4, #18*8] + vstr.64 d30, [r4, #19*8] + + vstr.64 d4, [r4, #20*8] + vstr.64 d16, [r4, #21*8] + vstr.64 d21, [r4, #22*8] + vstr.64 d26, [r4, #23*8] + vstr.64 d31, [r4, #24*8] + + mov r0, #120 + vpop {q4-q7} +.Lout: + pop {r4-r5, ip, pc} +.p2align 2 +.ltorg +.size _gcry_keccak_absorb_lanes64_armv7_neon,.-_gcry_keccak_absorb_lanes64_armv7_neon; + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/keccak.c b/libotr/libgcrypt-1.8.7/cipher/keccak.c new file mode 100644 index 0000000..0bb3155 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/keccak.c @@ -0,0 +1,1266 @@ +/* keccak.c - SHA3 hash functions + * Copyright (C) 2015 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + + +#include <config.h> +#include <string.h> +#include "g10lib.h" +#include "bithelp.h" +#include "bufhelp.h" +#include "cipher.h" +#include "hash-common.h" + + + +/* USE_64BIT indicates whether to use 64-bit generic implementation. + * USE_32BIT indicates whether to use 32-bit generic implementation. */ +#undef USE_64BIT +#if defined(__x86_64__) || SIZEOF_UNSIGNED_LONG == 8 +# define USE_64BIT 1 +#else +# define USE_32BIT 1 +#endif + + +/* USE_64BIT_BMI2 indicates whether to compile with 64-bit Intel BMI2 code. */ +#undef USE_64BIT_BMI2 +#if defined(USE_64BIT) && defined(HAVE_GCC_INLINE_ASM_BMI2) +# define USE_64BIT_BMI2 1 +#endif + + +/* USE_64BIT_SHLD indicates whether to compile with 64-bit Intel SHLD code. */ +#undef USE_64BIT_SHLD +#if defined(USE_64BIT) && defined (__GNUC__) && defined(__x86_64__) +# define USE_64BIT_SHLD 1 +#endif + + +/* USE_32BIT_BMI2 indicates whether to compile with 32-bit Intel BMI2 code. */ +#undef USE_32BIT_BMI2 +#if defined(USE_32BIT) && defined(HAVE_GCC_INLINE_ASM_BMI2) +# define USE_32BIT_BMI2 1 +#endif + + +/* USE_64BIT_ARM_NEON indicates whether to enable 64-bit ARM/NEON assembly + * code. */ +#undef USE_64BIT_ARM_NEON +#ifdef ENABLE_NEON_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_NEON) +# define USE_64BIT_ARM_NEON 1 +# endif +#endif /*ENABLE_NEON_SUPPORT*/ + + +#if defined(USE_64BIT) || defined(USE_64BIT_ARM_NEON) +# define NEED_COMMON64 1 +#endif + +#ifdef USE_32BIT +# define NEED_COMMON32BI 1 +#endif + + +#define SHA3_DELIMITED_SUFFIX 0x06 +#define SHAKE_DELIMITED_SUFFIX 0x1F + + +typedef struct +{ + union { +#ifdef NEED_COMMON64 + u64 state64[25]; +#endif +#ifdef NEED_COMMON32BI + u32 state32bi[50]; +#endif + } u; +} KECCAK_STATE; + + +typedef struct +{ + unsigned int (*permute)(KECCAK_STATE *hd); + unsigned int (*absorb)(KECCAK_STATE *hd, int pos, const byte *lanes, + unsigned int nlanes, int blocklanes); + unsigned int (*extract) (KECCAK_STATE *hd, unsigned int pos, byte *outbuf, + unsigned int outlen); +} keccak_ops_t; + + +typedef struct KECCAK_CONTEXT_S +{ + KECCAK_STATE state; + unsigned int outlen; + unsigned int blocksize; + unsigned int count; + unsigned int suffix; + const keccak_ops_t *ops; +} KECCAK_CONTEXT; + + + +#ifdef NEED_COMMON64 + +const u64 _gcry_keccak_round_consts_64bit[24 + 1] = +{ + U64_C(0x0000000000000001), U64_C(0x0000000000008082), + U64_C(0x800000000000808A), U64_C(0x8000000080008000), + U64_C(0x000000000000808B), U64_C(0x0000000080000001), + U64_C(0x8000000080008081), U64_C(0x8000000000008009), + U64_C(0x000000000000008A), U64_C(0x0000000000000088), + U64_C(0x0000000080008009), U64_C(0x000000008000000A), + U64_C(0x000000008000808B), U64_C(0x800000000000008B), + U64_C(0x8000000000008089), U64_C(0x8000000000008003), + U64_C(0x8000000000008002), U64_C(0x8000000000000080), + U64_C(0x000000000000800A), U64_C(0x800000008000000A), + U64_C(0x8000000080008081), U64_C(0x8000000000008080), + U64_C(0x0000000080000001), U64_C(0x8000000080008008), + U64_C(0xFFFFFFFFFFFFFFFF) +}; + +static unsigned int +keccak_extract64(KECCAK_STATE *hd, unsigned int pos, byte *outbuf, + unsigned int outlen) +{ + unsigned int i; + + /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */ + + for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++) + { + u64 tmp = hd->u.state64[i]; + buf_put_le64(outbuf, tmp); + outbuf += 8; + } + + return 0; +} + +#endif /* NEED_COMMON64 */ + + +#ifdef NEED_COMMON32BI + +static const u32 round_consts_32bit[2 * 24] = +{ + 0x00000001UL, 0x00000000UL, 0x00000000UL, 0x00000089UL, + 0x00000000UL, 0x8000008bUL, 0x00000000UL, 0x80008080UL, + 0x00000001UL, 0x0000008bUL, 0x00000001UL, 0x00008000UL, + 0x00000001UL, 0x80008088UL, 0x00000001UL, 0x80000082UL, + 0x00000000UL, 0x0000000bUL, 0x00000000UL, 0x0000000aUL, + 0x00000001UL, 0x00008082UL, 0x00000000UL, 0x00008003UL, + 0x00000001UL, 0x0000808bUL, 0x00000001UL, 0x8000000bUL, + 0x00000001UL, 0x8000008aUL, 0x00000001UL, 0x80000081UL, + 0x00000000UL, 0x80000081UL, 0x00000000UL, 0x80000008UL, + 0x00000000UL, 0x00000083UL, 0x00000000UL, 0x80008003UL, + 0x00000001UL, 0x80008088UL, 0x00000000UL, 0x80000088UL, + 0x00000001UL, 0x00008000UL, 0x00000000UL, 0x80008082UL +}; + +static unsigned int +keccak_extract32bi(KECCAK_STATE *hd, unsigned int pos, byte *outbuf, + unsigned int outlen) +{ + unsigned int i; + u32 x0; + u32 x1; + u32 t; + + /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */ + + for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++) + { + x0 = hd->u.state32bi[i * 2 + 0]; + x1 = hd->u.state32bi[i * 2 + 1]; + + t = (x0 & 0x0000FFFFUL) + (x1 << 16); + x1 = (x0 >> 16) + (x1 & 0xFFFF0000UL); + x0 = t; + t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8); + t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4); + t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2); + t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1); + t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8); + t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4); + t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2); + t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1); + + buf_put_le32(&outbuf[0], x0); + buf_put_le32(&outbuf[4], x1); + outbuf += 8; + } + + return 0; +} + +static inline void +keccak_absorb_lane32bi(u32 *lane, u32 x0, u32 x1) +{ + u32 t; + + t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1); + t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2); + t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4); + t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8); + t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1); + t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2); + t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4); + t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8); + lane[0] ^= (x0 & 0x0000FFFFUL) + (x1 << 16); + lane[1] ^= (x0 >> 16) + (x1 & 0xFFFF0000UL); +} + +#endif /* NEED_COMMON32BI */ + + +/* Construct generic 64-bit implementation. */ +#ifdef USE_64BIT + +#if __GNUC__ >= 4 && defined(__x86_64__) + +static inline void absorb_lanes64_8(u64 *dst, const byte *in) +{ + asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" + "movdqu 0*16(%[in]), %%xmm4\n\t" + "movdqu 1*16(%[dst]), %%xmm1\n\t" + "movdqu 1*16(%[in]), %%xmm5\n\t" + "movdqu 2*16(%[dst]), %%xmm2\n\t" + "movdqu 3*16(%[dst]), %%xmm3\n\t" + "pxor %%xmm4, %%xmm0\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu 2*16(%[in]), %%xmm4\n\t" + "movdqu 3*16(%[in]), %%xmm5\n\t" + "movdqu %%xmm0, 0*16(%[dst])\n\t" + "pxor %%xmm4, %%xmm2\n\t" + "movdqu %%xmm1, 1*16(%[dst])\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "movdqu %%xmm2, 2*16(%[dst])\n\t" + "movdqu %%xmm3, 3*16(%[dst])\n\t" + : + : [dst] "r" (dst), [in] "r" (in) + : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory"); +} + +static inline void absorb_lanes64_4(u64 *dst, const byte *in) +{ + asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" + "movdqu 0*16(%[in]), %%xmm4\n\t" + "movdqu 1*16(%[dst]), %%xmm1\n\t" + "movdqu 1*16(%[in]), %%xmm5\n\t" + "pxor %%xmm4, %%xmm0\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu %%xmm0, 0*16(%[dst])\n\t" + "movdqu %%xmm1, 1*16(%[dst])\n\t" + : + : [dst] "r" (dst), [in] "r" (in) + : "xmm0", "xmm1", "xmm4", "xmm5", "memory"); +} + +static inline void absorb_lanes64_2(u64 *dst, const byte *in) +{ + asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" + "movdqu 0*16(%[in]), %%xmm4\n\t" + "pxor %%xmm4, %%xmm0\n\t" + "movdqu %%xmm0, 0*16(%[dst])\n\t" + : + : [dst] "r" (dst), [in] "r" (in) + : "xmm0", "xmm4", "memory"); +} + +#else /* __x86_64__ */ + +static inline void absorb_lanes64_8(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); + dst[1] ^= buf_get_le64(in + 8 * 1); + dst[2] ^= buf_get_le64(in + 8 * 2); + dst[3] ^= buf_get_le64(in + 8 * 3); + dst[4] ^= buf_get_le64(in + 8 * 4); + dst[5] ^= buf_get_le64(in + 8 * 5); + dst[6] ^= buf_get_le64(in + 8 * 6); + dst[7] ^= buf_get_le64(in + 8 * 7); +} + +static inline void absorb_lanes64_4(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); + dst[1] ^= buf_get_le64(in + 8 * 1); + dst[2] ^= buf_get_le64(in + 8 * 2); + dst[3] ^= buf_get_le64(in + 8 * 3); +} + +static inline void absorb_lanes64_2(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); + dst[1] ^= buf_get_le64(in + 8 * 1); +} + +#endif /* !__x86_64__ */ + +static inline void absorb_lanes64_1(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); +} + + +# define ANDN64(x, y) (~(x) & (y)) +# define ROL64(x, n) (((x) << ((unsigned int)n & 63)) | \ + ((x) >> ((64 - (unsigned int)(n)) & 63))) + +# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64 +# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64 +# include "keccak_permute_64.h" + +# undef ANDN64 +# undef ROL64 +# undef KECCAK_F1600_PERMUTE_FUNC_NAME +# undef KECCAK_F1600_ABSORB_FUNC_NAME + +static const keccak_ops_t keccak_generic64_ops = +{ + .permute = keccak_f1600_state_permute64, + .absorb = keccak_absorb_lanes64, + .extract = keccak_extract64, +}; + +#endif /* USE_64BIT */ + + +/* Construct 64-bit Intel SHLD implementation. */ +#ifdef USE_64BIT_SHLD + +# define ANDN64(x, y) (~(x) & (y)) +# define ROL64(x, n) ({ \ + u64 tmp = (x); \ + asm ("shldq %1, %0, %0" \ + : "+r" (tmp) \ + : "J" ((n) & 63) \ + : "cc"); \ + tmp; }) + +# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_shld +# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_shld +# include "keccak_permute_64.h" + +# undef ANDN64 +# undef ROL64 +# undef KECCAK_F1600_PERMUTE_FUNC_NAME +# undef KECCAK_F1600_ABSORB_FUNC_NAME + +static const keccak_ops_t keccak_shld_64_ops = +{ + .permute = keccak_f1600_state_permute64_shld, + .absorb = keccak_absorb_lanes64_shld, + .extract = keccak_extract64, +}; + +#endif /* USE_64BIT_SHLD */ + + +/* Construct 64-bit Intel BMI2 implementation. */ +#ifdef USE_64BIT_BMI2 + +# define ANDN64(x, y) ({ \ + u64 tmp; \ + asm ("andnq %2, %1, %0" \ + : "=r" (tmp) \ + : "r0" (x), "rm" (y)); \ + tmp; }) + +# define ROL64(x, n) ({ \ + u64 tmp; \ + asm ("rorxq %2, %1, %0" \ + : "=r" (tmp) \ + : "rm0" (x), "J" (64 - ((n) & 63))); \ + tmp; }) + +# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_bmi2 +# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_bmi2 +# include "keccak_permute_64.h" + +# undef ANDN64 +# undef ROL64 +# undef KECCAK_F1600_PERMUTE_FUNC_NAME +# undef KECCAK_F1600_ABSORB_FUNC_NAME + +static const keccak_ops_t keccak_bmi2_64_ops = +{ + .permute = keccak_f1600_state_permute64_bmi2, + .absorb = keccak_absorb_lanes64_bmi2, + .extract = keccak_extract64, +}; + +#endif /* USE_64BIT_BMI2 */ + + +/* 64-bit ARMv7/NEON implementation. */ +#ifdef USE_64BIT_ARM_NEON + +unsigned int _gcry_keccak_permute_armv7_neon(u64 *state); +unsigned int _gcry_keccak_absorb_lanes64_armv7_neon(u64 *state, int pos, + const byte *lanes, + unsigned int nlanes, + int blocklanes); + +static unsigned int keccak_permute64_armv7_neon(KECCAK_STATE *hd) +{ + return _gcry_keccak_permute_armv7_neon(hd->u.state64); +} + +static unsigned int +keccak_absorb_lanes64_armv7_neon(KECCAK_STATE *hd, int pos, const byte *lanes, + unsigned int nlanes, int blocklanes) +{ + if (blocklanes < 0) + { + /* blocklanes == -1, permutationless absorb from keccak_final. */ + + while (nlanes) + { + hd->u.state64[pos] ^= buf_get_le64(lanes); + lanes += 8; + nlanes--; + } + + return 0; + } + else + { + return _gcry_keccak_absorb_lanes64_armv7_neon(hd->u.state64, pos, lanes, + nlanes, blocklanes); + } +} + +static const keccak_ops_t keccak_armv7_neon_64_ops = +{ + .permute = keccak_permute64_armv7_neon, + .absorb = keccak_absorb_lanes64_armv7_neon, + .extract = keccak_extract64, +}; + +#endif /* USE_64BIT_ARM_NEON */ + + +/* Construct generic 32-bit implementation. */ +#ifdef USE_32BIT + +# define ANDN32(x, y) (~(x) & (y)) +# define ROL32(x, n) (((x) << ((unsigned int)n & 31)) | \ + ((x) >> ((32 - (unsigned int)(n)) & 31))) + +# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute32bi +# include "keccak_permute_32.h" + +# undef ANDN32 +# undef ROL32 +# undef KECCAK_F1600_PERMUTE_FUNC_NAME + +static unsigned int +keccak_absorb_lanes32bi(KECCAK_STATE *hd, int pos, const byte *lanes, + unsigned int nlanes, int blocklanes) +{ + unsigned int burn = 0; + + while (nlanes) + { + keccak_absorb_lane32bi(&hd->u.state32bi[pos * 2], + buf_get_le32(lanes + 0), + buf_get_le32(lanes + 4)); + lanes += 8; + nlanes--; + + if (++pos == blocklanes) + { + burn = keccak_f1600_state_permute32bi(hd); + pos = 0; + } + } + + return burn; +} + +static const keccak_ops_t keccak_generic32bi_ops = +{ + .permute = keccak_f1600_state_permute32bi, + .absorb = keccak_absorb_lanes32bi, + .extract = keccak_extract32bi, +}; + +#endif /* USE_32BIT */ + + +/* Construct 32-bit Intel BMI2 implementation. */ +#ifdef USE_32BIT_BMI2 + +# define ANDN32(x, y) ({ \ + u32 tmp; \ + asm ("andnl %2, %1, %0" \ + : "=r" (tmp) \ + : "r0" (x), "rm" (y)); \ + tmp; }) + +# define ROL32(x, n) ({ \ + u32 tmp; \ + asm ("rorxl %2, %1, %0" \ + : "=r" (tmp) \ + : "rm0" (x), "J" (32 - ((n) & 31))); \ + tmp; }) + +# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute32bi_bmi2 +# include "keccak_permute_32.h" + +# undef ANDN32 +# undef ROL32 +# undef KECCAK_F1600_PERMUTE_FUNC_NAME + +static inline u32 pext(u32 x, u32 mask) +{ + u32 tmp; + asm ("pextl %2, %1, %0" : "=r" (tmp) : "r0" (x), "rm" (mask)); + return tmp; +} + +static inline u32 pdep(u32 x, u32 mask) +{ + u32 tmp; + asm ("pdepl %2, %1, %0" : "=r" (tmp) : "r0" (x), "rm" (mask)); + return tmp; +} + +static inline void +keccak_absorb_lane32bi_bmi2(u32 *lane, u32 x0, u32 x1) +{ + x0 = pdep(pext(x0, 0x55555555), 0x0000ffff) | (pext(x0, 0xaaaaaaaa) << 16); + x1 = pdep(pext(x1, 0x55555555), 0x0000ffff) | (pext(x1, 0xaaaaaaaa) << 16); + + lane[0] ^= (x0 & 0x0000FFFFUL) + (x1 << 16); + lane[1] ^= (x0 >> 16) + (x1 & 0xFFFF0000UL); +} + +static unsigned int +keccak_absorb_lanes32bi_bmi2(KECCAK_STATE *hd, int pos, const byte *lanes, + unsigned int nlanes, int blocklanes) +{ + unsigned int burn = 0; + + while (nlanes) + { + keccak_absorb_lane32bi_bmi2(&hd->u.state32bi[pos * 2], + buf_get_le32(lanes + 0), + buf_get_le32(lanes + 4)); + lanes += 8; + nlanes--; + + if (++pos == blocklanes) + { + burn = keccak_f1600_state_permute32bi_bmi2(hd); + pos = 0; + } + } + + return burn; +} + +static unsigned int +keccak_extract32bi_bmi2(KECCAK_STATE *hd, unsigned int pos, byte *outbuf, + unsigned int outlen) +{ + unsigned int i; + u32 x0; + u32 x1; + u32 t; + + /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */ + + for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++) + { + x0 = hd->u.state32bi[i * 2 + 0]; + x1 = hd->u.state32bi[i * 2 + 1]; + + t = (x0 & 0x0000FFFFUL) + (x1 << 16); + x1 = (x0 >> 16) + (x1 & 0xFFFF0000UL); + x0 = t; + + x0 = pdep(pext(x0, 0xffff0001), 0xaaaaaaab) | pdep(x0 >> 1, 0x55555554); + x1 = pdep(pext(x1, 0xffff0001), 0xaaaaaaab) | pdep(x1 >> 1, 0x55555554); + + buf_put_le32(&outbuf[0], x0); + buf_put_le32(&outbuf[4], x1); + outbuf += 8; + } + + return 0; +} + +static const keccak_ops_t keccak_bmi2_32bi_ops = +{ + .permute = keccak_f1600_state_permute32bi_bmi2, + .absorb = keccak_absorb_lanes32bi_bmi2, + .extract = keccak_extract32bi_bmi2, +}; + +#endif /* USE_32BIT */ + + +static void +keccak_write (void *context, const void *inbuf_arg, size_t inlen) +{ + KECCAK_CONTEXT *ctx = context; + const size_t bsize = ctx->blocksize; + const size_t blocklanes = bsize / 8; + const byte *inbuf = inbuf_arg; + unsigned int nburn, burn = 0; + unsigned int count, i; + unsigned int pos, nlanes; + + count = ctx->count; + + if (inlen && (count % 8)) + { + byte lane[8] = { 0, }; + + /* Complete absorbing partial input lane. */ + + pos = count / 8; + + for (i = count % 8; inlen && i < 8; i++) + { + lane[i] = *inbuf++; + inlen--; + count++; + } + + if (count == bsize) + count = 0; + + nburn = ctx->ops->absorb(&ctx->state, pos, lane, 1, + (count % 8) ? -1 : blocklanes); + burn = nburn > burn ? nburn : burn; + } + + /* Absorb full input lanes. */ + + pos = count / 8; + nlanes = inlen / 8; + if (nlanes > 0) + { + nburn = ctx->ops->absorb(&ctx->state, pos, inbuf, nlanes, blocklanes); + burn = nburn > burn ? nburn : burn; + inlen -= nlanes * 8; + inbuf += nlanes * 8; + count += nlanes * 8; + count = count % bsize; + } + + if (inlen) + { + byte lane[8] = { 0, }; + + /* Absorb remaining partial input lane. */ + + pos = count / 8; + + for (i = count % 8; inlen && i < 8; i++) + { + lane[i] = *inbuf++; + inlen--; + count++; + } + + nburn = ctx->ops->absorb(&ctx->state, pos, lane, 1, -1); + burn = nburn > burn ? nburn : burn; + + gcry_assert(count < bsize); + } + + ctx->count = count; + + if (burn) + _gcry_burn_stack (burn); +} + + +static void +keccak_init (int algo, void *context, unsigned int flags) +{ + KECCAK_CONTEXT *ctx = context; + KECCAK_STATE *hd = &ctx->state; + unsigned int features = _gcry_get_hw_features (); + + (void)flags; + (void)features; + + memset (hd, 0, sizeof *hd); + + ctx->count = 0; + + /* Select generic implementation. */ +#ifdef USE_64BIT + ctx->ops = &keccak_generic64_ops; +#elif defined USE_32BIT + ctx->ops = &keccak_generic32bi_ops; +#endif + + /* Select optimized implementation based in hw features. */ + if (0) {} +#ifdef USE_64BIT_ARM_NEON + else if (features & HWF_ARM_NEON) + ctx->ops = &keccak_armv7_neon_64_ops; +#endif +#ifdef USE_64BIT_BMI2 + else if (features & HWF_INTEL_BMI2) + ctx->ops = &keccak_bmi2_64_ops; +#endif +#ifdef USE_32BIT_BMI2 + else if (features & HWF_INTEL_BMI2) + ctx->ops = &keccak_bmi2_32bi_ops; +#endif +#ifdef USE_64BIT_SHLD + else if (features & HWF_INTEL_FAST_SHLD) + ctx->ops = &keccak_shld_64_ops; +#endif + + /* Set input block size, in Keccak terms this is called 'rate'. */ + + switch (algo) + { + case GCRY_MD_SHA3_224: + ctx->suffix = SHA3_DELIMITED_SUFFIX; + ctx->blocksize = 1152 / 8; + ctx->outlen = 224 / 8; + break; + case GCRY_MD_SHA3_256: + ctx->suffix = SHA3_DELIMITED_SUFFIX; + ctx->blocksize = 1088 / 8; + ctx->outlen = 256 / 8; + break; + case GCRY_MD_SHA3_384: + ctx->suffix = SHA3_DELIMITED_SUFFIX; + ctx->blocksize = 832 / 8; + ctx->outlen = 384 / 8; + break; + case GCRY_MD_SHA3_512: + ctx->suffix = SHA3_DELIMITED_SUFFIX; + ctx->blocksize = 576 / 8; + ctx->outlen = 512 / 8; + break; + case GCRY_MD_SHAKE128: + ctx->suffix = SHAKE_DELIMITED_SUFFIX; + ctx->blocksize = 1344 / 8; + ctx->outlen = 0; + break; + case GCRY_MD_SHAKE256: + ctx->suffix = SHAKE_DELIMITED_SUFFIX; + ctx->blocksize = 1088 / 8; + ctx->outlen = 0; + break; + default: + BUG(); + } +} + +static void +sha3_224_init (void *context, unsigned int flags) +{ + keccak_init (GCRY_MD_SHA3_224, context, flags); +} + +static void +sha3_256_init (void *context, unsigned int flags) +{ + keccak_init (GCRY_MD_SHA3_256, context, flags); +} + +static void +sha3_384_init (void *context, unsigned int flags) +{ + keccak_init (GCRY_MD_SHA3_384, context, flags); +} + +static void +sha3_512_init (void *context, unsigned int flags) +{ + keccak_init (GCRY_MD_SHA3_512, context, flags); +} + +static void +shake128_init (void *context, unsigned int flags) +{ + keccak_init (GCRY_MD_SHAKE128, context, flags); +} + +static void +shake256_init (void *context, unsigned int flags) +{ + keccak_init (GCRY_MD_SHAKE256, context, flags); +} + +/* The routine final terminates the computation and + * returns the digest. + * The handle is prepared for a new cycle, but adding bytes to the + * handle will the destroy the returned buffer. + * Returns: 64 bytes representing the digest. When used for sha384, + * we take the leftmost 48 of those bytes. + */ +static void +keccak_final (void *context) +{ + KECCAK_CONTEXT *ctx = context; + KECCAK_STATE *hd = &ctx->state; + const size_t bsize = ctx->blocksize; + const byte suffix = ctx->suffix; + unsigned int nburn, burn = 0; + unsigned int lastbytes; + byte lane[8]; + + lastbytes = ctx->count; + + /* Do the padding and switch to the squeezing phase */ + + /* Absorb the last few bits and add the first bit of padding (which + coincides with the delimiter in delimited suffix) */ + buf_put_le64(lane, (u64)suffix << ((lastbytes % 8) * 8)); + nburn = ctx->ops->absorb(&ctx->state, lastbytes / 8, lane, 1, -1); + burn = nburn > burn ? nburn : burn; + + /* Add the second bit of padding. */ + buf_put_le64(lane, (u64)0x80 << (((bsize - 1) % 8) * 8)); + nburn = ctx->ops->absorb(&ctx->state, (bsize - 1) / 8, lane, 1, -1); + burn = nburn > burn ? nburn : burn; + + if (suffix == SHA3_DELIMITED_SUFFIX) + { + /* Switch to the squeezing phase. */ + nburn = ctx->ops->permute(hd); + burn = nburn > burn ? nburn : burn; + + /* Squeeze out the SHA3 digest. */ + nburn = ctx->ops->extract(hd, 0, (void *)hd, ctx->outlen); + burn = nburn > burn ? nburn : burn; + } + else + { + /* Output for SHAKE can now be read with md_extract(). */ + + ctx->count = 0; + } + + wipememory(lane, sizeof(lane)); + if (burn) + _gcry_burn_stack (burn); +} + + +static byte * +keccak_read (void *context) +{ + KECCAK_CONTEXT *ctx = (KECCAK_CONTEXT *) context; + KECCAK_STATE *hd = &ctx->state; + return (byte *)&hd->u; +} + + +static void +keccak_extract (void *context, void *out, size_t outlen) +{ + KECCAK_CONTEXT *ctx = context; + KECCAK_STATE *hd = &ctx->state; + const size_t bsize = ctx->blocksize; + unsigned int nburn, burn = 0; + byte *outbuf = out; + unsigned int nlanes; + unsigned int nleft; + unsigned int count; + unsigned int i; + byte lane[8]; + + count = ctx->count; + + while (count && outlen && (outlen < 8 || count % 8)) + { + /* Extract partial lane. */ + nburn = ctx->ops->extract(hd, count / 8, lane, 8); + burn = nburn > burn ? nburn : burn; + + for (i = count % 8; outlen && i < 8; i++) + { + *outbuf++ = lane[i]; + outlen--; + count++; + } + + gcry_assert(count <= bsize); + + if (count == bsize) + count = 0; + } + + if (outlen >= 8 && count) + { + /* Extract tail of partial block. */ + nlanes = outlen / 8; + nleft = (bsize - count) / 8; + nlanes = nlanes < nleft ? nlanes : nleft; + + nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8); + burn = nburn > burn ? nburn : burn; + outlen -= nlanes * 8; + outbuf += nlanes * 8; + count += nlanes * 8; + + gcry_assert(count <= bsize); + + if (count == bsize) + count = 0; + } + + while (outlen >= bsize) + { + gcry_assert(count == 0); + + /* Squeeze more. */ + nburn = ctx->ops->permute(hd); + burn = nburn > burn ? nburn : burn; + + /* Extract full block. */ + nburn = ctx->ops->extract(hd, 0, outbuf, bsize); + burn = nburn > burn ? nburn : burn; + + outlen -= bsize; + outbuf += bsize; + } + + if (outlen) + { + gcry_assert(outlen < bsize); + + if (count == 0) + { + /* Squeeze more. */ + nburn = ctx->ops->permute(hd); + burn = nburn > burn ? nburn : burn; + } + + if (outlen >= 8) + { + /* Extract head of partial block. */ + nlanes = outlen / 8; + nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8); + burn = nburn > burn ? nburn : burn; + outlen -= nlanes * 8; + outbuf += nlanes * 8; + count += nlanes * 8; + + gcry_assert(count < bsize); + } + + if (outlen) + { + /* Extract head of partial lane. */ + nburn = ctx->ops->extract(hd, count / 8, lane, 8); + burn = nburn > burn ? nburn : burn; + + for (i = count % 8; outlen && i < 8; i++) + { + *outbuf++ = lane[i]; + outlen--; + count++; + } + + gcry_assert(count < bsize); + } + } + + ctx->count = count; + + if (burn) + _gcry_burn_stack (burn); +} + + + +/* + Self-test section. + */ + + +static gpg_err_code_t +selftests_keccak (int algo, int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + const char *short_hash; + const char *long_hash; + const char *one_million_a_hash; + int hash_len; + + switch (algo) + { + default: + BUG(); + + case GCRY_MD_SHA3_224: + short_hash = + "\xe6\x42\x82\x4c\x3f\x8c\xf2\x4a\xd0\x92\x34\xee\x7d\x3c\x76\x6f" + "\xc9\xa3\xa5\x16\x8d\x0c\x94\xad\x73\xb4\x6f\xdf"; + long_hash = + "\x54\x3e\x68\x68\xe1\x66\x6c\x1a\x64\x36\x30\xdf\x77\x36\x7a\xe5" + "\xa6\x2a\x85\x07\x0a\x51\xc1\x4c\xbf\x66\x5c\xbc"; + one_million_a_hash = + "\xd6\x93\x35\xb9\x33\x25\x19\x2e\x51\x6a\x91\x2e\x6d\x19\xa1\x5c" + "\xb5\x1c\x6e\xd5\xc1\x52\x43\xe7\xa7\xfd\x65\x3c"; + hash_len = 28; + break; + + case GCRY_MD_SHA3_256: + short_hash = + "\x3a\x98\x5d\xa7\x4f\xe2\x25\xb2\x04\x5c\x17\x2d\x6b\xd3\x90\xbd" + "\x85\x5f\x08\x6e\x3e\x9d\x52\x5b\x46\xbf\xe2\x45\x11\x43\x15\x32"; + long_hash = + "\x91\x6f\x60\x61\xfe\x87\x97\x41\xca\x64\x69\xb4\x39\x71\xdf\xdb" + "\x28\xb1\xa3\x2d\xc3\x6c\xb3\x25\x4e\x81\x2b\xe2\x7a\xad\x1d\x18"; + one_million_a_hash = + "\x5c\x88\x75\xae\x47\x4a\x36\x34\xba\x4f\xd5\x5e\xc8\x5b\xff\xd6" + "\x61\xf3\x2a\xca\x75\xc6\xd6\x99\xd0\xcd\xcb\x6c\x11\x58\x91\xc1"; + hash_len = 32; + break; + + case GCRY_MD_SHA3_384: + short_hash = + "\xec\x01\x49\x82\x88\x51\x6f\xc9\x26\x45\x9f\x58\xe2\xc6\xad\x8d" + "\xf9\xb4\x73\xcb\x0f\xc0\x8c\x25\x96\xda\x7c\xf0\xe4\x9b\xe4\xb2" + "\x98\xd8\x8c\xea\x92\x7a\xc7\xf5\x39\xf1\xed\xf2\x28\x37\x6d\x25"; + long_hash = + "\x79\x40\x7d\x3b\x59\x16\xb5\x9c\x3e\x30\xb0\x98\x22\x97\x47\x91" + "\xc3\x13\xfb\x9e\xcc\x84\x9e\x40\x6f\x23\x59\x2d\x04\xf6\x25\xdc" + "\x8c\x70\x9b\x98\xb4\x3b\x38\x52\xb3\x37\x21\x61\x79\xaa\x7f\xc7"; + one_million_a_hash = + "\xee\xe9\xe2\x4d\x78\xc1\x85\x53\x37\x98\x34\x51\xdf\x97\xc8\xad" + "\x9e\xed\xf2\x56\xc6\x33\x4f\x8e\x94\x8d\x25\x2d\x5e\x0e\x76\x84" + "\x7a\xa0\x77\x4d\xdb\x90\xa8\x42\x19\x0d\x2c\x55\x8b\x4b\x83\x40"; + hash_len = 48; + break; + + case GCRY_MD_SHA3_512: + short_hash = + "\xb7\x51\x85\x0b\x1a\x57\x16\x8a\x56\x93\xcd\x92\x4b\x6b\x09\x6e" + "\x08\xf6\x21\x82\x74\x44\xf7\x0d\x88\x4f\x5d\x02\x40\xd2\x71\x2e" + "\x10\xe1\x16\xe9\x19\x2a\xf3\xc9\x1a\x7e\xc5\x76\x47\xe3\x93\x40" + "\x57\x34\x0b\x4c\xf4\x08\xd5\xa5\x65\x92\xf8\x27\x4e\xec\x53\xf0"; + long_hash = + "\xaf\xeb\xb2\xef\x54\x2e\x65\x79\xc5\x0c\xad\x06\xd2\xe5\x78\xf9" + "\xf8\xdd\x68\x81\xd7\xdc\x82\x4d\x26\x36\x0f\xee\xbf\x18\xa4\xfa" + "\x73\xe3\x26\x11\x22\x94\x8e\xfc\xfd\x49\x2e\x74\xe8\x2e\x21\x89" + "\xed\x0f\xb4\x40\xd1\x87\xf3\x82\x27\x0c\xb4\x55\xf2\x1d\xd1\x85"; + one_million_a_hash = + "\x3c\x3a\x87\x6d\xa1\x40\x34\xab\x60\x62\x7c\x07\x7b\xb9\x8f\x7e" + "\x12\x0a\x2a\x53\x70\x21\x2d\xff\xb3\x38\x5a\x18\xd4\xf3\x88\x59" + "\xed\x31\x1d\x0a\x9d\x51\x41\xce\x9c\xc5\xc6\x6e\xe6\x89\xb2\x66" + "\xa8\xaa\x18\xac\xe8\x28\x2a\x0e\x0d\xb5\x96\xc9\x0b\x0a\x7b\x87"; + hash_len = 64; + break; + + case GCRY_MD_SHAKE128: + short_hash = + "\x58\x81\x09\x2d\xd8\x18\xbf\x5c\xf8\xa3\xdd\xb7\x93\xfb\xcb\xa7" + "\x40\x97\xd5\xc5\x26\xa6\xd3\x5f\x97\xb8\x33\x51\x94\x0f\x2c\xc8"; + long_hash = + "\x7b\x6d\xf6\xff\x18\x11\x73\xb6\xd7\x89\x8d\x7f\xf6\x3f\xb0\x7b" + "\x7c\x23\x7d\xaf\x47\x1a\x5a\xe5\x60\x2a\xdb\xcc\xef\x9c\xcf\x4b"; + one_million_a_hash = + "\x9d\x22\x2c\x79\xc4\xff\x9d\x09\x2c\xf6\xca\x86\x14\x3a\xa4\x11" + "\xe3\x69\x97\x38\x08\xef\x97\x09\x32\x55\x82\x6c\x55\x72\xef\x58"; + hash_len = 32; + break; + + case GCRY_MD_SHAKE256: + short_hash = + "\x48\x33\x66\x60\x13\x60\xa8\x77\x1c\x68\x63\x08\x0c\xc4\x11\x4d" + "\x8d\xb4\x45\x30\xf8\xf1\xe1\xee\x4f\x94\xea\x37\xe7\x8b\x57\x39"; + long_hash = + "\x98\xbe\x04\x51\x6c\x04\xcc\x73\x59\x3f\xef\x3e\xd0\x35\x2e\xa9" + "\xf6\x44\x39\x42\xd6\x95\x0e\x29\xa3\x72\xa6\x81\xc3\xde\xaf\x45"; + one_million_a_hash = + "\x35\x78\xa7\xa4\xca\x91\x37\x56\x9c\xdf\x76\xed\x61\x7d\x31\xbb" + "\x99\x4f\xca\x9c\x1b\xbf\x8b\x18\x40\x13\xde\x82\x34\xdf\xd1\x3a"; + hash_len = 32; + break; + } + + what = "short string"; + errtxt = _gcry_hash_selftest_check_one (algo, 0, "abc", 3, short_hash, + hash_len); + if (errtxt) + goto failed; + + if (extended) + { + what = "long string"; + errtxt = _gcry_hash_selftest_check_one + (algo, 0, + "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn" + "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112, + long_hash, hash_len); + if (errtxt) + goto failed; + + what = "one million \"a\""; + errtxt = _gcry_hash_selftest_check_one (algo, 1, NULL, 0, + one_million_a_hash, hash_len); + if (errtxt) + goto failed; + } + + return 0; /* Succeeded. */ + +failed: + if (report) + report ("digest", algo, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +/* Run a full self-test for ALGO and return 0 on success. */ +static gpg_err_code_t +run_selftests (int algo, int extended, selftest_report_func_t report) +{ + gpg_err_code_t ec; + + switch (algo) + { + case GCRY_MD_SHA3_224: + case GCRY_MD_SHA3_256: + case GCRY_MD_SHA3_384: + case GCRY_MD_SHA3_512: + case GCRY_MD_SHAKE128: + case GCRY_MD_SHAKE256: + ec = selftests_keccak (algo, extended, report); + break; + default: + ec = GPG_ERR_DIGEST_ALGO; + break; + } + + return ec; +} + + + + +static byte sha3_224_asn[] = { 0x30 }; +static gcry_md_oid_spec_t oid_spec_sha3_224[] = + { + { "2.16.840.1.101.3.4.2.7" }, + /* PKCS#1 sha3_224WithRSAEncryption */ + { "?" }, + { NULL } + }; +static byte sha3_256_asn[] = { 0x30 }; +static gcry_md_oid_spec_t oid_spec_sha3_256[] = + { + { "2.16.840.1.101.3.4.2.8" }, + /* PKCS#1 sha3_256WithRSAEncryption */ + { "?" }, + { NULL } + }; +static byte sha3_384_asn[] = { 0x30 }; +static gcry_md_oid_spec_t oid_spec_sha3_384[] = + { + { "2.16.840.1.101.3.4.2.9" }, + /* PKCS#1 sha3_384WithRSAEncryption */ + { "?" }, + { NULL } + }; +static byte sha3_512_asn[] = { 0x30 }; +static gcry_md_oid_spec_t oid_spec_sha3_512[] = + { + { "2.16.840.1.101.3.4.2.10" }, + /* PKCS#1 sha3_512WithRSAEncryption */ + { "?" }, + { NULL } + }; +static byte shake128_asn[] = { 0x30 }; +static gcry_md_oid_spec_t oid_spec_shake128[] = + { + { "2.16.840.1.101.3.4.2.11" }, + /* PKCS#1 shake128WithRSAEncryption */ + { "?" }, + { NULL } + }; +static byte shake256_asn[] = { 0x30 }; +static gcry_md_oid_spec_t oid_spec_shake256[] = + { + { "2.16.840.1.101.3.4.2.12" }, + /* PKCS#1 shake256WithRSAEncryption */ + { "?" }, + { NULL } + }; + +gcry_md_spec_t _gcry_digest_spec_sha3_224 = + { + GCRY_MD_SHA3_224, {0, 1}, + "SHA3-224", sha3_224_asn, DIM (sha3_224_asn), oid_spec_sha3_224, 28, + sha3_224_init, keccak_write, keccak_final, keccak_read, NULL, + sizeof (KECCAK_CONTEXT), + run_selftests + }; +gcry_md_spec_t _gcry_digest_spec_sha3_256 = + { + GCRY_MD_SHA3_256, {0, 1}, + "SHA3-256", sha3_256_asn, DIM (sha3_256_asn), oid_spec_sha3_256, 32, + sha3_256_init, keccak_write, keccak_final, keccak_read, NULL, + sizeof (KECCAK_CONTEXT), + run_selftests + }; +gcry_md_spec_t _gcry_digest_spec_sha3_384 = + { + GCRY_MD_SHA3_384, {0, 1}, + "SHA3-384", sha3_384_asn, DIM (sha3_384_asn), oid_spec_sha3_384, 48, + sha3_384_init, keccak_write, keccak_final, keccak_read, NULL, + sizeof (KECCAK_CONTEXT), + run_selftests + }; +gcry_md_spec_t _gcry_digest_spec_sha3_512 = + { + GCRY_MD_SHA3_512, {0, 1}, + "SHA3-512", sha3_512_asn, DIM (sha3_512_asn), oid_spec_sha3_512, 64, + sha3_512_init, keccak_write, keccak_final, keccak_read, NULL, + sizeof (KECCAK_CONTEXT), + run_selftests + }; +gcry_md_spec_t _gcry_digest_spec_shake128 = + { + GCRY_MD_SHAKE128, {0, 1}, + "SHAKE128", shake128_asn, DIM (shake128_asn), oid_spec_shake128, 0, + shake128_init, keccak_write, keccak_final, NULL, keccak_extract, + sizeof (KECCAK_CONTEXT), + run_selftests + }; +gcry_md_spec_t _gcry_digest_spec_shake256 = + { + GCRY_MD_SHAKE256, {0, 1}, + "SHAKE256", shake256_asn, DIM (shake256_asn), oid_spec_shake256, 0, + shake256_init, keccak_write, keccak_final, NULL, keccak_extract, + sizeof (KECCAK_CONTEXT), + run_selftests + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/keccak_permute_32.h b/libotr/libgcrypt-1.8.7/cipher/keccak_permute_32.h new file mode 100644 index 0000000..1ce42a4 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/keccak_permute_32.h @@ -0,0 +1,536 @@ +/* keccak_permute_32.h - Keccak permute function (simple 32bit bit-interleaved) + * Copyright (C) 2015 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* The code is based on public-domain/CC0 "keccakc1024/simple32bi/ + * Keccak-simple32BI.c" implementation by Ronny Van Keer from SUPERCOP toolkit + * package. + */ + +/* Function that computes the Keccak-f[1600] permutation on the given state. */ +static unsigned int +KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) +{ + const u32 *round_consts = round_consts_32bit; + const u32 *round_consts_end = round_consts_32bit + 2 * 24; + u32 Aba0, Abe0, Abi0, Abo0, Abu0; + u32 Aba1, Abe1, Abi1, Abo1, Abu1; + u32 Aga0, Age0, Agi0, Ago0, Agu0; + u32 Aga1, Age1, Agi1, Ago1, Agu1; + u32 Aka0, Ake0, Aki0, Ako0, Aku0; + u32 Aka1, Ake1, Aki1, Ako1, Aku1; + u32 Ama0, Ame0, Ami0, Amo0, Amu0; + u32 Ama1, Ame1, Ami1, Amo1, Amu1; + u32 Asa0, Ase0, Asi0, Aso0, Asu0; + u32 Asa1, Ase1, Asi1, Aso1, Asu1; + u32 BCa0, BCe0, BCi0, BCo0, BCu0; + u32 BCa1, BCe1, BCi1, BCo1, BCu1; + u32 Da0, De0, Di0, Do0, Du0; + u32 Da1, De1, Di1, Do1, Du1; + u32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; + u32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; + u32 Ega0, Ege0, Egi0, Ego0, Egu0; + u32 Ega1, Ege1, Egi1, Ego1, Egu1; + u32 Eka0, Eke0, Eki0, Eko0, Eku0; + u32 Eka1, Eke1, Eki1, Eko1, Eku1; + u32 Ema0, Eme0, Emi0, Emo0, Emu0; + u32 Ema1, Eme1, Emi1, Emo1, Emu1; + u32 Esa0, Ese0, Esi0, Eso0, Esu0; + u32 Esa1, Ese1, Esi1, Eso1, Esu1; + u32 *state = hd->u.state32bi; + + Aba0 = state[0]; + Aba1 = state[1]; + Abe0 = state[2]; + Abe1 = state[3]; + Abi0 = state[4]; + Abi1 = state[5]; + Abo0 = state[6]; + Abo1 = state[7]; + Abu0 = state[8]; + Abu1 = state[9]; + Aga0 = state[10]; + Aga1 = state[11]; + Age0 = state[12]; + Age1 = state[13]; + Agi0 = state[14]; + Agi1 = state[15]; + Ago0 = state[16]; + Ago1 = state[17]; + Agu0 = state[18]; + Agu1 = state[19]; + Aka0 = state[20]; + Aka1 = state[21]; + Ake0 = state[22]; + Ake1 = state[23]; + Aki0 = state[24]; + Aki1 = state[25]; + Ako0 = state[26]; + Ako1 = state[27]; + Aku0 = state[28]; + Aku1 = state[29]; + Ama0 = state[30]; + Ama1 = state[31]; + Ame0 = state[32]; + Ame1 = state[33]; + Ami0 = state[34]; + Ami1 = state[35]; + Amo0 = state[36]; + Amo1 = state[37]; + Amu0 = state[38]; + Amu1 = state[39]; + Asa0 = state[40]; + Asa1 = state[41]; + Ase0 = state[42]; + Ase1 = state[43]; + Asi0 = state[44]; + Asi1 = state[45]; + Aso0 = state[46]; + Aso1 = state[47]; + Asu0 = state[48]; + Asu1 = state[49]; + + do + { + /* prepareTheta */ + BCa0 = Aba0 ^ Aga0 ^ Aka0 ^ Ama0 ^ Asa0; + BCa1 = Aba1 ^ Aga1 ^ Aka1 ^ Ama1 ^ Asa1; + BCe0 = Abe0 ^ Age0 ^ Ake0 ^ Ame0 ^ Ase0; + BCe1 = Abe1 ^ Age1 ^ Ake1 ^ Ame1 ^ Ase1; + BCi0 = Abi0 ^ Agi0 ^ Aki0 ^ Ami0 ^ Asi0; + BCi1 = Abi1 ^ Agi1 ^ Aki1 ^ Ami1 ^ Asi1; + BCo0 = Abo0 ^ Ago0 ^ Ako0 ^ Amo0 ^ Aso0; + BCo1 = Abo1 ^ Ago1 ^ Ako1 ^ Amo1 ^ Aso1; + BCu0 = Abu0 ^ Agu0 ^ Aku0 ^ Amu0 ^ Asu0; + BCu1 = Abu1 ^ Agu1 ^ Aku1 ^ Amu1 ^ Asu1; + + /* thetaRhoPiChiIota(round , A, E) */ + Da0 = BCu0 ^ ROL32(BCe1, 1); + Da1 = BCu1 ^ BCe0; + De0 = BCa0 ^ ROL32(BCi1, 1); + De1 = BCa1 ^ BCi0; + Di0 = BCe0 ^ ROL32(BCo1, 1); + Di1 = BCe1 ^ BCo0; + Do0 = BCi0 ^ ROL32(BCu1, 1); + Do1 = BCi1 ^ BCu0; + Du0 = BCo0 ^ ROL32(BCa1, 1); + Du1 = BCo1 ^ BCa0; + + Aba0 ^= Da0; + BCa0 = Aba0; + Age0 ^= De0; + BCe0 = ROL32(Age0, 22); + Aki1 ^= Di1; + BCi0 = ROL32(Aki1, 22); + Amo1 ^= Do1; + BCo0 = ROL32(Amo1, 11); + Asu0 ^= Du0; + BCu0 = ROL32(Asu0, 7); + Eba0 = BCa0 ^ ANDN32(BCe0, BCi0); + Eba0 ^= *(round_consts++); + Ebe0 = BCe0 ^ ANDN32(BCi0, BCo0); + Ebi0 = BCi0 ^ ANDN32(BCo0, BCu0); + Ebo0 = BCo0 ^ ANDN32(BCu0, BCa0); + Ebu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Aba1 ^= Da1; + BCa1 = Aba1; + Age1 ^= De1; + BCe1 = ROL32(Age1, 22); + Aki0 ^= Di0; + BCi1 = ROL32(Aki0, 21); + Amo0 ^= Do0; + BCo1 = ROL32(Amo0, 10); + Asu1 ^= Du1; + BCu1 = ROL32(Asu1, 7); + Eba1 = BCa1 ^ ANDN32(BCe1, BCi1); + Eba1 ^= *(round_consts++); + Ebe1 = BCe1 ^ ANDN32(BCi1, BCo1); + Ebi1 = BCi1 ^ ANDN32(BCo1, BCu1); + Ebo1 = BCo1 ^ ANDN32(BCu1, BCa1); + Ebu1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Abo0 ^= Do0; + BCa0 = ROL32(Abo0, 14); + Agu0 ^= Du0; + BCe0 = ROL32(Agu0, 10); + Aka1 ^= Da1; + BCi0 = ROL32(Aka1, 2); + Ame1 ^= De1; + BCo0 = ROL32(Ame1, 23); + Asi1 ^= Di1; + BCu0 = ROL32(Asi1, 31); + Ega0 = BCa0 ^ ANDN32(BCe0, BCi0); + Ege0 = BCe0 ^ ANDN32(BCi0, BCo0); + Egi0 = BCi0 ^ ANDN32(BCo0, BCu0); + Ego0 = BCo0 ^ ANDN32(BCu0, BCa0); + Egu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Abo1 ^= Do1; + BCa1 = ROL32(Abo1, 14); + Agu1 ^= Du1; + BCe1 = ROL32(Agu1, 10); + Aka0 ^= Da0; + BCi1 = ROL32(Aka0, 1); + Ame0 ^= De0; + BCo1 = ROL32(Ame0, 22); + Asi0 ^= Di0; + BCu1 = ROL32(Asi0, 30); + Ega1 = BCa1 ^ ANDN32(BCe1, BCi1); + Ege1 = BCe1 ^ ANDN32(BCi1, BCo1); + Egi1 = BCi1 ^ ANDN32(BCo1, BCu1); + Ego1 = BCo1 ^ ANDN32(BCu1, BCa1); + Egu1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Abe1 ^= De1; + BCa0 = ROL32(Abe1, 1); + Agi0 ^= Di0; + BCe0 = ROL32(Agi0, 3); + Ako1 ^= Do1; + BCi0 = ROL32(Ako1, 13); + Amu0 ^= Du0; + BCo0 = ROL32(Amu0, 4); + Asa0 ^= Da0; + BCu0 = ROL32(Asa0, 9); + Eka0 = BCa0 ^ ANDN32(BCe0, BCi0); + Eke0 = BCe0 ^ ANDN32(BCi0, BCo0); + Eki0 = BCi0 ^ ANDN32(BCo0, BCu0); + Eko0 = BCo0 ^ ANDN32(BCu0, BCa0); + Eku0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Abe0 ^= De0; + BCa1 = Abe0; + Agi1 ^= Di1; + BCe1 = ROL32(Agi1, 3); + Ako0 ^= Do0; + BCi1 = ROL32(Ako0, 12); + Amu1 ^= Du1; + BCo1 = ROL32(Amu1, 4); + Asa1 ^= Da1; + BCu1 = ROL32(Asa1, 9); + Eka1 = BCa1 ^ ANDN32(BCe1, BCi1); + Eke1 = BCe1 ^ ANDN32(BCi1, BCo1); + Eki1 = BCi1 ^ ANDN32(BCo1, BCu1); + Eko1 = BCo1 ^ ANDN32(BCu1, BCa1); + Eku1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Abu1 ^= Du1; + BCa0 = ROL32(Abu1, 14); + Aga0 ^= Da0; + BCe0 = ROL32(Aga0, 18); + Ake0 ^= De0; + BCi0 = ROL32(Ake0, 5); + Ami1 ^= Di1; + BCo0 = ROL32(Ami1, 8); + Aso0 ^= Do0; + BCu0 = ROL32(Aso0, 28); + Ema0 = BCa0 ^ ANDN32(BCe0, BCi0); + Eme0 = BCe0 ^ ANDN32(BCi0, BCo0); + Emi0 = BCi0 ^ ANDN32(BCo0, BCu0); + Emo0 = BCo0 ^ ANDN32(BCu0, BCa0); + Emu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Abu0 ^= Du0; + BCa1 = ROL32(Abu0, 13); + Aga1 ^= Da1; + BCe1 = ROL32(Aga1, 18); + Ake1 ^= De1; + BCi1 = ROL32(Ake1, 5); + Ami0 ^= Di0; + BCo1 = ROL32(Ami0, 7); + Aso1 ^= Do1; + BCu1 = ROL32(Aso1, 28); + Ema1 = BCa1 ^ ANDN32(BCe1, BCi1); + Eme1 = BCe1 ^ ANDN32(BCi1, BCo1); + Emi1 = BCi1 ^ ANDN32(BCo1, BCu1); + Emo1 = BCo1 ^ ANDN32(BCu1, BCa1); + Emu1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Abi0 ^= Di0; + BCa0 = ROL32(Abi0, 31); + Ago1 ^= Do1; + BCe0 = ROL32(Ago1, 28); + Aku1 ^= Du1; + BCi0 = ROL32(Aku1, 20); + Ama1 ^= Da1; + BCo0 = ROL32(Ama1, 21); + Ase0 ^= De0; + BCu0 = ROL32(Ase0, 1); + Esa0 = BCa0 ^ ANDN32(BCe0, BCi0); + Ese0 = BCe0 ^ ANDN32(BCi0, BCo0); + Esi0 = BCi0 ^ ANDN32(BCo0, BCu0); + Eso0 = BCo0 ^ ANDN32(BCu0, BCa0); + Esu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Abi1 ^= Di1; + BCa1 = ROL32(Abi1, 31); + Ago0 ^= Do0; + BCe1 = ROL32(Ago0, 27); + Aku0 ^= Du0; + BCi1 = ROL32(Aku0, 19); + Ama0 ^= Da0; + BCo1 = ROL32(Ama0, 20); + Ase1 ^= De1; + BCu1 = ROL32(Ase1, 1); + Esa1 = BCa1 ^ ANDN32(BCe1, BCi1); + Ese1 = BCe1 ^ ANDN32(BCi1, BCo1); + Esi1 = BCi1 ^ ANDN32(BCo1, BCu1); + Eso1 = BCo1 ^ ANDN32(BCu1, BCa1); + Esu1 = BCu1 ^ ANDN32(BCa1, BCe1); + + /* prepareTheta */ + BCa0 = Eba0 ^ Ega0 ^ Eka0 ^ Ema0 ^ Esa0; + BCa1 = Eba1 ^ Ega1 ^ Eka1 ^ Ema1 ^ Esa1; + BCe0 = Ebe0 ^ Ege0 ^ Eke0 ^ Eme0 ^ Ese0; + BCe1 = Ebe1 ^ Ege1 ^ Eke1 ^ Eme1 ^ Ese1; + BCi0 = Ebi0 ^ Egi0 ^ Eki0 ^ Emi0 ^ Esi0; + BCi1 = Ebi1 ^ Egi1 ^ Eki1 ^ Emi1 ^ Esi1; + BCo0 = Ebo0 ^ Ego0 ^ Eko0 ^ Emo0 ^ Eso0; + BCo1 = Ebo1 ^ Ego1 ^ Eko1 ^ Emo1 ^ Eso1; + BCu0 = Ebu0 ^ Egu0 ^ Eku0 ^ Emu0 ^ Esu0; + BCu1 = Ebu1 ^ Egu1 ^ Eku1 ^ Emu1 ^ Esu1; + + /* thetaRhoPiChiIota(round+1, E, A) */ + Da0 = BCu0 ^ ROL32(BCe1, 1); + Da1 = BCu1 ^ BCe0; + De0 = BCa0 ^ ROL32(BCi1, 1); + De1 = BCa1 ^ BCi0; + Di0 = BCe0 ^ ROL32(BCo1, 1); + Di1 = BCe1 ^ BCo0; + Do0 = BCi0 ^ ROL32(BCu1, 1); + Do1 = BCi1 ^ BCu0; + Du0 = BCo0 ^ ROL32(BCa1, 1); + Du1 = BCo1 ^ BCa0; + + Eba0 ^= Da0; + BCa0 = Eba0; + Ege0 ^= De0; + BCe0 = ROL32(Ege0, 22); + Eki1 ^= Di1; + BCi0 = ROL32(Eki1, 22); + Emo1 ^= Do1; + BCo0 = ROL32(Emo1, 11); + Esu0 ^= Du0; + BCu0 = ROL32(Esu0, 7); + Aba0 = BCa0 ^ ANDN32(BCe0, BCi0); + Aba0 ^= *(round_consts++); + Abe0 = BCe0 ^ ANDN32(BCi0, BCo0); + Abi0 = BCi0 ^ ANDN32(BCo0, BCu0); + Abo0 = BCo0 ^ ANDN32(BCu0, BCa0); + Abu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Eba1 ^= Da1; + BCa1 = Eba1; + Ege1 ^= De1; + BCe1 = ROL32(Ege1, 22); + Eki0 ^= Di0; + BCi1 = ROL32(Eki0, 21); + Emo0 ^= Do0; + BCo1 = ROL32(Emo0, 10); + Esu1 ^= Du1; + BCu1 = ROL32(Esu1, 7); + Aba1 = BCa1 ^ ANDN32(BCe1, BCi1); + Aba1 ^= *(round_consts++); + Abe1 = BCe1 ^ ANDN32(BCi1, BCo1); + Abi1 = BCi1 ^ ANDN32(BCo1, BCu1); + Abo1 = BCo1 ^ ANDN32(BCu1, BCa1); + Abu1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Ebo0 ^= Do0; + BCa0 = ROL32(Ebo0, 14); + Egu0 ^= Du0; + BCe0 = ROL32(Egu0, 10); + Eka1 ^= Da1; + BCi0 = ROL32(Eka1, 2); + Eme1 ^= De1; + BCo0 = ROL32(Eme1, 23); + Esi1 ^= Di1; + BCu0 = ROL32(Esi1, 31); + Aga0 = BCa0 ^ ANDN32(BCe0, BCi0); + Age0 = BCe0 ^ ANDN32(BCi0, BCo0); + Agi0 = BCi0 ^ ANDN32(BCo0, BCu0); + Ago0 = BCo0 ^ ANDN32(BCu0, BCa0); + Agu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Ebo1 ^= Do1; + BCa1 = ROL32(Ebo1, 14); + Egu1 ^= Du1; + BCe1 = ROL32(Egu1, 10); + Eka0 ^= Da0; + BCi1 = ROL32(Eka0, 1); + Eme0 ^= De0; + BCo1 = ROL32(Eme0, 22); + Esi0 ^= Di0; + BCu1 = ROL32(Esi0, 30); + Aga1 = BCa1 ^ ANDN32(BCe1, BCi1); + Age1 = BCe1 ^ ANDN32(BCi1, BCo1); + Agi1 = BCi1 ^ ANDN32(BCo1, BCu1); + Ago1 = BCo1 ^ ANDN32(BCu1, BCa1); + Agu1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Ebe1 ^= De1; + BCa0 = ROL32(Ebe1, 1); + Egi0 ^= Di0; + BCe0 = ROL32(Egi0, 3); + Eko1 ^= Do1; + BCi0 = ROL32(Eko1, 13); + Emu0 ^= Du0; + BCo0 = ROL32(Emu0, 4); + Esa0 ^= Da0; + BCu0 = ROL32(Esa0, 9); + Aka0 = BCa0 ^ ANDN32(BCe0, BCi0); + Ake0 = BCe0 ^ ANDN32(BCi0, BCo0); + Aki0 = BCi0 ^ ANDN32(BCo0, BCu0); + Ako0 = BCo0 ^ ANDN32(BCu0, BCa0); + Aku0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Ebe0 ^= De0; + BCa1 = Ebe0; + Egi1 ^= Di1; + BCe1 = ROL32(Egi1, 3); + Eko0 ^= Do0; + BCi1 = ROL32(Eko0, 12); + Emu1 ^= Du1; + BCo1 = ROL32(Emu1, 4); + Esa1 ^= Da1; + BCu1 = ROL32(Esa1, 9); + Aka1 = BCa1 ^ ANDN32(BCe1, BCi1); + Ake1 = BCe1 ^ ANDN32(BCi1, BCo1); + Aki1 = BCi1 ^ ANDN32(BCo1, BCu1); + Ako1 = BCo1 ^ ANDN32(BCu1, BCa1); + Aku1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Ebu1 ^= Du1; + BCa0 = ROL32(Ebu1, 14); + Ega0 ^= Da0; + BCe0 = ROL32(Ega0, 18); + Eke0 ^= De0; + BCi0 = ROL32(Eke0, 5); + Emi1 ^= Di1; + BCo0 = ROL32(Emi1, 8); + Eso0 ^= Do0; + BCu0 = ROL32(Eso0, 28); + Ama0 = BCa0 ^ ANDN32(BCe0, BCi0); + Ame0 = BCe0 ^ ANDN32(BCi0, BCo0); + Ami0 = BCi0 ^ ANDN32(BCo0, BCu0); + Amo0 = BCo0 ^ ANDN32(BCu0, BCa0); + Amu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Ebu0 ^= Du0; + BCa1 = ROL32(Ebu0, 13); + Ega1 ^= Da1; + BCe1 = ROL32(Ega1, 18); + Eke1 ^= De1; + BCi1 = ROL32(Eke1, 5); + Emi0 ^= Di0; + BCo1 = ROL32(Emi0, 7); + Eso1 ^= Do1; + BCu1 = ROL32(Eso1, 28); + Ama1 = BCa1 ^ ANDN32(BCe1, BCi1); + Ame1 = BCe1 ^ ANDN32(BCi1, BCo1); + Ami1 = BCi1 ^ ANDN32(BCo1, BCu1); + Amo1 = BCo1 ^ ANDN32(BCu1, BCa1); + Amu1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Ebi0 ^= Di0; + BCa0 = ROL32(Ebi0, 31); + Ego1 ^= Do1; + BCe0 = ROL32(Ego1, 28); + Eku1 ^= Du1; + BCi0 = ROL32(Eku1, 20); + Ema1 ^= Da1; + BCo0 = ROL32(Ema1, 21); + Ese0 ^= De0; + BCu0 = ROL32(Ese0, 1); + Asa0 = BCa0 ^ ANDN32(BCe0, BCi0); + Ase0 = BCe0 ^ ANDN32(BCi0, BCo0); + Asi0 = BCi0 ^ ANDN32(BCo0, BCu0); + Aso0 = BCo0 ^ ANDN32(BCu0, BCa0); + Asu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Ebi1 ^= Di1; + BCa1 = ROL32(Ebi1, 31); + Ego0 ^= Do0; + BCe1 = ROL32(Ego0, 27); + Eku0 ^= Du0; + BCi1 = ROL32(Eku0, 19); + Ema0 ^= Da0; + BCo1 = ROL32(Ema0, 20); + Ese1 ^= De1; + BCu1 = ROL32(Ese1, 1); + Asa1 = BCa1 ^ ANDN32(BCe1, BCi1); + Ase1 = BCe1 ^ ANDN32(BCi1, BCo1); + Asi1 = BCi1 ^ ANDN32(BCo1, BCu1); + Aso1 = BCo1 ^ ANDN32(BCu1, BCa1); + Asu1 = BCu1 ^ ANDN32(BCa1, BCe1); + } + while (round_consts < round_consts_end); + + state[0] = Aba0; + state[1] = Aba1; + state[2] = Abe0; + state[3] = Abe1; + state[4] = Abi0; + state[5] = Abi1; + state[6] = Abo0; + state[7] = Abo1; + state[8] = Abu0; + state[9] = Abu1; + state[10] = Aga0; + state[11] = Aga1; + state[12] = Age0; + state[13] = Age1; + state[14] = Agi0; + state[15] = Agi1; + state[16] = Ago0; + state[17] = Ago1; + state[18] = Agu0; + state[19] = Agu1; + state[20] = Aka0; + state[21] = Aka1; + state[22] = Ake0; + state[23] = Ake1; + state[24] = Aki0; + state[25] = Aki1; + state[26] = Ako0; + state[27] = Ako1; + state[28] = Aku0; + state[29] = Aku1; + state[30] = Ama0; + state[31] = Ama1; + state[32] = Ame0; + state[33] = Ame1; + state[34] = Ami0; + state[35] = Ami1; + state[36] = Amo0; + state[37] = Amo1; + state[38] = Amu0; + state[39] = Amu1; + state[40] = Asa0; + state[41] = Asa1; + state[42] = Ase0; + state[43] = Ase1; + state[44] = Asi0; + state[45] = Asi1; + state[46] = Aso0; + state[47] = Aso1; + state[48] = Asu0; + state[49] = Asu1; + + return sizeof(void *) * 4 + sizeof(u32) * 12 * 5 * 2; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/keccak_permute_64.h b/libotr/libgcrypt-1.8.7/cipher/keccak_permute_64.h new file mode 100644 index 0000000..b28c871 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/keccak_permute_64.h @@ -0,0 +1,385 @@ +/* keccak_permute_64.h - Keccak permute function (simple 64bit) + * Copyright (C) 2015 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* The code is based on public-domain/CC0 "keccakc1024/simple/Keccak-simple.c" + * implementation by Ronny Van Keer from SUPERCOP toolkit package. + */ + +/* Function that computes the Keccak-f[1600] permutation on the given state. */ +static unsigned int +KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) +{ + const u64 *round_consts = _gcry_keccak_round_consts_64bit; + const u64 *round_consts_end = _gcry_keccak_round_consts_64bit + 24; + u64 Aba, Abe, Abi, Abo, Abu; + u64 Aga, Age, Agi, Ago, Agu; + u64 Aka, Ake, Aki, Ako, Aku; + u64 Ama, Ame, Ami, Amo, Amu; + u64 Asa, Ase, Asi, Aso, Asu; + u64 BCa, BCe, BCi, BCo, BCu; + u64 Da, De, Di, Do, Du; + u64 Eba, Ebe, Ebi, Ebo, Ebu; + u64 Ega, Ege, Egi, Ego, Egu; + u64 Eka, Eke, Eki, Eko, Eku; + u64 Ema, Eme, Emi, Emo, Emu; + u64 Esa, Ese, Esi, Eso, Esu; + u64 *state = hd->u.state64; + + Aba = state[0]; + Abe = state[1]; + Abi = state[2]; + Abo = state[3]; + Abu = state[4]; + Aga = state[5]; + Age = state[6]; + Agi = state[7]; + Ago = state[8]; + Agu = state[9]; + Aka = state[10]; + Ake = state[11]; + Aki = state[12]; + Ako = state[13]; + Aku = state[14]; + Ama = state[15]; + Ame = state[16]; + Ami = state[17]; + Amo = state[18]; + Amu = state[19]; + Asa = state[20]; + Ase = state[21]; + Asi = state[22]; + Aso = state[23]; + Asu = state[24]; + + do + { + /* prepareTheta */ + BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa; + BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase; + BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi; + BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso; + BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu; + + /* thetaRhoPiChiIotaPrepareTheta(round , A, E) */ + Da = BCu ^ ROL64(BCe, 1); + De = BCa ^ ROL64(BCi, 1); + Di = BCe ^ ROL64(BCo, 1); + Do = BCi ^ ROL64(BCu, 1); + Du = BCo ^ ROL64(BCa, 1); + + Aba ^= Da; + BCa = Aba; + Age ^= De; + BCe = ROL64(Age, 44); + Aki ^= Di; + BCi = ROL64(Aki, 43); + Amo ^= Do; + BCo = ROL64(Amo, 21); + Asu ^= Du; + BCu = ROL64(Asu, 14); + Eba = BCa ^ ANDN64(BCe, BCi); + Eba ^= *(round_consts++); + Ebe = BCe ^ ANDN64(BCi, BCo); + Ebi = BCi ^ ANDN64(BCo, BCu); + Ebo = BCo ^ ANDN64(BCu, BCa); + Ebu = BCu ^ ANDN64(BCa, BCe); + + Abo ^= Do; + BCa = ROL64(Abo, 28); + Agu ^= Du; + BCe = ROL64(Agu, 20); + Aka ^= Da; + BCi = ROL64(Aka, 3); + Ame ^= De; + BCo = ROL64(Ame, 45); + Asi ^= Di; + BCu = ROL64(Asi, 61); + Ega = BCa ^ ANDN64(BCe, BCi); + Ege = BCe ^ ANDN64(BCi, BCo); + Egi = BCi ^ ANDN64(BCo, BCu); + Ego = BCo ^ ANDN64(BCu, BCa); + Egu = BCu ^ ANDN64(BCa, BCe); + + Abe ^= De; + BCa = ROL64(Abe, 1); + Agi ^= Di; + BCe = ROL64(Agi, 6); + Ako ^= Do; + BCi = ROL64(Ako, 25); + Amu ^= Du; + BCo = ROL64(Amu, 8); + Asa ^= Da; + BCu = ROL64(Asa, 18); + Eka = BCa ^ ANDN64(BCe, BCi); + Eke = BCe ^ ANDN64(BCi, BCo); + Eki = BCi ^ ANDN64(BCo, BCu); + Eko = BCo ^ ANDN64(BCu, BCa); + Eku = BCu ^ ANDN64(BCa, BCe); + + Abu ^= Du; + BCa = ROL64(Abu, 27); + Aga ^= Da; + BCe = ROL64(Aga, 36); + Ake ^= De; + BCi = ROL64(Ake, 10); + Ami ^= Di; + BCo = ROL64(Ami, 15); + Aso ^= Do; + BCu = ROL64(Aso, 56); + Ema = BCa ^ ANDN64(BCe, BCi); + Eme = BCe ^ ANDN64(BCi, BCo); + Emi = BCi ^ ANDN64(BCo, BCu); + Emo = BCo ^ ANDN64(BCu, BCa); + Emu = BCu ^ ANDN64(BCa, BCe); + + Abi ^= Di; + BCa = ROL64(Abi, 62); + Ago ^= Do; + BCe = ROL64(Ago, 55); + Aku ^= Du; + BCi = ROL64(Aku, 39); + Ama ^= Da; + BCo = ROL64(Ama, 41); + Ase ^= De; + BCu = ROL64(Ase, 2); + Esa = BCa ^ ANDN64(BCe, BCi); + Ese = BCe ^ ANDN64(BCi, BCo); + Esi = BCi ^ ANDN64(BCo, BCu); + Eso = BCo ^ ANDN64(BCu, BCa); + Esu = BCu ^ ANDN64(BCa, BCe); + + /* prepareTheta */ + BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa; + BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese; + BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi; + BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso; + BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu; + + /* thetaRhoPiChiIotaPrepareTheta(round+1, E, A) */ + Da = BCu ^ ROL64(BCe, 1); + De = BCa ^ ROL64(BCi, 1); + Di = BCe ^ ROL64(BCo, 1); + Do = BCi ^ ROL64(BCu, 1); + Du = BCo ^ ROL64(BCa, 1); + + Eba ^= Da; + BCa = Eba; + Ege ^= De; + BCe = ROL64(Ege, 44); + Eki ^= Di; + BCi = ROL64(Eki, 43); + Emo ^= Do; + BCo = ROL64(Emo, 21); + Esu ^= Du; + BCu = ROL64(Esu, 14); + Aba = BCa ^ ANDN64(BCe, BCi); + Aba ^= *(round_consts++); + Abe = BCe ^ ANDN64(BCi, BCo); + Abi = BCi ^ ANDN64(BCo, BCu); + Abo = BCo ^ ANDN64(BCu, BCa); + Abu = BCu ^ ANDN64(BCa, BCe); + + Ebo ^= Do; + BCa = ROL64(Ebo, 28); + Egu ^= Du; + BCe = ROL64(Egu, 20); + Eka ^= Da; + BCi = ROL64(Eka, 3); + Eme ^= De; + BCo = ROL64(Eme, 45); + Esi ^= Di; + BCu = ROL64(Esi, 61); + Aga = BCa ^ ANDN64(BCe, BCi); + Age = BCe ^ ANDN64(BCi, BCo); + Agi = BCi ^ ANDN64(BCo, BCu); + Ago = BCo ^ ANDN64(BCu, BCa); + Agu = BCu ^ ANDN64(BCa, BCe); + + Ebe ^= De; + BCa = ROL64(Ebe, 1); + Egi ^= Di; + BCe = ROL64(Egi, 6); + Eko ^= Do; + BCi = ROL64(Eko, 25); + Emu ^= Du; + BCo = ROL64(Emu, 8); + Esa ^= Da; + BCu = ROL64(Esa, 18); + Aka = BCa ^ ANDN64(BCe, BCi); + Ake = BCe ^ ANDN64(BCi, BCo); + Aki = BCi ^ ANDN64(BCo, BCu); + Ako = BCo ^ ANDN64(BCu, BCa); + Aku = BCu ^ ANDN64(BCa, BCe); + + Ebu ^= Du; + BCa = ROL64(Ebu, 27); + Ega ^= Da; + BCe = ROL64(Ega, 36); + Eke ^= De; + BCi = ROL64(Eke, 10); + Emi ^= Di; + BCo = ROL64(Emi, 15); + Eso ^= Do; + BCu = ROL64(Eso, 56); + Ama = BCa ^ ANDN64(BCe, BCi); + Ame = BCe ^ ANDN64(BCi, BCo); + Ami = BCi ^ ANDN64(BCo, BCu); + Amo = BCo ^ ANDN64(BCu, BCa); + Amu = BCu ^ ANDN64(BCa, BCe); + + Ebi ^= Di; + BCa = ROL64(Ebi, 62); + Ego ^= Do; + BCe = ROL64(Ego, 55); + Eku ^= Du; + BCi = ROL64(Eku, 39); + Ema ^= Da; + BCo = ROL64(Ema, 41); + Ese ^= De; + BCu = ROL64(Ese, 2); + Asa = BCa ^ ANDN64(BCe, BCi); + Ase = BCe ^ ANDN64(BCi, BCo); + Asi = BCi ^ ANDN64(BCo, BCu); + Aso = BCo ^ ANDN64(BCu, BCa); + Asu = BCu ^ ANDN64(BCa, BCe); + } + while (round_consts < round_consts_end); + + state[0] = Aba; + state[1] = Abe; + state[2] = Abi; + state[3] = Abo; + state[4] = Abu; + state[5] = Aga; + state[6] = Age; + state[7] = Agi; + state[8] = Ago; + state[9] = Agu; + state[10] = Aka; + state[11] = Ake; + state[12] = Aki; + state[13] = Ako; + state[14] = Aku; + state[15] = Ama; + state[16] = Ame; + state[17] = Ami; + state[18] = Amo; + state[19] = Amu; + state[20] = Asa; + state[21] = Ase; + state[22] = Asi; + state[23] = Aso; + state[24] = Asu; + + return sizeof(void *) * 4 + sizeof(u64) * 12 * 5; +} + +static unsigned int +KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes, + unsigned int nlanes, int blocklanes) +{ + unsigned int burn = 0; + + while (nlanes) + { + switch (blocklanes) + { + case 21: + /* SHAKE128 */ + while (pos == 0 && nlanes >= 21) + { + nlanes -= 21; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8; + absorb_lanes64_4(&hd->u.state64[16], lanes); lanes += 8 * 4; + absorb_lanes64_1(&hd->u.state64[20], lanes); lanes += 8 * 1; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 18: + /* SHA3-224 */ + while (pos == 0 && nlanes >= 18) + { + nlanes -= 18; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8; + absorb_lanes64_2(&hd->u.state64[16], lanes); lanes += 8 * 2; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 17: + /* SHA3-256 & SHAKE256 */ + while (pos == 0 && nlanes >= 17) + { + nlanes -= 17; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8; + absorb_lanes64_1(&hd->u.state64[16], lanes); lanes += 8 * 1; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 13: + /* SHA3-384 */ + while (pos == 0 && nlanes >= 13) + { + nlanes -= 13; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_4(&hd->u.state64[8], lanes); lanes += 8 * 4; + absorb_lanes64_1(&hd->u.state64[12], lanes); lanes += 8 * 1; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 9: + /* SHA3-512 */ + while (pos == 0 && nlanes >= 9) + { + nlanes -= 9; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_1(&hd->u.state64[8], lanes); lanes += 8 * 1; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + } + + while (nlanes) + { + hd->u.state64[pos] ^= buf_get_le64(lanes); + lanes += 8; + nlanes--; + + if (++pos == blocklanes) + { + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + pos = 0; + break; + } + } + } + + return burn; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/mac-cmac.c b/libotr/libgcrypt-1.8.7/cipher/mac-cmac.c new file mode 100644 index 0000000..e42a764 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/mac-cmac.c @@ -0,0 +1,226 @@ +/* mac-cmac.c - CMAC glue for MAC API + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "cipher.h" +#include "./mac-internal.h" + + +static int +map_mac_algo_to_cipher (int mac_algo) +{ + switch (mac_algo) + { + default: + return GCRY_CIPHER_NONE; + case GCRY_MAC_CMAC_AES: + return GCRY_CIPHER_AES; + case GCRY_MAC_CMAC_3DES: + return GCRY_CIPHER_3DES; + case GCRY_MAC_CMAC_CAMELLIA: + return GCRY_CIPHER_CAMELLIA128; + case GCRY_MAC_CMAC_IDEA: + return GCRY_CIPHER_IDEA; + case GCRY_MAC_CMAC_CAST5: + return GCRY_CIPHER_CAST5; + case GCRY_MAC_CMAC_BLOWFISH: + return GCRY_CIPHER_BLOWFISH; + case GCRY_MAC_CMAC_TWOFISH: + return GCRY_CIPHER_TWOFISH; + case GCRY_MAC_CMAC_SERPENT: + return GCRY_CIPHER_SERPENT128; + case GCRY_MAC_CMAC_SEED: + return GCRY_CIPHER_SEED; + case GCRY_MAC_CMAC_RFC2268: + return GCRY_CIPHER_RFC2268_128; + case GCRY_MAC_CMAC_GOST28147: + return GCRY_CIPHER_GOST28147; + } +} + + +static gcry_err_code_t +cmac_open (gcry_mac_hd_t h) +{ + gcry_err_code_t err; + gcry_cipher_hd_t hd; + int secure = (h->magic == CTX_MAGIC_SECURE); + int cipher_algo; + unsigned int flags; + + cipher_algo = map_mac_algo_to_cipher (h->spec->algo); + flags = (secure ? GCRY_CIPHER_SECURE : 0); + + err = _gcry_cipher_open_internal (&hd, cipher_algo, GCRY_CIPHER_MODE_CMAC, + flags); + if (err) + return err; + + h->u.cmac.cipher_algo = cipher_algo; + h->u.cmac.ctx = hd; + h->u.cmac.blklen = _gcry_cipher_get_algo_blklen (cipher_algo); + return 0; +} + + +static void +cmac_close (gcry_mac_hd_t h) +{ + _gcry_cipher_close (h->u.cmac.ctx); + h->u.cmac.ctx = NULL; +} + + +static gcry_err_code_t +cmac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen) +{ + return _gcry_cipher_setkey (h->u.cmac.ctx, key, keylen); +} + + +static gcry_err_code_t +cmac_reset (gcry_mac_hd_t h) +{ + return _gcry_cipher_reset (h->u.cmac.ctx); +} + + +static gcry_err_code_t +cmac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + return _gcry_cipher_cmac_authenticate (h->u.cmac.ctx, buf, buflen); +} + + +static gcry_err_code_t +cmac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t * outlen) +{ + if (*outlen > h->u.cmac.blklen) + *outlen = h->u.cmac.blklen; + return _gcry_cipher_cmac_get_tag (h->u.cmac.ctx, outbuf, *outlen); +} + + +static gcry_err_code_t +cmac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + return _gcry_cipher_cmac_check_tag (h->u.cmac.ctx, buf, buflen); +} + + +static unsigned int +cmac_get_maclen (int algo) +{ + return _gcry_cipher_get_algo_blklen (map_mac_algo_to_cipher (algo)); +} + + +static unsigned int +cmac_get_keylen (int algo) +{ + return _gcry_cipher_get_algo_keylen (map_mac_algo_to_cipher (algo)); +} + + +static gcry_mac_spec_ops_t cmac_ops = { + cmac_open, + cmac_close, + cmac_setkey, + NULL, + cmac_reset, + cmac_write, + cmac_read, + cmac_verify, + cmac_get_maclen, + cmac_get_keylen +}; + + +#if USE_BLOWFISH +gcry_mac_spec_t _gcry_mac_type_spec_cmac_blowfish = { + GCRY_MAC_CMAC_BLOWFISH, {0, 0}, "CMAC_BLOWFISH", + &cmac_ops +}; +#endif +#if USE_DES +gcry_mac_spec_t _gcry_mac_type_spec_cmac_tripledes = { + GCRY_MAC_CMAC_3DES, {0, 1}, "CMAC_3DES", + &cmac_ops +}; +#endif +#if USE_CAST5 +gcry_mac_spec_t _gcry_mac_type_spec_cmac_cast5 = { + GCRY_MAC_CMAC_CAST5, {0, 0}, "CMAC_CAST5", + &cmac_ops +}; +#endif +#if USE_AES +gcry_mac_spec_t _gcry_mac_type_spec_cmac_aes = { + GCRY_MAC_CMAC_AES, {0, 1}, "CMAC_AES", + &cmac_ops +}; +#endif +#if USE_TWOFISH +gcry_mac_spec_t _gcry_mac_type_spec_cmac_twofish = { + GCRY_MAC_CMAC_TWOFISH, {0, 0}, "CMAC_TWOFISH", + &cmac_ops +}; +#endif +#if USE_SERPENT +gcry_mac_spec_t _gcry_mac_type_spec_cmac_serpent = { + GCRY_MAC_CMAC_SERPENT, {0, 0}, "CMAC_SERPENT", + &cmac_ops +}; +#endif +#if USE_RFC2268 +gcry_mac_spec_t _gcry_mac_type_spec_cmac_rfc2268 = { + GCRY_MAC_CMAC_RFC2268, {0, 0}, "CMAC_RFC2268", + &cmac_ops +}; +#endif +#if USE_SEED +gcry_mac_spec_t _gcry_mac_type_spec_cmac_seed = { + GCRY_MAC_CMAC_SEED, {0, 0}, "CMAC_SEED", + &cmac_ops +}; +#endif +#if USE_CAMELLIA +gcry_mac_spec_t _gcry_mac_type_spec_cmac_camellia = { + GCRY_MAC_CMAC_CAMELLIA, {0, 0}, "CMAC_CAMELLIA", + &cmac_ops +}; +#endif +#ifdef USE_IDEA +gcry_mac_spec_t _gcry_mac_type_spec_cmac_idea = { + GCRY_MAC_CMAC_IDEA, {0, 0}, "CMAC_IDEA", + &cmac_ops +}; +#endif +#if USE_GOST28147 +gcry_mac_spec_t _gcry_mac_type_spec_cmac_gost28147 = { + GCRY_MAC_CMAC_GOST28147, {0, 0}, "CMAC_GOST28147", + &cmac_ops +}; +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/mac-gmac.c b/libotr/libgcrypt-1.8.7/cipher/mac-gmac.c new file mode 100644 index 0000000..9bc86d9 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/mac-gmac.c @@ -0,0 +1,185 @@ +/* mac-gmac.c - GMAC glue for MAC API + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "cipher.h" +#include "./mac-internal.h" + + +static int +map_mac_algo_to_cipher (int mac_algo) +{ + switch (mac_algo) + { + default: + return GCRY_CIPHER_NONE; + case GCRY_MAC_GMAC_AES: + return GCRY_CIPHER_AES; + case GCRY_MAC_GMAC_CAMELLIA: + return GCRY_CIPHER_CAMELLIA128; + case GCRY_MAC_GMAC_TWOFISH: + return GCRY_CIPHER_TWOFISH; + case GCRY_MAC_GMAC_SERPENT: + return GCRY_CIPHER_SERPENT128; + case GCRY_MAC_GMAC_SEED: + return GCRY_CIPHER_SEED; + } +} + + +static gcry_err_code_t +gmac_open (gcry_mac_hd_t h) +{ + gcry_err_code_t err; + gcry_cipher_hd_t hd; + int secure = (h->magic == CTX_MAGIC_SECURE); + int cipher_algo; + unsigned int flags; + + cipher_algo = map_mac_algo_to_cipher (h->spec->algo); + flags = (secure ? GCRY_CIPHER_SECURE : 0); + + err = _gcry_cipher_open_internal (&hd, cipher_algo, GCRY_CIPHER_MODE_GCM, + flags); + if (err) + return err; + + h->u.gmac.cipher_algo = cipher_algo; + h->u.gmac.ctx = hd; + return 0; +} + + +static void +gmac_close (gcry_mac_hd_t h) +{ + _gcry_cipher_close (h->u.gmac.ctx); + h->u.gmac.ctx = NULL; +} + + +static gcry_err_code_t +gmac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen) +{ + return _gcry_cipher_setkey (h->u.gmac.ctx, key, keylen); +} + + +static gcry_err_code_t +gmac_setiv (gcry_mac_hd_t h, const unsigned char *iv, size_t ivlen) +{ + return _gcry_cipher_setiv (h->u.gmac.ctx, iv, ivlen); +} + + +static gcry_err_code_t +gmac_reset (gcry_mac_hd_t h) +{ + return _gcry_cipher_reset (h->u.gmac.ctx); +} + + +static gcry_err_code_t +gmac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + return _gcry_cipher_authenticate (h->u.gmac.ctx, buf, buflen); +} + + +static gcry_err_code_t +gmac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t * outlen) +{ + if (*outlen > GCRY_GCM_BLOCK_LEN) + *outlen = GCRY_GCM_BLOCK_LEN; + return _gcry_cipher_gettag (h->u.gmac.ctx, outbuf, *outlen); +} + + +static gcry_err_code_t +gmac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + return _gcry_cipher_checktag (h->u.gmac.ctx, buf, buflen); +} + + +static unsigned int +gmac_get_maclen (int algo) +{ + (void)algo; + return GCRY_GCM_BLOCK_LEN; +} + + +static unsigned int +gmac_get_keylen (int algo) +{ + return _gcry_cipher_get_algo_keylen (map_mac_algo_to_cipher (algo)); +} + + +static gcry_mac_spec_ops_t gmac_ops = { + gmac_open, + gmac_close, + gmac_setkey, + gmac_setiv, + gmac_reset, + gmac_write, + gmac_read, + gmac_verify, + gmac_get_maclen, + gmac_get_keylen +}; + + +#if USE_AES +gcry_mac_spec_t _gcry_mac_type_spec_gmac_aes = { + GCRY_MAC_GMAC_AES, {0, 1}, "GMAC_AES", + &gmac_ops +}; +#endif +#if USE_TWOFISH +gcry_mac_spec_t _gcry_mac_type_spec_gmac_twofish = { + GCRY_MAC_GMAC_TWOFISH, {0, 0}, "GMAC_TWOFISH", + &gmac_ops +}; +#endif +#if USE_SERPENT +gcry_mac_spec_t _gcry_mac_type_spec_gmac_serpent = { + GCRY_MAC_GMAC_SERPENT, {0, 0}, "GMAC_SERPENT", + &gmac_ops +}; +#endif +#if USE_SEED +gcry_mac_spec_t _gcry_mac_type_spec_gmac_seed = { + GCRY_MAC_GMAC_SEED, {0, 0}, "GMAC_SEED", + &gmac_ops +}; +#endif +#if USE_CAMELLIA +gcry_mac_spec_t _gcry_mac_type_spec_gmac_camellia = { + GCRY_MAC_GMAC_CAMELLIA, {0, 0}, "GMAC_CAMELLIA", + &gmac_ops +}; +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/mac-hmac.c b/libotr/libgcrypt-1.8.7/cipher/mac-hmac.c new file mode 100644 index 0000000..9379f4b --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/mac-hmac.c @@ -0,0 +1,317 @@ +/* mac-hmac.c - HMAC glue for MAC API + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "./mac-internal.h" +#include "bufhelp.h" + + +static int +map_mac_algo_to_md (int mac_algo) +{ + switch (mac_algo) + { + default: + return GCRY_MD_NONE; + case GCRY_MAC_HMAC_MD2: + return GCRY_MD_MD2; + case GCRY_MAC_HMAC_MD4: + return GCRY_MD_MD4; + case GCRY_MAC_HMAC_MD5: + return GCRY_MD_MD5; + case GCRY_MAC_HMAC_SHA1: + return GCRY_MD_SHA1; + case GCRY_MAC_HMAC_SHA224: + return GCRY_MD_SHA224; + case GCRY_MAC_HMAC_SHA256: + return GCRY_MD_SHA256; + case GCRY_MAC_HMAC_SHA384: + return GCRY_MD_SHA384; + case GCRY_MAC_HMAC_SHA512: + return GCRY_MD_SHA512; + case GCRY_MAC_HMAC_SHA3_224: + return GCRY_MD_SHA3_224; + case GCRY_MAC_HMAC_SHA3_256: + return GCRY_MD_SHA3_256; + case GCRY_MAC_HMAC_SHA3_384: + return GCRY_MD_SHA3_384; + case GCRY_MAC_HMAC_SHA3_512: + return GCRY_MD_SHA3_512; + case GCRY_MAC_HMAC_RMD160: + return GCRY_MD_RMD160; + case GCRY_MAC_HMAC_TIGER1: + return GCRY_MD_TIGER1; + case GCRY_MAC_HMAC_WHIRLPOOL: + return GCRY_MD_WHIRLPOOL; + case GCRY_MAC_HMAC_GOSTR3411_94: + return GCRY_MD_GOSTR3411_94; + case GCRY_MAC_HMAC_STRIBOG256: + return GCRY_MD_STRIBOG256; + case GCRY_MAC_HMAC_STRIBOG512: + return GCRY_MD_STRIBOG512; + } +} + + +static gcry_err_code_t +hmac_open (gcry_mac_hd_t h) +{ + gcry_err_code_t err; + gcry_md_hd_t hd; + int secure = (h->magic == CTX_MAGIC_SECURE); + unsigned int flags; + int md_algo; + + md_algo = map_mac_algo_to_md (h->spec->algo); + + flags = GCRY_MD_FLAG_HMAC; + flags |= (secure ? GCRY_MD_FLAG_SECURE : 0); + + err = _gcry_md_open (&hd, md_algo, flags); + if (err) + return err; + + h->u.hmac.md_algo = md_algo; + h->u.hmac.md_ctx = hd; + return 0; +} + + +static void +hmac_close (gcry_mac_hd_t h) +{ + _gcry_md_close (h->u.hmac.md_ctx); + h->u.hmac.md_ctx = NULL; +} + + +static gcry_err_code_t +hmac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen) +{ + return _gcry_md_setkey (h->u.hmac.md_ctx, key, keylen); +} + + +static gcry_err_code_t +hmac_reset (gcry_mac_hd_t h) +{ + _gcry_md_reset (h->u.hmac.md_ctx); + return 0; +} + + +static gcry_err_code_t +hmac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + _gcry_md_write (h->u.hmac.md_ctx, buf, buflen); + return 0; +} + + +static gcry_err_code_t +hmac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t * outlen) +{ + unsigned int dlen; + const unsigned char *digest; + + dlen = _gcry_md_get_algo_dlen (h->u.hmac.md_algo); + digest = _gcry_md_read (h->u.hmac.md_ctx, h->u.hmac.md_algo); + + if (*outlen <= dlen) + buf_cpy (outbuf, digest, *outlen); + else + { + buf_cpy (outbuf, digest, dlen); + *outlen = dlen; + } + + return 0; +} + + +static gcry_err_code_t +hmac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + unsigned int dlen; + const unsigned char *digest; + + dlen = _gcry_md_get_algo_dlen (h->u.hmac.md_algo); + digest = _gcry_md_read (h->u.hmac.md_ctx, h->u.hmac.md_algo); + + if (buflen > dlen) + return GPG_ERR_INV_LENGTH; + + return buf_eq_const (buf, digest, buflen) ? 0 : GPG_ERR_CHECKSUM; +} + + +static unsigned int +hmac_get_maclen (int algo) +{ + return _gcry_md_get_algo_dlen (map_mac_algo_to_md (algo)); +} + + +static unsigned int +hmac_get_keylen (int algo) +{ + /* Return blocksize for default key length. */ + switch (algo) + { + case GCRY_MD_SHA3_224: + return 1152 / 8; + case GCRY_MD_SHA3_256: + return 1088 / 8; + case GCRY_MD_SHA3_384: + return 832 / 8; + case GCRY_MD_SHA3_512: + return 576 / 8; + case GCRY_MAC_HMAC_SHA384: + case GCRY_MAC_HMAC_SHA512: + return 128; + case GCRY_MAC_HMAC_GOSTR3411_94: + return 32; + default: + return 64; + } +} + + +static const gcry_mac_spec_ops_t hmac_ops = { + hmac_open, + hmac_close, + hmac_setkey, + NULL, + hmac_reset, + hmac_write, + hmac_read, + hmac_verify, + hmac_get_maclen, + hmac_get_keylen +}; + + +#if USE_SHA1 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha1 = { + GCRY_MAC_HMAC_SHA1, {0, 1}, "HMAC_SHA1", + &hmac_ops +}; +#endif +#if USE_SHA256 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha256 = { + GCRY_MAC_HMAC_SHA256, {0, 1}, "HMAC_SHA256", + &hmac_ops +}; + +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha224 = { + GCRY_MAC_HMAC_SHA224, {0, 1}, "HMAC_SHA224", + &hmac_ops +}; +#endif +#if USE_SHA512 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512 = { + GCRY_MAC_HMAC_SHA512, {0, 1}, "HMAC_SHA512", + &hmac_ops +}; + +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha384 = { + GCRY_MAC_HMAC_SHA384, {0, 1}, "HMAC_SHA384", + &hmac_ops +}; +#endif +#if USE_SHA3 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_224 = { + GCRY_MAC_HMAC_SHA3_224, {0, 1}, "HMAC_SHA3_224", + &hmac_ops +}; + +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_256 = { + GCRY_MAC_HMAC_SHA3_256, {0, 1}, "HMAC_SHA3_256", + &hmac_ops +}; + +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_384 = { + GCRY_MAC_HMAC_SHA3_384, {0, 1}, "HMAC_SHA3_384", + &hmac_ops +}; + +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_512 = { + GCRY_MAC_HMAC_SHA3_512, {0, 1}, "HMAC_SHA3_512", + &hmac_ops +}; +#endif +#ifdef USE_GOST_R_3411_94 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_gost3411_94 = { + GCRY_MAC_HMAC_GOSTR3411_94, {0, 0}, "HMAC_GOSTR3411_94", + &hmac_ops +}; +#endif +#ifdef USE_GOST_R_3411_12 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog256 = { + GCRY_MAC_HMAC_STRIBOG256, {0, 0}, "HMAC_STRIBOG256", + &hmac_ops +}; + +gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog512 = { + GCRY_MAC_HMAC_STRIBOG512, {0, 0}, "HMAC_STRIBOG512", + &hmac_ops +}; +#endif +#if USE_WHIRLPOOL +gcry_mac_spec_t _gcry_mac_type_spec_hmac_whirlpool = { + GCRY_MAC_HMAC_WHIRLPOOL, {0, 0}, "HMAC_WHIRLPOOL", + &hmac_ops +}; +#endif +#if USE_RMD160 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_rmd160 = { + GCRY_MAC_HMAC_RMD160, {0, 0}, "HMAC_RIPEMD160", + &hmac_ops +}; +#endif +#if USE_TIGER +gcry_mac_spec_t _gcry_mac_type_spec_hmac_tiger1 = { + GCRY_MAC_HMAC_TIGER1, {0, 0}, "HMAC_TIGER", + &hmac_ops +}; +#endif +#if USE_MD5 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_md5 = { + GCRY_MAC_HMAC_MD5, {0, 0}, "HMAC_MD5", + &hmac_ops +}; +#endif +#if USE_MD4 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_md4 = { + GCRY_MAC_HMAC_MD4, {0, 0}, "HMAC_MD4", + &hmac_ops +}; +#endif +#if USE_MD2 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_md2 = { + GCRY_MAC_HMAC_MD2, {0, 0}, "HMAC_MD2", + &hmac_ops +}; +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/mac-internal.h b/libotr/libgcrypt-1.8.7/cipher/mac-internal.h new file mode 100644 index 0000000..2beb284 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/mac-internal.h @@ -0,0 +1,240 @@ +/* mac-internal.h - Internal defs for mac.c + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include "g10lib.h" + + +/* The data object used to hold a handle to an encryption object. */ +struct gcry_mac_handle; + +/* The data object used to hold poly1305-mac context. */ +struct poly1305mac_context_s; + + +/* + * + * Message authentication code related definitions. + * + */ + + +/* Magic values for the context structure. */ +#define CTX_MAGIC_NORMAL 0x59d9b8af +#define CTX_MAGIC_SECURE 0x12c27cd0 + + +/* MAC module functions. */ +typedef gcry_err_code_t (*gcry_mac_open_func_t)(gcry_mac_hd_t h); +typedef void (*gcry_mac_close_func_t)(gcry_mac_hd_t h); +typedef gcry_err_code_t (*gcry_mac_setkey_func_t)(gcry_mac_hd_t h, + const unsigned char *key, + size_t keylen); +typedef gcry_err_code_t (*gcry_mac_setiv_func_t)(gcry_mac_hd_t h, + const unsigned char *iv, + size_t ivlen); +typedef gcry_err_code_t (*gcry_mac_reset_func_t)(gcry_mac_hd_t h); +typedef gcry_err_code_t (*gcry_mac_write_func_t)(gcry_mac_hd_t h, + const unsigned char *inbuf, + size_t inlen); +typedef gcry_err_code_t (*gcry_mac_read_func_t)(gcry_mac_hd_t h, + unsigned char *outbuf, + size_t *outlen); +typedef gcry_err_code_t (*gcry_mac_verify_func_t)(gcry_mac_hd_t h, + const unsigned char *inbuf, + size_t inlen); +typedef unsigned int (*gcry_mac_get_maclen_func_t)(int algo); +typedef unsigned int (*gcry_mac_get_keylen_func_t)(int algo); + + +typedef struct gcry_mac_spec_ops +{ + gcry_mac_open_func_t open; + gcry_mac_close_func_t close; + gcry_mac_setkey_func_t setkey; + gcry_mac_setiv_func_t setiv; + gcry_mac_reset_func_t reset; + gcry_mac_write_func_t write; + gcry_mac_read_func_t read; + gcry_mac_verify_func_t verify; + gcry_mac_get_maclen_func_t get_maclen; + gcry_mac_get_keylen_func_t get_keylen; +} gcry_mac_spec_ops_t; + + +/* Module specification structure for message authentication codes. */ +typedef struct gcry_mac_spec +{ + int algo; + struct { + unsigned int disabled:1; + unsigned int fips:1; + } flags; + const char *name; + const gcry_mac_spec_ops_t *ops; +} gcry_mac_spec_t; + + +/* The handle structure. */ +struct gcry_mac_handle +{ + int magic; + int algo; + const gcry_mac_spec_t *spec; + gcry_ctx_t gcry_ctx; + union { + struct { + gcry_md_hd_t md_ctx; + int md_algo; + } hmac; + struct { + gcry_cipher_hd_t ctx; + int cipher_algo; + unsigned int blklen; + } cmac; + struct { + gcry_cipher_hd_t ctx; + int cipher_algo; + } gmac; + struct { + struct poly1305mac_context_s *ctx; + } poly1305mac; + } u; +}; + + +/* + * The HMAC algorithm specifications (mac-hmac.c). + */ +#if USE_SHA1 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha1; +#endif +#if USE_SHA256 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha256; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha224; +#endif +#if USE_SHA512 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha384; +#endif +#if USE_SHA3 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_224; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_256; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_384; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_512; +#endif +#ifdef USE_GOST_R_3411_94 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_gost3411_94; +#endif +#ifdef USE_GOST_R_3411_12 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog256; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog512; +#endif +#if USE_WHIRLPOOL +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_whirlpool; +#endif +#if USE_RMD160 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_rmd160; +#endif +#if USE_TIGER +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_tiger1; +#endif +#if USE_MD5 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_md5; +#endif +#if USE_MD4 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_md4; +#endif + +/* + * The CMAC algorithm specifications (mac-cmac.c). + */ +#if USE_BLOWFISH +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_blowfish; +#endif +#if USE_DES +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_tripledes; +#endif +#if USE_CAST5 +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_cast5; +#endif +#if USE_AES +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_aes; +#endif +#if USE_TWOFISH +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_twofish; +#endif +#if USE_SERPENT +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_serpent; +#endif +#if USE_RFC2268 +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_rfc2268; +#endif +#if USE_SEED +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_seed; +#endif +#if USE_CAMELLIA +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_camellia; +#endif +#ifdef USE_IDEA +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_idea; +#endif +#if USE_GOST28147 +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_gost28147; +#endif + +/* + * The GMAC algorithm specifications (mac-gmac.c). + */ +#if USE_AES +extern gcry_mac_spec_t _gcry_mac_type_spec_gmac_aes; +#endif +#if USE_TWOFISH +extern gcry_mac_spec_t _gcry_mac_type_spec_gmac_twofish; +#endif +#if USE_SERPENT +extern gcry_mac_spec_t _gcry_mac_type_spec_gmac_serpent; +#endif +#if USE_SEED +extern gcry_mac_spec_t _gcry_mac_type_spec_gmac_seed; +#endif +#if USE_CAMELLIA +extern gcry_mac_spec_t _gcry_mac_type_spec_gmac_camellia; +#endif + +/* + * The Poly1305 MAC algorithm specifications (mac-poly1305.c). + */ +extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac; +#if USE_AES +extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_aes; +#endif +#if USE_CAMELLIA +extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_camellia; +#endif +#if USE_TWOFISH +extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_twofish; +#endif +#if USE_SERPENT +extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_serpent; +#endif +#if USE_SEED +extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_seed; +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/mac-poly1305.c b/libotr/libgcrypt-1.8.7/cipher/mac-poly1305.c new file mode 100644 index 0000000..b80f87d --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/mac-poly1305.c @@ -0,0 +1,362 @@ +/* mac-poly1305.c - Poly1305 based MACs + * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "mac-internal.h" +#include "poly1305-internal.h" + + +struct poly1305mac_context_s { + poly1305_context_t ctx; + gcry_cipher_hd_t hd; + struct { + unsigned int key_set:1; + unsigned int nonce_set:1; + unsigned int tag:1; + } marks; + byte tag[POLY1305_TAGLEN]; + byte key[POLY1305_KEYLEN]; +}; + + +static gcry_err_code_t +poly1305mac_open (gcry_mac_hd_t h) +{ + struct poly1305mac_context_s *mac_ctx; + int secure = (h->magic == CTX_MAGIC_SECURE); + unsigned int flags = (secure ? GCRY_CIPHER_SECURE : 0); + gcry_err_code_t err; + int cipher_algo; + + if (secure) + mac_ctx = xtrycalloc_secure (1, sizeof(*mac_ctx)); + else + mac_ctx = xtrycalloc (1, sizeof(*mac_ctx)); + + if (!mac_ctx) + return gpg_err_code_from_syserror (); + + h->u.poly1305mac.ctx = mac_ctx; + + switch (h->spec->algo) + { + default: + /* already checked. */ + case GCRY_MAC_POLY1305: + /* plain Poly1305. */ + cipher_algo = -1; + return 0; + case GCRY_MAC_POLY1305_AES: + cipher_algo = GCRY_CIPHER_AES; + break; + case GCRY_MAC_POLY1305_CAMELLIA: + cipher_algo = GCRY_CIPHER_CAMELLIA128; + break; + case GCRY_MAC_POLY1305_TWOFISH: + cipher_algo = GCRY_CIPHER_TWOFISH; + break; + case GCRY_MAC_POLY1305_SERPENT: + cipher_algo = GCRY_CIPHER_SERPENT128; + break; + case GCRY_MAC_POLY1305_SEED: + cipher_algo = GCRY_CIPHER_SEED; + break; + } + + err = _gcry_cipher_open_internal (&mac_ctx->hd, cipher_algo, + GCRY_CIPHER_MODE_ECB, flags); + if (err) + goto err_free; + + return 0; + +err_free: + xfree(h->u.poly1305mac.ctx); + return err; +} + + +static void +poly1305mac_close (gcry_mac_hd_t h) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + + if (h->spec->algo != GCRY_MAC_POLY1305) + _gcry_cipher_close (mac_ctx->hd); + + xfree(mac_ctx); +} + + +static gcry_err_code_t +poly1305mac_prepare_key (gcry_mac_hd_t h, const unsigned char *key, size_t keylen) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + size_t block_keylen = keylen - 16; + + /* Need at least 16 + 1 byte key. */ + if (keylen <= 16) + return GPG_ERR_INV_KEYLEN; + + /* For Poly1305-AES, first part of key is passed to Poly1305 as is. */ + memcpy (mac_ctx->key, key + block_keylen, 16); + + /* Remaining part is used as key for the block cipher. */ + return _gcry_cipher_setkey (mac_ctx->hd, key, block_keylen); +} + + +static gcry_err_code_t +poly1305mac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + gcry_err_code_t err; + + memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx)); + memset(&mac_ctx->tag, 0, sizeof(mac_ctx->tag)); + memset(&mac_ctx->key, 0, sizeof(mac_ctx->key)); + + mac_ctx->marks.key_set = 0; + mac_ctx->marks.nonce_set = 0; + mac_ctx->marks.tag = 0; + + if (h->spec->algo != GCRY_MAC_POLY1305) + { + err = poly1305mac_prepare_key (h, key, keylen); + if (err) + return err; + + /* Poly1305-AES/etc also need nonce. */ + mac_ctx->marks.key_set = 1; + mac_ctx->marks.nonce_set = 0; + } + else + { + /* For plain Poly1305, key is the nonce and setup is complete now. */ + + if (keylen != POLY1305_KEYLEN) + return GPG_ERR_INV_KEYLEN; + + memcpy (mac_ctx->key, key, keylen); + + err = _gcry_poly1305_init (&mac_ctx->ctx, mac_ctx->key, POLY1305_KEYLEN); + if (err) + { + memset(&mac_ctx->key, 0, sizeof(mac_ctx->key)); + return err; + } + + mac_ctx->marks.key_set = 1; + mac_ctx->marks.nonce_set = 1; + } + + return 0; +} + + +static gcry_err_code_t +poly1305mac_setiv (gcry_mac_hd_t h, const unsigned char *iv, size_t ivlen) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + gcry_err_code_t err; + + if (h->spec->algo == GCRY_MAC_POLY1305) + return GPG_ERR_INV_ARG; + + if (ivlen != 16) + return GPG_ERR_INV_ARG; + + if (!mac_ctx->marks.key_set) + return 0; + + memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx)); + memset(&mac_ctx->tag, 0, sizeof(mac_ctx->tag)); + mac_ctx->marks.nonce_set = 0; + mac_ctx->marks.tag = 0; + + /* Prepare second part of the poly1305 key. */ + + err = _gcry_cipher_encrypt (mac_ctx->hd, mac_ctx->key + 16, 16, iv, 16); + if (err) + return err; + + err = _gcry_poly1305_init (&mac_ctx->ctx, mac_ctx->key, POLY1305_KEYLEN); + if (err) + return err; + + mac_ctx->marks.nonce_set = 1; + return 0; +} + + +static gcry_err_code_t +poly1305mac_reset (gcry_mac_hd_t h) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + + if (!mac_ctx->marks.key_set || !mac_ctx->marks.nonce_set) + return GPG_ERR_INV_STATE; + + memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx)); + memset(&mac_ctx->tag, 0, sizeof(mac_ctx->tag)); + + mac_ctx->marks.key_set = 1; + mac_ctx->marks.nonce_set = 1; + mac_ctx->marks.tag = 0; + + return _gcry_poly1305_init (&mac_ctx->ctx, mac_ctx->key, POLY1305_KEYLEN); +} + + +static gcry_err_code_t +poly1305mac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + + if (!mac_ctx->marks.key_set || !mac_ctx->marks.nonce_set || + mac_ctx->marks.tag) + return GPG_ERR_INV_STATE; + + _gcry_poly1305_update (&mac_ctx->ctx, buf, buflen); + return 0; +} + + +static gcry_err_code_t +poly1305mac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t *outlen) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + + if (!mac_ctx->marks.key_set || !mac_ctx->marks.nonce_set) + return GPG_ERR_INV_STATE; + + if (!mac_ctx->marks.tag) + { + _gcry_poly1305_finish(&mac_ctx->ctx, mac_ctx->tag); + + memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx)); + mac_ctx->marks.tag = 1; + } + + if (*outlen == 0) + return 0; + + if (*outlen <= POLY1305_TAGLEN) + buf_cpy (outbuf, mac_ctx->tag, *outlen); + else + { + buf_cpy (outbuf, mac_ctx->tag, POLY1305_TAGLEN); + *outlen = POLY1305_TAGLEN; + } + + return 0; +} + + +static gcry_err_code_t +poly1305mac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + gcry_err_code_t err; + size_t outlen = 0; + + /* Check and finalize tag. */ + err = poly1305mac_read(h, NULL, &outlen); + if (err) + return err; + + if (buflen > POLY1305_TAGLEN) + return GPG_ERR_INV_LENGTH; + + return buf_eq_const (buf, mac_ctx->tag, buflen) ? 0 : GPG_ERR_CHECKSUM; +} + + +static unsigned int +poly1305mac_get_maclen (int algo) +{ + (void)algo; + + return POLY1305_TAGLEN; +} + + +static unsigned int +poly1305mac_get_keylen (int algo) +{ + (void)algo; + + return POLY1305_KEYLEN; +} + + +static gcry_mac_spec_ops_t poly1305mac_ops = { + poly1305mac_open, + poly1305mac_close, + poly1305mac_setkey, + poly1305mac_setiv, + poly1305mac_reset, + poly1305mac_write, + poly1305mac_read, + poly1305mac_verify, + poly1305mac_get_maclen, + poly1305mac_get_keylen +}; + + +gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac = { + GCRY_MAC_POLY1305, {0, 0}, "POLY1305", + &poly1305mac_ops +}; +#if USE_AES +gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_aes = { + GCRY_MAC_POLY1305_AES, {0, 0}, "POLY1305_AES", + &poly1305mac_ops +}; +#endif +#if USE_CAMELLIA +gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_camellia = { + GCRY_MAC_POLY1305_CAMELLIA, {0, 0}, "POLY1305_CAMELLIA", + &poly1305mac_ops +}; +#endif +#if USE_TWOFISH +gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_twofish = { + GCRY_MAC_POLY1305_TWOFISH, {0, 0}, "POLY1305_TWOFISH", + &poly1305mac_ops +}; +#endif +#if USE_SERPENT +gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_serpent = { + GCRY_MAC_POLY1305_SERPENT, {0, 0}, "POLY1305_SERPENT", + &poly1305mac_ops +}; +#endif +#if USE_SEED +gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_seed = { + GCRY_MAC_POLY1305_SEED, {0, 0}, "POLY1305_SEED", + &poly1305mac_ops +}; +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/mac.c b/libotr/libgcrypt-1.8.7/cipher/mac.c new file mode 100644 index 0000000..46be7b7 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/mac.c @@ -0,0 +1,502 @@ +/* mac.c - message authentication code dispatcher + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "mac-internal.h" + + +/* This is the list of the digest implementations included in + libgcrypt. */ +static gcry_mac_spec_t *mac_list[] = { +#if USE_SHA1 + &_gcry_mac_type_spec_hmac_sha1, +#endif +#if USE_SHA256 + &_gcry_mac_type_spec_hmac_sha256, + &_gcry_mac_type_spec_hmac_sha224, +#endif +#if USE_SHA512 + &_gcry_mac_type_spec_hmac_sha512, + &_gcry_mac_type_spec_hmac_sha384, +#endif +#if USE_SHA3 + &_gcry_mac_type_spec_hmac_sha3_224, + &_gcry_mac_type_spec_hmac_sha3_256, + &_gcry_mac_type_spec_hmac_sha3_384, + &_gcry_mac_type_spec_hmac_sha3_512, +#endif +#ifdef USE_GOST_R_3411_94 + &_gcry_mac_type_spec_hmac_gost3411_94, +#endif +#ifdef USE_GOST_R_3411_12 + &_gcry_mac_type_spec_hmac_stribog256, + &_gcry_mac_type_spec_hmac_stribog512, +#endif +#if USE_WHIRLPOOL + &_gcry_mac_type_spec_hmac_whirlpool, +#endif +#if USE_RMD160 + &_gcry_mac_type_spec_hmac_rmd160, +#endif +#if USE_TIGER + &_gcry_mac_type_spec_hmac_tiger1, +#endif +#if USE_MD5 + &_gcry_mac_type_spec_hmac_md5, +#endif +#if USE_MD4 + &_gcry_mac_type_spec_hmac_md4, +#endif +#if USE_BLOWFISH + &_gcry_mac_type_spec_cmac_blowfish, +#endif +#if USE_DES + &_gcry_mac_type_spec_cmac_tripledes, +#endif +#if USE_CAST5 + &_gcry_mac_type_spec_cmac_cast5, +#endif +#if USE_AES + &_gcry_mac_type_spec_cmac_aes, + &_gcry_mac_type_spec_gmac_aes, + &_gcry_mac_type_spec_poly1305mac_aes, +#endif +#if USE_TWOFISH + &_gcry_mac_type_spec_cmac_twofish, + &_gcry_mac_type_spec_gmac_twofish, + &_gcry_mac_type_spec_poly1305mac_twofish, +#endif +#if USE_SERPENT + &_gcry_mac_type_spec_cmac_serpent, + &_gcry_mac_type_spec_gmac_serpent, + &_gcry_mac_type_spec_poly1305mac_serpent, +#endif +#if USE_RFC2268 + &_gcry_mac_type_spec_cmac_rfc2268, +#endif +#if USE_SEED + &_gcry_mac_type_spec_cmac_seed, + &_gcry_mac_type_spec_gmac_seed, + &_gcry_mac_type_spec_poly1305mac_seed, +#endif +#if USE_CAMELLIA + &_gcry_mac_type_spec_cmac_camellia, + &_gcry_mac_type_spec_gmac_camellia, + &_gcry_mac_type_spec_poly1305mac_camellia, +#endif +#ifdef USE_IDEA + &_gcry_mac_type_spec_cmac_idea, +#endif +#if USE_GOST28147 + &_gcry_mac_type_spec_cmac_gost28147, +#endif + &_gcry_mac_type_spec_poly1305mac, + NULL, +}; + +/* Explicitly initialize this module. */ +gcry_err_code_t +_gcry_mac_init (void) +{ + if (fips_mode()) + { + /* disable algorithms that are disallowed in fips */ + int idx; + gcry_mac_spec_t *spec; + + for (idx = 0; (spec = mac_list[idx]); idx++) + if (!spec->flags.fips) + spec->flags.disabled = 1; + } + + return 0; +} + + +/* Return the spec structure for the MAC algorithm ALGO. For an + unknown algorithm NULL is returned. */ +static gcry_mac_spec_t * +spec_from_algo (int algo) +{ + gcry_mac_spec_t *spec; + int idx; + + for (idx = 0; (spec = mac_list[idx]); idx++) + if (algo == spec->algo) + return spec; + return NULL; +} + + +/* Lookup a mac's spec by its name. */ +static gcry_mac_spec_t * +spec_from_name (const char *name) +{ + gcry_mac_spec_t *spec; + int idx; + + for (idx = 0; (spec = mac_list[idx]); idx++) + if (!stricmp (name, spec->name)) + return spec; + + return NULL; +} + + +/**************** + * Map a string to the mac algo + */ +int +_gcry_mac_map_name (const char *string) +{ + gcry_mac_spec_t *spec; + + if (!string) + return 0; + + /* Not found, search a matching mac name. */ + spec = spec_from_name (string); + if (spec) + return spec->algo; + + return 0; +} + + +/**************** + * This function simply returns the name of the algorithm or some constant + * string when there is no algo. It will never return NULL. + * Use the macro gcry_mac_test_algo() to check whether the algorithm + * is valid. + */ +const char * +_gcry_mac_algo_name (int algorithm) +{ + gcry_mac_spec_t *spec; + + spec = spec_from_algo (algorithm); + return spec ? spec->name : "?"; +} + + +static gcry_err_code_t +check_mac_algo (int algorithm) +{ + gcry_mac_spec_t *spec; + + spec = spec_from_algo (algorithm); + if (spec && !spec->flags.disabled) + return 0; + + return GPG_ERR_MAC_ALGO; +} + + +/**************** + * Open a message digest handle for use with algorithm ALGO. + */ +static gcry_err_code_t +mac_open (gcry_mac_hd_t * hd, int algo, int secure, gcry_ctx_t ctx) +{ + gcry_mac_spec_t *spec; + gcry_err_code_t err; + gcry_mac_hd_t h; + + spec = spec_from_algo (algo); + if (!spec) + return GPG_ERR_MAC_ALGO; + else if (spec->flags.disabled) + return GPG_ERR_MAC_ALGO; + else if (!spec->ops) + return GPG_ERR_MAC_ALGO; + else if (!spec->ops->open || !spec->ops->write || !spec->ops->setkey || + !spec->ops->read || !spec->ops->verify || !spec->ops->reset) + return GPG_ERR_MAC_ALGO; + + if (secure) + h = xtrycalloc_secure (1, sizeof (*h)); + else + h = xtrycalloc (1, sizeof (*h)); + + if (!h) + return gpg_err_code_from_syserror (); + + h->magic = secure ? CTX_MAGIC_SECURE : CTX_MAGIC_NORMAL; + h->spec = spec; + h->algo = algo; + h->gcry_ctx = ctx; + + err = h->spec->ops->open (h); + if (err) + xfree (h); + else + *hd = h; + + return err; +} + + +static gcry_err_code_t +mac_reset (gcry_mac_hd_t hd) +{ + if (hd->spec->ops->reset) + return hd->spec->ops->reset (hd); + + return 0; +} + + +static void +mac_close (gcry_mac_hd_t hd) +{ + if (hd->spec->ops->close) + hd->spec->ops->close (hd); + + wipememory (hd, sizeof (*hd)); + + xfree (hd); +} + + +static gcry_err_code_t +mac_setkey (gcry_mac_hd_t hd, const void *key, size_t keylen) +{ + if (!hd->spec->ops->setkey) + return GPG_ERR_INV_ARG; + if (keylen > 0 && !key) + return GPG_ERR_INV_ARG; + + return hd->spec->ops->setkey (hd, key, keylen); +} + + +static gcry_err_code_t +mac_setiv (gcry_mac_hd_t hd, const void *iv, size_t ivlen) +{ + if (!hd->spec->ops->setiv) + return GPG_ERR_INV_ARG; + if (ivlen > 0 && !iv) + return GPG_ERR_INV_ARG; + + return hd->spec->ops->setiv (hd, iv, ivlen); +} + + +static gcry_err_code_t +mac_write (gcry_mac_hd_t hd, const void *inbuf, size_t inlen) +{ + if (!hd->spec->ops->write) + return GPG_ERR_INV_ARG; + if (inlen > 0 && !inbuf) + return GPG_ERR_INV_ARG; + + return hd->spec->ops->write (hd, inbuf, inlen); +} + + +static gcry_err_code_t +mac_read (gcry_mac_hd_t hd, void *outbuf, size_t * outlen) +{ + if (!outbuf || !outlen || *outlen == 0 || !hd->spec->ops->read) + return GPG_ERR_INV_ARG; + + return hd->spec->ops->read (hd, outbuf, outlen); +} + + +static gcry_err_code_t +mac_verify (gcry_mac_hd_t hd, const void *buf, size_t buflen) +{ + if (!buf || buflen == 0 || !hd->spec->ops->verify) + return GPG_ERR_INV_ARG; + + return hd->spec->ops->verify (hd, buf, buflen); +} + + +/* Create a MAC object for algorithm ALGO. FLAGS may be + given as an bitwise OR of the gcry_mac_flags values. + H is guaranteed to be a valid handle or NULL on error. */ +gpg_err_code_t +_gcry_mac_open (gcry_mac_hd_t * h, int algo, unsigned int flags, + gcry_ctx_t ctx) +{ + gcry_err_code_t rc; + gcry_mac_hd_t hd = NULL; + + if ((flags & ~GCRY_MAC_FLAG_SECURE)) + rc = GPG_ERR_INV_ARG; + else + rc = mac_open (&hd, algo, !!(flags & GCRY_MAC_FLAG_SECURE), ctx); + + *h = rc ? NULL : hd; + return rc; +} + + +void +_gcry_mac_close (gcry_mac_hd_t hd) +{ + if (hd) + mac_close (hd); +} + + +gcry_err_code_t +_gcry_mac_setkey (gcry_mac_hd_t hd, const void *key, size_t keylen) +{ + return mac_setkey (hd, key, keylen); +} + + +gcry_err_code_t +_gcry_mac_setiv (gcry_mac_hd_t hd, const void *iv, size_t ivlen) +{ + return mac_setiv (hd, iv, ivlen); +} + + +gcry_err_code_t +_gcry_mac_write (gcry_mac_hd_t hd, const void *inbuf, size_t inlen) +{ + return mac_write (hd, inbuf, inlen); +} + + +gcry_err_code_t +_gcry_mac_read (gcry_mac_hd_t hd, void *outbuf, size_t * outlen) +{ + return mac_read (hd, outbuf, outlen); +} + + +gcry_err_code_t +_gcry_mac_verify (gcry_mac_hd_t hd, const void *buf, size_t buflen) +{ + return mac_verify (hd, buf, buflen); +} + + +int +_gcry_mac_get_algo (gcry_mac_hd_t hd) +{ + return hd->algo; +} + + +unsigned int +_gcry_mac_get_algo_maclen (int algo) +{ + gcry_mac_spec_t *spec; + + spec = spec_from_algo (algo); + if (!spec || !spec->ops || !spec->ops->get_maclen) + return 0; + + return spec->ops->get_maclen (algo); +} + + +unsigned int +_gcry_mac_get_algo_keylen (int algo) +{ + gcry_mac_spec_t *spec; + + spec = spec_from_algo (algo); + if (!spec || !spec->ops || !spec->ops->get_keylen) + return 0; + + return spec->ops->get_keylen (algo); +} + + +gcry_err_code_t +_gcry_mac_ctl (gcry_mac_hd_t hd, int cmd, void *buffer, size_t buflen) +{ + gcry_err_code_t rc; + + /* Currently not used. */ + (void) hd; + (void) buffer; + (void) buflen; + + switch (cmd) + { + case GCRYCTL_RESET: + rc = mac_reset (hd); + break; + default: + rc = GPG_ERR_INV_OP; + } + return rc; +} + + +/* Return information about the given MAC algorithm ALGO. + + GCRYCTL_TEST_ALGO: + Returns 0 if the specified algorithm ALGO is available for use. + BUFFER and NBYTES must be zero. + + Note: Because this function is in most cases used to return an + integer value, we can make it easier for the caller to just look at + the return value. The caller will in all cases consult the value + and thereby detecting whether a error occurred or not (i.e. while + checking the block size) + */ +gcry_err_code_t +_gcry_mac_algo_info (int algo, int what, void *buffer, size_t * nbytes) +{ + gcry_err_code_t rc = 0; + unsigned int ui; + + switch (what) + { + case GCRYCTL_GET_KEYLEN: + if (buffer || (!nbytes)) + rc = GPG_ERR_INV_ARG; + else + { + ui = _gcry_mac_get_algo_keylen (algo); + if (ui > 0) + *nbytes = (size_t) ui; + else + /* The only reason for an error is an invalid algo. */ + rc = GPG_ERR_MAC_ALGO; + } + break; + case GCRYCTL_TEST_ALGO: + if (buffer || nbytes) + rc = GPG_ERR_INV_ARG; + else + rc = check_mac_algo (algo); + break; + + default: + rc = GPG_ERR_INV_OP; + } + + return rc; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/md.c b/libotr/libgcrypt-1.8.7/cipher/md.c new file mode 100644 index 0000000..c1f585f --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/md.c @@ -0,0 +1,1478 @@ +/* md.c - message digest dispatcher + * Copyright (C) 1998, 1999, 2002, 2003, 2006, + * 2008 Free Software Foundation, Inc. + * Copyright (C) 2013, 2014 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "cipher.h" + + +/* This is the list of the digest implementations included in + libgcrypt. */ +static gcry_md_spec_t *digest_list[] = + { +#if USE_CRC + &_gcry_digest_spec_crc32, + &_gcry_digest_spec_crc32_rfc1510, + &_gcry_digest_spec_crc24_rfc2440, +#endif +#if USE_SHA1 + &_gcry_digest_spec_sha1, +#endif +#if USE_SHA256 + &_gcry_digest_spec_sha256, + &_gcry_digest_spec_sha224, +#endif +#if USE_SHA512 + &_gcry_digest_spec_sha512, + &_gcry_digest_spec_sha384, +#endif +#if USE_SHA3 + &_gcry_digest_spec_sha3_224, + &_gcry_digest_spec_sha3_256, + &_gcry_digest_spec_sha3_384, + &_gcry_digest_spec_sha3_512, + &_gcry_digest_spec_shake128, + &_gcry_digest_spec_shake256, +#endif +#if USE_GOST_R_3411_94 + &_gcry_digest_spec_gost3411_94, + &_gcry_digest_spec_gost3411_cp, +#endif +#if USE_GOST_R_3411_12 + &_gcry_digest_spec_stribog_256, + &_gcry_digest_spec_stribog_512, +#endif +#if USE_WHIRLPOOL + &_gcry_digest_spec_whirlpool, +#endif +#if USE_RMD160 + &_gcry_digest_spec_rmd160, +#endif +#if USE_TIGER + &_gcry_digest_spec_tiger, + &_gcry_digest_spec_tiger1, + &_gcry_digest_spec_tiger2, +#endif +#if USE_MD5 + &_gcry_digest_spec_md5, +#endif +#if USE_MD4 + &_gcry_digest_spec_md4, +#endif +#if USE_MD2 + &_gcry_digest_spec_md2, +#endif +#if USE_BLAKE2 + &_gcry_digest_spec_blake2b_512, + &_gcry_digest_spec_blake2b_384, + &_gcry_digest_spec_blake2b_256, + &_gcry_digest_spec_blake2b_160, + &_gcry_digest_spec_blake2s_256, + &_gcry_digest_spec_blake2s_224, + &_gcry_digest_spec_blake2s_160, + &_gcry_digest_spec_blake2s_128, +#endif + NULL + }; + + +typedef struct gcry_md_list +{ + gcry_md_spec_t *spec; + struct gcry_md_list *next; + size_t actual_struct_size; /* Allocated size of this structure. */ + PROPERLY_ALIGNED_TYPE context; +} GcryDigestEntry; + +/* This structure is put right after the gcry_md_hd_t buffer, so that + * only one memory block is needed. */ +struct gcry_md_context +{ + int magic; + size_t actual_handle_size; /* Allocated size of this handle. */ + FILE *debug; + struct { + unsigned int secure: 1; + unsigned int finalized:1; + unsigned int bugemu1:1; + unsigned int hmac:1; + } flags; + GcryDigestEntry *list; +}; + + +#define CTX_MAGIC_NORMAL 0x11071961 +#define CTX_MAGIC_SECURE 0x16917011 + +static gcry_err_code_t md_enable (gcry_md_hd_t hd, int algo); +static void md_close (gcry_md_hd_t a); +static void md_write (gcry_md_hd_t a, const void *inbuf, size_t inlen); +static byte *md_read( gcry_md_hd_t a, int algo ); +static int md_get_algo( gcry_md_hd_t a ); +static int md_digest_length( int algo ); +static void md_start_debug ( gcry_md_hd_t a, const char *suffix ); +static void md_stop_debug ( gcry_md_hd_t a ); + + + +static int +map_algo (int algo) +{ + return algo; +} + + +/* Return the spec structure for the hash algorithm ALGO. For an + unknown algorithm NULL is returned. */ +static gcry_md_spec_t * +spec_from_algo (int algo) +{ + int idx; + gcry_md_spec_t *spec; + + algo = map_algo (algo); + + for (idx = 0; (spec = digest_list[idx]); idx++) + if (algo == spec->algo) + return spec; + return NULL; +} + + +/* Lookup a hash's spec by its name. */ +static gcry_md_spec_t * +spec_from_name (const char *name) +{ + gcry_md_spec_t *spec; + int idx; + + for (idx=0; (spec = digest_list[idx]); idx++) + { + if (!stricmp (name, spec->name)) + return spec; + } + + return NULL; +} + + +/* Lookup a hash's spec by its OID. */ +static gcry_md_spec_t * +spec_from_oid (const char *oid) +{ + gcry_md_spec_t *spec; + gcry_md_oid_spec_t *oid_specs; + int idx, j; + + for (idx=0; (spec = digest_list[idx]); idx++) + { + oid_specs = spec->oids; + if (oid_specs) + { + for (j = 0; oid_specs[j].oidstring; j++) + if (!stricmp (oid, oid_specs[j].oidstring)) + return spec; + } + } + + return NULL; +} + + +static gcry_md_spec_t * +search_oid (const char *oid, gcry_md_oid_spec_t *oid_spec) +{ + gcry_md_spec_t *spec; + int i; + + if (!oid) + return NULL; + + if (!strncmp (oid, "oid.", 4) || !strncmp (oid, "OID.", 4)) + oid += 4; + + spec = spec_from_oid (oid); + if (spec && spec->oids) + { + for (i = 0; spec->oids[i].oidstring; i++) + if (!stricmp (oid, spec->oids[i].oidstring)) + { + if (oid_spec) + *oid_spec = spec->oids[i]; + return spec; + } + } + + return NULL; +} + + +/**************** + * Map a string to the digest algo + */ +int +_gcry_md_map_name (const char *string) +{ + gcry_md_spec_t *spec; + + if (!string) + return 0; + + /* If the string starts with a digit (optionally prefixed with + either "OID." or "oid."), we first look into our table of ASN.1 + object identifiers to figure out the algorithm */ + spec = search_oid (string, NULL); + if (spec) + return spec->algo; + + /* Not found, search a matching digest name. */ + spec = spec_from_name (string); + if (spec) + return spec->algo; + + return 0; +} + + +/**************** + * This function simply returns the name of the algorithm or some constant + * string when there is no algo. It will never return NULL. + * Use the macro gcry_md_test_algo() to check whether the algorithm + * is valid. + */ +const char * +_gcry_md_algo_name (int algorithm) +{ + gcry_md_spec_t *spec; + + spec = spec_from_algo (algorithm); + return spec ? spec->name : "?"; +} + + +static gcry_err_code_t +check_digest_algo (int algorithm) +{ + gcry_md_spec_t *spec; + + spec = spec_from_algo (algorithm); + if (spec && !spec->flags.disabled) + return 0; + + return GPG_ERR_DIGEST_ALGO; + +} + + +/**************** + * Open a message digest handle for use with algorithm ALGO. + * More algorithms may be added by md_enable(). The initial algorithm + * may be 0. + */ +static gcry_err_code_t +md_open (gcry_md_hd_t *h, int algo, unsigned int flags) +{ + gcry_err_code_t err = 0; + int secure = !!(flags & GCRY_MD_FLAG_SECURE); + int hmac = !!(flags & GCRY_MD_FLAG_HMAC); + int bufsize = secure ? 512 : 1024; + struct gcry_md_context *ctx; + gcry_md_hd_t hd; + size_t n; + + /* Allocate a memory area to hold the caller visible buffer with it's + * control information and the data required by this module. Set the + * context pointer at the beginning to this area. + * We have to use this strange scheme because we want to hide the + * internal data but have a variable sized buffer. + * + * +---+------+---........------+-------------+ + * !ctx! bctl ! buffer ! private ! + * +---+------+---........------+-------------+ + * ! ^ + * !---------------------------! + * + * We have to make sure that private is well aligned. + */ + n = sizeof (struct gcry_md_handle) + bufsize; + n = ((n + sizeof (PROPERLY_ALIGNED_TYPE) - 1) + / sizeof (PROPERLY_ALIGNED_TYPE)) * sizeof (PROPERLY_ALIGNED_TYPE); + + /* Allocate and set the Context pointer to the private data */ + if (secure) + hd = xtrymalloc_secure (n + sizeof (struct gcry_md_context)); + else + hd = xtrymalloc (n + sizeof (struct gcry_md_context)); + + if (! hd) + err = gpg_err_code_from_errno (errno); + + if (! err) + { + hd->ctx = ctx = (void *) ((char *) hd + n); + /* Setup the globally visible data (bctl in the diagram).*/ + hd->bufsize = n - sizeof (struct gcry_md_handle) + 1; + hd->bufpos = 0; + + /* Initialize the private data. */ + memset (hd->ctx, 0, sizeof *hd->ctx); + ctx->magic = secure ? CTX_MAGIC_SECURE : CTX_MAGIC_NORMAL; + ctx->actual_handle_size = n + sizeof (struct gcry_md_context); + ctx->flags.secure = secure; + ctx->flags.hmac = hmac; + ctx->flags.bugemu1 = !!(flags & GCRY_MD_FLAG_BUGEMU1); + } + + if (! err) + { + /* Hmmm, should we really do that? - yes [-wk] */ + _gcry_fast_random_poll (); + + if (algo) + { + err = md_enable (hd, algo); + if (err) + md_close (hd); + } + } + + if (! err) + *h = hd; + + return err; +} + +/* Create a message digest object for algorithm ALGO. FLAGS may be + given as an bitwise OR of the gcry_md_flags values. ALGO may be + given as 0 if the algorithms to be used are later set using + gcry_md_enable. H is guaranteed to be a valid handle or NULL on + error. */ +gcry_err_code_t +_gcry_md_open (gcry_md_hd_t *h, int algo, unsigned int flags) +{ + gcry_err_code_t rc; + gcry_md_hd_t hd; + + if ((flags & ~(GCRY_MD_FLAG_SECURE + | GCRY_MD_FLAG_HMAC + | GCRY_MD_FLAG_BUGEMU1))) + rc = GPG_ERR_INV_ARG; + else + rc = md_open (&hd, algo, flags); + + *h = rc? NULL : hd; + return rc; +} + + + +static gcry_err_code_t +md_enable (gcry_md_hd_t hd, int algorithm) +{ + struct gcry_md_context *h = hd->ctx; + gcry_md_spec_t *spec; + GcryDigestEntry *entry; + gcry_err_code_t err = 0; + + for (entry = h->list; entry; entry = entry->next) + if (entry->spec->algo == algorithm) + return 0; /* Already enabled */ + + spec = spec_from_algo (algorithm); + if (!spec) + { + log_debug ("md_enable: algorithm %d not available\n", algorithm); + err = GPG_ERR_DIGEST_ALGO; + } + + + if (!err && algorithm == GCRY_MD_MD5 && fips_mode ()) + { + _gcry_inactivate_fips_mode ("MD5 used"); + if (_gcry_enforced_fips_mode () ) + { + /* We should never get to here because we do not register + MD5 in enforced fips mode. But better throw an error. */ + err = GPG_ERR_DIGEST_ALGO; + } + } + + if (!err && h->flags.hmac && spec->read == NULL) + { + /* Expandable output function cannot act as part of HMAC. */ + err = GPG_ERR_DIGEST_ALGO; + } + + if (!err) + { + size_t size = (sizeof (*entry) + + spec->contextsize * (h->flags.hmac? 3 : 1) + - sizeof (entry->context)); + + /* And allocate a new list entry. */ + if (h->flags.secure) + entry = xtrymalloc_secure (size); + else + entry = xtrymalloc (size); + + if (! entry) + err = gpg_err_code_from_errno (errno); + else + { + entry->spec = spec; + entry->next = h->list; + entry->actual_struct_size = size; + h->list = entry; + + /* And init this instance. */ + entry->spec->init (&entry->context.c, + h->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0); + } + } + + return err; +} + + +gcry_err_code_t +_gcry_md_enable (gcry_md_hd_t hd, int algorithm) +{ + return md_enable (hd, algorithm); +} + + +static gcry_err_code_t +md_copy (gcry_md_hd_t ahd, gcry_md_hd_t *b_hd) +{ + gcry_err_code_t err = 0; + struct gcry_md_context *a = ahd->ctx; + struct gcry_md_context *b; + GcryDigestEntry *ar, *br; + gcry_md_hd_t bhd; + size_t n; + + if (ahd->bufpos) + md_write (ahd, NULL, 0); + + n = (char *) ahd->ctx - (char *) ahd; + if (a->flags.secure) + bhd = xtrymalloc_secure (n + sizeof (struct gcry_md_context)); + else + bhd = xtrymalloc (n + sizeof (struct gcry_md_context)); + + if (!bhd) + { + err = gpg_err_code_from_syserror (); + goto leave; + } + + bhd->ctx = b = (void *) ((char *) bhd + n); + /* No need to copy the buffer due to the write above. */ + gcry_assert (ahd->bufsize == (n - sizeof (struct gcry_md_handle) + 1)); + bhd->bufsize = ahd->bufsize; + bhd->bufpos = 0; + gcry_assert (! ahd->bufpos); + memcpy (b, a, sizeof *a); + b->list = NULL; + b->debug = NULL; + + /* Copy the complete list of algorithms. The copied list is + reversed, but that doesn't matter. */ + for (ar = a->list; ar; ar = ar->next) + { + if (a->flags.secure) + br = xtrymalloc_secure (ar->actual_struct_size); + else + br = xtrymalloc (ar->actual_struct_size); + if (!br) + { + err = gpg_err_code_from_syserror (); + md_close (bhd); + goto leave; + } + + memcpy (br, ar, ar->actual_struct_size); + br->next = b->list; + b->list = br; + } + + if (a->debug) + md_start_debug (bhd, "unknown"); + + *b_hd = bhd; + + leave: + return err; +} + + +gcry_err_code_t +_gcry_md_copy (gcry_md_hd_t *handle, gcry_md_hd_t hd) +{ + gcry_err_code_t rc; + + rc = md_copy (hd, handle); + if (rc) + *handle = NULL; + return rc; +} + + +/* + * Reset all contexts and discard any buffered stuff. This may be used + * instead of a md_close(); md_open(). + */ +void +_gcry_md_reset (gcry_md_hd_t a) +{ + GcryDigestEntry *r; + + /* Note: We allow this even in fips non operational mode. */ + + a->bufpos = a->ctx->flags.finalized = 0; + + if (a->ctx->flags.hmac) + for (r = a->ctx->list; r; r = r->next) + { + memcpy (r->context.c, r->context.c + r->spec->contextsize, + r->spec->contextsize); + } + else + for (r = a->ctx->list; r; r = r->next) + { + memset (r->context.c, 0, r->spec->contextsize); + (*r->spec->init) (&r->context.c, + a->ctx->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0); + } +} + + +static void +md_close (gcry_md_hd_t a) +{ + GcryDigestEntry *r, *r2; + + if (! a) + return; + if (a->ctx->debug) + md_stop_debug (a); + for (r = a->ctx->list; r; r = r2) + { + r2 = r->next; + wipememory (r, r->actual_struct_size); + xfree (r); + } + + wipememory (a, a->ctx->actual_handle_size); + xfree(a); +} + + +void +_gcry_md_close (gcry_md_hd_t hd) +{ + /* Note: We allow this even in fips non operational mode. */ + md_close (hd); +} + + +static void +md_write (gcry_md_hd_t a, const void *inbuf, size_t inlen) +{ + GcryDigestEntry *r; + + if (a->ctx->debug) + { + if (a->bufpos && fwrite (a->buf, a->bufpos, 1, a->ctx->debug) != 1) + BUG(); + if (inlen && fwrite (inbuf, inlen, 1, a->ctx->debug) != 1) + BUG(); + } + + for (r = a->ctx->list; r; r = r->next) + { + if (a->bufpos) + (*r->spec->write) (&r->context.c, a->buf, a->bufpos); + (*r->spec->write) (&r->context.c, inbuf, inlen); + } + a->bufpos = 0; +} + + +/* Note that this function may be used after finalize and read to keep + on writing to the transform function so to mitigate timing + attacks. */ +void +_gcry_md_write (gcry_md_hd_t hd, const void *inbuf, size_t inlen) +{ + md_write (hd, inbuf, inlen); +} + + +static void +md_final (gcry_md_hd_t a) +{ + GcryDigestEntry *r; + + if (a->ctx->flags.finalized) + return; + + if (a->bufpos) + md_write (a, NULL, 0); + + for (r = a->ctx->list; r; r = r->next) + (*r->spec->final) (&r->context.c); + + a->ctx->flags.finalized = 1; + + if (!a->ctx->flags.hmac) + return; + + for (r = a->ctx->list; r; r = r->next) + { + byte *p; + size_t dlen = r->spec->mdlen; + byte *hash; + gcry_err_code_t err; + + if (r->spec->read == NULL) + continue; + + p = r->spec->read (&r->context.c); + + if (a->ctx->flags.secure) + hash = xtrymalloc_secure (dlen); + else + hash = xtrymalloc (dlen); + if (!hash) + { + err = gpg_err_code_from_errno (errno); + _gcry_fatal_error (err, NULL); + } + + memcpy (hash, p, dlen); + memcpy (r->context.c, r->context.c + r->spec->contextsize * 2, + r->spec->contextsize); + (*r->spec->write) (&r->context.c, hash, dlen); + (*r->spec->final) (&r->context.c); + xfree (hash); + } +} + + +static gcry_err_code_t +md_setkey (gcry_md_hd_t h, const unsigned char *key, size_t keylen) +{ + gcry_err_code_t rc = 0; + GcryDigestEntry *r; + int algo_had_setkey = 0; + + if (!h->ctx->list) + return GPG_ERR_DIGEST_ALGO; /* Might happen if no algo is enabled. */ + + if (h->ctx->flags.hmac) + return GPG_ERR_DIGEST_ALGO; /* Tried md_setkey for HMAC md. */ + + for (r = h->ctx->list; r; r = r->next) + { + switch (r->spec->algo) + { + /* TODO? add spec->init_with_key? */ + case GCRY_MD_BLAKE2B_512: + case GCRY_MD_BLAKE2B_384: + case GCRY_MD_BLAKE2B_256: + case GCRY_MD_BLAKE2B_160: + case GCRY_MD_BLAKE2S_256: + case GCRY_MD_BLAKE2S_224: + case GCRY_MD_BLAKE2S_160: + case GCRY_MD_BLAKE2S_128: + algo_had_setkey = 1; + memset (r->context.c, 0, r->spec->contextsize); + rc = _gcry_blake2_init_with_key (r->context.c, + h->ctx->flags.bugemu1 + ? GCRY_MD_FLAG_BUGEMU1:0, + key, keylen, r->spec->algo); + break; + default: + rc = GPG_ERR_DIGEST_ALGO; + break; + } + + if (rc) + break; + } + + if (rc && !algo_had_setkey) + { + /* None of algorithms had setkey implementation, so contexts were not + * modified. Just return error. */ + return rc; + } + else if (rc && algo_had_setkey) + { + /* Some of the contexts have been modified, but got error. Reset + * all contexts. */ + _gcry_md_reset (h); + return rc; + } + + /* Successful md_setkey implies reset. */ + h->bufpos = h->ctx->flags.finalized = 0; + + return 0; +} + + +static gcry_err_code_t +prepare_macpads (gcry_md_hd_t a, const unsigned char *key, size_t keylen) +{ + GcryDigestEntry *r; + + if (!a->ctx->list) + return GPG_ERR_DIGEST_ALGO; /* Might happen if no algo is enabled. */ + + if (!a->ctx->flags.hmac) + return GPG_ERR_DIGEST_ALGO; /* Tried prepare_macpads for non-HMAC md. */ + + for (r = a->ctx->list; r; r = r->next) + { + const unsigned char *k; + size_t k_len; + unsigned char *key_allocated = NULL; + int macpad_Bsize; + int i; + + switch (r->spec->algo) + { + /* TODO: add spec->blocksize */ + case GCRY_MD_SHA3_224: + macpad_Bsize = 1152 / 8; + break; + case GCRY_MD_SHA3_256: + macpad_Bsize = 1088 / 8; + break; + case GCRY_MD_SHA3_384: + macpad_Bsize = 832 / 8; + break; + case GCRY_MD_SHA3_512: + macpad_Bsize = 576 / 8; + break; + case GCRY_MD_SHA384: + case GCRY_MD_SHA512: + case GCRY_MD_BLAKE2B_512: + case GCRY_MD_BLAKE2B_384: + case GCRY_MD_BLAKE2B_256: + case GCRY_MD_BLAKE2B_160: + macpad_Bsize = 128; + break; + case GCRY_MD_GOSTR3411_94: + case GCRY_MD_GOSTR3411_CP: + macpad_Bsize = 32; + break; + default: + macpad_Bsize = 64; + break; + } + + if ( keylen > macpad_Bsize ) + { + k = key_allocated = xtrymalloc_secure (r->spec->mdlen); + if (!k) + return gpg_err_code_from_errno (errno); + _gcry_md_hash_buffer (r->spec->algo, key_allocated, key, keylen); + k_len = r->spec->mdlen; + gcry_assert ( k_len <= macpad_Bsize ); + } + else + { + k = key; + k_len = keylen; + } + + (*r->spec->init) (&r->context.c, + a->ctx->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0); + a->bufpos = 0; + for (i=0; i < k_len; i++ ) + _gcry_md_putc (a, k[i] ^ 0x36); + for (; i < macpad_Bsize; i++ ) + _gcry_md_putc (a, 0x36); + (*r->spec->write) (&r->context.c, a->buf, a->bufpos); + memcpy (r->context.c + r->spec->contextsize, r->context.c, + r->spec->contextsize); + + (*r->spec->init) (&r->context.c, + a->ctx->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0); + a->bufpos = 0; + for (i=0; i < k_len; i++ ) + _gcry_md_putc (a, k[i] ^ 0x5c); + for (; i < macpad_Bsize; i++ ) + _gcry_md_putc (a, 0x5c); + (*r->spec->write) (&r->context.c, a->buf, a->bufpos); + memcpy (r->context.c + r->spec->contextsize*2, r->context.c, + r->spec->contextsize); + + xfree (key_allocated); + } + + a->bufpos = 0; + return 0; +} + + +gcry_err_code_t +_gcry_md_ctl (gcry_md_hd_t hd, int cmd, void *buffer, size_t buflen) +{ + gcry_err_code_t rc = 0; + + (void)buflen; /* Currently not used. */ + + switch (cmd) + { + case GCRYCTL_FINALIZE: + md_final (hd); + break; + case GCRYCTL_START_DUMP: + md_start_debug (hd, buffer); + break; + case GCRYCTL_STOP_DUMP: + md_stop_debug ( hd ); + break; + default: + rc = GPG_ERR_INV_OP; + } + return rc; +} + + +gcry_err_code_t +_gcry_md_setkey (gcry_md_hd_t hd, const void *key, size_t keylen) +{ + gcry_err_code_t rc; + + if (hd->ctx->flags.hmac) + { + rc = prepare_macpads (hd, key, keylen); + if (!rc) + _gcry_md_reset (hd); + } + else + { + rc = md_setkey (hd, key, keylen); + } + + return rc; +} + + +/* The new debug interface. If SUFFIX is a string it creates an debug + file for the context HD. IF suffix is NULL, the file is closed and + debugging is stopped. */ +void +_gcry_md_debug (gcry_md_hd_t hd, const char *suffix) +{ + if (suffix) + md_start_debug (hd, suffix); + else + md_stop_debug (hd); +} + + +/**************** + * If ALGO is null get the digest for the used algo (which should be + * only one) + */ +static byte * +md_read( gcry_md_hd_t a, int algo ) +{ + GcryDigestEntry *r = a->ctx->list; + + if (! algo) + { + /* Return the first algorithm */ + if (r) + { + if (r->next) + log_debug ("more than one algorithm in md_read(0)\n"); + if (r->spec->read) + return r->spec->read (&r->context.c); + } + } + else + { + for (r = a->ctx->list; r; r = r->next) + if (r->spec->algo == algo) + { + if (r->spec->read) + return r->spec->read (&r->context.c); + break; + } + } + + if (r && !r->spec->read) + _gcry_fatal_error (GPG_ERR_DIGEST_ALGO, + "requested algo has no fixed digest length"); + else + _gcry_fatal_error (GPG_ERR_DIGEST_ALGO, "requested algo not in md context"); + return NULL; +} + + +/* + * Read out the complete digest, this function implictly finalizes + * the hash. + */ +byte * +_gcry_md_read (gcry_md_hd_t hd, int algo) +{ + /* This function is expected to always return a digest, thus we + can't return an error which we actually should do in + non-operational state. */ + _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0); + return md_read (hd, algo); +} + + +/**************** + * If ALGO is null get the digest for the used algo (which should be + * only one) + */ +static gcry_err_code_t +md_extract(gcry_md_hd_t a, int algo, void *out, size_t outlen) +{ + GcryDigestEntry *r = a->ctx->list; + + if (!algo) + { + /* Return the first algorithm */ + if (r && r->spec->extract) + { + if (r->next) + log_debug ("more than one algorithm in md_extract(0)\n"); + r->spec->extract (&r->context.c, out, outlen); + return 0; + } + } + else + { + for (r = a->ctx->list; r; r = r->next) + if (r->spec->algo == algo && r->spec->extract) + { + r->spec->extract (&r->context.c, out, outlen); + return 0; + } + } + + return GPG_ERR_DIGEST_ALGO; +} + + +/* + * Expand the output from XOF class digest, this function implictly finalizes + * the hash. + */ +gcry_err_code_t +_gcry_md_extract (gcry_md_hd_t hd, int algo, void *out, size_t outlen) +{ + _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0); + return md_extract (hd, algo, out, outlen); +} + + +/* + * Read out an intermediate digest. Not yet functional. + */ +gcry_err_code_t +_gcry_md_get (gcry_md_hd_t hd, int algo, byte *buffer, int buflen) +{ + (void)hd; + (void)algo; + (void)buffer; + (void)buflen; + + /*md_digest ... */ + fips_signal_error ("unimplemented function called"); + return GPG_ERR_INTERNAL; +} + + +/* + * Shortcut function to hash a buffer with a given algo. The only + * guaranteed supported algorithms are RIPE-MD160 and SHA-1. The + * supplied digest buffer must be large enough to store the resulting + * hash. No error is returned, the function will abort on an invalid + * algo. DISABLED_ALGOS are ignored here. */ +void +_gcry_md_hash_buffer (int algo, void *digest, + const void *buffer, size_t length) +{ + if (0) + ; +#if USE_SHA256 + else if (algo == GCRY_MD_SHA256) + _gcry_sha256_hash_buffer (digest, buffer, length); +#endif +#if USE_SHA512 + else if (algo == GCRY_MD_SHA512) + _gcry_sha512_hash_buffer (digest, buffer, length); +#endif +#if USE_SHA1 + else if (algo == GCRY_MD_SHA1) + _gcry_sha1_hash_buffer (digest, buffer, length); +#endif +#if USE_RMD160 + else if (algo == GCRY_MD_RMD160 && !fips_mode () ) + _gcry_rmd160_hash_buffer (digest, buffer, length); +#endif + else + { + /* For the others we do not have a fast function, so we use the + normal functions. */ + gcry_md_hd_t h; + gpg_err_code_t err; + + if (algo == GCRY_MD_MD5 && fips_mode ()) + { + _gcry_inactivate_fips_mode ("MD5 used"); + if (_gcry_enforced_fips_mode () ) + { + /* We should never get to here because we do not register + MD5 in enforced fips mode. */ + _gcry_fips_noreturn (); + } + } + + err = md_open (&h, algo, 0); + if (err) + log_bug ("gcry_md_open failed for algo %d: %s", + algo, gpg_strerror (gcry_error(err))); + md_write (h, (byte *) buffer, length); + md_final (h); + memcpy (digest, md_read (h, algo), md_digest_length (algo)); + md_close (h); + } +} + + +/* Shortcut function to hash multiple buffers with a given algo. In + contrast to gcry_md_hash_buffer, this function returns an error on + invalid arguments or on other problems; disabled algorithms are + _not_ ignored but flagged as an error. + + The data to sign is taken from the array IOV which has IOVCNT items. + + The only supported flag in FLAGS is GCRY_MD_FLAG_HMAC which turns + this function into a HMAC function; the first item in IOV is then + used as the key. + + On success 0 is returned and resulting hash or HMAC is stored at + DIGEST which must have been provided by the caller with an + appropriate length. */ +gpg_err_code_t +_gcry_md_hash_buffers (int algo, unsigned int flags, void *digest, + const gcry_buffer_t *iov, int iovcnt) +{ + int hmac; + + if (!iov || iovcnt < 0) + return GPG_ERR_INV_ARG; + if (flags & ~(GCRY_MD_FLAG_HMAC)) + return GPG_ERR_INV_ARG; + + hmac = !!(flags & GCRY_MD_FLAG_HMAC); + if (hmac && iovcnt < 1) + return GPG_ERR_INV_ARG; + + if (0) + ; +#if USE_SHA256 + else if (algo == GCRY_MD_SHA256 && !hmac) + _gcry_sha256_hash_buffers (digest, iov, iovcnt); +#endif +#if USE_SHA512 + else if (algo == GCRY_MD_SHA512 && !hmac) + _gcry_sha512_hash_buffers (digest, iov, iovcnt); +#endif +#if USE_SHA1 + else if (algo == GCRY_MD_SHA1 && !hmac) + _gcry_sha1_hash_buffers (digest, iov, iovcnt); +#endif + else + { + /* For the others we do not have a fast function, so we use the + normal functions. */ + gcry_md_hd_t h; + gpg_err_code_t rc; + int dlen; + + if (algo == GCRY_MD_MD5 && fips_mode ()) + { + _gcry_inactivate_fips_mode ("MD5 used"); + if (_gcry_enforced_fips_mode () ) + { + /* We should never get to here because we do not register + MD5 in enforced fips mode. */ + _gcry_fips_noreturn (); + } + } + + /* Detect SHAKE128 like algorithms which we can't use because + * our API does not allow for a variable length digest. */ + dlen = md_digest_length (algo); + if (!dlen) + return GPG_ERR_DIGEST_ALGO; + + rc = md_open (&h, algo, (hmac? GCRY_MD_FLAG_HMAC:0)); + if (rc) + return rc; + + if (hmac) + { + rc = _gcry_md_setkey (h, + (const char*)iov[0].data + iov[0].off, + iov[0].len); + if (rc) + { + md_close (h); + return rc; + } + iov++; iovcnt--; + } + for (;iovcnt; iov++, iovcnt--) + md_write (h, (const char*)iov[0].data + iov[0].off, iov[0].len); + md_final (h); + memcpy (digest, md_read (h, algo), dlen); + md_close (h); + } + + return 0; +} + + +static int +md_get_algo (gcry_md_hd_t a) +{ + GcryDigestEntry *r = a->ctx->list; + + if (r && r->next) + { + fips_signal_error ("possible usage error"); + log_error ("WARNING: more than one algorithm in md_get_algo()\n"); + } + return r ? r->spec->algo : 0; +} + + +int +_gcry_md_get_algo (gcry_md_hd_t hd) +{ + return md_get_algo (hd); +} + + +/**************** + * Return the length of the digest + */ +static int +md_digest_length (int algorithm) +{ + gcry_md_spec_t *spec; + + spec = spec_from_algo (algorithm); + return spec? spec->mdlen : 0; +} + + +/**************** + * Return the length of the digest in bytes. + * This function will return 0 in case of errors. + */ +unsigned int +_gcry_md_get_algo_dlen (int algorithm) +{ + return md_digest_length (algorithm); +} + + +/* Hmmm: add a mode to enumerate the OIDs + * to make g10/sig-check.c more portable */ +static const byte * +md_asn_oid (int algorithm, size_t *asnlen, size_t *mdlen) +{ + gcry_md_spec_t *spec; + const byte *asnoid = NULL; + + spec = spec_from_algo (algorithm); + if (spec) + { + if (asnlen) + *asnlen = spec->asnlen; + if (mdlen) + *mdlen = spec->mdlen; + asnoid = spec->asnoid; + } + else + log_bug ("no ASN.1 OID for md algo %d\n", algorithm); + + return asnoid; +} + + +/**************** + * Return information about the given cipher algorithm + * WHAT select the kind of information returned: + * GCRYCTL_TEST_ALGO: + * Returns 0 when the specified algorithm is available for use. + * buffer and nbytes must be zero. + * GCRYCTL_GET_ASNOID: + * Return the ASNOID of the algorithm in buffer. if buffer is NULL, only + * the required length is returned. + * GCRYCTL_SELFTEST + * Helper for the regression tests - shall not be used by applications. + * + * Note: Because this function is in most cases used to return an + * integer value, we can make it easier for the caller to just look at + * the return value. The caller will in all cases consult the value + * and thereby detecting whether a error occurred or not (i.e. while checking + * the block size) + */ +gcry_err_code_t +_gcry_md_algo_info (int algo, int what, void *buffer, size_t *nbytes) +{ + gcry_err_code_t rc; + + switch (what) + { + case GCRYCTL_TEST_ALGO: + if (buffer || nbytes) + rc = GPG_ERR_INV_ARG; + else + rc = check_digest_algo (algo); + break; + + case GCRYCTL_GET_ASNOID: + /* We need to check that the algo is available because + md_asn_oid would otherwise raise an assertion. */ + rc = check_digest_algo (algo); + if (!rc) + { + const char unsigned *asn; + size_t asnlen; + + asn = md_asn_oid (algo, &asnlen, NULL); + if (buffer && (*nbytes >= asnlen)) + { + memcpy (buffer, asn, asnlen); + *nbytes = asnlen; + } + else if (!buffer && nbytes) + *nbytes = asnlen; + else + { + if (buffer) + rc = GPG_ERR_TOO_SHORT; + else + rc = GPG_ERR_INV_ARG; + } + } + break; + + case GCRYCTL_SELFTEST: + /* Helper function for the regression tests. */ + rc = gpg_err_code (_gcry_md_selftest (algo, nbytes? (int)*nbytes : 0, + NULL)); + break; + + default: + rc = GPG_ERR_INV_OP; + break; + } + + return rc; +} + + +static void +md_start_debug ( gcry_md_hd_t md, const char *suffix ) +{ + static int idx=0; + char buf[50]; + + if (fips_mode ()) + return; + + if ( md->ctx->debug ) + { + log_debug("Oops: md debug already started\n"); + return; + } + idx++; + snprintf (buf, DIM(buf)-1, "dbgmd-%05d.%.10s", idx, suffix ); + md->ctx->debug = fopen(buf, "w"); + if ( !md->ctx->debug ) + log_debug("md debug: can't open %s\n", buf ); +} + + +static void +md_stop_debug( gcry_md_hd_t md ) +{ + if ( md->ctx->debug ) + { + if ( md->bufpos ) + md_write ( md, NULL, 0 ); + fclose (md->ctx->debug); + md->ctx->debug = NULL; + } + + { /* a kludge to pull in the __muldi3 for Solaris */ + volatile u32 a = (u32)(uintptr_t)md; + volatile u64 b = 42; + volatile u64 c; + c = a * b; + (void)c; + } +} + + + +/* + * Return information about the digest handle. + * GCRYCTL_IS_SECURE: + * Returns 1 when the handle works on secured memory + * otherwise 0 is returned. There is no error return. + * GCRYCTL_IS_ALGO_ENABLED: + * Returns 1 if the algo is enabled for that handle. + * The algo must be passed as the address of an int. + */ +gcry_err_code_t +_gcry_md_info (gcry_md_hd_t h, int cmd, void *buffer, size_t *nbytes) +{ + gcry_err_code_t rc = 0; + + switch (cmd) + { + case GCRYCTL_IS_SECURE: + *nbytes = h->ctx->flags.secure; + break; + + case GCRYCTL_IS_ALGO_ENABLED: + { + GcryDigestEntry *r; + int algo; + + if ( !buffer || !nbytes || *nbytes != sizeof (int)) + rc = GPG_ERR_INV_ARG; + else + { + algo = *(int*)buffer; + + *nbytes = 0; + for(r=h->ctx->list; r; r = r->next ) { + if (r->spec->algo == algo) + { + *nbytes = 1; + break; + } + } + } + break; + } + + default: + rc = GPG_ERR_INV_OP; + } + + return rc; +} + + +/* Explicitly initialize this module. */ +gcry_err_code_t +_gcry_md_init (void) +{ + if (fips_mode()) + { + /* disable algorithms that are disallowed in fips */ + int idx; + gcry_md_spec_t *spec; + + for (idx = 0; (spec = digest_list[idx]); idx++) + if (!spec->flags.fips) + spec->flags.disabled = 1; + } + + return 0; +} + + +int +_gcry_md_is_secure (gcry_md_hd_t a) +{ + size_t value; + + if (_gcry_md_info (a, GCRYCTL_IS_SECURE, NULL, &value)) + value = 1; /* It seems to be better to assume secure memory on + error. */ + return value; +} + + +int +_gcry_md_is_enabled (gcry_md_hd_t a, int algo) +{ + size_t value; + + value = sizeof algo; + if (_gcry_md_info (a, GCRYCTL_IS_ALGO_ENABLED, &algo, &value)) + value = 0; + return value; +} + + +/* Run the selftests for digest algorithm ALGO with optional reporting + function REPORT. */ +gpg_error_t +_gcry_md_selftest (int algo, int extended, selftest_report_func_t report) +{ + gcry_err_code_t ec = 0; + gcry_md_spec_t *spec; + + spec = spec_from_algo (algo); + if (spec && !spec->flags.disabled && spec->selftest) + ec = spec->selftest (algo, extended, report); + else + { + ec = (spec && spec->selftest) ? GPG_ERR_DIGEST_ALGO + /* */ : GPG_ERR_NOT_IMPLEMENTED; + if (report) + report ("digest", algo, "module", + (spec && !spec->flags.disabled)? + "no selftest available" : + spec? "algorithm disabled" : "algorithm not found"); + } + + return gpg_error (ec); +} diff --git a/libotr/libgcrypt-1.8.7/cipher/md4.c b/libotr/libgcrypt-1.8.7/cipher/md4.c new file mode 100644 index 0000000..afa6382 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/md4.c @@ -0,0 +1,291 @@ +/* md4.c - MD4 Message-Digest Algorithm + * Copyright (C) 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Based on md5.c in libgcrypt, but rewritten to compute md4 checksums + * using a public domain md4 implementation with the following comments: + * + * Modified by Wei Dai from Andrew M. Kuchling's md4.c + * The original code and all modifications are in the public domain. + * + * This is the original introductory comment: + * + * md4.c : MD4 hash algorithm. + * + * Part of the Python Cryptography Toolkit, version 1.1 + * + * Distribute and use freely; there are no restrictions on further + * dissemination and usage except those imposed by the laws of your + * country of residence. + * + */ + +/* MD4 test suite: + * MD4 ("") = 31d6cfe0d16ae931b73c59d7e0c089c0 + * MD4 ("a") = bde52cb31de33e46245e05fbdbd6fb24 + * MD4 ("abc") = a448017aaf21d8525fc10ae87aa6729d + * MD4 ("message digest") = d9130a8164549fe818874806e1c7014b + * MD4 ("abcdefghijklmnopqrstuvwxyz") = d79e1c308aa5bbcdeea8ed63df412da9 + * MD4 ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") = + * 043f8582f241db351ce627e153e7f0e4 + * MD4 ("123456789012345678901234567890123456789012345678901234567890123456 + * 78901234567890") = e33b4ddc9c38f2199c3e7b164fcc0536 + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" +#include "cipher.h" + +#include "bithelp.h" +#include "bufhelp.h" +#include "hash-common.h" + + +typedef struct { + gcry_md_block_ctx_t bctx; + u32 A,B,C,D; /* chaining variables */ +} MD4_CONTEXT; + +static unsigned int +transform ( void *c, const unsigned char *data, size_t nblks ); + +static void +md4_init (void *context, unsigned int flags) +{ + MD4_CONTEXT *ctx = context; + + (void)flags; + + ctx->A = 0x67452301; + ctx->B = 0xefcdab89; + ctx->C = 0x98badcfe; + ctx->D = 0x10325476; + + ctx->bctx.nblocks = 0; + ctx->bctx.nblocks_high = 0; + ctx->bctx.count = 0; + ctx->bctx.blocksize = 64; + ctx->bctx.bwrite = transform; +} + +#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define G(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) + + +/**************** + * transform 64 bytes + */ +static unsigned int +transform_blk ( void *c, const unsigned char *data ) +{ + MD4_CONTEXT *ctx = c; + u32 in[16]; + register u32 A = ctx->A; + register u32 B = ctx->B; + register u32 C = ctx->C; + register u32 D = ctx->D; + int i; + + for ( i = 0; i < 16; i++ ) + in[i] = buf_get_le32(data + i * 4); + + /* Round 1. */ +#define function(a,b,c,d,k,s) a=rol(a+F(b,c,d)+in[k],s); + function(A,B,C,D, 0, 3); + function(D,A,B,C, 1, 7); + function(C,D,A,B, 2,11); + function(B,C,D,A, 3,19); + function(A,B,C,D, 4, 3); + function(D,A,B,C, 5, 7); + function(C,D,A,B, 6,11); + function(B,C,D,A, 7,19); + function(A,B,C,D, 8, 3); + function(D,A,B,C, 9, 7); + function(C,D,A,B,10,11); + function(B,C,D,A,11,19); + function(A,B,C,D,12, 3); + function(D,A,B,C,13, 7); + function(C,D,A,B,14,11); + function(B,C,D,A,15,19); + +#undef function + + /* Round 2. */ +#define function(a,b,c,d,k,s) a=rol(a+G(b,c,d)+in[k]+0x5a827999,s); + + function(A,B,C,D, 0, 3); + function(D,A,B,C, 4, 5); + function(C,D,A,B, 8, 9); + function(B,C,D,A,12,13); + function(A,B,C,D, 1, 3); + function(D,A,B,C, 5, 5); + function(C,D,A,B, 9, 9); + function(B,C,D,A,13,13); + function(A,B,C,D, 2, 3); + function(D,A,B,C, 6, 5); + function(C,D,A,B,10, 9); + function(B,C,D,A,14,13); + function(A,B,C,D, 3, 3); + function(D,A,B,C, 7, 5); + function(C,D,A,B,11, 9); + function(B,C,D,A,15,13); + +#undef function + + /* Round 3. */ +#define function(a,b,c,d,k,s) a=rol(a+H(b,c,d)+in[k]+0x6ed9eba1,s); + + function(A,B,C,D, 0, 3); + function(D,A,B,C, 8, 9); + function(C,D,A,B, 4,11); + function(B,C,D,A,12,15); + function(A,B,C,D, 2, 3); + function(D,A,B,C,10, 9); + function(C,D,A,B, 6,11); + function(B,C,D,A,14,15); + function(A,B,C,D, 1, 3); + function(D,A,B,C, 9, 9); + function(C,D,A,B, 5,11); + function(B,C,D,A,13,15); + function(A,B,C,D, 3, 3); + function(D,A,B,C,11, 9); + function(C,D,A,B, 7,11); + function(B,C,D,A,15,15); + + + /* Put checksum in context given as argument. */ + ctx->A += A; + ctx->B += B; + ctx->C += C; + ctx->D += D; + + return /*burn_stack*/ 80+6*sizeof(void*); +} + + +static unsigned int +transform ( void *c, const unsigned char *data, size_t nblks ) +{ + unsigned int burn; + + do + { + burn = transform_blk (c, data); + data += 64; + } + while (--nblks); + + return burn; +} + + +/* The routine final terminates the message-digest computation and + * ends with the desired message digest in mdContext->digest[0...15]. + * The handle is prepared for a new MD4 cycle. + * Returns 16 bytes representing the digest. + */ + +static void +md4_final( void *context ) +{ + MD4_CONTEXT *hd = context; + u32 t, th, msb, lsb; + byte *p; + unsigned int burn; + + _gcry_md_block_write(hd, NULL, 0); /* flush */; + + t = hd->bctx.nblocks; + if (sizeof t == sizeof hd->bctx.nblocks) + th = hd->bctx.nblocks_high; + else + th = hd->bctx.nblocks >> 32; + + /* multiply by 64 to make a byte count */ + lsb = t << 6; + msb = (th << 6) | (t >> 26); + /* add the count */ + t = lsb; + if( (lsb += hd->bctx.count) < t ) + msb++; + /* multiply by 8 to make a bit count */ + t = lsb; + lsb <<= 3; + msb <<= 3; + msb |= t >> 29; + + if( hd->bctx.count < 56 ) /* enough room */ + { + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */ + while( hd->bctx.count < 56 ) + hd->bctx.buf[hd->bctx.count++] = 0; /* pad */ + } + else /* need one extra block */ + { + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */ + while( hd->bctx.count < 64 ) + hd->bctx.buf[hd->bctx.count++] = 0; + _gcry_md_block_write(hd, NULL, 0); /* flush */; + memset(hd->bctx.buf, 0, 56 ); /* fill next block with zeroes */ + } + /* append the 64 bit count */ + buf_put_le32(hd->bctx.buf + 56, lsb); + buf_put_le32(hd->bctx.buf + 60, msb); + burn = transform ( hd, hd->bctx.buf, 1 ); + _gcry_burn_stack (burn); + + p = hd->bctx.buf; +#define X(a) do { buf_put_le32(p, hd->a); p += 4; } while(0) + X(A); + X(B); + X(C); + X(D); +#undef X + +} + +static byte * +md4_read (void *context) +{ + MD4_CONTEXT *hd = context; + return hd->bctx.buf; +} + +static byte asn[18] = /* Object ID is 1.2.840.113549.2.4 */ + { 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08, 0x2a, 0x86,0x48, + 0x86, 0xf7, 0x0d, 0x02, 0x04, 0x05, 0x00, 0x04, 0x10 }; + +static gcry_md_oid_spec_t oid_spec_md4[] = + { + /* iso.member-body.us.rsadsi.digestAlgorithm.md4 */ + { "1.2.840.113549.2.4" }, + { NULL }, + }; + +gcry_md_spec_t _gcry_digest_spec_md4 = + { + GCRY_MD_MD4, {0, 0}, + "MD4", asn, DIM (asn), oid_spec_md4,16, + md4_init, _gcry_md_block_write, md4_final, md4_read, NULL, + sizeof (MD4_CONTEXT) + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/md5.c b/libotr/libgcrypt-1.8.7/cipher/md5.c new file mode 100644 index 0000000..ed942cf --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/md5.c @@ -0,0 +1,317 @@ +/* md5.c - MD5 Message-Digest Algorithm + * Copyright (C) 1995,1996,1998,1999,2001,2002, + * 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * According to the definition of MD5 in RFC 1321 from April 1992. + * NOTE: This is *not* the same file as the one from glibc. + * Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. + * heavily modified for GnuPG by Werner Koch <wk@gnupg.org> + */ + +/* Test values: + * "" D4 1D 8C D9 8F 00 B2 04 E9 80 09 98 EC F8 42 7E + * "a" 0C C1 75 B9 C0 F1 B6 A8 31 C3 99 E2 69 77 26 61 + * "abc 90 01 50 98 3C D2 4F B0 D6 96 3F 7D 28 E1 7F 72 + * "message digest" F9 6B 69 7D 7C B7 93 8D 52 5A 2F 31 AA F1 61 D0 + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" +#include "cipher.h" + +#include "bithelp.h" +#include "bufhelp.h" +#include "hash-common.h" + + +typedef struct { + gcry_md_block_ctx_t bctx; + u32 A,B,C,D; /* chaining variables */ +} MD5_CONTEXT; + +static unsigned int +transform ( void *ctx, const unsigned char *data, size_t datalen ); + +static void +md5_init( void *context, unsigned int flags) +{ + MD5_CONTEXT *ctx = context; + + (void)flags; + + ctx->A = 0x67452301; + ctx->B = 0xefcdab89; + ctx->C = 0x98badcfe; + ctx->D = 0x10325476; + + ctx->bctx.nblocks = 0; + ctx->bctx.nblocks_high = 0; + ctx->bctx.count = 0; + ctx->bctx.blocksize = 64; + ctx->bctx.bwrite = transform; +} + + +/* These are the four functions used in the four steps of the MD5 algorithm + and defined in the RFC 1321. The first function is a little bit optimized + (as found in Colin Plumbs public domain implementation). */ +/* #define FF(b, c, d) ((b & c) | (~b & d)) */ +#define FF(b, c, d) (d ^ (b & (c ^ d))) +#define FG(b, c, d) FF (d, b, c) +#define FH(b, c, d) (b ^ c ^ d) +#define FI(b, c, d) (c ^ (b | ~d)) + + +/**************** + * transform 64 bytes + */ +static unsigned int +transform_blk ( void *c, const unsigned char *data ) +{ + MD5_CONTEXT *ctx = c; + u32 correct_words[16]; + register u32 A = ctx->A; + register u32 B = ctx->B; + register u32 C = ctx->C; + register u32 D = ctx->D; + u32 *cwp = correct_words; + int i; + + for ( i = 0; i < 16; i++ ) + correct_words[i] = buf_get_le32(data + i * 4); + +#define OP(a, b, c, d, s, T) \ + do \ + { \ + a += FF (b, c, d) + (*cwp++) + T; \ + a = rol(a, s); \ + a += b; \ + } \ + while (0) + + /* Before we start, one word about the strange constants. + They are defined in RFC 1321 as + + T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64 + */ + + /* Round 1. */ + OP (A, B, C, D, 7, 0xd76aa478); + OP (D, A, B, C, 12, 0xe8c7b756); + OP (C, D, A, B, 17, 0x242070db); + OP (B, C, D, A, 22, 0xc1bdceee); + OP (A, B, C, D, 7, 0xf57c0faf); + OP (D, A, B, C, 12, 0x4787c62a); + OP (C, D, A, B, 17, 0xa8304613); + OP (B, C, D, A, 22, 0xfd469501); + OP (A, B, C, D, 7, 0x698098d8); + OP (D, A, B, C, 12, 0x8b44f7af); + OP (C, D, A, B, 17, 0xffff5bb1); + OP (B, C, D, A, 22, 0x895cd7be); + OP (A, B, C, D, 7, 0x6b901122); + OP (D, A, B, C, 12, 0xfd987193); + OP (C, D, A, B, 17, 0xa679438e); + OP (B, C, D, A, 22, 0x49b40821); + +#undef OP +#define OP(f, a, b, c, d, k, s, T) \ + do \ + { \ + a += f (b, c, d) + correct_words[k] + T; \ + a = rol(a, s); \ + a += b; \ + } \ + while (0) + + /* Round 2. */ + OP (FG, A, B, C, D, 1, 5, 0xf61e2562); + OP (FG, D, A, B, C, 6, 9, 0xc040b340); + OP (FG, C, D, A, B, 11, 14, 0x265e5a51); + OP (FG, B, C, D, A, 0, 20, 0xe9b6c7aa); + OP (FG, A, B, C, D, 5, 5, 0xd62f105d); + OP (FG, D, A, B, C, 10, 9, 0x02441453); + OP (FG, C, D, A, B, 15, 14, 0xd8a1e681); + OP (FG, B, C, D, A, 4, 20, 0xe7d3fbc8); + OP (FG, A, B, C, D, 9, 5, 0x21e1cde6); + OP (FG, D, A, B, C, 14, 9, 0xc33707d6); + OP (FG, C, D, A, B, 3, 14, 0xf4d50d87); + OP (FG, B, C, D, A, 8, 20, 0x455a14ed); + OP (FG, A, B, C, D, 13, 5, 0xa9e3e905); + OP (FG, D, A, B, C, 2, 9, 0xfcefa3f8); + OP (FG, C, D, A, B, 7, 14, 0x676f02d9); + OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a); + + /* Round 3. */ + OP (FH, A, B, C, D, 5, 4, 0xfffa3942); + OP (FH, D, A, B, C, 8, 11, 0x8771f681); + OP (FH, C, D, A, B, 11, 16, 0x6d9d6122); + OP (FH, B, C, D, A, 14, 23, 0xfde5380c); + OP (FH, A, B, C, D, 1, 4, 0xa4beea44); + OP (FH, D, A, B, C, 4, 11, 0x4bdecfa9); + OP (FH, C, D, A, B, 7, 16, 0xf6bb4b60); + OP (FH, B, C, D, A, 10, 23, 0xbebfbc70); + OP (FH, A, B, C, D, 13, 4, 0x289b7ec6); + OP (FH, D, A, B, C, 0, 11, 0xeaa127fa); + OP (FH, C, D, A, B, 3, 16, 0xd4ef3085); + OP (FH, B, C, D, A, 6, 23, 0x04881d05); + OP (FH, A, B, C, D, 9, 4, 0xd9d4d039); + OP (FH, D, A, B, C, 12, 11, 0xe6db99e5); + OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8); + OP (FH, B, C, D, A, 2, 23, 0xc4ac5665); + + /* Round 4. */ + OP (FI, A, B, C, D, 0, 6, 0xf4292244); + OP (FI, D, A, B, C, 7, 10, 0x432aff97); + OP (FI, C, D, A, B, 14, 15, 0xab9423a7); + OP (FI, B, C, D, A, 5, 21, 0xfc93a039); + OP (FI, A, B, C, D, 12, 6, 0x655b59c3); + OP (FI, D, A, B, C, 3, 10, 0x8f0ccc92); + OP (FI, C, D, A, B, 10, 15, 0xffeff47d); + OP (FI, B, C, D, A, 1, 21, 0x85845dd1); + OP (FI, A, B, C, D, 8, 6, 0x6fa87e4f); + OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0); + OP (FI, C, D, A, B, 6, 15, 0xa3014314); + OP (FI, B, C, D, A, 13, 21, 0x4e0811a1); + OP (FI, A, B, C, D, 4, 6, 0xf7537e82); + OP (FI, D, A, B, C, 11, 10, 0xbd3af235); + OP (FI, C, D, A, B, 2, 15, 0x2ad7d2bb); + OP (FI, B, C, D, A, 9, 21, 0xeb86d391); + + /* Put checksum in context given as argument. */ + ctx->A += A; + ctx->B += B; + ctx->C += C; + ctx->D += D; + + return /*burn_stack*/ 80+6*sizeof(void*); +} + + +static unsigned int +transform ( void *c, const unsigned char *data, size_t nblks ) +{ + unsigned int burn; + + do + { + burn = transform_blk (c, data); + data += 64; + } + while (--nblks); + + return burn; +} + + +/* The routine final terminates the message-digest computation and + * ends with the desired message digest in mdContext->digest[0...15]. + * The handle is prepared for a new MD5 cycle. + * Returns 16 bytes representing the digest. + */ + +static void +md5_final( void *context) +{ + MD5_CONTEXT *hd = context; + u32 t, th, msb, lsb; + byte *p; + unsigned int burn; + + _gcry_md_block_write(hd, NULL, 0); /* flush */; + + t = hd->bctx.nblocks; + if (sizeof t == sizeof hd->bctx.nblocks) + th = hd->bctx.nblocks_high; + else + th = hd->bctx.nblocks >> 32; + + /* multiply by 64 to make a byte count */ + lsb = t << 6; + msb = (th << 6) | (t >> 26); + /* add the count */ + t = lsb; + if( (lsb += hd->bctx.count) < t ) + msb++; + /* multiply by 8 to make a bit count */ + t = lsb; + lsb <<= 3; + msb <<= 3; + msb |= t >> 29; + + if( hd->bctx.count < 56 ) /* enough room */ + { + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */ + while( hd->bctx.count < 56 ) + hd->bctx.buf[hd->bctx.count++] = 0; /* pad */ + } + else /* need one extra block */ + { + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */ + while( hd->bctx.count < 64 ) + hd->bctx.buf[hd->bctx.count++] = 0; + _gcry_md_block_write(hd, NULL, 0); /* flush */; + memset(hd->bctx.buf, 0, 56 ); /* fill next block with zeroes */ + } + /* append the 64 bit count */ + buf_put_le32(hd->bctx.buf + 56, lsb); + buf_put_le32(hd->bctx.buf + 60, msb); + burn = transform ( hd, hd->bctx.buf, 1 ); + _gcry_burn_stack (burn); + + p = hd->bctx.buf; +#define X(a) do { buf_put_le32(p, hd->a); p += 4; } while(0) + X(A); + X(B); + X(C); + X(D); +#undef X + +} + +static byte * +md5_read( void *context ) +{ + MD5_CONTEXT *hd = (MD5_CONTEXT *) context; + return hd->bctx.buf; +} + +static byte asn[18] = /* Object ID is 1.2.840.113549.2.5 */ + { 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08, 0x2a, 0x86,0x48, + 0x86, 0xf7, 0x0d, 0x02, 0x05, 0x05, 0x00, 0x04, 0x10 }; + +static gcry_md_oid_spec_t oid_spec_md5[] = + { + /* iso.member-body.us.rsadsi.pkcs.pkcs-1.4 (md5WithRSAEncryption) */ + { "1.2.840.113549.1.1.4" }, + /* RSADSI digestAlgorithm MD5 */ + { "1.2.840.113549.2.5" }, + { NULL }, + }; + +gcry_md_spec_t _gcry_digest_spec_md5 = + { + GCRY_MD_MD5, {0, 0}, + "MD5", asn, DIM (asn), oid_spec_md5, 16, + md5_init, _gcry_md_block_write, md5_final, md5_read, NULL, + sizeof (MD5_CONTEXT) + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/poly1305-armv7-neon.S b/libotr/libgcrypt-1.8.7/cipher/poly1305-armv7-neon.S new file mode 100644 index 0000000..b4dc946 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/poly1305-armv7-neon.S @@ -0,0 +1,742 @@ +/* poly1305-armv7-neon.S - ARMv7/NEON implementation of Poly1305 + * + * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Based on public domain implementation by Andrew Moon at + * https://github.com/floodyberry/poly1305-opt + */ + +#include <config.h> + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) + +.syntax unified +.fpu neon +.arm + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + +#define UNALIGNED_LDMIA2(ptr, l0, l1) \ + tst ptr, #3; \ + beq 1f; \ + vpush {d0}; \ + vld1.32 {d0}, [ptr]!; \ + vmov l0, s0; \ + vmov l1, s1; \ + vpop {d0}; \ + b 2f; \ + 1: ldmia ptr!, {l0-l1}; \ + 2: ; + +#define UNALIGNED_LDMIA4(ptr, l0, l1, l2, l3) \ + tst ptr, #3; \ + beq 1f; \ + vpush {d0-d1}; \ + vld1.32 {d0-d1}, [ptr]!; \ + vmov l0, s0; \ + vmov l1, s1; \ + vmov l2, s2; \ + vmov l3, s3; \ + vpop {d0-d1}; \ + b 2f; \ + 1: ldmia ptr!, {l0-l3}; \ + 2: ; + +.text + +.p2align 2 +.Lpoly1305_init_constants_neon: +.long 0x3ffff03 +.long 0x3ffc0ff +.long 0x3f03fff +.long 0x00fffff + +.globl _gcry_poly1305_armv7_neon_init_ext +.type _gcry_poly1305_armv7_neon_init_ext,%function; +_gcry_poly1305_armv7_neon_init_ext: +.Lpoly1305_init_ext_neon_local: + stmfd sp!, {r4-r11, lr} + sub sp, sp, #32 + mov r14, #-1 + UNALIGNED_LDMIA4(r1, r2, r3, r4, r5) + GET_DATA_POINTER(r7,.Lpoly1305_init_constants_neon,r8) + mov r6, r2 + mov r8, r2, lsr #26 + mov r9, r3, lsr #20 + mov r10, r4, lsr #14 + mov r11, r5, lsr #8 + orr r8, r8, r3, lsl #6 + orr r9, r9, r4, lsl #12 + orr r10, r10, r5, lsl #18 + ldmia r7, {r2-r5} + and r2, r2, r8 + and r3, r3, r9 + and r4, r4, r10 + and r5, r5, r11 + and r6, r6, 0x3ffffff + stmia r0!, {r2-r6} + eor r8, r8, r8 + str r8, [sp, #24] +.Lpoly1305_init_ext_neon_squareloop: + ldr r8, [sp, #24] + mov r12, #16 + cmp r8, #2 + beq .Lpoly1305_init_ext_neon_donesquaring + cmp r8, #1 + moveq r12, #64 + cmp r14, r12 + bls .Lpoly1305_init_ext_neon_donesquaring + add r8, #1 + str r8, [sp, #24] + mov r6, r6, lsl #1 + mov r2, r2, lsl #1 + umull r7, r8, r3, r3 + umull r9, r10, r6, r4 + umlal r7, r8, r6, r5 + umlal r9, r10, r2, r3 + add r11, r5, r5, lsl #2 + umlal r7, r8, r2, r4 + umlal r9, r10, r5, r11 + str r7, [sp, #16] + str r8, [sp, #20] + mov r2, r2, lsr #1 + mov r5, r5, lsl #1 + str r9, [sp, #8] + str r10, [sp, #12] + umull r7, r8, r2, r2 + umull r9, r10, r6, r2 + add r11, r3, r3, lsl #2 + add r12, r4, r4, lsl #2 + umlal r7, r8, r6, r3 + umlal r9, r10, r5, r11 + umlal r7, r8, r5, r12 + umlal r9, r10, r4, r12 + mov r6, r6, lsr #1 + mov r3, r3, lsl #1 + add r11, r2, r2, lsl #2 + str r7, [sp, #0] + str r8, [sp, #4] + umull r7, r8, r6, r6 + umlal r7, r8, r3, r12 + umlal r7, r8, r5, r11 + and r6, r7, 0x3ffffff + mov r11, r7, lsr #26 + orr r11, r11, r8, lsl #6 + ldr r7, [sp, #0] + ldr r8, [sp, #4] + adds r9, r9, r11 + adc r10, r10, #0 + and r2, r9, 0x3ffffff + mov r11, r9, lsr #26 + orr r11, r11, r10, lsl #6 + ldr r9, [sp, #8] + ldr r10, [sp, #12] + adds r7, r7, r11 + adc r8, r8, #0 + and r3, r7, 0x3ffffff + mov r11, r7, lsr #26 + orr r11, r11, r8, lsl #6 + ldr r7, [sp, #16] + ldr r8, [sp, #20] + adds r9, r9, r11 + adc r10, r10, #0 + and r4, r9, 0x3ffffff + mov r11, r9, lsr #26 + orr r11, r11, r10, lsl #6 + adds r7, r7, r11 + adc r8, r8, #0 + and r5, r7, 0x3ffffff + mov r11, r7, lsr #26 + orr r11, r11, r8, lsl #6 + add r11, r11, r11, lsl #2 + add r6, r6, r11 + mov r11, r6, lsr #26 + and r6, r6, 0x3ffffff + add r2, r2, r11 + stmia r0!, {r2-r6} + b .Lpoly1305_init_ext_neon_squareloop +.Lpoly1305_init_ext_neon_donesquaring: + mov r2, #2 + ldr r14, [sp, #24] + sub r14, r2, r14 + mov r3, r14, lsl #4 + add r3, r3, r14, lsl #2 + add r0, r0, r3 + eor r2, r2, r2 + eor r3, r3, r3 + eor r4, r4, r4 + eor r5, r5, r5 + eor r6, r6, r6 + stmia r0!, {r2-r6} + stmia r0!, {r2-r6} + UNALIGNED_LDMIA4(r1, r2, r3, r4, r5) + stmia r0, {r2-r6} + add sp, sp, #32 + ldmfd sp!, {r4-r11, lr} + mov r0, #(9*4+32) + bx lr +.ltorg +.size _gcry_poly1305_armv7_neon_init_ext,.-_gcry_poly1305_armv7_neon_init_ext; + +.globl _gcry_poly1305_armv7_neon_blocks +.type _gcry_poly1305_armv7_neon_blocks,%function; +_gcry_poly1305_armv7_neon_blocks: +.Lpoly1305_blocks_neon_local: + vmov.i32 q0, #0xffffffff + vmov.i32 d4, #1 + vsubw.u32 q0, q0, d4 + vstmdb sp!, {q4,q5,q6,q7} + stmfd sp!, {r4-r11, lr} + mov r8, sp + and sp, sp, #~63 + sub sp, sp, #192 + str r0, [sp, #108] + str r1, [sp, #112] + str r2, [sp, #116] + str r8, [sp, #120] + mov r3, r0 + mov r0, r1 + mov r1, r2 + mov r2, r3 + ldr r8, [r2, #116] + veor d15, d15, d15 + vorr.i32 d15, #(1 << 24) + tst r8, #2 + beq .Lpoly1305_blocks_neon_skip_shift8 + vshr.u64 d15, #32 +.Lpoly1305_blocks_neon_skip_shift8: + tst r8, #4 + beq .Lpoly1305_blocks_neon_skip_shift16 + veor d15, d15, d15 +.Lpoly1305_blocks_neon_skip_shift16: + vst1.64 d15, [sp, :64] + tst r8, #1 + bne .Lpoly1305_blocks_neon_started + vld1.64 {q0-q1}, [r0]! + vswp d1, d2 + vmovn.i64 d21, q0 + vshrn.i64 d22, q0, #26 + vshrn.u64 d24, q1, #14 + vext.8 d0, d0, d2, #4 + vext.8 d1, d1, d3, #4 + vshr.u64 q1, q1, #32 + vshrn.i64 d23, q0, #20 + vshrn.u64 d25, q1, #8 + vand.i32 d21, #0x03ffffff + vand.i32 q11, #0x03ffffff + vand.i32 q12, #0x03ffffff + orr r8, r8, #1 + sub r1, r1, #32 + str r8, [r2, #116] + vorr d25, d25, d15 + b .Lpoly1305_blocks_neon_setupr20 +.Lpoly1305_blocks_neon_started: + add r9, r2, #60 + vldm r9, {d21-d25} +.Lpoly1305_blocks_neon_setupr20: + vmov.i32 d0, #5 + tst r8, #(8|16) + beq .Lpoly1305_blocks_neon_setupr20_simple + tst r8, #(8) + beq .Lpoly1305_blocks_neon_setupr20_r_1 + mov r9, r2 + add r10, r2, #20 + vld1.64 {q9}, [r9]! + vld1.64 {q8}, [r10]! + vld1.64 {d2}, [r9] + vld1.64 {d20}, [r10] + b .Lpoly1305_blocks_neon_setupr20_hard +.Lpoly1305_blocks_neon_setupr20_r_1: + mov r9, r2 + vmov.i32 d2, #1 + vld1.64 {q8}, [r9]! + veor q9, q9, q9 + vshr.u64 d2, d2, #32 + vld1.64 {d20}, [r9] +.Lpoly1305_blocks_neon_setupr20_hard: + vzip.i32 q8, q9 + vzip.i32 d20, d2 + b .Lpoly1305_blocks_neon_setups20 +.Lpoly1305_blocks_neon_setupr20_simple: + add r9, r2, #20 + vld1.64 {d2-d4}, [r9] + vdup.32 d16, d2[0] + vdup.32 d17, d2[1] + vdup.32 d18, d3[0] + vdup.32 d19, d3[1] + vdup.32 d20, d4[0] +.Lpoly1305_blocks_neon_setups20: + vmul.i32 q13, q8, d0[0] + vmov.i64 q15, 0x00000000ffffffff + vmul.i32 q14, q9, d0[0] + vshr.u64 q15, q15, #6 + cmp r1, #64 + blo .Lpoly1305_blocks_neon_try32 + add r9, sp, #16 + add r10, r2, #40 + add r11, sp, #64 + str r1, [sp, #116] + vld1.64 {d10-d12}, [r10] + vmov d14, d12 + vmul.i32 q6, q5, d0[0] +.Lpoly1305_blocks_neon_mainloop: + UNALIGNED_LDMIA4(r0, r2, r3, r4, r5) + vmull.u32 q0, d25, d12[0] + mov r7, r2, lsr #26 + vmlal.u32 q0, d24, d12[1] + mov r8, r3, lsr #20 + ldr r6, [sp, #0] + vmlal.u32 q0, d23, d13[0] + mov r9, r4, lsr #14 + vmlal.u32 q0, d22, d13[1] + orr r6, r6, r5, lsr #8 + vmlal.u32 q0, d21, d14[0] + orr r3, r7, r3, lsl #6 + vmull.u32 q1, d25, d12[1] + orr r4, r8, r4, lsl #12 + orr r5, r9, r5, lsl #18 + vmlal.u32 q1, d24, d13[0] + UNALIGNED_LDMIA4(r0, r7, r8, r9, r10) + vmlal.u32 q1, d23, d13[1] + mov r1, r7, lsr #26 + vmlal.u32 q1, d22, d14[0] + ldr r11, [sp, #4] + mov r12, r8, lsr #20 + vmlal.u32 q1, d21, d10[0] + mov r14, r9, lsr #14 + vmull.u32 q2, d25, d13[0] + orr r11, r11, r10, lsr #8 + orr r8, r1, r8, lsl #6 + vmlal.u32 q2, d24, d13[1] + orr r9, r12, r9, lsl #12 + vmlal.u32 q2, d23, d14[0] + orr r10, r14, r10, lsl #18 + vmlal.u32 q2, d22, d10[0] + mov r12, r3 + and r2, r2, #0x3ffffff + vmlal.u32 q2, d21, d10[1] + mov r14, r5 + vmull.u32 q3, d25, d13[1] + and r3, r7, #0x3ffffff + vmlal.u32 q3, d24, d14[0] + and r5, r8, #0x3ffffff + vmlal.u32 q3, d23, d10[0] + and r7, r9, #0x3ffffff + vmlal.u32 q3, d22, d10[1] + and r8, r14, #0x3ffffff + vmlal.u32 q3, d21, d11[0] + and r9, r10, #0x3ffffff + add r14, sp, #128 + vmull.u32 q4, d25, d14[0] + mov r10, r6 + vmlal.u32 q4, d24, d10[0] + and r6, r4, #0x3ffffff + vmlal.u32 q4, d23, d10[1] + and r4, r12, #0x3ffffff + vmlal.u32 q4, d22, d11[0] + stm r14, {r2-r11} + vmlal.u32 q4, d21, d11[1] + vld1.64 {d21-d24}, [r14, :256]! + vld1.64 {d25}, [r14, :64] + UNALIGNED_LDMIA4(r0, r2, r3, r4, r5) + vmlal.u32 q0, d25, d26 + mov r7, r2, lsr #26 + vmlal.u32 q0, d24, d27 + ldr r6, [sp, #0] + mov r8, r3, lsr #20 + vmlal.u32 q0, d23, d28 + mov r9, r4, lsr #14 + vmlal.u32 q0, d22, d29 + orr r6, r6, r5, lsr #8 + vmlal.u32 q0, d21, d20 + orr r3, r7, r3, lsl #6 + vmlal.u32 q1, d25, d27 + orr r4, r8, r4, lsl #12 + orr r5, r9, r5, lsl #18 + vmlal.u32 q1, d24, d28 + UNALIGNED_LDMIA4(r0, r7, r8, r9, r10) + vmlal.u32 q1, d23, d29 + mov r1, r7, lsr #26 + vmlal.u32 q1, d22, d20 + ldr r11, [sp, #4] + mov r12, r8, lsr #20 + vmlal.u32 q1, d21, d16 + mov r14, r9, lsr #14 + vmlal.u32 q2, d25, d28 + orr r11, r11, r10, lsr #8 + orr r8, r1, r8, lsl #6 + orr r9, r12, r9, lsl #12 + vmlal.u32 q2, d24, d29 + orr r10, r14, r10, lsl #18 + and r2, r2, #0x3ffffff + mov r12, r3 + vmlal.u32 q2, d23, d20 + mov r14, r5 + vmlal.u32 q2, d22, d16 + and r3, r7, #0x3ffffff + vmlal.u32 q2, d21, d17 + and r5, r8, #0x3ffffff + vmlal.u32 q3, d25, d29 + and r7, r9, #0x3ffffff + vmlal.u32 q3, d24, d20 + and r8, r14, #0x3ffffff + vmlal.u32 q3, d23, d16 + and r9, r10, #0x3ffffff + vmlal.u32 q3, d22, d17 + add r14, sp, #128 + vmlal.u32 q3, d21, d18 + mov r10, r6 + vmlal.u32 q4, d25, d20 + vmlal.u32 q4, d24, d16 + and r6, r4, #0x3ffffff + vmlal.u32 q4, d23, d17 + and r4, r12, #0x3ffffff + vmlal.u32 q4, d22, d18 + stm r14, {r2-r11} + vmlal.u32 q4, d21, d19 + vld1.64 {d21-d24}, [r14, :256]! + vld1.64 {d25}, [r14, :64] + vaddw.u32 q0, q0, d21 + vaddw.u32 q1, q1, d22 + vaddw.u32 q2, q2, d23 + vaddw.u32 q3, q3, d24 + vaddw.u32 q4, q4, d25 + vshr.u64 q11, q0, #26 + vand q0, q0, q15 + vadd.i64 q1, q1, q11 + vshr.u64 q12, q3, #26 + vand q3, q3, q15 + vadd.i64 q4, q4, q12 + vshr.u64 q11, q1, #26 + vand q1, q1, q15 + vadd.i64 q2, q2, q11 + vshr.u64 q12, q4, #26 + vand q4, q4, q15 + vadd.i64 q0, q0, q12 + vshl.i64 q12, q12, #2 + ldr r1, [sp, #116] + vadd.i64 q0, q0, q12 + vshr.u64 q11, q2, #26 + vand q2, q2, q15 + vadd.i64 q3, q3, q11 + sub r1, #64 + vshr.u64 q12, q0, #26 + vand q0, q0, q15 + vadd.i64 q1, q1, q12 + cmp r1, #64 + vshr.u64 q11, q3, #26 + vand q3, q3, q15 + vadd.i64 q4, q4, q11 + vmovn.i64 d21, q0 + str r1, [sp, #116] + vmovn.i64 d22, q1 + vmovn.i64 d23, q2 + vmovn.i64 d24, q3 + vmovn.i64 d25, q4 + bhs .Lpoly1305_blocks_neon_mainloop +.Lpoly1305_blocks_neon_try32: + cmp r1, #32 + blo .Lpoly1305_blocks_neon_done + tst r0, r0 + bne .Lpoly1305_blocks_loadm32 + veor q0, q0, q0 + veor q1, q1, q1 + veor q2, q2, q2 + veor q3, q3, q3 + veor q4, q4, q4 + b .Lpoly1305_blocks_continue32 +.Lpoly1305_blocks_loadm32: + vld1.64 {q0-q1}, [r0]! + veor q4, q4, q4 + vswp d1, d2 + veor q3, q3, q3 + vtrn.32 q0, q4 + vtrn.32 q1, q3 + vshl.i64 q2, q1, #12 + vshl.i64 q3, q3, #18 + vshl.i64 q1, q4, #6 + vmovl.u32 q4, d15 +.Lpoly1305_blocks_continue32: + vmlal.u32 q0, d25, d26 + vmlal.u32 q0, d24, d27 + vmlal.u32 q0, d23, d28 + vmlal.u32 q0, d22, d29 + vmlal.u32 q0, d21, d20 + vmlal.u32 q1, d25, d27 + vmlal.u32 q1, d24, d28 + vmlal.u32 q1, d23, d29 + vmlal.u32 q1, d22, d20 + vmlal.u32 q1, d21, d16 + vmlal.u32 q2, d25, d28 + vmlal.u32 q2, d24, d29 + vmlal.u32 q2, d23, d20 + vmlal.u32 q2, d22, d16 + vmlal.u32 q2, d21, d17 + vmlal.u32 q3, d25, d29 + vmlal.u32 q3, d24, d20 + vmlal.u32 q3, d23, d16 + vmlal.u32 q3, d22, d17 + vmlal.u32 q3, d21, d18 + vmlal.u32 q4, d25, d20 + vmlal.u32 q4, d24, d16 + vmlal.u32 q4, d23, d17 + vmlal.u32 q4, d22, d18 + vmlal.u32 q4, d21, d19 + vshr.u64 q11, q0, #26 + vand q0, q0, q15 + vadd.i64 q1, q1, q11 + vshr.u64 q12, q3, #26 + vand q3, q3, q15 + vadd.i64 q4, q4, q12 + vshr.u64 q11, q1, #26 + vand q1, q1, q15 + vadd.i64 q2, q2, q11 + vshr.u64 q12, q4, #26 + vand q4, q4, q15 + vadd.i64 q0, q0, q12 + vshl.i64 q12, q12, #2 + vadd.i64 q0, q0, q12 + vshr.u64 q11, q2, #26 + vand q2, q2, q15 + vadd.i64 q3, q3, q11 + vshr.u64 q12, q0, #26 + vand q0, q0, q15 + vadd.i64 q1, q1, q12 + vshr.u64 q11, q3, #26 + vand q3, q3, q15 + vadd.i64 q4, q4, q11 + vmovn.i64 d21, q0 + vmovn.i64 d22, q1 + vmovn.i64 d23, q2 + vmovn.i64 d24, q3 + vmovn.i64 d25, q4 +.Lpoly1305_blocks_neon_done: + tst r0, r0 + beq .Lpoly1305_blocks_neon_final + ldr r2, [sp, #108] + add r2, r2, #60 + vst1.64 {d21}, [r2]! + vst1.64 {d22-d25}, [r2] + b .Lpoly1305_blocks_neon_leave +.Lpoly1305_blocks_neon_final: + vadd.u32 d10, d0, d1 + vadd.u32 d13, d2, d3 + vadd.u32 d11, d4, d5 + ldr r5, [sp, #108] + vadd.u32 d14, d6, d7 + vadd.u32 d12, d8, d9 + vtrn.32 d10, d13 + vtrn.32 d11, d14 + vst1.64 {d10-d12}, [sp] + ldm sp, {r0-r4} + mov r12, r0, lsr #26 + and r0, r0, #0x3ffffff + add r1, r1, r12 + mov r12, r1, lsr #26 + and r1, r1, #0x3ffffff + add r2, r2, r12 + mov r12, r2, lsr #26 + and r2, r2, #0x3ffffff + add r3, r3, r12 + mov r12, r3, lsr #26 + and r3, r3, #0x3ffffff + add r4, r4, r12 + mov r12, r4, lsr #26 + and r4, r4, #0x3ffffff + add r12, r12, r12, lsl #2 + add r0, r0, r12 + mov r12, r0, lsr #26 + and r0, r0, #0x3ffffff + add r1, r1, r12 + mov r12, r1, lsr #26 + and r1, r1, #0x3ffffff + add r2, r2, r12 + mov r12, r2, lsr #26 + and r2, r2, #0x3ffffff + add r3, r3, r12 + mov r12, r3, lsr #26 + and r3, r3, #0x3ffffff + add r4, r4, r12 + mov r12, r4, lsr #26 + and r4, r4, #0x3ffffff + add r12, r12, r12, lsl #2 + add r0, r0, r12 + mov r12, r0, lsr #26 + and r0, r0, #0x3ffffff + add r1, r1, r12 + add r6, r0, #5 + mov r12, r6, lsr #26 + and r6, r6, #0x3ffffff + add r7, r1, r12 + mov r12, r7, lsr #26 + and r7, r7, #0x3ffffff + add r10, r2, r12 + mov r12, r10, lsr #26 + and r10, r10, #0x3ffffff + add r11, r3, r12 + mov r12, #-(1 << 26) + add r12, r12, r11, lsr #26 + and r11, r11, #0x3ffffff + add r14, r4, r12 + mov r12, r14, lsr #31 + sub r12, #1 + and r6, r6, r12 + and r7, r7, r12 + and r10, r10, r12 + and r11, r11, r12 + and r14, r14, r12 + mvn r12, r12 + and r0, r0, r12 + and r1, r1, r12 + and r2, r2, r12 + and r3, r3, r12 + and r4, r4, r12 + orr r0, r0, r6 + orr r1, r1, r7 + orr r2, r2, r10 + orr r3, r3, r11 + orr r4, r4, r14 + orr r0, r0, r1, lsl #26 + lsr r1, r1, #6 + orr r1, r1, r2, lsl #20 + lsr r2, r2, #12 + orr r2, r2, r3, lsl #14 + lsr r3, r3, #18 + orr r3, r3, r4, lsl #8 + add r5, r5, #60 + stm r5, {r0-r3} +.Lpoly1305_blocks_neon_leave: + sub r0, sp, #8 + ldr sp, [sp, #120] + ldmfd sp!, {r4-r11, lr} + vldm sp!, {q4-q7} + sub r0, sp, r0 + bx lr +.size _gcry_poly1305_armv7_neon_blocks,.-_gcry_poly1305_armv7_neon_blocks; + +.globl _gcry_poly1305_armv7_neon_finish_ext +.type _gcry_poly1305_armv7_neon_finish_ext,%function; +_gcry_poly1305_armv7_neon_finish_ext: +.Lpoly1305_finish_ext_neon_local: + stmfd sp!, {r4-r11, lr} + sub sp, sp, #32 + mov r5, r0 + mov r6, r1 + mov r7, r2 + mov r8, r3 + ands r7, r7, r7 + beq .Lpoly1305_finish_ext_neon_noremaining + mov r9, sp + veor q0, q0, q0 + veor q1, q1, q1 + vst1.64 {q0-q1}, [sp] + tst r7, #16 + beq .Lpoly1305_finish_ext_neon_skip16 + vld1.u64 {q0}, [r1]! + vst1.64 {q0}, [r9]! +.Lpoly1305_finish_ext_neon_skip16: + tst r7, #8 + beq .Lpoly1305_finish_ext_neon_skip8 + UNALIGNED_LDMIA2(r1, r10, r11) + stmia r9!, {r10-r11} +.Lpoly1305_finish_ext_neon_skip8: + tst r7, #4 + beq .Lpoly1305_finish_ext_neon_skip4 + ldr r10, [r1], #4 + str r10, [r9], #4 +.Lpoly1305_finish_ext_neon_skip4: + tst r7, #2 + beq .Lpoly1305_finish_ext_neon_skip2 + ldrh r10, [r1], #2 + strh r10, [r9], #2 +.Lpoly1305_finish_ext_neon_skip2: + tst r7, #1 + beq .Lpoly1305_finish_ext_neon_skip1 + ldrb r10, [r1], #1 + strb r10, [r9], #1 +.Lpoly1305_finish_ext_neon_skip1: + cmp r7, #16 + beq .Lpoly1305_finish_ext_neon_skipfinalbit + mov r10, #1 + strb r10, [r9] +.Lpoly1305_finish_ext_neon_skipfinalbit: + ldr r10, [r5, #116] + orrhs r10, #2 + orrlo r10, #4 + str r10, [r5, #116] + mov r0, r5 + mov r1, sp + mov r2, #32 + bl .Lpoly1305_blocks_neon_local +.Lpoly1305_finish_ext_neon_noremaining: + ldr r10, [r5, #116] + tst r10, #1 + beq .Lpoly1305_finish_ext_neon_notstarted + cmp r7, #0 + beq .Lpoly1305_finish_ext_neon_user2r + cmp r7, #16 + bls .Lpoly1305_finish_ext_neon_user1 +.Lpoly1305_finish_ext_neon_user2r: + orr r10, r10, #8 + b .Lpoly1305_finish_ext_neon_finalblock +.Lpoly1305_finish_ext_neon_user1: + orr r10, r10, #16 +.Lpoly1305_finish_ext_neon_finalblock: + str r10, [r5, #116] + mov r0, r5 + eor r1, r1, r1 + mov r2, #32 + bl .Lpoly1305_blocks_neon_local +.Lpoly1305_finish_ext_neon_notstarted: + add r0, r5, #60 + add r9, r5, #100 + ldm r0, {r0-r3} + ldm r9, {r9-r12} + adds r0, r0, r9 + adcs r1, r1, r10 + adcs r2, r2, r11 + adcs r3, r3, r12 + stm r8, {r0-r3} + veor q0, q0, q0 + veor q1, q1, q1 + veor q2, q2, q2 + veor q3, q3, q3 + vstmia r5!, {q0-q3} + vstm r5, {q0-q3} + add sp, sp, #32 + ldmfd sp!, {r4-r11, lr} + mov r0, #(9*4+32) + bx lr +.size _gcry_poly1305_armv7_neon_finish_ext,.-_gcry_poly1305_armv7_neon_finish_ext; + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/poly1305-avx2-amd64.S b/libotr/libgcrypt-1.8.7/cipher/poly1305-avx2-amd64.S new file mode 100644 index 0000000..9362a5a --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/poly1305-avx2-amd64.S @@ -0,0 +1,962 @@ +/* poly1305-avx2-amd64.S - AMD64/AVX2 implementation of Poly1305 + * + * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Based on public domain implementation by Andrew Moon at + * https://github.com/floodyberry/poly1305-opt + */ + +#include <config.h> + +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(ENABLE_AVX2_SUPPORT) + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + + +.text + + +.align 8 +.globl _gcry_poly1305_amd64_avx2_init_ext +ELF(.type _gcry_poly1305_amd64_avx2_init_ext,@function;) +_gcry_poly1305_amd64_avx2_init_ext: +.Lpoly1305_init_ext_avx2_local: + xor %edx, %edx + vzeroupper + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbx + movq %rdx, %rcx + vpxor %ymm0, %ymm0, %ymm0 + movq $-1, %r8 + testq %rcx, %rcx + vmovdqu %ymm0, (%rdi) + vmovdqu %ymm0, 32(%rdi) + vmovdqu %ymm0, 64(%rdi) + vmovdqu %ymm0, 96(%rdi) + vmovdqu %ymm0, 128(%rdi) + movq 8(%rsi), %r9 + cmove %r8, %rcx + movq $0xffc0fffffff, %r8 + movq %r9, %r13 + movq (%rsi), %r10 + andq %r10, %r8 + shrq $44, %r10 + movq %r8, %r14 + shlq $20, %r13 + orq %r13, %r10 + movq $0xfffffc0ffff, %r13 + shrq $24, %r9 + andq %r13, %r10 + movq $0xffffffc0f, %r13 + andq %r13, %r9 + movl %r8d, %r13d + andl $67108863, %r13d + movl %r13d, 164(%rdi) + movq %r10, %r13 + shrq $26, %r14 + shlq $18, %r13 + orq %r13, %r14 + movq %r10, %r13 + shrq $8, %r13 + andl $67108863, %r14d + andl $67108863, %r13d + movl %r14d, 172(%rdi) + movq %r10, %r14 + movl %r13d, 180(%rdi) + movq %r9, %r13 + shrq $34, %r14 + shlq $10, %r13 + orq %r13, %r14 + movq %r9, %r13 + shrq $16, %r13 + andl $67108863, %r14d + movl %r14d, 188(%rdi) + movl %r13d, 196(%rdi) + cmpq $16, %rcx + jbe .Lpoly1305_init_ext_avx2_continue + lea (%r9,%r9,4), %r11 + shlq $2, %r11 + lea (%r10,%r10), %rax + mulq %r11 + movq %rax, %r13 + movq %r8, %rax + movq %rdx, %r14 + mulq %r8 + addq %rax, %r13 + lea (%r8,%r8), %rax + movq %r13, %r12 + adcq %rdx, %r14 + mulq %r10 + shlq $20, %r14 + movq %rax, %r15 + shrq $44, %r12 + movq %r11, %rax + orq %r12, %r14 + movq %rdx, %r12 + mulq %r9 + addq %rax, %r15 + movq %r8, %rax + adcq %rdx, %r12 + addq %r15, %r14 + lea (%r9,%r9), %r15 + movq %r14, %rbx + adcq $0, %r12 + mulq %r15 + shlq $20, %r12 + movq %rdx, %r11 + shrq $44, %rbx + orq %rbx, %r12 + movq %rax, %rbx + movq %r10, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %r11 + addq %rbx, %r12 + movq $0xfffffffffff, %rbx + movq %r12, %r15 + adcq $0, %r11 + andq %rbx, %r13 + shlq $22, %r11 + andq %rbx, %r14 + shrq $42, %r15 + orq %r15, %r11 + lea (%r11,%r11,4), %r11 + addq %r11, %r13 + movq %rbx, %r11 + andq %r13, %r11 + shrq $44, %r13 + movq %r11, %r15 + addq %r13, %r14 + movq $0x3ffffffffff, %r13 + andq %r14, %rbx + andq %r13, %r12 + movq %rbx, %r13 + shrq $26, %r15 + shlq $18, %r13 + orq %r13, %r15 + movq %rbx, %r13 + shrq $44, %r14 + shrq $8, %r13 + addq %r14, %r12 + movl %r11d, %r14d + andl $67108863, %r15d + andl $67108863, %r14d + andl $67108863, %r13d + movl %r14d, 204(%rdi) + movq %rbx, %r14 + movl %r13d, 220(%rdi) + movq %r12, %r13 + shrq $34, %r14 + shlq $10, %r13 + orq %r13, %r14 + movq %r12, %r13 + shrq $16, %r13 + andl $67108863, %r14d + movl %r15d, 212(%rdi) + movl %r14d, 228(%rdi) + movl %r13d, 236(%rdi) + cmpq $32, %rcx + jbe .Lpoly1305_init_ext_avx2_continue + movq %r9, %rax + lea (%rbx,%rbx,4), %r14 + shlq $2, %r14 + mulq %r14 + movq %rdi, -32(%rsp) + lea (%r12,%r12,4), %rdi + shlq $2, %rdi + movq %rax, %r14 + movq %r10, %rax + movq %rdx, %r15 + mulq %rdi + movq %rax, %r13 + movq %r11, %rax + movq %rcx, -16(%rsp) + movq %rdx, %rcx + mulq %r8 + addq %rax, %r13 + movq %rdi, %rax + movq %rsi, -24(%rsp) + adcq %rdx, %rcx + addq %r13, %r14 + adcq %rcx, %r15 + movq %r14, %rcx + mulq %r9 + shlq $20, %r15 + movq %rax, %r13 + shrq $44, %rcx + movq %r11, %rax + orq %rcx, %r15 + movq %rdx, %rcx + mulq %r10 + movq %rax, %rsi + movq %rbx, %rax + movq %rdx, %rdi + mulq %r8 + addq %rax, %rsi + movq %r11, %rax + adcq %rdx, %rdi + addq %rsi, %r13 + adcq %rdi, %rcx + addq %r13, %r15 + movq %r15, %rdi + adcq $0, %rcx + mulq %r9 + shlq $20, %rcx + movq %rdx, %rsi + shrq $44, %rdi + orq %rdi, %rcx + movq %rax, %rdi + movq %rbx, %rax + mulq %r10 + movq %rax, %r9 + movq %r8, %rax + movq %rdx, %r10 + movq $0xfffffffffff, %r8 + mulq %r12 + addq %rax, %r9 + adcq %rdx, %r10 + andq %r8, %r14 + addq %r9, %rdi + adcq %r10, %rsi + andq %r8, %r15 + addq %rdi, %rcx + movq $0x3ffffffffff, %rdi + movq %rcx, %r10 + adcq $0, %rsi + andq %rdi, %rcx + shlq $22, %rsi + shrq $42, %r10 + orq %r10, %rsi + movq -32(%rsp), %rdi + lea (%rsi,%rsi,4), %r9 + movq %r8, %rsi + addq %r9, %r14 + andq %r14, %rsi + shrq $44, %r14 + addq %r14, %r15 + andq %r15, %r8 + shrq $44, %r15 + movq %r8, %r14 + addq %r15, %rcx + movl %esi, %r15d + movq %rcx, %r10 + movq %r8, %r9 + shrq $26, %rsi + andl $67108863, %r15d + shlq $18, %r14 + shrq $34, %r8 + orq %r14, %rsi + shlq $10, %r10 + shrq $8, %r9 + orq %r10, %r8 + shrq $16, %rcx + andl $67108863, %esi + movl %esi, 252(%rdi) + andl $67108863, %r9d + movl %ecx, 276(%rdi) + andl $67108863, %r8d + movl %r15d, 244(%rdi) + movl %r9d, 260(%rdi) + movl %r8d, 268(%rdi) + movq -16(%rsp), %rcx + movq -24(%rsp), %rsi +.Lpoly1305_init_ext_avx2_continue: + movl 16(%rsi), %r8d + movl %r8d, 284(%rdi) + movl 20(%rsi), %r9d + movl %r9d, 292(%rdi) + movl 24(%rsi), %r10d + movl %r10d, 300(%rdi) + movl 28(%rsi), %esi + movl %esi, 308(%rdi) + cmpq $48, %rcx + jbe .Lpoly1305_init_ext_avx2_done + lea (%r12,%r12,4), %r9 + shlq $2, %r9 + lea (%rbx,%rbx), %rax + mulq %r9 + movq %rax, %rsi + movq %r11, %rax + movq %rdx, %r8 + mulq %r11 + addq %rax, %rsi + lea (%r11,%r11), %rax + movq %rsi, %r10 + adcq %rdx, %r8 + mulq %rbx + movq %rax, %r13 + movq %r12, %rax + movq %rdx, %rcx + addq %r12, %r12 + mulq %r9 + addq %rax, %r13 + movq %r11, %rax + movq $0xfffffffffff, %r9 + adcq %rdx, %rcx + andq %r9, %rsi + mulq %r12 + shlq $20, %r8 + movq %rax, %r11 + shrq $44, %r10 + movq %rbx, %rax + orq %r10, %r8 + movq %rdx, %r12 + mulq %rbx + addq %r13, %r8 + movq %r8, %r14 + adcq $0, %rcx + andq %r9, %r8 + addq %rax, %r11 + adcq %rdx, %r12 + shlq $20, %rcx + shrq $44, %r14 + orq %r14, %rcx + addq %r11, %rcx + movq %rcx, %rbx + adcq $0, %r12 + shlq $22, %r12 + shrq $42, %rbx + orq %rbx, %r12 + movq %r9, %rbx + lea (%r12,%r12,4), %r15 + addq %r15, %rsi + andq %rsi, %rbx + shrq $44, %rsi + movl %ebx, %r11d + addq %rsi, %r8 + movq $0x3ffffffffff, %rsi + andq %r8, %r9 + andq %rsi, %rcx + shrq $44, %r8 + movq %r9, %rax + addq %r8, %rcx + movq %r9, %r8 + movq %rcx, %r10 + andl $67108863, %r11d + shrq $26, %rbx + shlq $18, %r8 + shrq $34, %r9 + orq %r8, %rbx + shlq $10, %r10 + shrq $8, %rax + orq %r10, %r9 + shrq $16, %rcx + andl $67108863, %ebx + andl $67108863, %eax + andl $67108863, %r9d + movl %r11d, 184(%rdi) + movl %r11d, 176(%rdi) + movl %r11d, 168(%rdi) + movl %r11d, 160(%rdi) + movl %ebx, 216(%rdi) + movl %ebx, 208(%rdi) + movl %ebx, 200(%rdi) + movl %ebx, 192(%rdi) + movl %eax, 248(%rdi) + movl %eax, 240(%rdi) + movl %eax, 232(%rdi) + movl %eax, 224(%rdi) + movl %r9d, 280(%rdi) + movl %r9d, 272(%rdi) + movl %r9d, 264(%rdi) + movl %r9d, 256(%rdi) + movl %ecx, 312(%rdi) + movl %ecx, 304(%rdi) + movl %ecx, 296(%rdi) + movl %ecx, 288(%rdi) +.Lpoly1305_init_ext_avx2_done: + movq $0, 320(%rdi) + vzeroall + popq %rbx + popq %r15 + popq %r14 + popq %r13 + popq %r12 + ret +ELF(.size _gcry_poly1305_amd64_avx2_init_ext,.-_gcry_poly1305_amd64_avx2_init_ext;) + + +.align 8 +.globl _gcry_poly1305_amd64_avx2_blocks +ELF(.type _gcry_poly1305_amd64_avx2_blocks,@function;) +_gcry_poly1305_amd64_avx2_blocks: +.Lpoly1305_blocks_avx2_local: + vzeroupper + pushq %rbp + movq %rsp, %rbp + pushq %rbx + andq $-64, %rsp + subq $200, %rsp + movl $((1<<26)-1), %r8d + movl $(5), %r9d + movl $((1<<24)), %r10d + vmovd %r8d, %xmm0 + vmovd %r9d, %xmm8 + vmovd %r10d, %xmm7 + vpbroadcastq %xmm0, %ymm0 + vpbroadcastq %xmm8, %ymm8 + vpbroadcastq %xmm7, %ymm7 + vmovdqa %ymm7, 168(%rsp) + movq 320(%rdi), %rax + testb $60, %al + je .Lpoly1305_blocks_avx2_9 + vmovdqa 168(%rsp), %ymm7 + vpsrldq $8, %ymm7, %ymm1 + vmovdqa %ymm1, 168(%rsp) + testb $4, %al + je .Lpoly1305_blocks_avx2_10 + vpermq $192, %ymm1, %ymm7 + vmovdqa %ymm7, 168(%rsp) +.Lpoly1305_blocks_avx2_10: + testb $8, %al + je .Lpoly1305_blocks_avx2_11 + vpermq $240, 168(%rsp), %ymm7 + vmovdqa %ymm7, 168(%rsp) +.Lpoly1305_blocks_avx2_11: + testb $16, %al + je .Lpoly1305_blocks_avx2_12 + vpermq $252, 168(%rsp), %ymm6 + vmovdqa %ymm6, 168(%rsp) +.Lpoly1305_blocks_avx2_12: + testb $32, %al + je .Lpoly1305_blocks_avx2_9 + vpxor %xmm6, %xmm6, %xmm6 + vmovdqa %ymm6, 168(%rsp) +.Lpoly1305_blocks_avx2_9: + testb $1, %al + jne .Lpoly1305_blocks_avx2_13 + vmovdqu (%rsi), %ymm3 + vmovdqu 32(%rsi), %ymm1 + vpunpcklqdq %ymm1, %ymm3, %ymm2 + vpunpckhqdq %ymm1, %ymm3, %ymm1 + vpermq $216, %ymm2, %ymm2 + vpermq $216, %ymm1, %ymm1 + vpand %ymm2, %ymm0, %ymm5 + vpsrlq $26, %ymm2, %ymm4 + vpand %ymm4, %ymm0, %ymm4 + vpsllq $12, %ymm1, %ymm3 + vpsrlq $52, %ymm2, %ymm2 + vpor %ymm3, %ymm2, %ymm2 + vpand %ymm2, %ymm0, %ymm3 + vpsrlq $26, %ymm2, %ymm2 + vpand %ymm2, %ymm0, %ymm2 + vpsrlq $40, %ymm1, %ymm1 + vpor 168(%rsp), %ymm1, %ymm1 + addq $64, %rsi + subq $64, %rdx + orq $1, 320(%rdi) + jmp .Lpoly1305_blocks_avx2_14 +.Lpoly1305_blocks_avx2_13: + vmovdqa (%rdi), %ymm5 + vmovdqa 32(%rdi), %ymm4 + vmovdqa 64(%rdi), %ymm3 + vmovdqa 96(%rdi), %ymm2 + vmovdqa 128(%rdi), %ymm1 +.Lpoly1305_blocks_avx2_14: + cmpq $63, %rdx + jbe .Lpoly1305_blocks_avx2_15 + vmovdqa 160(%rdi), %ymm6 + vmovdqa %ymm8, 136(%rsp) + vmovdqa 192(%rdi), %ymm7 + vpmuludq %ymm8, %ymm7, %ymm11 + vmovdqa %ymm11, 104(%rsp) + vmovdqa 224(%rdi), %ymm11 + vmovdqa %ymm11, 72(%rsp) + vpmuludq %ymm11, %ymm8, %ymm11 + vmovdqa %ymm11, 40(%rsp) + vmovdqa 256(%rdi), %ymm11 + vmovdqa %ymm11, 8(%rsp) + vpmuludq %ymm11, %ymm8, %ymm11 + vmovdqa %ymm11, -24(%rsp) + vmovdqa 288(%rdi), %ymm13 + vmovdqa %ymm13, -56(%rsp) + vpmuludq %ymm13, %ymm8, %ymm13 + vmovdqa %ymm13, -88(%rsp) +.Lpoly1305_blocks_avx2_16: + vpmuludq 104(%rsp), %ymm1, %ymm14 + vmovdqa 40(%rsp), %ymm13 + vpmuludq %ymm13, %ymm2, %ymm8 + vpmuludq %ymm13, %ymm1, %ymm13 + vmovdqa -24(%rsp), %ymm9 + vpmuludq %ymm9, %ymm2, %ymm10 + vpmuludq %ymm9, %ymm1, %ymm11 + vpaddq %ymm8, %ymm14, %ymm14 + vpmuludq %ymm9, %ymm3, %ymm8 + vmovdqa -88(%rsp), %ymm12 + vpmuludq %ymm12, %ymm1, %ymm9 + vpaddq %ymm10, %ymm13, %ymm13 + vpmuludq %ymm12, %ymm4, %ymm15 + vmovdqa %ymm12, %ymm10 + vpmuludq %ymm12, %ymm3, %ymm12 + vpaddq %ymm8, %ymm14, %ymm14 + vpmuludq %ymm10, %ymm2, %ymm10 + vpmuludq %ymm6, %ymm2, %ymm8 + vpaddq %ymm15, %ymm14, %ymm14 + vpmuludq %ymm6, %ymm1, %ymm1 + vpaddq %ymm12, %ymm13, %ymm13 + vpmuludq %ymm6, %ymm5, %ymm15 + vpaddq %ymm10, %ymm11, %ymm11 + vpmuludq %ymm6, %ymm4, %ymm12 + vpaddq %ymm8, %ymm9, %ymm9 + vpmuludq %ymm6, %ymm3, %ymm10 + vpmuludq %ymm7, %ymm3, %ymm8 + vpaddq %ymm15, %ymm14, %ymm14 + vpmuludq %ymm7, %ymm2, %ymm2 + vpaddq %ymm12, %ymm13, %ymm12 + vpmuludq %ymm7, %ymm5, %ymm15 + vpaddq %ymm10, %ymm11, %ymm10 + vpmuludq %ymm7, %ymm4, %ymm13 + vpaddq %ymm8, %ymm9, %ymm8 + vmovdqa 72(%rsp), %ymm9 + vpmuludq %ymm9, %ymm4, %ymm11 + vpaddq %ymm2, %ymm1, %ymm1 + vpmuludq %ymm9, %ymm3, %ymm3 + vpaddq %ymm15, %ymm12, %ymm12 + vpmuludq %ymm9, %ymm5, %ymm15 + vpaddq %ymm13, %ymm10, %ymm10 + vmovdqa 8(%rsp), %ymm2 + vpmuludq %ymm2, %ymm5, %ymm9 + vpaddq %ymm11, %ymm8, %ymm8 + vpmuludq %ymm2, %ymm4, %ymm4 + vpaddq %ymm3, %ymm1, %ymm1 + vpmuludq -56(%rsp), %ymm5, %ymm5 + vpaddq %ymm15, %ymm10, %ymm10 + vpaddq %ymm9, %ymm8, %ymm8 + vpaddq %ymm4, %ymm1, %ymm1 + vpaddq %ymm5, %ymm1, %ymm5 + vmovdqu (%rsi), %ymm3 + vmovdqu 32(%rsi), %ymm2 + vperm2i128 $32, %ymm2, %ymm3, %ymm1 + vperm2i128 $49, %ymm2, %ymm3, %ymm2 + vpunpckldq %ymm2, %ymm1, %ymm15 + vpunpckhdq %ymm2, %ymm1, %ymm2 + vpxor %xmm4, %xmm4, %xmm4 + vpunpckldq %ymm4, %ymm15, %ymm1 + vpunpckhdq %ymm4, %ymm15, %ymm15 + vpunpckldq %ymm4, %ymm2, %ymm3 + vpunpckhdq %ymm4, %ymm2, %ymm2 + vpsllq $6, %ymm15, %ymm15 + vpsllq $12, %ymm3, %ymm3 + vpsllq $18, %ymm2, %ymm2 + vpaddq %ymm1, %ymm14, %ymm14 + vpaddq %ymm15, %ymm12, %ymm12 + vpaddq %ymm3, %ymm10, %ymm10 + vpaddq %ymm2, %ymm8, %ymm8 + vpaddq 168(%rsp), %ymm5, %ymm5 + addq $64, %rsi + vpsrlq $26, %ymm14, %ymm4 + vpsrlq $26, %ymm8, %ymm2 + vpand %ymm0, %ymm14, %ymm14 + vpand %ymm0, %ymm8, %ymm8 + vpaddq %ymm4, %ymm12, %ymm12 + vpaddq %ymm2, %ymm5, %ymm5 + vpsrlq $26, %ymm12, %ymm3 + vpsrlq $26, %ymm5, %ymm9 + vpand %ymm0, %ymm12, %ymm12 + vpand %ymm0, %ymm5, %ymm11 + vpaddq %ymm3, %ymm10, %ymm3 + vpmuludq 136(%rsp), %ymm9, %ymm9 + vpaddq %ymm9, %ymm14, %ymm14 + vpsrlq $26, %ymm3, %ymm2 + vpsrlq $26, %ymm14, %ymm4 + vpand %ymm0, %ymm3, %ymm3 + vpand %ymm0, %ymm14, %ymm5 + vpaddq %ymm2, %ymm8, %ymm2 + vpaddq %ymm4, %ymm12, %ymm4 + vpsrlq $26, %ymm2, %ymm1 + vpand %ymm0, %ymm2, %ymm2 + vpaddq %ymm1, %ymm11, %ymm1 + subq $64, %rdx + cmpq $63, %rdx + ja .Lpoly1305_blocks_avx2_16 +.Lpoly1305_blocks_avx2_15: + testb $64, 320(%rdi) + jne .Lpoly1305_blocks_avx2_17 + vmovdqa %ymm5, (%rdi) + vmovdqa %ymm4, 32(%rdi) + vmovdqa %ymm3, 64(%rdi) + vmovdqa %ymm2, 96(%rdi) + vmovdqa %ymm1, 128(%rdi) + jmp .Lpoly1305_blocks_avx2_8 +.Lpoly1305_blocks_avx2_17: + vpermq $245, %ymm5, %ymm0 + vpaddq %ymm0, %ymm5, %ymm5 + vpermq $245, %ymm4, %ymm0 + vpaddq %ymm0, %ymm4, %ymm4 + vpermq $245, %ymm3, %ymm0 + vpaddq %ymm0, %ymm3, %ymm3 + vpermq $245, %ymm2, %ymm0 + vpaddq %ymm0, %ymm2, %ymm2 + vpermq $245, %ymm1, %ymm0 + vpaddq %ymm0, %ymm1, %ymm1 + vpermq $170, %ymm5, %ymm0 + vpaddq %ymm0, %ymm5, %ymm5 + vpermq $170, %ymm4, %ymm0 + vpaddq %ymm0, %ymm4, %ymm4 + vpermq $170, %ymm3, %ymm0 + vpaddq %ymm0, %ymm3, %ymm3 + vpermq $170, %ymm2, %ymm0 + vpaddq %ymm0, %ymm2, %ymm2 + vpermq $170, %ymm1, %ymm0 + vpaddq %ymm0, %ymm1, %ymm1 + vmovd %xmm5, %eax + vmovd %xmm4, %edx + movl %eax, %ecx + shrl $26, %ecx + addl %edx, %ecx + movl %ecx, %edx + andl $67108863, %edx + vmovd %xmm3, %esi + shrl $26, %ecx + movl %ecx, %r11d + addl %esi, %r11d + vmovd %xmm2, %ecx + movl %r11d, %r10d + shrl $26, %r10d + addl %ecx, %r10d + movl %r10d, %r9d + andl $67108863, %r9d + vmovd %xmm1, %r8d + movl %edx, %esi + salq $26, %rsi + andl $67108863, %eax + orq %rax, %rsi + movabsq $17592186044415, %rax + andq %rax, %rsi + andl $67108863, %r11d + salq $8, %r11 + shrl $18, %edx + movl %edx, %edx + orq %r11, %rdx + movq %r9, %rcx + salq $34, %rcx + orq %rcx, %rdx + andq %rax, %rdx + shrl $26, %r10d + addl %r10d, %r8d + salq $16, %r8 + shrl $10, %r9d + movl %r9d, %r9d + orq %r9, %r8 + movabsq $4398046511103, %r10 + movq %r8, %r9 + andq %r10, %r9 + shrq $42, %r8 + leaq (%r8,%r8,4), %rcx + addq %rcx, %rsi + movq %rsi, %r8 + andq %rax, %r8 + movq %rsi, %rcx + shrq $44, %rcx + addq %rdx, %rcx + movq %rcx, %rsi + andq %rax, %rsi + shrq $44, %rcx + movq %rcx, %rdx + addq %r9, %rdx + andq %rdx, %r10 + shrq $42, %rdx + leaq (%r8,%rdx,4), %rcx + leaq (%rcx,%rdx), %rdx + movq %rdx, %rbx + andq %rax, %rbx + shrq $44, %rdx + movq %rdx, %r11 + addq %rsi, %r11 + leaq 5(%rbx), %r9 + movq %r9, %r8 + shrq $44, %r8 + addq %r11, %r8 + movabsq $-4398046511104, %rsi + addq %r10, %rsi + movq %r8, %rdx + shrq $44, %rdx + addq %rdx, %rsi + movq %rsi, %rdx + shrq $63, %rdx + subq $1, %rdx + movq %rdx, %rcx + notq %rcx + andq %rcx, %rbx + andq %rcx, %r11 + andq %r10, %rcx + andq %rax, %r9 + andq %rdx, %r9 + orq %r9, %rbx + movq %rbx, (%rdi) + andq %r8, %rax + andq %rdx, %rax + orq %rax, %r11 + movq %r11, 8(%rdi) + andq %rsi, %rdx + orq %rcx, %rdx + movq %rdx, 16(%rdi) +.Lpoly1305_blocks_avx2_8: + movq -8(%rbp), %rbx + vzeroall + movq %rbp, %rax + subq %rsp, %rax + leave + addq $8, %rax + ret +ELF(.size _gcry_poly1305_amd64_avx2_blocks,.-_gcry_poly1305_amd64_avx2_blocks;) + + +.align 8 +.globl _gcry_poly1305_amd64_avx2_finish_ext +ELF(.type _gcry_poly1305_amd64_avx2_finish_ext,@function;) +_gcry_poly1305_amd64_avx2_finish_ext: +.Lpoly1305_finish_ext_avx2_local: + vzeroupper + pushq %rbp + movq %rsp, %rbp + pushq %r13 + pushq %r12 + pushq %rbx + andq $-64, %rsp + subq $64, %rsp + movq %rdi, %rbx + movq %rdx, %r13 + movq %rcx, %r12 + testq %rdx, %rdx + je .Lpoly1305_finish_ext_avx2_22 + vpxor %xmm0, %xmm0, %xmm0 + vmovdqa %ymm0, (%rsp) + vmovdqa %ymm0, 32(%rsp) + movq %rsp, %rax + subq %rsp, %rsi + testb $32, %dl + je .Lpoly1305_finish_ext_avx2_23 + vmovdqu (%rsp,%rsi), %ymm0 + vmovdqa %ymm0, (%rsp) + leaq 32(%rsp), %rax +.Lpoly1305_finish_ext_avx2_23: + testb $16, %r13b + je .Lpoly1305_finish_ext_avx2_24 + vmovdqu (%rax,%rsi), %xmm0 + vmovdqa %xmm0, (%rax) + addq $16, %rax +.Lpoly1305_finish_ext_avx2_24: + testb $8, %r13b + je .Lpoly1305_finish_ext_avx2_25 + movq (%rax,%rsi), %rdx + movq %rdx, (%rax) + addq $8, %rax +.Lpoly1305_finish_ext_avx2_25: + testb $4, %r13b + je .Lpoly1305_finish_ext_avx2_26 + movl (%rax,%rsi), %edx + movl %edx, (%rax) + addq $4, %rax +.Lpoly1305_finish_ext_avx2_26: + testb $2, %r13b + je .Lpoly1305_finish_ext_avx2_27 + movzwl (%rax,%rsi), %edx + movw %dx, (%rax) + addq $2, %rax +.Lpoly1305_finish_ext_avx2_27: + testb $1, %r13b + je .Lpoly1305_finish_ext_avx2_28 + movzbl (%rax,%rsi), %edx + movb %dl, (%rax) +.Lpoly1305_finish_ext_avx2_28: + testb $15, %r13b + je .Lpoly1305_finish_ext_avx2_29 + movb $1, (%rsp,%r13) +.Lpoly1305_finish_ext_avx2_29: + cmpq $47, %r13 + jbe .Lpoly1305_finish_ext_avx2_30 + orq $4, 320(%rbx) + jmp .Lpoly1305_finish_ext_avx2_31 +.Lpoly1305_finish_ext_avx2_30: + cmpq $31, %r13 + jbe .Lpoly1305_finish_ext_avx2_32 + orq $8, 320(%rbx) + jmp .Lpoly1305_finish_ext_avx2_31 +.Lpoly1305_finish_ext_avx2_32: + cmpq $15, %r13 + jbe .Lpoly1305_finish_ext_avx2_33 + orq $16, 320(%rbx) + jmp .Lpoly1305_finish_ext_avx2_31 +.Lpoly1305_finish_ext_avx2_33: + orq $32, 320(%rbx) +.Lpoly1305_finish_ext_avx2_31: + testb $1, 320(%rbx) + je .Lpoly1305_finish_ext_avx2_34 + cmpq $32, %r13 + ja .Lpoly1305_finish_ext_avx2_34 + cmpq $17, %r13 + sbbq %rsi, %rsi + notq %rsi + addq $2, %rsi + cmpq $17, %r13 + sbbq %rax, %rax + movq %rbx, %rdx + addq $23, %rax + leaq (%rbx,%rax,8), %rax + movl $0, %ecx +.Lpoly1305_finish_ext_avx2_37: + movl 244(%rdx), %edi + movl %edi, (%rax) + movl 252(%rdx), %edi + movl %edi, 32(%rax) + movl 260(%rdx), %edi + movl %edi, 64(%rax) + movl 268(%rdx), %edi + movl %edi, 96(%rax) + movl 276(%rdx), %edi + movl %edi, 128(%rax) + addq $1, %rcx + subq $40, %rdx + addq $8, %rax + cmpq %rcx, %rsi + ja .Lpoly1305_finish_ext_avx2_37 +.Lpoly1305_finish_ext_avx2_34: + movl $64, %edx + movq %rsp, %rsi + movq %rbx, %rdi + call .Lpoly1305_blocks_avx2_local +.Lpoly1305_finish_ext_avx2_22: + movq 320(%rbx), %r8 + testb $1, %r8b + je .Lpoly1305_finish_ext_avx2_38 + leaq -1(%r13), %rax + cmpq $47, %rax + ja .Lpoly1305_finish_ext_avx2_46 + cmpq $32, %r13 + ja .Lpoly1305_finish_ext_avx2_47 + cmpq $17, %r13 + sbbq %r9, %r9 + addq $2, %r9 + movl $0, %edi + cmpq $17, %r13 + sbbq %rax, %rax + notq %rax + andl $5, %eax + jmp .Lpoly1305_finish_ext_avx2_39 +.Lpoly1305_finish_ext_avx2_41: + movl (%rdx), %esi + movl %esi, (%rax) + movl 8(%rdx), %esi + movl %esi, 32(%rax) + movl 16(%rdx), %esi + movl %esi, 64(%rax) + movl 24(%rdx), %esi + movl %esi, 96(%rax) + movl 32(%rdx), %esi + movl %esi, 128(%rax) + addq $1, %rcx + subq $40, %rdx + addq $8, %rax + movq %rcx, %rsi + subq %rdi, %rsi + cmpq %rsi, %r9 + ja .Lpoly1305_finish_ext_avx2_41 + cmpq $3, %rcx + ja .Lpoly1305_finish_ext_avx2_42 + leaq 160(%rbx,%rcx,8), %rax +.Lpoly1305_finish_ext_avx2_43: + movl $1, (%rax) + movl $0, 32(%rax) + movl $0, 64(%rax) + movl $0, 96(%rax) + movl $0, 128(%rax) + addq $1, %rcx + addq $8, %rax + cmpq $4, %rcx + jne .Lpoly1305_finish_ext_avx2_43 +.Lpoly1305_finish_ext_avx2_42: + orq $96, %r8 + movq %r8, 320(%rbx) + vpxor %ymm0, %ymm0, %ymm0 + vmovdqa %ymm0, (%rsp) + vmovdqa %ymm0, 32(%rsp) + movl $64, %edx + movq %rsp, %rsi + movq %rbx, %rdi + call .Lpoly1305_blocks_avx2_local +.Lpoly1305_finish_ext_avx2_38: + movq 8(%rbx), %rax + movq %rax, %rdx + salq $44, %rdx + orq (%rbx), %rdx + shrq $20, %rax + movl $24, %edi + shlx %rdi, 16(%rbx), %rcx + orq %rcx, %rax + movl 292(%rbx), %ecx + salq $32, %rcx + movl 284(%rbx), %esi + orq %rsi, %rcx + movl 308(%rbx), %esi + salq $32, %rsi + movl 300(%rbx), %edi + orq %rdi, %rsi + addq %rcx, %rdx + adcq %rsi, %rax + movq %rdx, (%r12) + movq %rax, 8(%r12) + vpxor %xmm0, %xmm0, %xmm0 + vmovdqu %ymm0, (%rbx) + vmovdqu %ymm0, 32(%rbx) + vmovdqu %ymm0, 64(%rbx) + vmovdqu %ymm0, 96(%rbx) + vmovdqu %ymm0, 128(%rbx) + vmovdqu %ymm0, 160(%rbx) + vmovdqu %ymm0, 192(%rbx) + vmovdqu %ymm0, 224(%rbx) + jmp .Lpoly1305_finish_ext_avx2_49 +.Lpoly1305_finish_ext_avx2_46: + movl $3, %r9d + movl $1, %edi + movl $10, %eax + jmp .Lpoly1305_finish_ext_avx2_39 +.Lpoly1305_finish_ext_avx2_47: + movl $3, %r9d + movl $0, %edi + movl $10, %eax +.Lpoly1305_finish_ext_avx2_39: + leaq 164(%rbx,%rax,8), %rdx + leaq 160(%rbx,%rdi,8), %rax + movq %rdi, %rcx + jmp .Lpoly1305_finish_ext_avx2_41 +.Lpoly1305_finish_ext_avx2_49: + movq %rbp, %rax + subq %rsp, %rax + leaq -24(%rbp), %rsp + vzeroall + popq %rbx + popq %r12 + popq %r13 + popq %rbp + addq $(8*5), %rax +ret +ELF(.size _gcry_poly1305_amd64_avx2_finish_ext,.-_gcry_poly1305_amd64_avx2_finish_ext;) + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/poly1305-internal.h b/libotr/libgcrypt-1.8.7/cipher/poly1305-internal.h new file mode 100644 index 0000000..bcbe5df --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/poly1305-internal.h @@ -0,0 +1,167 @@ +/* poly1305-internal.h - Poly1305 internals + * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef G10_POLY1305_INTERNAL_H +#define G10_POLY1305_INTERNAL_H + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" + + +#define POLY1305_TAGLEN 16 +#define POLY1305_KEYLEN 32 + + +/* Block-size used in default implementation. */ +#define POLY1305_REF_BLOCKSIZE 16 + +/* State size of default implementation. */ +#define POLY1305_REF_STATESIZE 64 + +/* State alignment for default implementation. */ +#define POLY1305_REF_ALIGNMENT sizeof(void *) + + +#undef POLY1305_SYSV_FUNC_ABI + +/* POLY1305_USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */ +#undef POLY1305_USE_SSE2 +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define POLY1305_USE_SSE2 1 +# define POLY1305_SSE2_BLOCKSIZE 32 +# define POLY1305_SSE2_STATESIZE 248 +# define POLY1305_SSE2_ALIGNMENT 16 +# define POLY1305_SYSV_FUNC_ABI 1 +#endif + + +/* POLY1305_USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */ +#undef POLY1305_USE_AVX2 +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(ENABLE_AVX2_SUPPORT) +# define POLY1305_USE_AVX2 1 +# define POLY1305_AVX2_BLOCKSIZE 64 +# define POLY1305_AVX2_STATESIZE 328 +# define POLY1305_AVX2_ALIGNMENT 32 +# define POLY1305_SYSV_FUNC_ABI 1 +#endif + + +/* POLY1305_USE_NEON indicates whether to enable ARM NEON assembly code. */ +#undef POLY1305_USE_NEON +#if defined(ENABLE_NEON_SUPPORT) && defined(HAVE_ARM_ARCH_V6) && \ + defined(__ARMEL__) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) +# define POLY1305_USE_NEON 1 +# define POLY1305_NEON_BLOCKSIZE 32 +# define POLY1305_NEON_STATESIZE 128 +# define POLY1305_NEON_ALIGNMENT 16 +#endif + + +/* Largest block-size used in any implementation (optimized implementations + * might use block-size multiple of 16). */ +#ifdef POLY1305_USE_AVX2 +# define POLY1305_LARGEST_BLOCKSIZE POLY1305_AVX2_BLOCKSIZE +#elif defined(POLY1305_USE_NEON) +# define POLY1305_LARGEST_BLOCKSIZE POLY1305_NEON_BLOCKSIZE +#elif defined(POLY1305_USE_SSE2) +# define POLY1305_LARGEST_BLOCKSIZE POLY1305_SSE2_BLOCKSIZE +#else +# define POLY1305_LARGEST_BLOCKSIZE POLY1305_REF_BLOCKSIZE +#endif + +/* Largest state-size used in any implementation. */ +#ifdef POLY1305_USE_AVX2 +# define POLY1305_LARGEST_STATESIZE POLY1305_AVX2_STATESIZE +#elif defined(POLY1305_USE_NEON) +# define POLY1305_LARGEST_STATESIZE POLY1305_NEON_STATESIZE +#elif defined(POLY1305_USE_SSE2) +# define POLY1305_LARGEST_STATESIZE POLY1305_SSE2_STATESIZE +#else +# define POLY1305_LARGEST_STATESIZE POLY1305_REF_STATESIZE +#endif + +/* Minimum alignment for state pointer passed to implementations. */ +#ifdef POLY1305_USE_AVX2 +# define POLY1305_STATE_ALIGNMENT POLY1305_AVX2_ALIGNMENT +#elif defined(POLY1305_USE_NEON) +# define POLY1305_STATE_ALIGNMENT POLY1305_NEON_ALIGNMENT +#elif defined(POLY1305_USE_SSE2) +# define POLY1305_STATE_ALIGNMENT POLY1305_SSE2_ALIGNMENT +#else +# define POLY1305_STATE_ALIGNMENT POLY1305_REF_ALIGNMENT +#endif + + +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef OPS_FUNC_ABI +#if defined(POLY1305_SYSV_FUNC_ABI) && \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) +# define OPS_FUNC_ABI __attribute__((sysv_abi)) +#else +# define OPS_FUNC_ABI +#endif + + +typedef struct poly1305_key_s +{ + byte b[POLY1305_KEYLEN]; +} poly1305_key_t; + + +typedef struct poly1305_ops_s +{ + size_t block_size; + void (*init_ext) (void *ctx, const poly1305_key_t * key) OPS_FUNC_ABI; + unsigned int (*blocks) (void *ctx, const byte * m, size_t bytes) OPS_FUNC_ABI; + unsigned int (*finish_ext) (void *ctx, const byte * m, size_t remaining, + byte mac[POLY1305_TAGLEN]) OPS_FUNC_ABI; +} poly1305_ops_t; + + +typedef struct poly1305_context_s +{ + byte state[POLY1305_LARGEST_STATESIZE + POLY1305_STATE_ALIGNMENT]; + byte buffer[POLY1305_LARGEST_BLOCKSIZE]; + const poly1305_ops_t *ops; + unsigned int leftover; +} poly1305_context_t; + + +gcry_err_code_t _gcry_poly1305_init (poly1305_context_t * ctx, const byte * key, + size_t keylen); + +void _gcry_poly1305_finish (poly1305_context_t * ctx, + byte mac[POLY1305_TAGLEN]); + +void _gcry_poly1305_update (poly1305_context_t * ctx, const byte * buf, + size_t buflen); + + +#endif /* G10_POLY1305_INTERNAL_H */ diff --git a/libotr/libgcrypt-1.8.7/cipher/poly1305-sse2-amd64.S b/libotr/libgcrypt-1.8.7/cipher/poly1305-sse2-amd64.S new file mode 100644 index 0000000..219eb07 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/poly1305-sse2-amd64.S @@ -0,0 +1,1043 @@ +/* poly1305-sse2-amd64.S - AMD64/SSE2 implementation of Poly1305 + * + * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Based on public domain implementation by Andrew Moon at + * https://github.com/floodyberry/poly1305-opt + */ + +#include <config.h> + +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + + +.text + + +.align 8 +.globl _gcry_poly1305_amd64_sse2_init_ext +ELF(.type _gcry_poly1305_amd64_sse2_init_ext,@function;) +_gcry_poly1305_amd64_sse2_init_ext: +.Lpoly1305_init_ext_x86_local: + xor %edx, %edx + pushq %r12 + pushq %r13 + pushq %r14 + movq %rdx, %r10 + movq $-1, %rcx + testq %r10, %r10 + pxor %xmm0, %xmm0 + movq $0xfffffc0ffff, %r9 + movdqa %xmm0, (%rdi) + cmove %rcx, %r10 + movdqa %xmm0, 16(%rdi) + movq $0xffc0fffffff, %rcx + movdqa %xmm0, 32(%rdi) + movdqa %xmm0, 48(%rdi) + movdqa %xmm0, 64(%rdi) + movq 8(%rsi), %r11 + movq %r11, %r8 + movq (%rsi), %r12 + andq %r12, %rcx + shrq $44, %r12 + shlq $20, %r8 + shrq $24, %r11 + orq %r8, %r12 + movq $0xffffffc0f, %r8 + andq %r9, %r12 + andq %r8, %r11 + movl %ecx, %r8d + andl $67108863, %r8d + movq %rcx, %r9 + movl %r8d, 84(%rdi) + movq %r12, %r8 + shrq $26, %r9 + shlq $18, %r8 + orq %r8, %r9 + movq %r12, %r8 + shrq $8, %r8 + andl $67108863, %r9d + andl $67108863, %r8d + movl %r9d, 92(%rdi) + movq %r12, %r9 + movl %r8d, 100(%rdi) + movq %r11, %r8 + shrq $34, %r9 + shlq $10, %r8 + orq %r8, %r9 + movq %r11, %r8 + shrq $16, %r8 + andl $67108863, %r9d + movl %r9d, 108(%rdi) + cmpq $16, %r10 + movl %r8d, 116(%rdi) + movl 16(%rsi), %r8d + movl %r8d, 124(%rdi) + movl 20(%rsi), %r8d + movl %r8d, 132(%rdi) + movl 24(%rsi), %r8d + movl %r8d, 140(%rdi) + movl 28(%rsi), %esi + movl %esi, 148(%rdi) + jbe .Lpoly1305_init_ext_sse2_done + lea (%r11,%r11,4), %r14 + shlq $2, %r14 + lea (%r12,%r12), %rax + mulq %r14 + movq %rax, %r13 + movq %rcx, %rax + movq %rdx, %r8 + mulq %rcx + addq %rax, %r13 + lea (%rcx,%rcx), %rax + movq %r13, %r9 + adcq %rdx, %r8 + mulq %r12 + shlq $20, %r8 + movq %rax, %rsi + shrq $44, %r9 + movq %r11, %rax + orq %r9, %r8 + movq %rdx, %r9 + mulq %r14 + addq %rax, %rsi + movq %rcx, %rax + adcq %rdx, %r9 + addq %r11, %r11 + mulq %r11 + addq %rsi, %r8 + movq %rax, %r11 + movq %r12, %rax + movq %rdx, %rcx + adcq $0, %r9 + mulq %r12 + addq %rax, %r11 + movq %r8, %rsi + adcq %rdx, %rcx + shlq $20, %r9 + shrq $44, %rsi + orq %rsi, %r9 + movq $0xfffffffffff, %rsi + addq %r11, %r9 + movq %r9, %r12 + adcq $0, %rcx + andq %rsi, %r13 + shlq $22, %rcx + andq %rsi, %r8 + shrq $42, %r12 + orq %r12, %rcx + movq %rsi, %r12 + lea (%rcx,%rcx,4), %rcx + addq %rcx, %r13 + movq %rsi, %rcx + andq %r13, %rcx + shrq $44, %r13 + movq %rcx, %r14 + addq %r13, %r8 + movq $0x3ffffffffff, %r13 + andq %r8, %r12 + andq %r13, %r9 + shrq $44, %r8 + movq %r12, %r11 + addq %r8, %r9 + movq %r12, %rax + movq %r9, %r13 + movl %ecx, %r8d + shrq $26, %r14 + andl $67108863, %r8d + shlq $18, %r11 + shrq $34, %rax + orq %r11, %r14 + shlq $10, %r13 + movq %r12, %r11 + orq %r13, %rax + movq %r9, %r13 + shrq $8, %r11 + shrq $16, %r13 + andl $67108863, %r14d + andl $67108863, %r11d + andl $67108863, %eax + movl %r8d, 88(%rdi) + cmpq $64, %r10 + movl %r8d, 80(%rdi) + movl %r14d, 104(%rdi) + movl %r14d, 96(%rdi) + movl %r11d, 120(%rdi) + movl %r11d, 112(%rdi) + movl %eax, 136(%rdi) + movl %eax, 128(%rdi) + movl %r13d, 152(%rdi) + movl %r13d, 144(%rdi) + jbe .Lpoly1305_init_ext_sse2_done + lea (%r9,%r9,4), %r14 + shlq $2, %r14 + lea (%r12,%r12), %rax + mulq %r14 + movq %rax, %r8 + movq %rcx, %rax + movq %rdx, %r10 + mulq %rcx + addq %rax, %r8 + lea (%rcx,%rcx), %rax + movq %r8, %r11 + adcq %rdx, %r10 + andq %rsi, %r8 + mulq %r12 + shlq $20, %r10 + movq %rax, %r13 + shrq $44, %r11 + movq %r9, %rax + orq %r11, %r10 + movq %rdx, %r11 + mulq %r14 + addq %rax, %r13 + movq %rcx, %rax + adcq %rdx, %r11 + addq %r9, %r9 + mulq %r9 + addq %r13, %r10 + movq %rax, %r9 + movq %r12, %rax + movq %rdx, %rcx + adcq $0, %r11 + mulq %r12 + addq %rax, %r9 + movq %r10, %r13 + adcq %rdx, %rcx + andq %rsi, %r10 + shlq $20, %r11 + shrq $44, %r13 + orq %r13, %r11 + addq %r9, %r11 + movq %rsi, %r9 + movq %r11, %r12 + adcq $0, %rcx + shlq $22, %rcx + shrq $42, %r12 + orq %r12, %rcx + lea (%rcx,%rcx,4), %rcx + addq %rcx, %r8 + andq %r8, %r9 + shrq $44, %r8 + movl %r9d, %eax + addq %r8, %r10 + movq $0x3ffffffffff, %r8 + andq %r10, %rsi + andq %r8, %r11 + shrq $44, %r10 + movq %rsi, %r8 + addq %r10, %r11 + andl $67108863, %eax + shrq $26, %r9 + movq %r11, %r10 + shlq $18, %r8 + shlq $10, %r10 + orq %r8, %r9 + movq %rsi, %r8 + shrq $34, %rsi + andl $67108863, %r9d + shrq $8, %r8 + orq %r10, %rsi + shrq $16, %r11 + andl $67108863, %r8d + andl $67108863, %esi + movl %eax, 168(%rdi) + movl %eax, 160(%rdi) + movl %r9d, 184(%rdi) + movl %r9d, 176(%rdi) + movl %r8d, 200(%rdi) + movl %r8d, 192(%rdi) + movl %esi, 216(%rdi) + movl %esi, 208(%rdi) + movl %r11d, 232(%rdi) + movl %r11d, 224(%rdi) +.Lpoly1305_init_ext_sse2_done: + movq $0, 240(%rdi) + popq %r14 + popq %r13 + popq %r12 + ret +ELF(.size _gcry_poly1305_amd64_sse2_init_ext,.-_gcry_poly1305_amd64_sse2_init_ext;) + + +.align 8 +.globl _gcry_poly1305_amd64_sse2_finish_ext +ELF(.type _gcry_poly1305_amd64_sse2_finish_ext,@function;) +_gcry_poly1305_amd64_sse2_finish_ext: +.Lpoly1305_finish_ext_x86_local: + pushq %rbp + movq %rsp, %rbp + subq $64, %rsp + andq $~63, %rsp + movq %rdx, 32(%rsp) + movq %rcx, 40(%rsp) + andq %rdx, %rdx + jz .Lpoly1305_finish_x86_no_leftover + pxor %xmm0, %xmm0 + movdqa %xmm0, 0+0(%rsp) + movdqa %xmm0, 16+0(%rsp) + leaq 0(%rsp), %r8 + testq $16, %rdx + jz .Lpoly1305_finish_x86_skip16 + movdqu 0(%rsi), %xmm0 + movdqa %xmm0, 0(%r8) + addq $16, %rsi + addq $16, %r8 +.Lpoly1305_finish_x86_skip16: + testq $8, %rdx + jz .Lpoly1305_finish_x86_skip8 + movq 0(%rsi), %rax + movq %rax, 0(%r8) + addq $8, %rsi + addq $8, %r8 +.Lpoly1305_finish_x86_skip8: + testq $4, %rdx + jz .Lpoly1305_finish_x86_skip4 + movl 0(%rsi), %eax + movl %eax, 0(%r8) + addq $4, %rsi + addq $4, %r8 +.Lpoly1305_finish_x86_skip4: + testq $2, %rdx + jz .Lpoly1305_finish_x86_skip2 + movw 0(%rsi), %ax + movw %ax, 0(%r8) + addq $2, %rsi + addq $2, %r8 +.Lpoly1305_finish_x86_skip2: + testq $1, %rdx + jz .Lpoly1305_finish_x86_skip1 + movb 0(%rsi), %al + movb %al, 0(%r8) + addq $1, %r8 +.Lpoly1305_finish_x86_skip1: + cmpq $16, %rdx + je .Lpoly1305_finish_x86_is16 + movb $1, 0(%r8) +.Lpoly1305_finish_x86_is16: + movq $4, %rax + jae .Lpoly1305_finish_x86_16andover + movq $8, %rax +.Lpoly1305_finish_x86_16andover: + orq %rax, 240(%rdi) + leaq 0(%rsp), %rsi + movq $32, %rdx + callq .Lpoly1305_blocks_x86_local +.Lpoly1305_finish_x86_no_leftover: + testq $1, 240(%rdi) + jz .Lpoly1305_finish_x86_not_started + movq 32(%rsp), %rdx + andq %rdx, %rdx + jz .Lpoly1305_finish_x86_r2r + cmpq $16, %rdx + jg .Lpoly1305_finish_x86_r2r + xorl %r10d, %r10d + movl 84(%rdi), %eax + movl 92(%rdi), %ecx + movl 100(%rdi), %edx + movl 108(%rdi), %r8d + movl 116(%rdi), %r9d + movl %eax, 80(%rdi) + movl $1, 8+80(%rdi) + movl %ecx, 96(%rdi) + movl %r10d, 8+96(%rdi) + movl %edx, 112(%rdi) + movl %r10d, 8+112(%rdi) + movl %r8d, 128(%rdi) + movl %r10d, 8+128(%rdi) + movl %r9d, 144(%rdi) + movl %r10d, 8+144(%rdi) + jmp .Lpoly1305_finish_x86_combine +.Lpoly1305_finish_x86_r2r: + movl 84(%rdi), %eax + movl 92(%rdi), %ecx + movl 100(%rdi), %edx + movl 108(%rdi), %r8d + movl 116(%rdi), %r9d + movl %eax, 8+80(%rdi) + movl %ecx, 8+96(%rdi) + movl %edx, 8+112(%rdi) + movl %r8d, 8+128(%rdi) + movl %r9d, 8+144(%rdi) +.Lpoly1305_finish_x86_combine: + xorq %rsi, %rsi + movq $32, %rdx + callq .Lpoly1305_blocks_x86_local +.Lpoly1305_finish_x86_not_started: + movq 0(%rdi), %r8 + movq 8(%rdi), %r9 + movq %r9, %r10 + movq 16(%rdi), %r11 + shlq $44, %r9 + shrq $20, %r10 + shlq $24, %r11 + orq %r9, %r8 + orq %r11, %r10 + pxor %xmm0, %xmm0 + movl 124(%rdi), %eax + movl 132(%rdi), %ecx + movl 140(%rdi), %edx + movl 148(%rdi), %esi + movq 40(%rsp), %r11 + shlq $32, %rcx + shlq $32, %rsi + orq %rcx, %rax + orq %rsi, %rdx + addq %r8, %rax + adcq %r10, %rdx + movq %rax, 0(%r11) + movq %rdx, 8(%r11) + movq %rbp, %rax + subq %rsp, %rax + movq %rbp, %rsp + movdqa %xmm0, 0(%rdi) + movdqa %xmm0, 16(%rdi) + movdqa %xmm0, 32(%rdi) + movdqa %xmm0, 48(%rdi) + movdqa %xmm0, 64(%rdi) + movdqa %xmm0, 80(%rdi) + movdqa %xmm0, 96(%rdi) + movdqa %xmm0, 112(%rdi) + movdqa %xmm0, 128(%rdi) + movdqa %xmm0, 144(%rdi) + movdqa %xmm0, 160(%rdi) + movdqa %xmm0, 176(%rdi) + movdqa %xmm0, 192(%rdi) + movdqa %xmm0, 208(%rdi) + movdqa %xmm0, 224(%rdi) + popq %rbp + addq $8, %rax + ret +ELF(.size _gcry_poly1305_amd64_sse2_finish_ext,.-_gcry_poly1305_amd64_sse2_finish_ext;) + + +.align 8 +.globl _gcry_poly1305_amd64_sse2_blocks +ELF(.type _gcry_poly1305_amd64_sse2_blocks,@function;) +_gcry_poly1305_amd64_sse2_blocks: +.Lpoly1305_blocks_x86_local: + pushq %rbp + movq %rsp, %rbp + pushq %rbx + andq $-64, %rsp + subq $328, %rsp + movq 240(%rdi), %rax + movl $(1<<24), %r8d + movl $((1<<26)-1), %r9d + movd %r8, %xmm0 + movd %r9, %xmm5 + pshufd $0x44, %xmm0, %xmm0 + pshufd $0x44, %xmm5, %xmm5 + testb $4, %al + je .Lpoly1305_blocks_x86_3 + psrldq $8, %xmm0 +.Lpoly1305_blocks_x86_3: + testb $8, %al + je .Lpoly1305_blocks_x86_4 + pxor %xmm0, %xmm0 +.Lpoly1305_blocks_x86_4: + movdqa %xmm0, 168(%rsp) + testb $1, %al + jne .Lpoly1305_blocks_x86_5 + movq 16(%rsi), %xmm0 + movdqa %xmm5, %xmm7 + movdqa %xmm5, %xmm10 + movq (%rsi), %xmm6 + orq $1, %rax + subq $32, %rdx + movq 8(%rsi), %xmm1 + punpcklqdq %xmm0, %xmm6 + movq 24(%rsi), %xmm0 + pand %xmm6, %xmm7 + movdqa %xmm6, %xmm9 + psrlq $52, %xmm6 + addq $32, %rsi + punpcklqdq %xmm0, %xmm1 + movdqa %xmm1, %xmm0 + psrlq $26, %xmm9 + psllq $12, %xmm0 + movq %rax, 240(%rdi) + pand %xmm5, %xmm9 + por %xmm0, %xmm6 + psrlq $40, %xmm1 + pand %xmm6, %xmm10 + por 168(%rsp), %xmm1 + psrlq $26, %xmm6 + pand %xmm5, %xmm6 +.Lpoly1305_blocks_x86_6: + movdqa 80(%rdi), %xmm13 + cmpq $63, %rdx + movl $(5), %r8d + movd %r8, %xmm14 + pshufd $0x44, %xmm14, %xmm14 + movdqa 96(%rdi), %xmm15 + movdqa %xmm13, -8(%rsp) + movdqa 112(%rdi), %xmm0 + movdqa %xmm14, 136(%rsp) + movdqa 128(%rdi), %xmm3 + movdqa %xmm15, 312(%rsp) + pmuludq %xmm14, %xmm15 + movdqa 144(%rdi), %xmm13 + movdqa %xmm0, 232(%rsp) + pmuludq %xmm14, %xmm0 + movdqa %xmm3, 152(%rsp) + pmuludq %xmm14, %xmm3 + movdqa %xmm13, 56(%rsp) + pmuludq %xmm14, %xmm13 + movdqa %xmm15, 40(%rsp) + movdqa %xmm0, -24(%rsp) + movdqa %xmm3, -40(%rsp) + movdqa %xmm13, -56(%rsp) + jbe .Lpoly1305_blocks_x86_7 + movdqa 192(%rdi), %xmm15 + leaq 32(%rsi), %rax + movq %rdx, %rcx + movdqa 176(%rdi), %xmm14 + movdqa %xmm15, %xmm2 + movdqa 208(%rdi), %xmm0 + movdqa %xmm15, 216(%rsp) + movdqa %xmm14, 296(%rsp) + movdqa 224(%rdi), %xmm3 + pmuludq 136(%rsp), %xmm14 + movdqa -24(%rsp), %xmm13 + movdqa %xmm14, 8(%rsp) + pmuludq 136(%rsp), %xmm2 + movdqa -40(%rsp), %xmm14 + movdqa %xmm0, 120(%rsp) + pmuludq 136(%rsp), %xmm0 + movdqa %xmm3, 24(%rsp) + movdqa 160(%rdi), %xmm12 + movdqa %xmm0, %xmm8 + movdqa -56(%rsp), %xmm15 + movdqa %xmm13, 88(%rsp) + pmuludq 136(%rsp), %xmm3 + movdqa %xmm2, 104(%rsp) + movdqa %xmm0, %xmm13 + movdqa -8(%rsp), %xmm11 + movdqa %xmm3, 280(%rsp) + movdqa %xmm2, %xmm3 + movdqa %xmm0, 200(%rsp) + movdqa %xmm14, 184(%rsp) + movdqa %xmm15, 264(%rsp) + jmp .Lpoly1305_blocks_x86_8 +.p2align 6,,63 +.Lpoly1305_blocks_x86_13: + movdqa 200(%rsp), %xmm13 + movdqa %xmm3, %xmm6 + movdqa 200(%rsp), %xmm8 + movdqa 104(%rsp), %xmm3 +.Lpoly1305_blocks_x86_8: + movdqa 8(%rsp), %xmm4 + pmuludq %xmm6, %xmm3 + subq $64, %rcx + pmuludq %xmm10, %xmm8 + movdqa 104(%rsp), %xmm2 + movdqa 200(%rsp), %xmm0 + pmuludq %xmm1, %xmm4 + movdqa 280(%rsp), %xmm15 + pmuludq %xmm6, %xmm13 + movdqa 280(%rsp), %xmm14 + pmuludq %xmm1, %xmm0 + paddq %xmm3, %xmm4 + pmuludq %xmm1, %xmm2 + movdqa 280(%rsp), %xmm3 + paddq %xmm8, %xmm4 + pmuludq %xmm9, %xmm15 + movdqa 280(%rsp), %xmm8 + pmuludq %xmm10, %xmm14 + pmuludq %xmm6, %xmm8 + paddq %xmm13, %xmm2 + movdqa %xmm6, %xmm13 + pmuludq %xmm1, %xmm3 + paddq %xmm15, %xmm4 + movdqa 296(%rsp), %xmm15 + pmuludq %xmm12, %xmm13 + paddq %xmm14, %xmm2 + movdqa %xmm7, %xmm14 + paddq %xmm8, %xmm0 + pmuludq %xmm12, %xmm14 + movdqa %xmm9, %xmm8 + pmuludq 296(%rsp), %xmm6 + pmuludq %xmm12, %xmm8 + movdqa %xmm6, 248(%rsp) + pmuludq %xmm10, %xmm15 + movq -16(%rax), %xmm6 + paddq %xmm13, %xmm3 + movdqa %xmm10, %xmm13 + paddq %xmm14, %xmm4 + movq -8(%rax), %xmm14 + paddq %xmm8, %xmm2 + movq -32(%rax), %xmm8 + pmuludq %xmm12, %xmm13 + paddq %xmm15, %xmm3 + pmuludq %xmm12, %xmm1 + movdqa 216(%rsp), %xmm15 + pmuludq 216(%rsp), %xmm10 + punpcklqdq %xmm6, %xmm8 + movq -24(%rax), %xmm6 + pmuludq %xmm9, %xmm15 + paddq %xmm13, %xmm0 + movdqa 296(%rsp), %xmm13 + paddq 248(%rsp), %xmm1 + punpcklqdq %xmm14, %xmm6 + movdqa 296(%rsp), %xmm14 + pmuludq %xmm9, %xmm13 + pmuludq 120(%rsp), %xmm9 + movdqa %xmm15, 72(%rsp) + paddq %xmm10, %xmm1 + movdqa 216(%rsp), %xmm15 + pmuludq %xmm7, %xmm14 + movdqa %xmm6, %xmm10 + paddq %xmm9, %xmm1 + pmuludq %xmm7, %xmm15 + paddq %xmm13, %xmm0 + paddq 72(%rsp), %xmm3 + movdqa 120(%rsp), %xmm13 + psllq $12, %xmm10 + paddq %xmm14, %xmm2 + movdqa %xmm5, %xmm14 + pand %xmm8, %xmm14 + pmuludq %xmm7, %xmm13 + paddq %xmm15, %xmm0 + movdqa %xmm14, 248(%rsp) + movdqa %xmm8, %xmm14 + psrlq $52, %xmm8 + movdqu (%rax), %xmm9 + por %xmm10, %xmm8 + pmuludq 24(%rsp), %xmm7 + movdqu 16(%rax), %xmm10 + paddq %xmm13, %xmm3 + pxor %xmm13, %xmm13 + movdqa %xmm9, %xmm15 + paddq %xmm7, %xmm1 + movdqa %xmm6, %xmm7 + movdqa %xmm10, -72(%rsp) + punpckldq %xmm10, %xmm15 + movdqa %xmm15, %xmm10 + punpckldq %xmm13, %xmm10 + punpckhdq -72(%rsp), %xmm9 + psrlq $40, %xmm6 + movdqa %xmm10, 72(%rsp) + movdqa %xmm9, %xmm10 + punpckhdq %xmm13, %xmm9 + psllq $18, %xmm9 + paddq 72(%rsp), %xmm4 + addq $64, %rax + paddq %xmm9, %xmm3 + movdqa 40(%rsp), %xmm9 + cmpq $63, %rcx + punpckhdq %xmm13, %xmm15 + psllq $6, %xmm15 + punpckldq %xmm13, %xmm10 + paddq %xmm15, %xmm2 + psllq $12, %xmm10 + por 168(%rsp), %xmm6 + pmuludq %xmm6, %xmm9 + movdqa 88(%rsp), %xmm15 + paddq %xmm10, %xmm0 + movdqa 88(%rsp), %xmm13 + psrlq $14, %xmm7 + pand %xmm5, %xmm8 + movdqa 184(%rsp), %xmm10 + pand %xmm5, %xmm7 + pmuludq %xmm7, %xmm15 + paddq %xmm9, %xmm4 + pmuludq %xmm6, %xmm13 + movdqa 184(%rsp), %xmm9 + paddq 168(%rsp), %xmm1 + pmuludq %xmm7, %xmm10 + pmuludq %xmm6, %xmm9 + paddq %xmm15, %xmm4 + movdqa 184(%rsp), %xmm15 + paddq %xmm13, %xmm2 + psrlq $26, %xmm14 + movdqa 264(%rsp), %xmm13 + paddq %xmm10, %xmm2 + pmuludq %xmm8, %xmm15 + pand %xmm5, %xmm14 + paddq %xmm9, %xmm0 + pmuludq %xmm6, %xmm13 + movdqa 264(%rsp), %xmm9 + movdqa 264(%rsp), %xmm10 + pmuludq %xmm11, %xmm6 + pmuludq %xmm8, %xmm9 + paddq %xmm15, %xmm4 + movdqa 264(%rsp), %xmm15 + pmuludq %xmm14, %xmm10 + paddq %xmm13, %xmm3 + movdqa %xmm7, %xmm13 + pmuludq %xmm7, %xmm15 + paddq %xmm6, %xmm1 + movdqa 312(%rsp), %xmm6 + paddq %xmm9, %xmm2 + pmuludq %xmm11, %xmm13 + movdqa 248(%rsp), %xmm9 + paddq %xmm10, %xmm4 + pmuludq %xmm8, %xmm6 + pmuludq 312(%rsp), %xmm7 + paddq %xmm15, %xmm0 + movdqa %xmm9, %xmm10 + movdqa %xmm14, %xmm15 + pmuludq %xmm11, %xmm10 + paddq %xmm13, %xmm3 + movdqa %xmm8, %xmm13 + pmuludq %xmm11, %xmm13 + paddq %xmm6, %xmm3 + paddq %xmm7, %xmm1 + movdqa 232(%rsp), %xmm6 + pmuludq %xmm11, %xmm15 + pmuludq 232(%rsp), %xmm8 + paddq %xmm10, %xmm4 + paddq %xmm8, %xmm1 + movdqa 312(%rsp), %xmm10 + paddq %xmm13, %xmm0 + pmuludq %xmm14, %xmm6 + movdqa 312(%rsp), %xmm13 + pmuludq %xmm9, %xmm10 + paddq %xmm15, %xmm2 + movdqa 232(%rsp), %xmm7 + pmuludq %xmm14, %xmm13 + pmuludq 152(%rsp), %xmm14 + paddq %xmm14, %xmm1 + pmuludq %xmm9, %xmm7 + paddq %xmm6, %xmm3 + paddq %xmm10, %xmm2 + movdqa 152(%rsp), %xmm10 + paddq %xmm13, %xmm0 + pmuludq %xmm9, %xmm10 + paddq %xmm7, %xmm0 + movdqa %xmm4, %xmm7 + psrlq $26, %xmm7 + pmuludq 56(%rsp), %xmm9 + pand %xmm5, %xmm4 + paddq %xmm7, %xmm2 + paddq %xmm9, %xmm1 + paddq %xmm10, %xmm3 + movdqa %xmm2, %xmm7 + movdqa %xmm2, %xmm9 + movdqa %xmm3, %xmm6 + psrlq $26, %xmm7 + pand %xmm5, %xmm3 + psrlq $26, %xmm6 + paddq %xmm7, %xmm0 + pand %xmm5, %xmm9 + paddq %xmm6, %xmm1 + movdqa %xmm0, %xmm10 + movdqa %xmm1, %xmm6 + pand %xmm5, %xmm10 + pand %xmm5, %xmm1 + psrlq $26, %xmm6 + pmuludq 136(%rsp), %xmm6 + paddq %xmm6, %xmm4 + movdqa %xmm0, %xmm6 + psrlq $26, %xmm6 + movdqa %xmm4, %xmm2 + movdqa %xmm4, %xmm7 + paddq %xmm6, %xmm3 + psrlq $26, %xmm2 + pand %xmm5, %xmm7 + movdqa %xmm3, %xmm0 + paddq %xmm2, %xmm9 + pand %xmm5, %xmm3 + psrlq $26, %xmm0 + paddq %xmm0, %xmm1 + ja .Lpoly1305_blocks_x86_13 + leaq -64(%rdx), %rax + movdqa %xmm3, %xmm6 + andl $63, %edx + andq $-64, %rax + leaq 64(%rsi,%rax), %rsi +.Lpoly1305_blocks_x86_7: + cmpq $31, %rdx + jbe .Lpoly1305_blocks_x86_9 + movdqa -24(%rsp), %xmm13 + movdqa %xmm6, %xmm0 + movdqa %xmm6, %xmm3 + movdqa 40(%rsp), %xmm11 + movdqa %xmm1, %xmm12 + testq %rsi, %rsi + movdqa -40(%rsp), %xmm2 + pmuludq %xmm13, %xmm0 + movdqa %xmm1, %xmm8 + pmuludq %xmm1, %xmm11 + movdqa %xmm10, %xmm4 + movdqa %xmm1, %xmm14 + pmuludq %xmm2, %xmm3 + movdqa %xmm6, %xmm15 + pmuludq %xmm1, %xmm13 + movdqa %xmm7, %xmm1 + pmuludq %xmm2, %xmm12 + paddq %xmm0, %xmm11 + movdqa -56(%rsp), %xmm0 + pmuludq %xmm10, %xmm2 + paddq %xmm3, %xmm13 + pmuludq %xmm0, %xmm4 + movdqa %xmm9, %xmm3 + pmuludq %xmm0, %xmm3 + paddq %xmm2, %xmm11 + pmuludq %xmm0, %xmm8 + movdqa %xmm6, %xmm2 + pmuludq %xmm0, %xmm2 + movdqa -8(%rsp), %xmm0 + paddq %xmm4, %xmm13 + movdqa 312(%rsp), %xmm4 + paddq %xmm3, %xmm11 + pmuludq 312(%rsp), %xmm6 + movdqa 312(%rsp), %xmm3 + pmuludq %xmm0, %xmm1 + paddq %xmm2, %xmm12 + pmuludq %xmm0, %xmm15 + movdqa %xmm9, %xmm2 + pmuludq %xmm0, %xmm2 + pmuludq %xmm7, %xmm3 + paddq %xmm1, %xmm11 + movdqa 232(%rsp), %xmm1 + pmuludq %xmm0, %xmm14 + paddq %xmm15, %xmm8 + pmuludq %xmm10, %xmm0 + paddq %xmm2, %xmm13 + movdqa 312(%rsp), %xmm2 + pmuludq %xmm10, %xmm4 + paddq %xmm3, %xmm13 + movdqa 152(%rsp), %xmm3 + pmuludq %xmm9, %xmm2 + paddq %xmm6, %xmm14 + pmuludq 232(%rsp), %xmm10 + paddq %xmm0, %xmm12 + pmuludq %xmm9, %xmm1 + paddq %xmm10, %xmm14 + movdqa 232(%rsp), %xmm0 + pmuludq %xmm7, %xmm3 + paddq %xmm4, %xmm8 + pmuludq 152(%rsp), %xmm9 + paddq %xmm2, %xmm12 + paddq %xmm9, %xmm14 + pmuludq %xmm7, %xmm0 + paddq %xmm1, %xmm8 + pmuludq 56(%rsp), %xmm7 + paddq %xmm3, %xmm8 + paddq %xmm7, %xmm14 + paddq %xmm0, %xmm12 + je .Lpoly1305_blocks_x86_10 + movdqu (%rsi), %xmm1 + pxor %xmm0, %xmm0 + paddq 168(%rsp), %xmm14 + movdqu 16(%rsi), %xmm2 + movdqa %xmm1, %xmm3 + punpckldq %xmm2, %xmm3 + punpckhdq %xmm2, %xmm1 + movdqa %xmm3, %xmm4 + movdqa %xmm1, %xmm2 + punpckldq %xmm0, %xmm4 + punpckhdq %xmm0, %xmm3 + punpckhdq %xmm0, %xmm1 + punpckldq %xmm0, %xmm2 + movdqa %xmm2, %xmm0 + psllq $6, %xmm3 + paddq %xmm4, %xmm11 + psllq $12, %xmm0 + paddq %xmm3, %xmm13 + psllq $18, %xmm1 + paddq %xmm0, %xmm12 + paddq %xmm1, %xmm8 +.Lpoly1305_blocks_x86_10: + movdqa %xmm11, %xmm9 + movdqa %xmm8, %xmm1 + movdqa %xmm11, %xmm7 + psrlq $26, %xmm9 + movdqa %xmm8, %xmm6 + pand %xmm5, %xmm7 + paddq %xmm13, %xmm9 + psrlq $26, %xmm1 + pand %xmm5, %xmm6 + movdqa %xmm9, %xmm10 + paddq %xmm14, %xmm1 + pand %xmm5, %xmm9 + psrlq $26, %xmm10 + movdqa %xmm1, %xmm0 + pand %xmm5, %xmm1 + paddq %xmm12, %xmm10 + psrlq $26, %xmm0 + pmuludq 136(%rsp), %xmm0 + movdqa %xmm10, %xmm2 + paddq %xmm0, %xmm7 + psrlq $26, %xmm2 + movdqa %xmm7, %xmm0 + pand %xmm5, %xmm10 + paddq %xmm2, %xmm6 + psrlq $26, %xmm0 + pand %xmm5, %xmm7 + movdqa %xmm6, %xmm2 + paddq %xmm0, %xmm9 + pand %xmm5, %xmm6 + psrlq $26, %xmm2 + paddq %xmm2, %xmm1 +.Lpoly1305_blocks_x86_9: + testq %rsi, %rsi + je .Lpoly1305_blocks_x86_11 + movdqa %xmm7, 0(%rdi) + movdqa %xmm9, 16(%rdi) + movdqa %xmm10, 32(%rdi) + movdqa %xmm6, 48(%rdi) + movdqa %xmm1, 64(%rdi) + movq -8(%rbp), %rbx + leave + ret +.Lpoly1305_blocks_x86_5: + movdqa 0(%rdi), %xmm7 + movdqa 16(%rdi), %xmm9 + movdqa 32(%rdi), %xmm10 + movdqa 48(%rdi), %xmm6 + movdqa 64(%rdi), %xmm1 + jmp .Lpoly1305_blocks_x86_6 +.Lpoly1305_blocks_x86_11: + movdqa %xmm7, %xmm0 + movdqa %xmm9, %xmm2 + movdqa %xmm6, %xmm3 + psrldq $8, %xmm0 + movabsq $4398046511103, %rbx + paddq %xmm0, %xmm7 + psrldq $8, %xmm2 + movdqa %xmm10, %xmm0 + movd %xmm7, %edx + paddq %xmm2, %xmm9 + psrldq $8, %xmm0 + movl %edx, %ecx + movd %xmm9, %eax + paddq %xmm0, %xmm10 + shrl $26, %ecx + psrldq $8, %xmm3 + movdqa %xmm1, %xmm0 + addl %ecx, %eax + movd %xmm10, %ecx + paddq %xmm3, %xmm6 + movl %eax, %r9d + shrl $26, %eax + psrldq $8, %xmm0 + addl %ecx, %eax + movd %xmm6, %ecx + paddq %xmm0, %xmm1 + movl %eax, %esi + andl $67108863, %r9d + movd %xmm1, %r10d + shrl $26, %esi + andl $67108863, %eax + andl $67108863, %edx + addl %ecx, %esi + salq $8, %rax + movl %r9d, %ecx + shrl $18, %r9d + movl %esi, %r8d + shrl $26, %esi + andl $67108863, %r8d + addl %r10d, %esi + orq %r9, %rax + salq $16, %rsi + movq %r8, %r9 + shrl $10, %r8d + salq $26, %rcx + orq %r8, %rsi + salq $34, %r9 + orq %rdx, %rcx + movq %rsi, %r8 + shrq $42, %rsi + movabsq $17592186044415, %rdx + orq %r9, %rax + andq %rbx, %r8 + leaq (%rsi,%rsi,4), %rsi + andq %rdx, %rcx + andq %rdx, %rax + movabsq $-4398046511104, %r10 + addq %rsi, %rcx + movq %rcx, %rsi + shrq $44, %rcx + addq %rcx, %rax + andq %rdx, %rsi + movq %rax, %rcx + shrq $44, %rax + addq %r8, %rax + andq %rdx, %rcx + andq %rax, %rbx + shrq $42, %rax + leaq (%rsi,%rax,4), %rsi + addq %rbx, %r10 + addq %rax, %rsi + movq %rsi, %r8 + shrq $44, %rsi + andq %rdx, %r8 + addq %rcx, %rsi + leaq 5(%r8), %r9 + movq %r9, %r11 + andq %rdx, %r9 + shrq $44, %r11 + addq %rsi, %r11 + movq %r11, %rax + andq %r11, %rdx + shrq $44, %rax + addq %rax, %r10 + movq %r10, %rax + shrq $63, %rax + subq $1, %rax + movq %rax, %rcx + andq %rax, %r9 + andq %rax, %rdx + notq %rcx + andq %r10, %rax + andq %rcx, %r8 + andq %rcx, %rsi + andq %rbx, %rcx + orq %r9, %r8 + orq %rdx, %rsi + orq %rax, %rcx + movq %r8, 0(%rdi) + movq %rsi, 8(%rdi) + movq %rcx, 16(%rdi) + movq -8(%rbp), %rbx + movq %rbp, %rax + subq %rsp, %rax + pxor %xmm15, %xmm15 + pxor %xmm7, %xmm7 + pxor %xmm14, %xmm14 + pxor %xmm6, %xmm6 + pxor %xmm13, %xmm13 + pxor %xmm5, %xmm5 + pxor %xmm12, %xmm12 + pxor %xmm4, %xmm4 + leave + addq $8, %rax + pxor %xmm11, %xmm11 + pxor %xmm3, %xmm3 + pxor %xmm10, %xmm10 + pxor %xmm2, %xmm2 + pxor %xmm9, %xmm9 + pxor %xmm1, %xmm1 + pxor %xmm8, %xmm8 + pxor %xmm0, %xmm0 + ret +ELF(.size _gcry_poly1305_amd64_sse2_blocks,.-_gcry_poly1305_amd64_sse2_blocks;) + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/poly1305.c b/libotr/libgcrypt-1.8.7/cipher/poly1305.c new file mode 100644 index 0000000..22255fb --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/poly1305.c @@ -0,0 +1,643 @@ +/* poly1305.c - Poly1305 internals and generic implementation + * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* The code is based on public-domain Poly1305 implementation by + * Andrew Moon at + * https://github.com/floodyberry/poly1305-opt + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "poly1305-internal.h" + + +static const char *selftest (void); + + + +#ifdef POLY1305_USE_SSE2 + +void _gcry_poly1305_amd64_sse2_init_ext(void *state, const poly1305_key_t *key) + OPS_FUNC_ABI; +unsigned int _gcry_poly1305_amd64_sse2_finish_ext(void *state, const byte *m, + size_t remaining, + byte mac[16]) OPS_FUNC_ABI; +unsigned int _gcry_poly1305_amd64_sse2_blocks(void *ctx, const byte *m, + size_t bytes) OPS_FUNC_ABI; + +static const poly1305_ops_t poly1305_amd64_sse2_ops = { + POLY1305_SSE2_BLOCKSIZE, + _gcry_poly1305_amd64_sse2_init_ext, + _gcry_poly1305_amd64_sse2_blocks, + _gcry_poly1305_amd64_sse2_finish_ext +}; + +#else /* !POLY1305_USE_SSE2 */ + +static OPS_FUNC_ABI void poly1305_init_ext_ref32 +/**/ (void *state, const poly1305_key_t *key); +static OPS_FUNC_ABI unsigned int poly1305_blocks_ref32 +/**/ (void *state, const byte *m, size_t bytes); +static OPS_FUNC_ABI unsigned int poly1305_finish_ext_ref32 +/**/ (void *state, const byte * m, + size_t remaining, byte mac[POLY1305_TAGLEN]); + +static const poly1305_ops_t poly1305_default_ops = { + POLY1305_REF_BLOCKSIZE, + poly1305_init_ext_ref32, + poly1305_blocks_ref32, + poly1305_finish_ext_ref32 +}; + +#endif /* !POLY1305_USE_SSE2 */ + + +#ifdef POLY1305_USE_AVX2 + +void _gcry_poly1305_amd64_avx2_init_ext(void *state, const poly1305_key_t *key) + OPS_FUNC_ABI; +unsigned int _gcry_poly1305_amd64_avx2_finish_ext(void *state, const byte *m, + size_t remaining, + byte mac[16]) OPS_FUNC_ABI; +unsigned int _gcry_poly1305_amd64_avx2_blocks(void *ctx, const byte *m, + size_t bytes) OPS_FUNC_ABI; + +static const poly1305_ops_t poly1305_amd64_avx2_ops = { + POLY1305_AVX2_BLOCKSIZE, + _gcry_poly1305_amd64_avx2_init_ext, + _gcry_poly1305_amd64_avx2_blocks, + _gcry_poly1305_amd64_avx2_finish_ext +}; + +#endif + + +#ifdef POLY1305_USE_NEON + +void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key) + OPS_FUNC_ABI; +unsigned int _gcry_poly1305_armv7_neon_finish_ext(void *state, const byte *m, + size_t remaining, + byte mac[16]) OPS_FUNC_ABI; +unsigned int _gcry_poly1305_armv7_neon_blocks(void *ctx, const byte *m, + size_t bytes) OPS_FUNC_ABI; + +static const poly1305_ops_t poly1305_armv7_neon_ops = { + POLY1305_NEON_BLOCKSIZE, + _gcry_poly1305_armv7_neon_init_ext, + _gcry_poly1305_armv7_neon_blocks, + _gcry_poly1305_armv7_neon_finish_ext +}; + +#endif + + +/* Reference unoptimized poly1305 implementation using 32 bit * 32 bit = 64 bit + * multiplication and 64 bit addition. + */ + +typedef struct poly1305_state_ref32_s +{ + u32 r[5]; + u32 h[5]; + u32 pad[4]; + byte final; +} poly1305_state_ref32_t; + + +#ifndef POLY1305_USE_SSE2 +static OPS_FUNC_ABI void +poly1305_init_ext_ref32 (void *state, const poly1305_key_t * key) +{ + poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state; + + gcry_assert (sizeof (*st) + POLY1305_STATE_ALIGNMENT <= + sizeof (((poly1305_context_t *) 0)->state)); + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + st->r[0] = (buf_get_le32 (&key->b[0])) & 0x3ffffff; + st->r[1] = (buf_get_le32 (&key->b[3]) >> 2) & 0x3ffff03; + st->r[2] = (buf_get_le32 (&key->b[6]) >> 4) & 0x3ffc0ff; + st->r[3] = (buf_get_le32 (&key->b[9]) >> 6) & 0x3f03fff; + st->r[4] = (buf_get_le32 (&key->b[12]) >> 8) & 0x00fffff; + + /* h = 0 */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; + + /* save pad for later */ + st->pad[0] = buf_get_le32 (&key->b[16]); + st->pad[1] = buf_get_le32 (&key->b[20]); + st->pad[2] = buf_get_le32 (&key->b[24]); + st->pad[3] = buf_get_le32 (&key->b[28]); + + st->final = 0; +} +#endif /* !POLY1305_USE_SSE2 */ + + +#ifndef POLY1305_USE_SSE2 +static OPS_FUNC_ABI unsigned int +poly1305_blocks_ref32 (void *state, const byte * m, size_t bytes) +{ + poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state; + const u32 hibit = (st->final) ? 0 : (1 << 24); /* 1 << 128 */ + u32 r0, r1, r2, r3, r4; + u32 s1, s2, s3, s4; + u32 h0, h1, h2, h3, h4; + u64 d0, d1, d2, d3, d4; + u32 c; + + r0 = st->r[0]; + r1 = st->r[1]; + r2 = st->r[2]; + r3 = st->r[3]; + r4 = st->r[4]; + + s1 = r1 * 5; + s2 = r2 * 5; + s3 = r3 * 5; + s4 = r4 * 5; + + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + while (bytes >= POLY1305_REF_BLOCKSIZE) + { + /* h += m[i] */ + h0 += (buf_get_le32 (m + 0)) & 0x3ffffff; + h1 += (buf_get_le32 (m + 3) >> 2) & 0x3ffffff; + h2 += (buf_get_le32 (m + 6) >> 4) & 0x3ffffff; + h3 += (buf_get_le32 (m + 9) >> 6) & 0x3ffffff; + h4 += (buf_get_le32 (m + 12) >> 8) | hibit; + + /* h *= r */ + d0 = + ((u64) h0 * r0) + ((u64) h1 * s4) + + ((u64) h2 * s3) + ((u64) h3 * s2) + ((u64) h4 * s1); + d1 = + ((u64) h0 * r1) + ((u64) h1 * r0) + + ((u64) h2 * s4) + ((u64) h3 * s3) + ((u64) h4 * s2); + d2 = + ((u64) h0 * r2) + ((u64) h1 * r1) + + ((u64) h2 * r0) + ((u64) h3 * s4) + ((u64) h4 * s3); + d3 = + ((u64) h0 * r3) + ((u64) h1 * r2) + + ((u64) h2 * r1) + ((u64) h3 * r0) + ((u64) h4 * s4); + d4 = + ((u64) h0 * r4) + ((u64) h1 * r3) + + ((u64) h2 * r2) + ((u64) h3 * r1) + ((u64) h4 * r0); + + /* (partial) h %= p */ + c = (u32) (d0 >> 26); + h0 = (u32) d0 & 0x3ffffff; + d1 += c; + c = (u32) (d1 >> 26); + h1 = (u32) d1 & 0x3ffffff; + d2 += c; + c = (u32) (d2 >> 26); + h2 = (u32) d2 & 0x3ffffff; + d3 += c; + c = (u32) (d3 >> 26); + h3 = (u32) d3 & 0x3ffffff; + d4 += c; + c = (u32) (d4 >> 26); + h4 = (u32) d4 & 0x3ffffff; + h0 += c * 5; + c = (h0 >> 26); + h0 = h0 & 0x3ffffff; + h1 += c; + + m += POLY1305_REF_BLOCKSIZE; + bytes -= POLY1305_REF_BLOCKSIZE; + } + + st->h[0] = h0; + st->h[1] = h1; + st->h[2] = h2; + st->h[3] = h3; + st->h[4] = h4; + + return (16 * sizeof (u32) + 5 * sizeof (u64) + 5 * sizeof (void *)); +} +#endif /* !POLY1305_USE_SSE2 */ + + +#ifndef POLY1305_USE_SSE2 +static OPS_FUNC_ABI unsigned int +poly1305_finish_ext_ref32 (void *state, const byte * m, + size_t remaining, byte mac[POLY1305_TAGLEN]) +{ + poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state; + u32 h0, h1, h2, h3, h4, c; + u32 g0, g1, g2, g3, g4; + u64 f; + u32 mask; + unsigned int burn = 0; + + /* process the remaining block */ + if (remaining) + { + byte final[POLY1305_REF_BLOCKSIZE] = { 0 }; + size_t i; + for (i = 0; i < remaining; i++) + final[i] = m[i]; + final[remaining] = 1; + st->final = 1; + burn = poly1305_blocks_ref32 (st, final, POLY1305_REF_BLOCKSIZE); + } + + /* fully carry h */ + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + c = h1 >> 26; + h1 = h1 & 0x3ffffff; + h2 += c; + c = h2 >> 26; + h2 = h2 & 0x3ffffff; + h3 += c; + c = h3 >> 26; + h3 = h3 & 0x3ffffff; + h4 += c; + c = h4 >> 26; + h4 = h4 & 0x3ffffff; + h0 += c * 5; + c = h0 >> 26; + h0 = h0 & 0x3ffffff; + h1 += c; + + /* compute h + -p */ + g0 = h0 + 5; + c = g0 >> 26; + g0 &= 0x3ffffff; + g1 = h1 + c; + c = g1 >> 26; + g1 &= 0x3ffffff; + g2 = h2 + c; + c = g2 >> 26; + g2 &= 0x3ffffff; + g3 = h3 + c; + c = g3 >> 26; + g3 &= 0x3ffffff; + g4 = h4 + c - (1 << 26); + + /* select h if h < p, or h + -p if h >= p */ + mask = (g4 >> ((sizeof (u32) * 8) - 1)) - 1; + g0 &= mask; + g1 &= mask; + g2 &= mask; + g3 &= mask; + g4 &= mask; + mask = ~mask; + h0 = (h0 & mask) | g0; + h1 = (h1 & mask) | g1; + h2 = (h2 & mask) | g2; + h3 = (h3 & mask) | g3; + h4 = (h4 & mask) | g4; + + /* h = h % (2^128) */ + h0 = ((h0) | (h1 << 26)) & 0xffffffff; + h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; + h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; + h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; + + /* mac = (h + pad) % (2^128) */ + f = (u64) h0 + st->pad[0]; + h0 = (u32) f; + f = (u64) h1 + st->pad[1] + (f >> 32); + h1 = (u32) f; + f = (u64) h2 + st->pad[2] + (f >> 32); + h2 = (u32) f; + f = (u64) h3 + st->pad[3] + (f >> 32); + h3 = (u32) f; + + buf_put_le32 (mac + 0, h0); + buf_put_le32 (mac + 4, h1); + buf_put_le32 (mac + 8, h2); + buf_put_le32 (mac + 12, h3); + + /* zero out the state */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; + st->r[0] = 0; + st->r[1] = 0; + st->r[2] = 0; + st->r[3] = 0; + st->r[4] = 0; + st->pad[0] = 0; + st->pad[1] = 0; + st->pad[2] = 0; + st->pad[3] = 0; + + /* burn_stack */ + return (13 * sizeof (u32) + sizeof (u64) + + POLY1305_REF_BLOCKSIZE + 6 * sizeof (void *)) + burn; +} +#endif /* !POLY1305_USE_SSE2*/ + + + + + +static inline void * +poly1305_get_state (poly1305_context_t * ctx) +{ + byte *c = ctx->state; + c += POLY1305_STATE_ALIGNMENT - 1; + c -= (uintptr_t) c & (POLY1305_STATE_ALIGNMENT - 1); + return c; +} + + +static void +poly1305_init (poly1305_context_t * ctx, const poly1305_key_t * key) +{ + void *state = poly1305_get_state (ctx); + + ctx->leftover = 0; + + ctx->ops->init_ext (state, key); +} + + +void +_gcry_poly1305_update (poly1305_context_t * ctx, const byte * m, size_t bytes) +{ + void *state = poly1305_get_state (ctx); + unsigned int burn = 0; + size_t block_size = ctx->ops->block_size; + + /* handle leftover */ + if (ctx->leftover) + { + size_t want = (block_size - ctx->leftover); + if (want > bytes) + want = bytes; + buf_cpy (ctx->buffer + ctx->leftover, m, want); + bytes -= want; + m += want; + ctx->leftover += want; + if (ctx->leftover < block_size) + return; + burn = ctx->ops->blocks (state, ctx->buffer, block_size); + ctx->leftover = 0; + } + + /* process full blocks */ + if (bytes >= block_size) + { + size_t want = (bytes & ~(block_size - 1)); + burn = ctx->ops->blocks (state, m, want); + m += want; + bytes -= want; + } + + /* store leftover */ + if (bytes) + { + buf_cpy (ctx->buffer + ctx->leftover, m, bytes); + ctx->leftover += bytes; + } + + if (burn) + _gcry_burn_stack (burn); +} + + +void +_gcry_poly1305_finish (poly1305_context_t * ctx, byte mac[POLY1305_TAGLEN]) +{ + void *state = poly1305_get_state (ctx); + unsigned int burn; + + burn = ctx->ops->finish_ext (state, ctx->buffer, ctx->leftover, mac); + + _gcry_burn_stack (burn); +} + + +gcry_err_code_t +_gcry_poly1305_init (poly1305_context_t * ctx, const byte * key, + size_t keylen) +{ + static int initialized; + static const char *selftest_failed; + poly1305_key_t keytmp; + unsigned int features = _gcry_get_hw_features (); + + if (!initialized) + { + initialized = 1; + selftest_failed = selftest (); + if (selftest_failed) + log_error ("Poly1305 selftest failed (%s)\n", selftest_failed); + } + + if (keylen != POLY1305_KEYLEN) + return GPG_ERR_INV_KEYLEN; + + if (selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + +#ifdef POLY1305_USE_SSE2 + ctx->ops = &poly1305_amd64_sse2_ops; +#else + ctx->ops = &poly1305_default_ops; +#endif + +#ifdef POLY1305_USE_AVX2 + if (features & HWF_INTEL_AVX2) + ctx->ops = &poly1305_amd64_avx2_ops; +#endif +#ifdef POLY1305_USE_NEON + if (features & HWF_ARM_NEON) + ctx->ops = &poly1305_armv7_neon_ops; +#endif + (void)features; + + buf_cpy (keytmp.b, key, POLY1305_KEYLEN); + poly1305_init (ctx, &keytmp); + + wipememory (&keytmp, sizeof (keytmp)); + + return 0; +} + + +static void +poly1305_auth (byte mac[POLY1305_TAGLEN], const byte * m, size_t bytes, + const byte * key) +{ + poly1305_context_t ctx; + + memset (&ctx, 0, sizeof (ctx)); + + _gcry_poly1305_init (&ctx, key, POLY1305_KEYLEN); + _gcry_poly1305_update (&ctx, m, bytes); + _gcry_poly1305_finish (&ctx, mac); + + wipememory (&ctx, sizeof (ctx)); +} + + +static const char * +selftest (void) +{ + /* example from nacl */ + static const byte nacl_key[POLY1305_KEYLEN] = { + 0xee, 0xa6, 0xa7, 0x25, 0x1c, 0x1e, 0x72, 0x91, + 0x6d, 0x11, 0xc2, 0xcb, 0x21, 0x4d, 0x3c, 0x25, + 0x25, 0x39, 0x12, 0x1d, 0x8e, 0x23, 0x4e, 0x65, + 0x2d, 0x65, 0x1f, 0xa4, 0xc8, 0xcf, 0xf8, 0x80, + }; + + static const byte nacl_msg[131] = { + 0x8e, 0x99, 0x3b, 0x9f, 0x48, 0x68, 0x12, 0x73, + 0xc2, 0x96, 0x50, 0xba, 0x32, 0xfc, 0x76, 0xce, + 0x48, 0x33, 0x2e, 0xa7, 0x16, 0x4d, 0x96, 0xa4, + 0x47, 0x6f, 0xb8, 0xc5, 0x31, 0xa1, 0x18, 0x6a, + 0xc0, 0xdf, 0xc1, 0x7c, 0x98, 0xdc, 0xe8, 0x7b, + 0x4d, 0xa7, 0xf0, 0x11, 0xec, 0x48, 0xc9, 0x72, + 0x71, 0xd2, 0xc2, 0x0f, 0x9b, 0x92, 0x8f, 0xe2, + 0x27, 0x0d, 0x6f, 0xb8, 0x63, 0xd5, 0x17, 0x38, + 0xb4, 0x8e, 0xee, 0xe3, 0x14, 0xa7, 0xcc, 0x8a, + 0xb9, 0x32, 0x16, 0x45, 0x48, 0xe5, 0x26, 0xae, + 0x90, 0x22, 0x43, 0x68, 0x51, 0x7a, 0xcf, 0xea, + 0xbd, 0x6b, 0xb3, 0x73, 0x2b, 0xc0, 0xe9, 0xda, + 0x99, 0x83, 0x2b, 0x61, 0xca, 0x01, 0xb6, 0xde, + 0x56, 0x24, 0x4a, 0x9e, 0x88, 0xd5, 0xf9, 0xb3, + 0x79, 0x73, 0xf6, 0x22, 0xa4, 0x3d, 0x14, 0xa6, + 0x59, 0x9b, 0x1f, 0x65, 0x4c, 0xb4, 0x5a, 0x74, + 0xe3, 0x55, 0xa5 + }; + + static const byte nacl_mac[16] = { + 0xf3, 0xff, 0xc7, 0x70, 0x3f, 0x94, 0x00, 0xe5, + 0x2a, 0x7d, 0xfb, 0x4b, 0x3d, 0x33, 0x05, 0xd9 + }; + + /* generates a final value of (2^130 - 2) == 3 */ + static const byte wrap_key[POLY1305_KEYLEN] = { + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }; + + static const byte wrap_msg[16] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + }; + + static const byte wrap_mac[16] = { + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }; + + /* mac of the macs of messages of length 0 to 256, where the key and messages + * have all their values set to the length + */ + static const byte total_key[POLY1305_KEYLEN] = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + }; + + static const byte total_mac[16] = { + 0x64, 0xaf, 0xe2, 0xe8, 0xd6, 0xad, 0x7b, 0xbd, + 0xd2, 0x87, 0xf9, 0x7c, 0x44, 0x62, 0x3d, 0x39 + }; + + poly1305_context_t ctx; + poly1305_context_t total_ctx; + byte all_key[POLY1305_KEYLEN]; + byte all_msg[256]; + byte mac[16]; + size_t i, j; + + memset (&ctx, 0, sizeof (ctx)); + memset (&total_ctx, 0, sizeof (total_ctx)); + + memset (mac, 0, sizeof (mac)); + poly1305_auth (mac, nacl_msg, sizeof (nacl_msg), nacl_key); + if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0) + return "Poly1305 test 1 failed."; + + /* SSE2/AVX have a 32 byte block size, but also support 64 byte blocks, so + * make sure everything still works varying between them */ + memset (mac, 0, sizeof (mac)); + _gcry_poly1305_init (&ctx, nacl_key, POLY1305_KEYLEN); + _gcry_poly1305_update (&ctx, nacl_msg + 0, 32); + _gcry_poly1305_update (&ctx, nacl_msg + 32, 64); + _gcry_poly1305_update (&ctx, nacl_msg + 96, 16); + _gcry_poly1305_update (&ctx, nacl_msg + 112, 8); + _gcry_poly1305_update (&ctx, nacl_msg + 120, 4); + _gcry_poly1305_update (&ctx, nacl_msg + 124, 2); + _gcry_poly1305_update (&ctx, nacl_msg + 126, 1); + _gcry_poly1305_update (&ctx, nacl_msg + 127, 1); + _gcry_poly1305_update (&ctx, nacl_msg + 128, 1); + _gcry_poly1305_update (&ctx, nacl_msg + 129, 1); + _gcry_poly1305_update (&ctx, nacl_msg + 130, 1); + _gcry_poly1305_finish (&ctx, mac); + if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0) + return "Poly1305 test 2 failed."; + + memset (mac, 0, sizeof (mac)); + poly1305_auth (mac, wrap_msg, sizeof (wrap_msg), wrap_key); + if (memcmp (wrap_mac, mac, sizeof (nacl_mac)) != 0) + return "Poly1305 test 3 failed."; + + _gcry_poly1305_init (&total_ctx, total_key, POLY1305_KEYLEN); + for (i = 0; i < 256; i++) + { + /* set key and message to 'i,i,i..' */ + for (j = 0; j < sizeof (all_key); j++) + all_key[j] = i; + for (j = 0; j < i; j++) + all_msg[j] = i; + poly1305_auth (mac, all_msg, i, all_key); + _gcry_poly1305_update (&total_ctx, mac, 16); + } + _gcry_poly1305_finish (&total_ctx, mac); + if (memcmp (total_mac, mac, sizeof (total_mac)) != 0) + return "Poly1305 test 4 failed."; + + return NULL; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/primegen.c b/libotr/libgcrypt-1.8.7/cipher/primegen.c new file mode 100644 index 0000000..e24de4d --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/primegen.c @@ -0,0 +1,1878 @@ +/* primegen.c - prime number generator + * Copyright (C) 1998, 2000, 2001, 2002, 2003 + * 2004, 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" + +static gcry_mpi_t gen_prime (unsigned int nbits, int secret, int randomlevel, + int (*extra_check)(void *, gcry_mpi_t), + void *extra_check_arg); +static int check_prime( gcry_mpi_t prime, gcry_mpi_t val_2, int rm_rounds, + gcry_prime_check_func_t cb_func, void *cb_arg ); +static int is_prime (gcry_mpi_t n, int steps, unsigned int *count); +static void m_out_of_n( char *array, int m, int n ); + +static void (*progress_cb) (void *,const char*,int,int, int ); +static void *progress_cb_data; + +/* Note: 2 is not included because it can be tested more easily by + looking at bit 0. The last entry in this list is marked by a zero */ +static ushort small_prime_numbers[] = { + 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, + 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, + 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, + 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, + 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, + 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, + 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, + 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, + 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, + 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, + 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, + 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, + 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, + 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, + 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, + 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, + 991, 997, 1009, 1013, 1019, 1021, 1031, 1033, + 1039, 1049, 1051, 1061, 1063, 1069, 1087, 1091, + 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151, + 1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, + 1217, 1223, 1229, 1231, 1237, 1249, 1259, 1277, + 1279, 1283, 1289, 1291, 1297, 1301, 1303, 1307, + 1319, 1321, 1327, 1361, 1367, 1373, 1381, 1399, + 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451, + 1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, + 1499, 1511, 1523, 1531, 1543, 1549, 1553, 1559, + 1567, 1571, 1579, 1583, 1597, 1601, 1607, 1609, + 1613, 1619, 1621, 1627, 1637, 1657, 1663, 1667, + 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733, + 1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, + 1801, 1811, 1823, 1831, 1847, 1861, 1867, 1871, + 1873, 1877, 1879, 1889, 1901, 1907, 1913, 1931, + 1933, 1949, 1951, 1973, 1979, 1987, 1993, 1997, + 1999, 2003, 2011, 2017, 2027, 2029, 2039, 2053, + 2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, + 2113, 2129, 2131, 2137, 2141, 2143, 2153, 2161, + 2179, 2203, 2207, 2213, 2221, 2237, 2239, 2243, + 2251, 2267, 2269, 2273, 2281, 2287, 2293, 2297, + 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357, + 2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, + 2417, 2423, 2437, 2441, 2447, 2459, 2467, 2473, + 2477, 2503, 2521, 2531, 2539, 2543, 2549, 2551, + 2557, 2579, 2591, 2593, 2609, 2617, 2621, 2633, + 2647, 2657, 2659, 2663, 2671, 2677, 2683, 2687, + 2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, + 2731, 2741, 2749, 2753, 2767, 2777, 2789, 2791, + 2797, 2801, 2803, 2819, 2833, 2837, 2843, 2851, + 2857, 2861, 2879, 2887, 2897, 2903, 2909, 2917, + 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999, + 3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, + 3067, 3079, 3083, 3089, 3109, 3119, 3121, 3137, + 3163, 3167, 3169, 3181, 3187, 3191, 3203, 3209, + 3217, 3221, 3229, 3251, 3253, 3257, 3259, 3271, + 3299, 3301, 3307, 3313, 3319, 3323, 3329, 3331, + 3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, + 3407, 3413, 3433, 3449, 3457, 3461, 3463, 3467, + 3469, 3491, 3499, 3511, 3517, 3527, 3529, 3533, + 3539, 3541, 3547, 3557, 3559, 3571, 3581, 3583, + 3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643, + 3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, + 3719, 3727, 3733, 3739, 3761, 3767, 3769, 3779, + 3793, 3797, 3803, 3821, 3823, 3833, 3847, 3851, + 3853, 3863, 3877, 3881, 3889, 3907, 3911, 3917, + 3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989, + 4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, + 4051, 4057, 4073, 4079, 4091, 4093, 4099, 4111, + 4127, 4129, 4133, 4139, 4153, 4157, 4159, 4177, + 4201, 4211, 4217, 4219, 4229, 4231, 4241, 4243, + 4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297, + 4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391, + 4397, 4409, 4421, 4423, 4441, 4447, 4451, 4457, + 4463, 4481, 4483, 4493, 4507, 4513, 4517, 4519, + 4523, 4547, 4549, 4561, 4567, 4583, 4591, 4597, + 4603, 4621, 4637, 4639, 4643, 4649, 4651, 4657, + 4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, + 4733, 4751, 4759, 4783, 4787, 4789, 4793, 4799, + 4801, 4813, 4817, 4831, 4861, 4871, 4877, 4889, + 4903, 4909, 4919, 4931, 4933, 4937, 4943, 4951, + 4957, 4967, 4969, 4973, 4987, 4993, 4999, + 0 +}; +static int no_of_small_prime_numbers = DIM (small_prime_numbers) - 1; + + + +/* An object and a list to build up a global pool of primes. See + save_pool_prime and get_pool_prime. */ +struct primepool_s +{ + struct primepool_s *next; + gcry_mpi_t prime; /* If this is NULL the entry is not used. */ + unsigned int nbits; + gcry_random_level_t randomlevel; +}; +struct primepool_s *primepool; +/* Mutex used to protect access to the primepool. */ +GPGRT_LOCK_DEFINE (primepool_lock); + + +gcry_err_code_t +_gcry_primegen_init (void) +{ + /* This function was formerly used to initialize the primepool + Mutex. This has been replace by a static initialization. */ + return 0; +} + + +/* Save PRIME which has been generated at RANDOMLEVEL for later + use. Needs to be called while primepool_lock is being hold. Note + that PRIME should be considered released after calling this + function. */ +static void +save_pool_prime (gcry_mpi_t prime, gcry_random_level_t randomlevel) +{ + struct primepool_s *item, *item2; + size_t n; + + for (n=0, item = primepool; item; item = item->next, n++) + if (!item->prime) + break; + if (!item && n > 100) + { + /* Remove some of the entries. Our strategy is removing + the last third from the list. */ + int i; + + for (i=0, item2 = primepool; item2; item2 = item2->next) + { + if (i >= n/3*2) + { + _gcry_mpi_release (item2->prime); + item2->prime = NULL; + if (!item) + item = item2; + } + } + } + if (!item) + { + item = xtrycalloc (1, sizeof *item); + if (!item) + { + /* Out of memory. Silently giving up. */ + _gcry_mpi_release (prime); + return; + } + item->next = primepool; + primepool = item; + } + item->prime = prime; + item->nbits = mpi_get_nbits (prime); + item->randomlevel = randomlevel; +} + + +/* Return a prime for the prime pool or NULL if none has been found. + The prime needs to match NBITS and randomlevel. This function needs + to be called with the primepool_look is being hold. */ +static gcry_mpi_t +get_pool_prime (unsigned int nbits, gcry_random_level_t randomlevel) +{ + struct primepool_s *item; + + for (item = primepool; item; item = item->next) + if (item->prime + && item->nbits == nbits && item->randomlevel == randomlevel) + { + gcry_mpi_t prime = item->prime; + item->prime = NULL; + gcry_assert (nbits == mpi_get_nbits (prime)); + return prime; + } + return NULL; +} + + + + + + +void +_gcry_register_primegen_progress ( void (*cb)(void *,const char*,int,int,int), + void *cb_data ) +{ + progress_cb = cb; + progress_cb_data = cb_data; +} + + +static void +progress( int c ) +{ + if ( progress_cb ) + progress_cb ( progress_cb_data, "primegen", c, 0, 0 ); +} + + +/**************** + * Generate a prime number (stored in secure memory) + */ +gcry_mpi_t +_gcry_generate_secret_prime (unsigned int nbits, + gcry_random_level_t random_level, + int (*extra_check)(void*, gcry_mpi_t), + void *extra_check_arg) +{ + gcry_mpi_t prime; + + prime = gen_prime (nbits, 1, random_level, extra_check, extra_check_arg); + progress('\n'); + return prime; +} + + +/* Generate a prime number which may be public, i.e. not allocated in + secure memory. */ +gcry_mpi_t +_gcry_generate_public_prime (unsigned int nbits, + gcry_random_level_t random_level, + int (*extra_check)(void*, gcry_mpi_t), + void *extra_check_arg) +{ + gcry_mpi_t prime; + + prime = gen_prime (nbits, 0, random_level, extra_check, extra_check_arg); + progress('\n'); + return prime; +} + + +/* Core prime generation function. The algorithm used to generate + practically save primes is due to Lim and Lee as described in the + CRYPTO '97 proceedings (ISBN3540633847) page 260. + + NEED_Q_FACTOR: If true make sure that at least one factor is of + size qbits. This is for example required for DSA. + PRIME_GENERATED: Adresss of a variable where the resulting prime + number will be stored. + PBITS: Requested size of the prime number. At least 48. + QBITS: One factor of the prime needs to be of this size. Maybe 0 + if this is not required. See also MODE. + G: If not NULL an MPI which will receive a generator for the prime + for use with Elgamal. + RET_FACTORS: if not NULL, an array with all factors are stored at + that address. + ALL_FACTORS: If set to true all factors of prime-1 are returned. + RANDOMLEVEL: How strong should the random numers be. + FLAGS: Prime generation bit flags. Currently supported: + GCRY_PRIME_FLAG_SECRET - The prime needs to be kept secret. + CB_FUNC, CB_ARG: Callback to be used for extra checks. + + */ +static gcry_err_code_t +prime_generate_internal (int need_q_factor, + gcry_mpi_t *prime_generated, unsigned int pbits, + unsigned int qbits, gcry_mpi_t g, + gcry_mpi_t **ret_factors, + gcry_random_level_t randomlevel, unsigned int flags, + int all_factors, + gcry_prime_check_func_t cb_func, void *cb_arg) +{ + gcry_err_code_t err = 0; + gcry_mpi_t *factors_new = NULL; /* Factors to return to the + caller. */ + gcry_mpi_t *factors = NULL; /* Current factors. */ + gcry_random_level_t poolrandomlevel; /* Random level used for pool primes. */ + gcry_mpi_t *pool = NULL; /* Pool of primes. */ + int *pool_in_use = NULL; /* Array with currently used POOL elements. */ + unsigned char *perms = NULL; /* Permutations of POOL. */ + gcry_mpi_t q_factor = NULL; /* Used if QBITS is non-zero. */ + unsigned int fbits = 0; /* Length of prime factors. */ + unsigned int n = 0; /* Number of factors. */ + unsigned int m = 0; /* Number of primes in pool. */ + gcry_mpi_t q = NULL; /* First prime factor. */ + gcry_mpi_t prime = NULL; /* Prime candidate. */ + unsigned int nprime = 0; /* Bits of PRIME. */ + unsigned int req_qbits; /* The original QBITS value. */ + gcry_mpi_t val_2; /* For check_prime(). */ + int is_locked = 0; /* Flag to help unlocking the primepool. */ + unsigned int is_secret = (flags & GCRY_PRIME_FLAG_SECRET); + unsigned int count1 = 0, count2 = 0; + unsigned int i = 0, j = 0; + + if (pbits < 48) + return GPG_ERR_INV_ARG; + + /* We won't use a too strong random elvel for the pooled subprimes. */ + poolrandomlevel = (randomlevel > GCRY_STRONG_RANDOM? + GCRY_STRONG_RANDOM : randomlevel); + + + /* If QBITS is not given, assume a reasonable value. */ + if (!qbits) + qbits = pbits / 3; + + req_qbits = qbits; + + /* Find number of needed prime factors N. */ + for (n = 1; (pbits - qbits - 1) / n >= qbits; n++) + ; + n--; + + val_2 = mpi_alloc_set_ui (2); + + if ((! n) || ((need_q_factor) && (n < 2))) + { + err = GPG_ERR_INV_ARG; + goto leave; + } + + if (need_q_factor) + { + n--; /* Need one factor less because we want a specific Q-FACTOR. */ + fbits = (pbits - 2 * req_qbits -1) / n; + qbits = pbits - req_qbits - n * fbits; + } + else + { + fbits = (pbits - req_qbits -1) / n; + qbits = pbits - n * fbits; + } + + if (DBG_CIPHER) + log_debug ("gen prime: pbits=%u qbits=%u fbits=%u/%u n=%d\n", + pbits, req_qbits, qbits, fbits, n); + + /* Allocate an integer to old the new prime. */ + prime = mpi_new (pbits); + + /* Generate first prime factor. */ + q = gen_prime (qbits, is_secret, randomlevel, NULL, NULL); + + /* Generate a specific Q-Factor if requested. */ + if (need_q_factor) + q_factor = gen_prime (req_qbits, is_secret, randomlevel, NULL, NULL); + + /* Allocate an array to hold all factors + 2 for later usage. */ + factors = xtrycalloc (n + 2, sizeof (*factors)); + if (!factors) + { + err = gpg_err_code_from_errno (errno); + goto leave; + } + + /* Allocate an array to track pool usage. */ + pool_in_use = xtrymalloc (n * sizeof *pool_in_use); + if (!pool_in_use) + { + err = gpg_err_code_from_errno (errno); + goto leave; + } + for (i=0; i < n; i++) + pool_in_use[i] = -1; + + /* Make a pool of 3n+5 primes (this is an arbitrary value). We + require at least 30 primes for are useful selection process. + + Fixme: We need to research the best formula for sizing the pool. + */ + m = n * 3 + 5; + if (need_q_factor) /* Need some more in this case. */ + m += 5; + if (m < 30) + m = 30; + pool = xtrycalloc (m , sizeof (*pool)); + if (! pool) + { + err = gpg_err_code_from_errno (errno); + goto leave; + } + + /* Permutate over the pool of primes until we find a prime of the + requested length. */ + do + { + next_try: + for (i=0; i < n; i++) + pool_in_use[i] = -1; + + if (!perms) + { + /* Allocate new primes. This is done right at the beginning + of the loop and if we have later run out of primes. */ + for (i = 0; i < m; i++) + { + mpi_free (pool[i]); + pool[i] = NULL; + } + + /* Init m_out_of_n(). */ + perms = xtrycalloc (1, m); + if (!perms) + { + err = gpg_err_code_from_errno (errno); + goto leave; + } + + err = gpgrt_lock_lock (&primepool_lock); + if (err) + goto leave; + is_locked = 1; + + for (i = 0; i < n; i++) + { + perms[i] = 1; + /* At a maximum we use strong random for the factors. + This saves us a lot of entropy. Given that Q and + possible Q-factor are also used in the final prime + this should be acceptable. We also don't allocate in + secure memory to save on that scare resource too. If + Q has been allocated in secure memory, the final + prime will be saved there anyway. This is because + our MPI routines take care of that. GnuPG has worked + this way ever since. */ + pool[i] = NULL; + if (is_locked) + { + pool[i] = get_pool_prime (fbits, poolrandomlevel); + if (!pool[i]) + { + err = gpgrt_lock_unlock (&primepool_lock); + if (err) + goto leave; + is_locked = 0; + } + } + if (!pool[i]) + pool[i] = gen_prime (fbits, 0, poolrandomlevel, NULL, NULL); + pool_in_use[i] = i; + factors[i] = pool[i]; + } + + if (is_locked && (err = gpgrt_lock_unlock (&primepool_lock))) + goto leave; + is_locked = 0; + } + else + { + /* Get next permutation. */ + m_out_of_n ( (char*)perms, n, m); + + if ((err = gpgrt_lock_lock (&primepool_lock))) + goto leave; + is_locked = 1; + + for (i = j = 0; (i < m) && (j < n); i++) + if (perms[i]) + { + /* If the subprime has not yet beed generated do it now. */ + if (!pool[i] && is_locked) + { + pool[i] = get_pool_prime (fbits, poolrandomlevel); + if (!pool[i]) + { + if ((err = gpgrt_lock_unlock (&primepool_lock))) + goto leave; + is_locked = 0; + } + } + if (!pool[i]) + pool[i] = gen_prime (fbits, 0, poolrandomlevel, NULL, NULL); + pool_in_use[j] = i; + factors[j++] = pool[i]; + } + + if (is_locked && (err = gpgrt_lock_unlock (&primepool_lock))) + goto leave; + is_locked = 0; + + if (i == n) + { + /* Ran out of permutations: Allocate new primes. */ + xfree (perms); + perms = NULL; + progress ('!'); + goto next_try; + } + } + + /* Generate next prime candidate: + p = 2 * q [ * q_factor] * factor_0 * factor_1 * ... * factor_n + 1. + */ + mpi_set (prime, q); + mpi_mul_ui (prime, prime, 2); + if (need_q_factor) + mpi_mul (prime, prime, q_factor); + for(i = 0; i < n; i++) + mpi_mul (prime, prime, factors[i]); + mpi_add_ui (prime, prime, 1); + nprime = mpi_get_nbits (prime); + + if (nprime < pbits) + { + if (++count1 > 20) + { + count1 = 0; + qbits++; + progress('>'); + mpi_free (q); + q = gen_prime (qbits, is_secret, randomlevel, NULL, NULL); + goto next_try; + } + } + else + count1 = 0; + + if (nprime > pbits) + { + if (++count2 > 20) + { + count2 = 0; + qbits--; + progress('<'); + mpi_free (q); + q = gen_prime (qbits, is_secret, randomlevel, NULL, NULL); + goto next_try; + } + } + else + count2 = 0; + } + while (! ((nprime == pbits) && check_prime (prime, val_2, 5, + cb_func, cb_arg))); + + if (DBG_CIPHER) + { + progress ('\n'); + log_mpidump ("prime ", prime); + log_mpidump ("factor q", q); + if (need_q_factor) + log_mpidump ("factor q0", q_factor); + for (i = 0; i < n; i++) + log_mpidump ("factor pi", factors[i]); + log_debug ("bit sizes: prime=%u, q=%u", + mpi_get_nbits (prime), mpi_get_nbits (q)); + if (need_q_factor) + log_printf (", q0=%u", mpi_get_nbits (q_factor)); + for (i = 0; i < n; i++) + log_printf (", p%d=%u", i, mpi_get_nbits (factors[i])); + log_printf ("\n"); + } + + if (ret_factors) + { + /* Caller wants the factors. */ + factors_new = xtrycalloc (n + 4, sizeof (*factors_new)); + if (! factors_new) + { + err = gpg_err_code_from_errno (errno); + goto leave; + } + + if (all_factors) + { + i = 0; + factors_new[i++] = mpi_set_ui (NULL, 2); + factors_new[i++] = mpi_copy (q); + if (need_q_factor) + factors_new[i++] = mpi_copy (q_factor); + for(j=0; j < n; j++) + factors_new[i++] = mpi_copy (factors[j]); + } + else + { + i = 0; + if (need_q_factor) + { + factors_new[i++] = mpi_copy (q_factor); + for (; i <= n; i++) + factors_new[i] = mpi_copy (factors[i]); + } + else + for (; i < n; i++ ) + factors_new[i] = mpi_copy (factors[i]); + } + } + + if (g && need_q_factor) + err = GPG_ERR_NOT_IMPLEMENTED; + else if (g) + { + /* Create a generator (start with 3). */ + gcry_mpi_t tmp = mpi_alloc (mpi_get_nlimbs (prime)); + gcry_mpi_t b = mpi_alloc (mpi_get_nlimbs (prime)); + gcry_mpi_t pmin1 = mpi_alloc (mpi_get_nlimbs (prime)); + + factors[n] = q; + factors[n + 1] = mpi_alloc_set_ui (2); + mpi_sub_ui (pmin1, prime, 1); + mpi_set_ui (g, 2); + do + { + mpi_add_ui (g, g, 1); + if (DBG_CIPHER) + log_printmpi ("checking g", g); + else + progress('^'); + for (i = 0; i < n + 2; i++) + { + mpi_fdiv_q (tmp, pmin1, factors[i]); + /* No mpi_pow(), but it is okay to use this with mod + prime. */ + mpi_powm (b, g, tmp, prime); + if (! mpi_cmp_ui (b, 1)) + break; + } + if (DBG_CIPHER) + progress('\n'); + } + while (i < n + 2); + + mpi_free (factors[n+1]); + mpi_free (tmp); + mpi_free (b); + mpi_free (pmin1); + } + + if (! DBG_CIPHER) + progress ('\n'); + + + leave: + if (pool) + { + is_locked = !gpgrt_lock_lock (&primepool_lock); + for(i = 0; i < m; i++) + { + if (pool[i]) + { + for (j=0; j < n; j++) + if (pool_in_use[j] == i) + break; + if (j == n && is_locked) + { + /* This pooled subprime has not been used. */ + save_pool_prime (pool[i], poolrandomlevel); + } + else + mpi_free (pool[i]); + } + } + if (is_locked) + err = gpgrt_lock_unlock (&primepool_lock); + is_locked = 0; + xfree (pool); + } + xfree (pool_in_use); + if (factors) + xfree (factors); /* Factors are shallow copies. */ + if (perms) + xfree (perms); + + mpi_free (val_2); + mpi_free (q); + mpi_free (q_factor); + + if (! err) + { + *prime_generated = prime; + if (ret_factors) + *ret_factors = factors_new; + } + else + { + if (factors_new) + { + for (i = 0; factors_new[i]; i++) + mpi_free (factors_new[i]); + xfree (factors_new); + } + mpi_free (prime); + } + + return err; +} + + +/* Generate a prime used for discrete logarithm algorithms; i.e. this + prime will be public and no strong random is required. On success + R_PRIME receives a new MPI with the prime. On error R_PRIME is set + to NULL and an error code is returned. If RET_FACTORS is not NULL + it is set to an allocated array of factors on success or to NULL on + error. */ +gcry_err_code_t +_gcry_generate_elg_prime (int mode, unsigned pbits, unsigned qbits, + gcry_mpi_t g, + gcry_mpi_t *r_prime, gcry_mpi_t **ret_factors) +{ + *r_prime = NULL; + if (ret_factors) + *ret_factors = NULL; + return prime_generate_internal ((mode == 1), r_prime, pbits, qbits, g, + ret_factors, GCRY_WEAK_RANDOM, 0, 0, + NULL, NULL); +} + + +static gcry_mpi_t +gen_prime (unsigned int nbits, int secret, int randomlevel, + int (*extra_check)(void *, gcry_mpi_t), void *extra_check_arg) +{ + gcry_mpi_t prime, ptest, pminus1, val_2, val_3, result; + int i; + unsigned int x, step; + unsigned int count1, count2; + int *mods; + +/* if ( DBG_CIPHER ) */ +/* log_debug ("generate a prime of %u bits ", nbits ); */ + + if (nbits < 16) + log_fatal ("can't generate a prime with less than %d bits\n", 16); + + mods = (secret? xmalloc_secure (no_of_small_prime_numbers * sizeof *mods) + /* */ : xmalloc (no_of_small_prime_numbers * sizeof *mods)); + /* Make nbits fit into gcry_mpi_t implementation. */ + val_2 = mpi_alloc_set_ui( 2 ); + val_3 = mpi_alloc_set_ui( 3); + prime = secret? mpi_snew (nbits): mpi_new (nbits); + result = mpi_alloc_like( prime ); + pminus1= mpi_alloc_like( prime ); + ptest = mpi_alloc_like( prime ); + count1 = count2 = 0; + for (;;) + { /* try forvever */ + int dotcount=0; + + /* generate a random number */ + _gcry_mpi_randomize( prime, nbits, randomlevel ); + + /* Set high order bit to 1, set low order bit to 1. If we are + generating a secret prime we are most probably doing that + for RSA, to make sure that the modulus does have the + requested key size we set the 2 high order bits. */ + mpi_set_highbit (prime, nbits-1); + if (secret) + mpi_set_bit (prime, nbits-2); + mpi_set_bit(prime, 0); + + /* Calculate all remainders. */ + for (i=0; (x = small_prime_numbers[i]); i++ ) + mods[i] = mpi_fdiv_r_ui(NULL, prime, x); + + /* Now try some primes starting with prime. */ + for(step=0; step < 20000; step += 2 ) + { + /* Check against all the small primes we have in mods. */ + count1++; + for (i=0; (x = small_prime_numbers[i]); i++ ) + { + while ( mods[i] + step >= x ) + mods[i] -= x; + if ( !(mods[i] + step) ) + break; + } + if ( x ) + continue; /* Found a multiple of an already known prime. */ + + mpi_add_ui( ptest, prime, step ); + + /* Do a fast Fermat test now. */ + count2++; + mpi_sub_ui( pminus1, ptest, 1); + mpi_powm( result, val_2, pminus1, ptest ); + if ( !mpi_cmp_ui( result, 1 ) ) + { + /* Not composite, perform stronger tests */ + if (is_prime(ptest, 5, &count2 )) + { + if (!mpi_test_bit( ptest, nbits-1-secret )) + { + progress('\n'); + log_debug ("overflow in prime generation\n"); + break; /* Stop loop, continue with a new prime. */ + } + + if (extra_check && extra_check (extra_check_arg, ptest)) + { + /* The extra check told us that this prime is + not of the caller's taste. */ + progress ('/'); + } + else + { + /* Got it. */ + mpi_free(val_2); + mpi_free(val_3); + mpi_free(result); + mpi_free(pminus1); + mpi_free(prime); + xfree(mods); + return ptest; + } + } + } + if (++dotcount == 10 ) + { + progress('.'); + dotcount = 0; + } + } + progress(':'); /* restart with a new random value */ + } +} + +/**************** + * Returns: true if this may be a prime + * RM_ROUNDS gives the number of Rabin-Miller tests to run. + */ +static int +check_prime( gcry_mpi_t prime, gcry_mpi_t val_2, int rm_rounds, + gcry_prime_check_func_t cb_func, void *cb_arg) +{ + int i; + unsigned int x; + unsigned int count=0; + + /* Check against small primes. */ + for (i=0; (x = small_prime_numbers[i]); i++ ) + { + if ( mpi_divisible_ui( prime, x ) ) + return !mpi_cmp_ui (prime, x); + } + + /* A quick Fermat test. */ + { + gcry_mpi_t result = mpi_alloc_like( prime ); + gcry_mpi_t pminus1 = mpi_alloc_like( prime ); + mpi_sub_ui( pminus1, prime, 1); + mpi_powm( result, val_2, pminus1, prime ); + mpi_free( pminus1 ); + if ( mpi_cmp_ui( result, 1 ) ) + { + /* Is composite. */ + mpi_free( result ); + progress('.'); + return 0; + } + mpi_free( result ); + } + + if (!cb_func || cb_func (cb_arg, GCRY_PRIME_CHECK_AT_MAYBE_PRIME, prime)) + { + /* Perform stronger tests. */ + if ( is_prime( prime, rm_rounds, &count ) ) + { + if (!cb_func + || cb_func (cb_arg, GCRY_PRIME_CHECK_AT_GOT_PRIME, prime)) + return 1; /* Probably a prime. */ + } + } + progress('.'); + return 0; +} + + +/* + * Return true if n is probably a prime + */ +static int +is_prime (gcry_mpi_t n, int steps, unsigned int *count) +{ + gcry_mpi_t x = mpi_alloc( mpi_get_nlimbs( n ) ); + gcry_mpi_t y = mpi_alloc( mpi_get_nlimbs( n ) ); + gcry_mpi_t z = mpi_alloc( mpi_get_nlimbs( n ) ); + gcry_mpi_t nminus1 = mpi_alloc( mpi_get_nlimbs( n ) ); + gcry_mpi_t a2 = mpi_alloc_set_ui( 2 ); + gcry_mpi_t q; + unsigned i, j, k; + int rc = 0; + unsigned nbits = mpi_get_nbits( n ); + + if (steps < 5) /* Make sure that we do at least 5 rounds. */ + steps = 5; + + mpi_sub_ui( nminus1, n, 1 ); + + /* Find q and k, so that n = 1 + 2^k * q . */ + q = mpi_copy ( nminus1 ); + k = mpi_trailing_zeros ( q ); + mpi_tdiv_q_2exp (q, q, k); + + for (i=0 ; i < steps; i++ ) + { + ++*count; + if( !i ) + { + mpi_set_ui( x, 2 ); + } + else + { + /* We need to loop to avoid an X with value 0 or 1. */ + do + { + _gcry_mpi_randomize (x, nbits, GCRY_WEAK_RANDOM); + + /* Make sure that the number is smaller than the prime + * and keep the randomness of the high bit. */ + if (mpi_test_bit (x, nbits-2)) + { + mpi_set_highbit (x, nbits-2); /* Clear all higher bits. */ + } + else + { + mpi_set_highbit (x, nbits-2); + mpi_clear_bit (x, nbits-2); + } + } + while (mpi_cmp_ui (x, 1) <= 0); + gcry_assert (mpi_cmp (x, nminus1) < 0); + } + mpi_powm ( y, x, q, n); + if ( mpi_cmp_ui(y, 1) && mpi_cmp( y, nminus1 ) ) + { + for ( j=1; j < k && mpi_cmp( y, nminus1 ); j++ ) + { + mpi_powm(y, y, a2, n); + if( !mpi_cmp_ui( y, 1 ) ) + goto leave; /* Not a prime. */ + } + if (mpi_cmp( y, nminus1 ) ) + goto leave; /* Not a prime. */ + } + progress('+'); + } + rc = 1; /* May be a prime. */ + + leave: + mpi_free( x ); + mpi_free( y ); + mpi_free( z ); + mpi_free( nminus1 ); + mpi_free( q ); + mpi_free( a2 ); + + return rc; +} + + +/* Given ARRAY of size N with M elements set to true produce a + modified array with the next permutation of M elements. Note, that + ARRAY is used in a one-bit-per-byte approach. To detected the last + permutation it is useful to initialize the array with the first M + element set to true and use this test: + m_out_of_n (array, m, n); + for (i = j = 0; i < n && j < m; i++) + if (array[i]) + j++; + if (j == m) + goto ready; + + This code is based on the algorithm 452 from the "Collected + Algorithms From ACM, Volume II" by C. N. Liu and D. T. Tang. +*/ +static void +m_out_of_n ( char *array, int m, int n ) +{ + int i=0, i1=0, j=0, jp=0, j1=0, k1=0, k2=0; + + if( !m || m >= n ) + return; + + /* Need to handle this simple case separately. */ + if( m == 1 ) + { + for (i=0; i < n; i++ ) + { + if ( array[i] ) + { + array[i++] = 0; + if( i >= n ) + i = 0; + array[i] = 1; + return; + } + } + BUG(); + } + + + for (j=1; j < n; j++ ) + { + if ( array[n-1] == array[n-j-1]) + continue; + j1 = j; + break; + } + + if ( (m & 1) ) + { + /* M is odd. */ + if( array[n-1] ) + { + if( j1 & 1 ) + { + k1 = n - j1; + k2 = k1+2; + if( k2 > n ) + k2 = n; + goto leave; + } + goto scan; + } + k2 = n - j1 - 1; + if( k2 == 0 ) + { + k1 = i; + k2 = n - j1; + } + else if( array[k2] && array[k2-1] ) + k1 = n; + else + k1 = k2 + 1; + } + else + { + /* M is even. */ + if( !array[n-1] ) + { + k1 = n - j1; + k2 = k1 + 1; + goto leave; + } + + if( !(j1 & 1) ) + { + k1 = n - j1; + k2 = k1+2; + if( k2 > n ) + k2 = n; + goto leave; + } + scan: + jp = n - j1 - 1; + for (i=1; i <= jp; i++ ) + { + i1 = jp + 2 - i; + if( array[i1-1] ) + { + if( array[i1-2] ) + { + k1 = i1 - 1; + k2 = n - j1; + } + else + { + k1 = i1 - 1; + k2 = n + 1 - j1; + } + goto leave; + } + } + k1 = 1; + k2 = n + 1 - m; + } + leave: + /* Now complement the two selected bits. */ + array[k1-1] = !array[k1-1]; + array[k2-1] = !array[k2-1]; +} + + +/* Generate a new prime number of PRIME_BITS bits and store it in + PRIME. If FACTOR_BITS is non-zero, one of the prime factors of + (prime - 1) / 2 must be FACTOR_BITS bits long. If FACTORS is + non-zero, allocate a new, NULL-terminated array holding the prime + factors and store it in FACTORS. FLAGS might be used to influence + the prime number generation process. */ +gcry_err_code_t +_gcry_prime_generate (gcry_mpi_t *prime, unsigned int prime_bits, + unsigned int factor_bits, gcry_mpi_t **factors, + gcry_prime_check_func_t cb_func, void *cb_arg, + gcry_random_level_t random_level, + unsigned int flags) +{ + gcry_err_code_t rc = 0; + gcry_mpi_t *factors_generated = NULL; + gcry_mpi_t prime_generated = NULL; + unsigned int mode = 0; + + if (!prime) + return GPG_ERR_INV_ARG; + *prime = NULL; + + if (flags & GCRY_PRIME_FLAG_SPECIAL_FACTOR) + mode = 1; + + /* Generate. */ + rc = prime_generate_internal ((mode==1), &prime_generated, prime_bits, + factor_bits, NULL, + factors? &factors_generated : NULL, + random_level, flags, 1, + cb_func, cb_arg); + + if (!rc && cb_func) + { + /* Additional check. */ + if ( !cb_func (cb_arg, GCRY_PRIME_CHECK_AT_FINISH, prime_generated)) + { + /* Failed, deallocate resources. */ + unsigned int i; + + mpi_free (prime_generated); + if (factors) + { + for (i = 0; factors_generated[i]; i++) + mpi_free (factors_generated[i]); + xfree (factors_generated); + } + rc = GPG_ERR_GENERAL; + } + } + + if (!rc) + { + if (factors) + *factors = factors_generated; + *prime = prime_generated; + } + + return rc; +} + +/* Check whether the number X is prime. */ +gcry_err_code_t +_gcry_prime_check (gcry_mpi_t x, unsigned int flags) +{ + (void)flags; + + switch (mpi_cmp_ui (x, 2)) + { + case 0: return 0; /* 2 is a prime */ + case -1: return GPG_ERR_NO_PRIME; /* Only numbers > 1 are primes. */ + } + + /* We use 64 rounds because the prime we are going to test is not + guaranteed to be a random one. */ + if (check_prime (x, mpi_const (MPI_C_TWO), 64, NULL, NULL)) + return 0; + + return GPG_ERR_NO_PRIME; +} + + +/* Check whether the number X is prime according to FIPS 186-4 table C.2. */ +gcry_err_code_t +_gcry_fips186_4_prime_check (gcry_mpi_t x, unsigned int bits) +{ + gcry_err_code_t ec = GPG_ERR_NO_ERROR; + + switch (mpi_cmp_ui (x, 2)) + { + case 0: return ec; /* 2 is a prime */ + case -1: return GPG_ERR_NO_PRIME; /* Only numbers > 1 are primes. */ + } + + /* We use 5 or 4 rounds as specified in table C.2 */ + if (! check_prime (x, mpi_const (MPI_C_TWO), bits > 1024 ? 4 : 5, NULL, NULL)) + ec = GPG_ERR_NO_PRIME; + + return ec; +} + + +/* Find a generator for PRIME where the factorization of (prime-1) is + in the NULL terminated array FACTORS. Return the generator as a + newly allocated MPI in R_G. If START_G is not NULL, use this as s + atart for the search. Returns 0 on success.*/ +gcry_err_code_t +_gcry_prime_group_generator (gcry_mpi_t *r_g, + gcry_mpi_t prime, gcry_mpi_t *factors, + gcry_mpi_t start_g) +{ + gcry_mpi_t tmp, b, pmin1, g; + int first, i, n; + + if (!r_g) + return GPG_ERR_INV_ARG; + *r_g = NULL; + if (!factors || !prime) + return GPG_ERR_INV_ARG; + + for (n=0; factors[n]; n++) + ; + if (n < 2) + return GPG_ERR_INV_ARG; + + tmp = mpi_new (0); + b = mpi_new (0); + pmin1 = mpi_new (0); + g = start_g? mpi_copy (start_g) : mpi_set_ui (NULL, 3); + + /* Extra sanity check - usually disabled. */ +/* mpi_set (tmp, factors[0]); */ +/* for(i = 1; i < n; i++) */ +/* mpi_mul (tmp, tmp, factors[i]); */ +/* mpi_add_ui (tmp, tmp, 1); */ +/* if (mpi_cmp (prime, tmp)) */ +/* return gpg_error (GPG_ERR_INV_ARG); */ + + mpi_sub_ui (pmin1, prime, 1); + first = 1; + do + { + if (first) + first = 0; + else + mpi_add_ui (g, g, 1); + + if (DBG_CIPHER) + log_printmpi ("checking g", g); + else + progress('^'); + + for (i = 0; i < n; i++) + { + mpi_fdiv_q (tmp, pmin1, factors[i]); + mpi_powm (b, g, tmp, prime); + if (! mpi_cmp_ui (b, 1)) + break; + } + if (DBG_CIPHER) + progress('\n'); + } + while (i < n); + + _gcry_mpi_release (tmp); + _gcry_mpi_release (b); + _gcry_mpi_release (pmin1); + *r_g = g; + + return 0; +} + +/* Convenience function to release the factors array. */ +void +_gcry_prime_release_factors (gcry_mpi_t *factors) +{ + if (factors) + { + int i; + + for (i=0; factors[i]; i++) + mpi_free (factors[i]); + xfree (factors); + } +} + + + +/* Helper for _gcry_derive_x931_prime. */ +static gcry_mpi_t +find_x931_prime (const gcry_mpi_t pfirst) +{ + gcry_mpi_t val_2 = mpi_alloc_set_ui (2); + gcry_mpi_t prime; + + prime = mpi_copy (pfirst); + /* If P is even add 1. */ + mpi_set_bit (prime, 0); + + /* We use 64 Rabin-Miller rounds which is better and thus + sufficient. We do not have a Lucas test implementation thus we + can't do it in the X9.31 preferred way of running a few + Rabin-Miller followed by one Lucas test. */ + while ( !check_prime (prime, val_2, 64, NULL, NULL) ) + mpi_add_ui (prime, prime, 2); + + mpi_free (val_2); + + return prime; +} + + +/* Generate a prime using the algorithm from X9.31 appendix B.4. + + This function requires that the provided public exponent E is odd. + XP, XP1 and XP2 are the seed values. All values are mandatory. + + On success the prime is returned. If R_P1 or R_P2 are given the + internal values P1 and P2 are saved at these addresses. On error + NULL is returned. */ +gcry_mpi_t +_gcry_derive_x931_prime (const gcry_mpi_t xp, + const gcry_mpi_t xp1, const gcry_mpi_t xp2, + const gcry_mpi_t e, + gcry_mpi_t *r_p1, gcry_mpi_t *r_p2) +{ + gcry_mpi_t p1, p2, p1p2, yp0; + + if (!xp || !xp1 || !xp2) + return NULL; + if (!e || !mpi_test_bit (e, 0)) + return NULL; /* We support only odd values for E. */ + + p1 = find_x931_prime (xp1); + p2 = find_x931_prime (xp2); + p1p2 = mpi_alloc_like (xp); + mpi_mul (p1p2, p1, p2); + + { + gcry_mpi_t r1, tmp; + + /* r1 = (p2^{-1} mod p1)p2 - (p1^{-1} mod p2) */ + tmp = mpi_alloc_like (p1); + mpi_invm (tmp, p2, p1); + mpi_mul (tmp, tmp, p2); + r1 = tmp; + + tmp = mpi_alloc_like (p2); + mpi_invm (tmp, p1, p2); + mpi_mul (tmp, tmp, p1); + mpi_sub (r1, r1, tmp); + + /* Fixup a negative value. */ + if (mpi_has_sign (r1)) + mpi_add (r1, r1, p1p2); + + /* yp0 = xp + (r1 - xp mod p1*p2) */ + yp0 = tmp; tmp = NULL; + mpi_subm (yp0, r1, xp, p1p2); + mpi_add (yp0, yp0, xp); + mpi_free (r1); + + /* Fixup a negative value. */ + if (mpi_cmp (yp0, xp) < 0 ) + mpi_add (yp0, yp0, p1p2); + } + + /* yp0 is now the first integer greater than xp with p1 being a + large prime factor of yp0-1 and p2 a large prime factor of yp0+1. */ + + /* Note that the first example from X9.31 (D.1.1) which uses + (Xq1 #1A5CF72EE770DE50CB09ACCEA9#) + (Xq2 #134E4CAA16D2350A21D775C404#) + (Xq #CC1092495D867E64065DEE3E7955F2EBC7D47A2D + 7C9953388F97DDDC3E1CA19C35CA659EDC2FC325 + 6D29C2627479C086A699A49C4C9CEE7EF7BD1B34 + 321DE34A#)))) + returns an yp0 of + #CC1092495D867E64065DEE3E7955F2EBC7D47A2D + 7C9953388F97DDDC3E1CA19C35CA659EDC2FC4E3 + BF20CB896EE37E098A906313271422162CB6C642 + 75C1201F# + and not + #CC1092495D867E64065DEE3E7955F2EBC7D47A2D + 7C9953388F97DDDC3E1CA19C35CA659EDC2FC2E6 + C88FE299D52D78BE405A97E01FD71DD7819ECB91 + FA85A076# + as stated in the standard. This seems to be a bug in X9.31. + */ + + { + gcry_mpi_t val_2 = mpi_alloc_set_ui (2); + gcry_mpi_t gcdtmp = mpi_alloc_like (yp0); + int gcdres; + + mpi_sub_ui (p1p2, p1p2, 1); /* Adjust for loop body. */ + mpi_sub_ui (yp0, yp0, 1); /* Ditto. */ + for (;;) + { + gcdres = mpi_gcd (gcdtmp, e, yp0); + mpi_add_ui (yp0, yp0, 1); + if (!gcdres) + progress ('/'); /* gcd (e, yp0-1) != 1 */ + else if (check_prime (yp0, val_2, 64, NULL, NULL)) + break; /* Found. */ + /* We add p1p2-1 because yp0 is incremented after the gcd test. */ + mpi_add (yp0, yp0, p1p2); + } + mpi_free (gcdtmp); + mpi_free (val_2); + } + + mpi_free (p1p2); + + progress('\n'); + if (r_p1) + *r_p1 = p1; + else + mpi_free (p1); + if (r_p2) + *r_p2 = p2; + else + mpi_free (p2); + return yp0; +} + + + +/* Generate the two prime used for DSA using the algorithm specified + in FIPS 186-2. PBITS is the desired length of the prime P and a + QBITS the length of the prime Q. If SEED is not supplied and + SEEDLEN is 0 the function generates an appropriate SEED. On + success the generated primes are stored at R_Q and R_P, the counter + value is stored at R_COUNTER and the seed actually used for + generation is stored at R_SEED and R_SEEDVALUE. */ +gpg_err_code_t +_gcry_generate_fips186_2_prime (unsigned int pbits, unsigned int qbits, + const void *seed, size_t seedlen, + gcry_mpi_t *r_q, gcry_mpi_t *r_p, + int *r_counter, + void **r_seed, size_t *r_seedlen) +{ + gpg_err_code_t ec; + unsigned char seed_help_buffer[160/8]; /* Used to hold a generated SEED. */ + unsigned char *seed_plus; /* Malloced buffer to hold SEED+x. */ + unsigned char digest[160/8]; /* Helper buffer for SHA-1 digest. */ + gcry_mpi_t val_2 = NULL; /* Helper for the prime test. */ + gcry_mpi_t tmpval = NULL; /* Helper variable. */ + int i; + + unsigned char value_u[160/8]; + int value_n, value_b, value_k; + int counter; + gcry_mpi_t value_w = NULL; + gcry_mpi_t value_x = NULL; + gcry_mpi_t prime_q = NULL; + gcry_mpi_t prime_p = NULL; + + /* FIPS 186-2 allows only for 1024/160 bit. */ + if (pbits != 1024 || qbits != 160) + return GPG_ERR_INV_KEYLEN; + + if (!seed && !seedlen) + ; /* No seed value given: We are asked to generate it. */ + else if (!seed || seedlen < qbits/8) + return GPG_ERR_INV_ARG; + + /* Allocate a buffer to later compute SEED+some_increment. */ + seed_plus = xtrymalloc (seedlen < 20? 20:seedlen); + if (!seed_plus) + { + ec = gpg_err_code_from_syserror (); + goto leave; + } + + val_2 = mpi_alloc_set_ui (2); + value_n = (pbits - 1) / qbits; + value_b = (pbits - 1) - value_n * qbits; + value_w = mpi_new (pbits); + value_x = mpi_new (pbits); + + restart: + /* Generate Q. */ + for (;;) + { + /* Step 1: Generate a (new) seed unless one has been supplied. */ + if (!seed) + { + seedlen = sizeof seed_help_buffer; + _gcry_create_nonce (seed_help_buffer, seedlen); + seed = seed_help_buffer; + } + + /* Step 2: U = sha1(seed) ^ sha1((seed+1) mod 2^{qbits}) */ + memcpy (seed_plus, seed, seedlen); + for (i=seedlen-1; i >= 0; i--) + { + seed_plus[i]++; + if (seed_plus[i]) + break; + } + _gcry_md_hash_buffer (GCRY_MD_SHA1, value_u, seed, seedlen); + _gcry_md_hash_buffer (GCRY_MD_SHA1, digest, seed_plus, seedlen); + for (i=0; i < sizeof value_u; i++) + value_u[i] ^= digest[i]; + + /* Step 3: Form q from U */ + _gcry_mpi_release (prime_q); prime_q = NULL; + ec = _gcry_mpi_scan (&prime_q, GCRYMPI_FMT_USG, + value_u, sizeof value_u, NULL); + if (ec) + goto leave; + mpi_set_highbit (prime_q, qbits-1 ); + mpi_set_bit (prime_q, 0); + + /* Step 4: Test whether Q is prime using 64 round of Rabin-Miller. */ + if (check_prime (prime_q, val_2, 64, NULL, NULL)) + break; /* Yes, Q is prime. */ + + /* Step 5. */ + seed = NULL; /* Force a new seed at Step 1. */ + } + + /* Step 6. Note that we do no use an explicit offset but increment + SEED_PLUS accordingly. SEED_PLUS is currently SEED+1. */ + counter = 0; + + /* Generate P. */ + prime_p = mpi_new (pbits); + for (;;) + { + /* Step 7: For k = 0,...n let + V_k = sha1(seed+offset+k) mod 2^{qbits} + Step 8: W = V_0 + V_1*2^160 + + ... + + V_{n-1}*2^{(n-1)*160} + + (V_{n} mod 2^b)*2^{n*160} + */ + mpi_set_ui (value_w, 0); + for (value_k=0; value_k <= value_n; value_k++) + { + /* There is no need to have an explicit offset variable: In + the first round we shall have an offset of 2, this is + achieved by using SEED_PLUS which is already at SEED+1, + thus we just need to increment it once again. The + requirement for the next round is to update offset by N, + which we implictly did at the end of this loop, and then + to add one; this one is the same as in the first round. */ + for (i=seedlen-1; i >= 0; i--) + { + seed_plus[i]++; + if (seed_plus[i]) + break; + } + _gcry_md_hash_buffer (GCRY_MD_SHA1, digest, seed_plus, seedlen); + + _gcry_mpi_release (tmpval); tmpval = NULL; + ec = _gcry_mpi_scan (&tmpval, GCRYMPI_FMT_USG, + digest, sizeof digest, NULL); + if (ec) + goto leave; + if (value_k == value_n) + mpi_clear_highbit (tmpval, value_b); /* (V_n mod 2^b) */ + mpi_lshift (tmpval, tmpval, value_k*qbits); + mpi_add (value_w, value_w, tmpval); + } + + /* Step 8 continued: X = W + 2^{L-1} */ + mpi_set_ui (value_x, 0); + mpi_set_highbit (value_x, pbits-1); + mpi_add (value_x, value_x, value_w); + + /* Step 9: c = X mod 2q, p = X - (c - 1) */ + mpi_mul_2exp (tmpval, prime_q, 1); + mpi_mod (tmpval, value_x, tmpval); + mpi_sub_ui (tmpval, tmpval, 1); + mpi_sub (prime_p, value_x, tmpval); + + /* Step 10: If p < 2^{L-1} skip the primality test. */ + /* Step 11 and 12: Primality test. */ + if (mpi_get_nbits (prime_p) >= pbits-1 + && check_prime (prime_p, val_2, 64, NULL, NULL) ) + break; /* Yes, P is prime, continue with Step 15. */ + + /* Step 13: counter = counter + 1, offset = offset + n + 1. */ + counter++; + + /* Step 14: If counter >= 2^12 goto Step 1. */ + if (counter >= 4096) + goto restart; + } + + /* Step 15: Save p, q, counter and seed. */ +/* log_debug ("fips186-2 pbits p=%u q=%u counter=%d\n", */ +/* mpi_get_nbits (prime_p), mpi_get_nbits (prime_q), counter); */ +/* log_printhex("fips186-2 seed:", seed, seedlen); */ +/* log_mpidump ("fips186-2 prime p", prime_p); */ +/* log_mpidump ("fips186-2 prime q", prime_q); */ + if (r_q) + { + *r_q = prime_q; + prime_q = NULL; + } + if (r_p) + { + *r_p = prime_p; + prime_p = NULL; + } + if (r_counter) + *r_counter = counter; + if (r_seed && r_seedlen) + { + memcpy (seed_plus, seed, seedlen); + *r_seed = seed_plus; + seed_plus = NULL; + *r_seedlen = seedlen; + } + + + leave: + _gcry_mpi_release (tmpval); + _gcry_mpi_release (value_x); + _gcry_mpi_release (value_w); + _gcry_mpi_release (prime_p); + _gcry_mpi_release (prime_q); + xfree (seed_plus); + _gcry_mpi_release (val_2); + return ec; +} + + + +/* WARNING: The code below has not yet been tested! + * + * Generate the two prime used for DSA using the algorithm specified + * in FIPS 186-3, A.1.1.2. PBITS is the desired length of the prime P + * and a QBITS the length of the prime Q. If SEED is not supplied and + * SEEDLEN is 0 the function generates an appropriate SEED. On + * success the generated primes are stored at R_Q and R_P, the counter + * value is stored at R_COUNTER and the seed actually used for + * generation is stored at R_SEED and R_SEEDVALUE. The hash algorithm + * used is stored at R_HASHALGO. + * + * Note that this function is very similar to the fips186_2 code. Due + * to the minor differences, other buffer sizes and for documentarion, + * we use a separate function. + */ +gpg_err_code_t +_gcry_generate_fips186_3_prime (unsigned int pbits, unsigned int qbits, + const void *seed, size_t seedlen, + gcry_mpi_t *r_q, gcry_mpi_t *r_p, + int *r_counter, + void **r_seed, size_t *r_seedlen, + int *r_hashalgo) +{ + gpg_err_code_t ec; + unsigned char seed_help_buffer[256/8]; /* Used to hold a generated SEED. */ + unsigned char *seed_plus; /* Malloced buffer to hold SEED+x. */ + unsigned char digest[256/8]; /* Helper buffer for SHA-2 digest. */ + gcry_mpi_t val_2 = NULL; /* Helper for the prime test. */ + gcry_mpi_t tmpval = NULL; /* Helper variable. */ + int hashalgo; /* The id of the Approved Hash Function. */ + int i; + + unsigned char value_u[256/8]; + int value_n, value_b, value_j; + int counter; + gcry_mpi_t value_w = NULL; + gcry_mpi_t value_x = NULL; + gcry_mpi_t prime_q = NULL; + gcry_mpi_t prime_p = NULL; + + gcry_assert (sizeof seed_help_buffer == sizeof digest + && sizeof seed_help_buffer == sizeof value_u); + + /* Step 1: Check the requested prime lengths. */ + /* Note that due to the size of our buffers QBITS is limited to 256. */ + if (pbits == 2048 && qbits == 224) + hashalgo = GCRY_MD_SHA224; + else if (pbits == 2048 && qbits == 256) + hashalgo = GCRY_MD_SHA256; + else if (pbits == 3072 && qbits == 256) + hashalgo = GCRY_MD_SHA256; + else + return GPG_ERR_INV_KEYLEN; + + /* Also check that the hash algorithm is available. */ + ec = _gcry_md_test_algo (hashalgo); + if (ec) + return ec; + gcry_assert (qbits/8 <= sizeof digest); + gcry_assert (_gcry_md_get_algo_dlen (hashalgo) == qbits/8); + + + /* Step 2: Check seedlen. */ + if (!seed && !seedlen) + ; /* No seed value given: We are asked to generate it. */ + else if (!seed || seedlen < qbits/8) + return GPG_ERR_INV_ARG; + + /* Allocate a buffer to later compute SEED+some_increment and a few + helper variables. */ + seed_plus = xtrymalloc (seedlen < sizeof seed_help_buffer? + sizeof seed_help_buffer : seedlen); + if (!seed_plus) + { + ec = gpg_err_code_from_syserror (); + goto leave; + } + val_2 = mpi_alloc_set_ui (2); + value_w = mpi_new (pbits); + value_x = mpi_new (pbits); + + /* Step 3: n = \lceil L / outlen \rceil - 1 */ + value_n = (pbits + qbits - 1) / qbits - 1; + /* Step 4: b = L - 1 - (n * outlen) */ + value_b = pbits - 1 - (value_n * qbits); + + restart: + /* Generate Q. */ + for (;;) + { + /* Step 5: Generate a (new) seed unless one has been supplied. */ + if (!seed) + { + seedlen = qbits/8; + gcry_assert (seedlen <= sizeof seed_help_buffer); + _gcry_create_nonce (seed_help_buffer, seedlen); + seed = seed_help_buffer; + } + + /* Step 6: U = hash(seed) */ + _gcry_md_hash_buffer (hashalgo, value_u, seed, seedlen); + + /* Step 7: q = 2^{N-1} + U + 1 - (U mod 2) */ + if ( !(value_u[qbits/8-1] & 0x01) ) + { + for (i=qbits/8-1; i >= 0; i--) + { + value_u[i]++; + if (value_u[i]) + break; + } + } + _gcry_mpi_release (prime_q); prime_q = NULL; + ec = _gcry_mpi_scan (&prime_q, GCRYMPI_FMT_USG, + value_u, qbits/8, NULL); + if (ec) + goto leave; + mpi_set_highbit (prime_q, qbits-1 ); + + /* Step 8: Test whether Q is prime using 64 round of Rabin-Miller. + According to table C.1 this is sufficient for all + supported prime sizes (i.e. up 3072/256). */ + if (check_prime (prime_q, val_2, 64, NULL, NULL)) + break; /* Yes, Q is prime. */ + + /* Step 8. */ + seed = NULL; /* Force a new seed at Step 5. */ + } + + /* Step 11. Note that we do no use an explicit offset but increment + SEED_PLUS accordingly. */ + memcpy (seed_plus, seed, seedlen); + counter = 0; + + /* Generate P. */ + prime_p = mpi_new (pbits); + for (;;) + { + /* Step 11.1: For j = 0,...n let + V_j = hash(seed+offset+j) + Step 11.2: W = V_0 + V_1*2^outlen + + ... + + V_{n-1}*2^{(n-1)*outlen} + + (V_{n} mod 2^b)*2^{n*outlen} + */ + mpi_set_ui (value_w, 0); + for (value_j=0; value_j <= value_n; value_j++) + { + /* There is no need to have an explicit offset variable: In + the first round we shall have an offset of 1 and a j of + 0. This is achieved by incrementing SEED_PLUS here. For + the next round offset is implicitly updated by using + SEED_PLUS again. */ + for (i=seedlen-1; i >= 0; i--) + { + seed_plus[i]++; + if (seed_plus[i]) + break; + } + _gcry_md_hash_buffer (hashalgo, digest, seed_plus, seedlen); + + _gcry_mpi_release (tmpval); tmpval = NULL; + ec = _gcry_mpi_scan (&tmpval, GCRYMPI_FMT_USG, + digest, qbits/8, NULL); + if (ec) + goto leave; + if (value_j == value_n) + mpi_clear_highbit (tmpval, value_b); /* (V_n mod 2^b) */ + mpi_lshift (tmpval, tmpval, value_j*qbits); + mpi_add (value_w, value_w, tmpval); + } + + /* Step 11.3: X = W + 2^{L-1} */ + mpi_set_ui (value_x, 0); + mpi_set_highbit (value_x, pbits-1); + mpi_add (value_x, value_x, value_w); + + /* Step 11.4: c = X mod 2q */ + mpi_mul_2exp (tmpval, prime_q, 1); + mpi_mod (tmpval, value_x, tmpval); + + /* Step 11.5: p = X - (c - 1) */ + mpi_sub_ui (tmpval, tmpval, 1); + mpi_sub (prime_p, value_x, tmpval); + + /* Step 11.6: If p < 2^{L-1} skip the primality test. */ + /* Step 11.7 and 11.8: Primality test. */ + if (mpi_get_nbits (prime_p) >= pbits-1 + && check_prime (prime_p, val_2, 64, NULL, NULL) ) + break; /* Yes, P is prime, continue with Step 15. */ + + /* Step 11.9: counter = counter + 1, offset = offset + n + 1. + If counter >= 4L goto Step 5. */ + counter++; + if (counter >= 4*pbits) + goto restart; + } + + /* Step 12: Save p, q, counter and seed. */ + /* log_debug ("fips186-3 pbits p=%u q=%u counter=%d\n", */ + /* mpi_get_nbits (prime_p), mpi_get_nbits (prime_q), counter); */ + /* log_printhex ("fips186-3 seed", seed, seedlen); */ + /* log_printmpi ("fips186-3 p", prime_p); */ + /* log_printmpi ("fips186-3 q", prime_q); */ + + if (r_q) + { + *r_q = prime_q; + prime_q = NULL; + } + if (r_p) + { + *r_p = prime_p; + prime_p = NULL; + } + if (r_counter) + *r_counter = counter; + if (r_seed && r_seedlen) + { + memcpy (seed_plus, seed, seedlen); + *r_seed = seed_plus; + seed_plus = NULL; + *r_seedlen = seedlen; + } + if (r_hashalgo) + *r_hashalgo = hashalgo; + + leave: + _gcry_mpi_release (tmpval); + _gcry_mpi_release (value_x); + _gcry_mpi_release (value_w); + _gcry_mpi_release (prime_p); + _gcry_mpi_release (prime_q); + xfree (seed_plus); + _gcry_mpi_release (val_2); + return ec; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/pubkey-internal.h b/libotr/libgcrypt-1.8.7/cipher/pubkey-internal.h new file mode 100644 index 0000000..d31e26f --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/pubkey-internal.h @@ -0,0 +1,105 @@ +/* pubkey-internal.h - Internal defs for pubkey.c + * Copyright (C) 2013 g10 code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef GCRY_PUBKEY_INTERNAL_H +#define GCRY_PUBKEY_INTERNAL_H + +/*-- pubkey-util.c --*/ +gpg_err_code_t _gcry_pk_util_parse_flaglist (gcry_sexp_t list, + int *r_flags, + enum pk_encoding *r_encoding); +gpg_err_code_t _gcry_pk_util_get_nbits (gcry_sexp_t list, + unsigned int *r_nbits); +gpg_err_code_t _gcry_pk_util_get_rsa_use_e (gcry_sexp_t list, + unsigned long *r_e); +gpg_err_code_t _gcry_pk_util_preparse_sigval (gcry_sexp_t s_sig, + const char **algo_names, + gcry_sexp_t *r_parms, + int *r_eccflags); +gpg_err_code_t _gcry_pk_util_preparse_encval (gcry_sexp_t sexp, + const char **algo_names, + gcry_sexp_t *r_parms, + struct pk_encoding_ctx *ctx); +void _gcry_pk_util_init_encoding_ctx (struct pk_encoding_ctx *ctx, + enum pk_operation op, + unsigned int nbits); +void _gcry_pk_util_free_encoding_ctx (struct pk_encoding_ctx *ctx); +gcry_err_code_t _gcry_pk_util_data_to_mpi (gcry_sexp_t input, + gcry_mpi_t *ret_mpi, + struct pk_encoding_ctx *ctx); + + + +/*-- rsa-common.c --*/ +gpg_err_code_t +_gcry_rsa_pkcs1_encode_for_enc (gcry_mpi_t *r_result, unsigned int nbits, + const unsigned char *value, size_t valuelen, + const unsigned char *random_override, + size_t random_override_len); +gpg_err_code_t +_gcry_rsa_pkcs1_decode_for_enc (unsigned char **r_result, size_t *r_resultlen, + unsigned int nbits, gcry_mpi_t value); +gpg_err_code_t +_gcry_rsa_pkcs1_encode_raw_for_sig (gcry_mpi_t *r_result, unsigned int nbits, + const unsigned char *value, size_t valuelen); + +gpg_err_code_t +_gcry_rsa_pkcs1_encode_for_sig (gcry_mpi_t *r_result, unsigned int nbits, + const unsigned char *value, size_t valuelen, + int algo); +gpg_err_code_t +_gcry_rsa_oaep_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo, + const unsigned char *value, size_t valuelen, + const unsigned char *label, size_t labellen, + const void *random_override, size_t random_override_len); +gpg_err_code_t +_gcry_rsa_oaep_decode (unsigned char **r_result, size_t *r_resultlen, + unsigned int nbits, int algo, + gcry_mpi_t value, + const unsigned char *label, size_t labellen); +gpg_err_code_t +_gcry_rsa_pss_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo, + const unsigned char *value, size_t valuelen, int saltlen, + const void *random_override, size_t random_override_len); +gpg_err_code_t +_gcry_rsa_pss_verify (gcry_mpi_t value, gcry_mpi_t encoded, + unsigned int nbits, int algo, size_t saltlen); + + + +/*-- dsa-common.c --*/ +void _gcry_dsa_modify_k (gcry_mpi_t k, gcry_mpi_t q, int qbits); +gcry_mpi_t _gcry_dsa_gen_k (gcry_mpi_t q, int security_level); +gpg_err_code_t _gcry_dsa_gen_rfc6979_k (gcry_mpi_t *r_k, + gcry_mpi_t dsa_q, gcry_mpi_t dsa_x, + const unsigned char *h1, + unsigned int h1len, + int halgo, + unsigned int extraloops); + +gpg_err_code_t _gcry_dsa_normalize_hash (gcry_mpi_t input, + gcry_mpi_t *out, + unsigned int qbits); + +/*-- ecc.c --*/ +gpg_err_code_t _gcry_pk_ecc_get_sexp (gcry_sexp_t *r_sexp, int mode, + mpi_ec_t ec); + + +#endif /*GCRY_PUBKEY_INTERNAL_H*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/pubkey-util.c b/libotr/libgcrypt-1.8.7/cipher/pubkey-util.c new file mode 100644 index 0000000..c40ef97 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/pubkey-util.c @@ -0,0 +1,1121 @@ +/* pubkey-util.c - Supporting functions for all pubkey modules. + * Copyright (C) 1998, 1999, 2000, 2002, 2003, 2005, + * 2007, 2008, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013, 2015 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "pubkey-internal.h" + + +/* Callback for the pubkey algorithm code to verify PSS signatures. + OPAQUE is the data provided by the actual caller. The meaning of + TMP depends on the actual algorithm (but there is only RSA); now + for RSA it is the output of running the public key function on the + input. */ +static int +pss_verify_cmp (void *opaque, gcry_mpi_t tmp) +{ + struct pk_encoding_ctx *ctx = opaque; + gcry_mpi_t hash = ctx->verify_arg; + + return _gcry_rsa_pss_verify (hash, tmp, ctx->nbits - 1, + ctx->hash_algo, ctx->saltlen); +} + + +/* Parser for a flag list. On return the encoding is stored at + R_ENCODING and the flags are stored at R_FLAGS. If any of them is + not needed, NULL may be passed. The function returns 0 on success + or an error code. */ +gpg_err_code_t +_gcry_pk_util_parse_flaglist (gcry_sexp_t list, + int *r_flags, enum pk_encoding *r_encoding) +{ + gpg_err_code_t rc = 0; + const char *s; + size_t n; + int i; + int encoding = PUBKEY_ENC_UNKNOWN; + int flags = 0; + int igninvflag = 0; + + for (i = list ? sexp_length (list)-1 : 0; i > 0; i--) + { + s = sexp_nth_data (list, i, &n); + if (!s) + continue; /* Not a data element. */ + + switch (n) + { + case 3: + if (!memcmp (s, "pss", 3) && encoding == PUBKEY_ENC_UNKNOWN) + { + encoding = PUBKEY_ENC_PSS; + flags |= PUBKEY_FLAG_FIXEDLEN; + } + else if (!memcmp (s, "raw", 3) && encoding == PUBKEY_ENC_UNKNOWN) + { + encoding = PUBKEY_ENC_RAW; + flags |= PUBKEY_FLAG_RAW_FLAG; /* Explicitly given. */ + } + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 4: + if (!memcmp (s, "comp", 4)) + flags |= PUBKEY_FLAG_COMP; + else if (!memcmp (s, "oaep", 4) && encoding == PUBKEY_ENC_UNKNOWN) + { + encoding = PUBKEY_ENC_OAEP; + flags |= PUBKEY_FLAG_FIXEDLEN; + } + else if (!memcmp (s, "gost", 4)) + { + encoding = PUBKEY_ENC_RAW; + flags |= PUBKEY_FLAG_GOST; + } + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 5: + if (!memcmp (s, "eddsa", 5)) + { + encoding = PUBKEY_ENC_RAW; + flags |= PUBKEY_FLAG_EDDSA; + flags |= PUBKEY_FLAG_DJB_TWEAK; + } + else if (!memcmp (s, "pkcs1", 5) && encoding == PUBKEY_ENC_UNKNOWN) + { + encoding = PUBKEY_ENC_PKCS1; + flags |= PUBKEY_FLAG_FIXEDLEN; + } + else if (!memcmp (s, "param", 5)) + flags |= PUBKEY_FLAG_PARAM; + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 6: + if (!memcmp (s, "nocomp", 6)) + flags |= PUBKEY_FLAG_NOCOMP; + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 7: + if (!memcmp (s, "rfc6979", 7)) + flags |= PUBKEY_FLAG_RFC6979; + else if (!memcmp (s, "noparam", 7)) + ; /* Ignore - it is the default. */ + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 8: + if (!memcmp (s, "use-x931", 8)) + flags |= PUBKEY_FLAG_USE_X931; + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 9: + if (!memcmp (s, "pkcs1-raw", 9) && encoding == PUBKEY_ENC_UNKNOWN) + { + encoding = PUBKEY_ENC_PKCS1_RAW; + flags |= PUBKEY_FLAG_FIXEDLEN; + } + else if (!memcmp (s, "djb-tweak", 9)) + { + encoding = PUBKEY_ENC_RAW; + flags |= PUBKEY_FLAG_DJB_TWEAK; + } + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 10: + if (!memcmp (s, "igninvflag", 10)) + igninvflag = 1; + else if (!memcmp (s, "no-keytest", 10)) + flags |= PUBKEY_FLAG_NO_KEYTEST; + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 11: + if (!memcmp (s, "no-blinding", 11)) + flags |= PUBKEY_FLAG_NO_BLINDING; + else if (!memcmp (s, "use-fips186", 11)) + flags |= PUBKEY_FLAG_USE_FIPS186; + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 13: + if (!memcmp (s, "use-fips186-2", 13)) + flags |= PUBKEY_FLAG_USE_FIPS186_2; + else if (!memcmp (s, "transient-key", 13)) + flags |= PUBKEY_FLAG_TRANSIENT_KEY; + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + default: + if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + } + } + + if (r_flags) + *r_flags = flags; + if (r_encoding) + *r_encoding = encoding; + + return rc; +} + + +static int +get_hash_algo (const char *s, size_t n) +{ + static const struct { const char *name; int algo; } hashnames[] = { + { "sha1", GCRY_MD_SHA1 }, + { "md5", GCRY_MD_MD5 }, + { "sha256", GCRY_MD_SHA256 }, + { "ripemd160", GCRY_MD_RMD160 }, + { "rmd160", GCRY_MD_RMD160 }, + { "sha384", GCRY_MD_SHA384 }, + { "sha512", GCRY_MD_SHA512 }, + { "sha224", GCRY_MD_SHA224 }, + { "md2", GCRY_MD_MD2 }, + { "md4", GCRY_MD_MD4 }, + { "tiger", GCRY_MD_TIGER }, + { "haval", GCRY_MD_HAVAL }, + { "sha3-224", GCRY_MD_SHA3_224 }, + { "sha3-256", GCRY_MD_SHA3_256 }, + { "sha3-384", GCRY_MD_SHA3_384 }, + { "sha3-512", GCRY_MD_SHA3_512 }, + { NULL, 0 } + }; + int algo; + int i; + + for (i=0; hashnames[i].name; i++) + { + if ( strlen (hashnames[i].name) == n + && !memcmp (hashnames[i].name, s, n)) + break; + } + if (hashnames[i].name) + algo = hashnames[i].algo; + else + { + /* In case of not listed or dynamically allocated hash + algorithm we fall back to this somewhat slower + method. Further, it also allows to use OIDs as + algorithm names. */ + char *tmpname; + + tmpname = xtrymalloc (n+1); + if (!tmpname) + algo = 0; /* Out of core - silently give up. */ + else + { + memcpy (tmpname, s, n); + tmpname[n] = 0; + algo = _gcry_md_map_name (tmpname); + xfree (tmpname); + } + } + return algo; +} + + +/* Get the "nbits" parameter from an s-expression of the format: + * + * (algo + * (parameter_name_1 ....) + * .... + * (parameter_name_n ....)) + * + * Example: + * + * (rsa + * (nbits 4:2048)) + * + * On success the value for nbits is stored at R_NBITS. If no nbits + * parameter is found, the function returns success and stores 0 at + * R_NBITS. For parsing errors the function returns an error code and + * stores 0 at R_NBITS. + */ +gpg_err_code_t +_gcry_pk_util_get_nbits (gcry_sexp_t list, unsigned int *r_nbits) +{ + char buf[50]; + const char *s; + size_t n; + + *r_nbits = 0; + + list = sexp_find_token (list, "nbits", 0); + if (!list) + return 0; /* No NBITS found. */ + + s = sexp_nth_data (list, 1, &n); + if (!s || n >= DIM (buf) - 1 ) + { + /* NBITS given without a cdr. */ + sexp_release (list); + return GPG_ERR_INV_OBJ; + } + memcpy (buf, s, n); + buf[n] = 0; + *r_nbits = (unsigned int)strtoul (buf, NULL, 0); + sexp_release (list); + return 0; +} + + +/* Get the optional "rsa-use-e" parameter from an s-expression of the + * format: + * + * (algo + * (parameter_name_1 ....) + * .... + * (parameter_name_n ....)) + * + * Example: + * + * (rsa + * (nbits 4:2048) + * (rsa-use-e 2:41)) + * + * On success the value for nbits is stored at R_E. If no rsa-use-e + * parameter is found, the function returns success and stores 65537 at + * R_E. For parsing errors the function returns an error code and + * stores 0 at R_E. + */ +gpg_err_code_t +_gcry_pk_util_get_rsa_use_e (gcry_sexp_t list, unsigned long *r_e) +{ + char buf[50]; + const char *s; + size_t n; + + *r_e = 0; + + list = sexp_find_token (list, "rsa-use-e", 0); + if (!list) + { + *r_e = 65537; /* Not given, use the value generated by old versions. */ + return 0; + } + + s = sexp_nth_data (list, 1, &n); + if (!s || n >= DIM (buf) - 1 ) + { + /* No value or value too large. */ + sexp_release (list); + return GPG_ERR_INV_OBJ; + } + memcpy (buf, s, n); + buf[n] = 0; + *r_e = strtoul (buf, NULL, 0); + sexp_release (list); + return 0; +} + + +/* Parse a "sig-val" s-expression and store the inner parameter list at + R_PARMS. ALGO_NAMES is used to verify that the algorithm in + "sig-val" is valid. Returns 0 on success and stores a new list at + R_PARMS which must be freed by the caller. On error R_PARMS is set + to NULL and an error code returned. If R_ECCFLAGS is not NULL flag + values are set into it; as of now they are only used with ecc + algorithms. */ +gpg_err_code_t +_gcry_pk_util_preparse_sigval (gcry_sexp_t s_sig, const char **algo_names, + gcry_sexp_t *r_parms, int *r_eccflags) +{ + gpg_err_code_t rc; + gcry_sexp_t l1 = NULL; + gcry_sexp_t l2 = NULL; + char *name = NULL; + int i; + + *r_parms = NULL; + if (r_eccflags) + *r_eccflags = 0; + + /* Extract the signature value. */ + l1 = sexp_find_token (s_sig, "sig-val", 0); + if (!l1) + { + rc = GPG_ERR_INV_OBJ; /* Does not contain a signature value object. */ + goto leave; + } + + l2 = sexp_nth (l1, 1); + if (!l2) + { + rc = GPG_ERR_NO_OBJ; /* No cadr for the sig object. */ + goto leave; + } + name = sexp_nth_string (l2, 0); + if (!name) + { + rc = GPG_ERR_INV_OBJ; /* Invalid structure of object. */ + goto leave; + } + else if (!strcmp (name, "flags")) + { + /* Skip a "flags" parameter and look again for the algorithm + name. This is not used but here just for the sake of + consistent S-expressions we need to handle it. */ + sexp_release (l2); + l2 = sexp_nth (l1, 2); + if (!l2) + { + rc = GPG_ERR_INV_OBJ; + goto leave; + } + xfree (name); + name = sexp_nth_string (l2, 0); + if (!name) + { + rc = GPG_ERR_INV_OBJ; /* Invalid structure of object. */ + goto leave; + } + } + + for (i=0; algo_names[i]; i++) + if (!stricmp (name, algo_names[i])) + break; + if (!algo_names[i]) + { + rc = GPG_ERR_CONFLICT; /* "sig-val" uses an unexpected algo. */ + goto leave; + } + if (r_eccflags) + { + if (!strcmp (name, "eddsa")) + *r_eccflags = PUBKEY_FLAG_EDDSA; + if (!strcmp (name, "gost")) + *r_eccflags = PUBKEY_FLAG_GOST; + } + + *r_parms = l2; + l2 = NULL; + rc = 0; + + leave: + xfree (name); + sexp_release (l2); + sexp_release (l1); + return rc; +} + + +/* Parse a "enc-val" s-expression and store the inner parameter list + at R_PARMS. ALGO_NAMES is used to verify that the algorithm in + "enc-val" is valid. Returns 0 on success and stores a new list at + R_PARMS which must be freed by the caller. On error R_PARMS is set + to NULL and an error code returned. If R_ECCFLAGS is not NULL flag + values are set into it; as of now they are only used with ecc + algorithms. + + (enc-val + [(flags [raw, pkcs1, oaep, no-blinding])] + [(hash-algo <algo>)] + [(label <label>)] + (<algo> + (<param_name1> <mpi>) + ... + (<param_namen> <mpi>))) + + HASH-ALGO and LABEL are specific to OAEP. CTX will be updated with + encoding information. */ +gpg_err_code_t +_gcry_pk_util_preparse_encval (gcry_sexp_t sexp, const char **algo_names, + gcry_sexp_t *r_parms, + struct pk_encoding_ctx *ctx) +{ + gcry_err_code_t rc = 0; + gcry_sexp_t l1 = NULL; + gcry_sexp_t l2 = NULL; + char *name = NULL; + size_t n; + int parsed_flags = 0; + int i; + + *r_parms = NULL; + + /* Check that the first element is valid. */ + l1 = sexp_find_token (sexp, "enc-val" , 0); + if (!l1) + { + rc = GPG_ERR_INV_OBJ; /* Does not contain an encrypted value object. */ + goto leave; + } + + l2 = sexp_nth (l1, 1); + if (!l2) + { + rc = GPG_ERR_NO_OBJ; /* No cadr for the data object. */ + goto leave; + } + + /* Extract identifier of sublist. */ + name = sexp_nth_string (l2, 0); + if (!name) + { + rc = GPG_ERR_INV_OBJ; /* Invalid structure of object. */ + goto leave; + } + + if (!strcmp (name, "flags")) + { + const char *s; + + /* There is a flags element - process it. */ + rc = _gcry_pk_util_parse_flaglist (l2, &parsed_flags, &ctx->encoding); + if (rc) + goto leave; + if (ctx->encoding == PUBKEY_ENC_PSS) + { + rc = GPG_ERR_CONFLICT; + goto leave; + } + + /* Get the OAEP parameters HASH-ALGO and LABEL, if any. */ + if (ctx->encoding == PUBKEY_ENC_OAEP) + { + /* Get HASH-ALGO. */ + sexp_release (l2); + l2 = sexp_find_token (l1, "hash-algo", 0); + if (l2) + { + s = sexp_nth_data (l2, 1, &n); + if (!s) + rc = GPG_ERR_NO_OBJ; + else + { + ctx->hash_algo = get_hash_algo (s, n); + if (!ctx->hash_algo) + rc = GPG_ERR_DIGEST_ALGO; + } + if (rc) + goto leave; + } + + /* Get LABEL. */ + sexp_release (l2); + l2 = sexp_find_token (l1, "label", 0); + if (l2) + { + s = sexp_nth_data (l2, 1, &n); + if (!s) + rc = GPG_ERR_NO_OBJ; + else if (n > 0) + { + ctx->label = xtrymalloc (n); + if (!ctx->label) + rc = gpg_err_code_from_syserror (); + else + { + memcpy (ctx->label, s, n); + ctx->labellen = n; + } + } + if (rc) + goto leave; + } + } + + /* Get the next which has the actual data - skip HASH-ALGO and LABEL. */ + for (i = 2; (sexp_release (l2), l2 = sexp_nth (l1, i)); i++) + { + s = sexp_nth_data (l2, 0, &n); + if (!(n == 9 && !memcmp (s, "hash-algo", 9)) + && !(n == 5 && !memcmp (s, "label", 5)) + && !(n == 15 && !memcmp (s, "random-override", 15))) + break; + } + if (!l2) + { + rc = GPG_ERR_NO_OBJ; /* No cadr for the data object. */ + goto leave; + } + + /* Extract sublist identifier. */ + xfree (name); + name = sexp_nth_string (l2, 0); + if (!name) + { + rc = GPG_ERR_INV_OBJ; /* Invalid structure of object. */ + goto leave; + } + } + else /* No flags - flag as legacy structure. */ + parsed_flags |= PUBKEY_FLAG_LEGACYRESULT; + + for (i=0; algo_names[i]; i++) + if (!stricmp (name, algo_names[i])) + break; + if (!algo_names[i]) + { + rc = GPG_ERR_CONFLICT; /* "enc-val" uses an unexpected algo. */ + goto leave; + } + + *r_parms = l2; + l2 = NULL; + ctx->flags |= parsed_flags; + rc = 0; + + leave: + xfree (name); + sexp_release (l2); + sexp_release (l1); + return rc; +} + + +/* Initialize an encoding context. */ +void +_gcry_pk_util_init_encoding_ctx (struct pk_encoding_ctx *ctx, + enum pk_operation op, + unsigned int nbits) +{ + ctx->op = op; + ctx->nbits = nbits; + ctx->encoding = PUBKEY_ENC_UNKNOWN; + ctx->flags = 0; + if (fips_mode ()) + { + ctx->hash_algo = GCRY_MD_SHA256; + } + else + { + ctx->hash_algo = GCRY_MD_SHA1; + } + ctx->label = NULL; + ctx->labellen = 0; + ctx->saltlen = 20; + ctx->verify_cmp = NULL; + ctx->verify_arg = NULL; +} + +/* Free a context initialzied by _gcry_pk_util_init_encoding_ctx. */ +void +_gcry_pk_util_free_encoding_ctx (struct pk_encoding_ctx *ctx) +{ + xfree (ctx->label); +} + + +/* Take the hash value and convert into an MPI, suitable for + passing to the low level functions. We currently support the + old style way of passing just a MPI and the modern interface which + allows to pass flags so that we can choose between raw and pkcs1 + padding - may be more padding options later. + + (<mpi>) + or + (data + [(flags [raw, direct, pkcs1, oaep, pss, no-blinding, rfc6979, eddsa])] + [(hash <algo> <value>)] + [(value <text>)] + [(hash-algo <algo>)] + [(label <label>)] + [(salt-length <length>)] + [(random-override <data>)] + ) + + Either the VALUE or the HASH element must be present for use + with signatures. VALUE is used for encryption. + + HASH-ALGO is specific to OAEP and EDDSA. + + LABEL is specific to OAEP. + + SALT-LENGTH is for PSS it is limited to 16384 bytes. + + RANDOM-OVERRIDE is used to replace random nonces for regression + testing. */ +gcry_err_code_t +_gcry_pk_util_data_to_mpi (gcry_sexp_t input, gcry_mpi_t *ret_mpi, + struct pk_encoding_ctx *ctx) +{ + gcry_err_code_t rc = 0; + gcry_sexp_t ldata, lhash, lvalue; + size_t n; + const char *s; + int unknown_flag = 0; + int parsed_flags = 0; + + *ret_mpi = NULL; + ldata = sexp_find_token (input, "data", 0); + if (!ldata) + { /* assume old style */ + *ret_mpi = sexp_nth_mpi (input, 0, 0); + return *ret_mpi ? GPG_ERR_NO_ERROR : GPG_ERR_INV_OBJ; + } + + /* See whether there is a flags list. */ + { + gcry_sexp_t lflags = sexp_find_token (ldata, "flags", 0); + if (lflags) + { + if (_gcry_pk_util_parse_flaglist (lflags, + &parsed_flags, &ctx->encoding)) + unknown_flag = 1; + sexp_release (lflags); + } + } + + if (ctx->encoding == PUBKEY_ENC_UNKNOWN) + ctx->encoding = PUBKEY_ENC_RAW; /* default to raw */ + + /* Get HASH or MPI */ + lhash = sexp_find_token (ldata, "hash", 0); + lvalue = lhash? NULL : sexp_find_token (ldata, "value", 0); + + if (!(!lhash ^ !lvalue)) + rc = GPG_ERR_INV_OBJ; /* none or both given */ + else if (unknown_flag) + rc = GPG_ERR_INV_FLAG; + else if (ctx->encoding == PUBKEY_ENC_RAW + && (parsed_flags & PUBKEY_FLAG_EDDSA)) + { + /* Prepare for EdDSA. */ + gcry_sexp_t list; + void *value; + size_t valuelen; + + if (!lvalue) + { + rc = GPG_ERR_INV_OBJ; + goto leave; + } + /* Get HASH-ALGO. */ + list = sexp_find_token (ldata, "hash-algo", 0); + if (list) + { + s = sexp_nth_data (list, 1, &n); + if (!s) + rc = GPG_ERR_NO_OBJ; + else + { + ctx->hash_algo = get_hash_algo (s, n); + if (!ctx->hash_algo) + rc = GPG_ERR_DIGEST_ALGO; + } + sexp_release (list); + } + else + rc = GPG_ERR_INV_OBJ; + if (rc) + goto leave; + + /* Get VALUE. */ + value = sexp_nth_buffer (lvalue, 1, &valuelen); + if (!value) + { + /* We assume that a zero length message is meant by + "(value)". This is commonly used by test vectors. Note + that S-expression do not allow zero length items. */ + valuelen = 0; + value = xtrymalloc (1); + if (!value) + rc = gpg_err_code_from_syserror (); + } + else if ((valuelen * 8) < valuelen) + { + xfree (value); + rc = GPG_ERR_TOO_LARGE; + } + if (rc) + goto leave; + + /* Note that mpi_set_opaque takes ownership of VALUE. */ + *ret_mpi = mpi_set_opaque (NULL, value, valuelen*8); + } + else if (ctx->encoding == PUBKEY_ENC_RAW && lhash + && ((parsed_flags & PUBKEY_FLAG_RAW_FLAG) + || (parsed_flags & PUBKEY_FLAG_RFC6979))) + { + /* Raw encoding along with a hash element. This is commonly + used for DSA. For better backward error compatibility we + allow this only if either the rfc6979 flag has been given or + the raw flags was explicitly given. */ + if (sexp_length (lhash) != 3) + rc = GPG_ERR_INV_OBJ; + else if ( !(s=sexp_nth_data (lhash, 1, &n)) || !n ) + rc = GPG_ERR_INV_OBJ; + else + { + void *value; + size_t valuelen; + + ctx->hash_algo = get_hash_algo (s, n); + if (!ctx->hash_algo) + rc = GPG_ERR_DIGEST_ALGO; + else if (!(value=sexp_nth_buffer (lhash, 2, &valuelen))) + rc = GPG_ERR_INV_OBJ; + else if ((valuelen * 8) < valuelen) + { + xfree (value); + rc = GPG_ERR_TOO_LARGE; + } + else + *ret_mpi = mpi_set_opaque (NULL, value, valuelen*8); + } + } + else if (ctx->encoding == PUBKEY_ENC_RAW && lvalue) + { + /* RFC6969 may only be used with the a hash value and not the + MPI based value. */ + if (parsed_flags & PUBKEY_FLAG_RFC6979) + { + rc = GPG_ERR_CONFLICT; + goto leave; + } + + /* Get the value */ + *ret_mpi = sexp_nth_mpi (lvalue, 1, GCRYMPI_FMT_USG); + if (!*ret_mpi) + rc = GPG_ERR_INV_OBJ; + } + else if (ctx->encoding == PUBKEY_ENC_PKCS1 && lvalue + && ctx->op == PUBKEY_OP_ENCRYPT) + { + const void * value; + size_t valuelen; + gcry_sexp_t list; + void *random_override = NULL; + size_t random_override_len = 0; + + if ( !(value=sexp_nth_data (lvalue, 1, &valuelen)) || !valuelen ) + rc = GPG_ERR_INV_OBJ; + else + { + /* Get optional RANDOM-OVERRIDE. */ + list = sexp_find_token (ldata, "random-override", 0); + if (list) + { + s = sexp_nth_data (list, 1, &n); + if (!s) + rc = GPG_ERR_NO_OBJ; + else if (n > 0) + { + random_override = xtrymalloc (n); + if (!random_override) + rc = gpg_err_code_from_syserror (); + else + { + memcpy (random_override, s, n); + random_override_len = n; + } + } + sexp_release (list); + if (rc) + goto leave; + } + + rc = _gcry_rsa_pkcs1_encode_for_enc (ret_mpi, ctx->nbits, + value, valuelen, + random_override, + random_override_len); + xfree (random_override); + } + } + else if (ctx->encoding == PUBKEY_ENC_PKCS1 && lhash + && (ctx->op == PUBKEY_OP_SIGN || ctx->op == PUBKEY_OP_VERIFY)) + { + if (sexp_length (lhash) != 3) + rc = GPG_ERR_INV_OBJ; + else if ( !(s=sexp_nth_data (lhash, 1, &n)) || !n ) + rc = GPG_ERR_INV_OBJ; + else + { + const void * value; + size_t valuelen; + + ctx->hash_algo = get_hash_algo (s, n); + + if (!ctx->hash_algo) + rc = GPG_ERR_DIGEST_ALGO; + else if ( !(value=sexp_nth_data (lhash, 2, &valuelen)) + || !valuelen ) + rc = GPG_ERR_INV_OBJ; + else + rc = _gcry_rsa_pkcs1_encode_for_sig (ret_mpi, ctx->nbits, + value, valuelen, + ctx->hash_algo); + } + } + else if (ctx->encoding == PUBKEY_ENC_PKCS1_RAW && lvalue + && (ctx->op == PUBKEY_OP_SIGN || ctx->op == PUBKEY_OP_VERIFY)) + { + const void * value; + size_t valuelen; + + if (sexp_length (lvalue) != 2) + rc = GPG_ERR_INV_OBJ; + else if ( !(value=sexp_nth_data (lvalue, 1, &valuelen)) + || !valuelen ) + rc = GPG_ERR_INV_OBJ; + else + rc = _gcry_rsa_pkcs1_encode_raw_for_sig (ret_mpi, ctx->nbits, + value, valuelen); + } + else if (ctx->encoding == PUBKEY_ENC_OAEP && lvalue + && ctx->op == PUBKEY_OP_ENCRYPT) + { + const void * value; + size_t valuelen; + + if ( !(value=sexp_nth_data (lvalue, 1, &valuelen)) || !valuelen ) + rc = GPG_ERR_INV_OBJ; + else + { + gcry_sexp_t list; + void *random_override = NULL; + size_t random_override_len = 0; + + /* Get HASH-ALGO. */ + list = sexp_find_token (ldata, "hash-algo", 0); + if (list) + { + s = sexp_nth_data (list, 1, &n); + if (!s) + rc = GPG_ERR_NO_OBJ; + else + { + ctx->hash_algo = get_hash_algo (s, n); + if (!ctx->hash_algo) + rc = GPG_ERR_DIGEST_ALGO; + } + sexp_release (list); + if (rc) + goto leave; + } + + /* Get LABEL. */ + list = sexp_find_token (ldata, "label", 0); + if (list) + { + s = sexp_nth_data (list, 1, &n); + if (!s) + rc = GPG_ERR_NO_OBJ; + else if (n > 0) + { + ctx->label = xtrymalloc (n); + if (!ctx->label) + rc = gpg_err_code_from_syserror (); + else + { + memcpy (ctx->label, s, n); + ctx->labellen = n; + } + } + sexp_release (list); + if (rc) + goto leave; + } + /* Get optional RANDOM-OVERRIDE. */ + list = sexp_find_token (ldata, "random-override", 0); + if (list) + { + s = sexp_nth_data (list, 1, &n); + if (!s) + rc = GPG_ERR_NO_OBJ; + else if (n > 0) + { + random_override = xtrymalloc (n); + if (!random_override) + rc = gpg_err_code_from_syserror (); + else + { + memcpy (random_override, s, n); + random_override_len = n; + } + } + sexp_release (list); + if (rc) + goto leave; + } + + rc = _gcry_rsa_oaep_encode (ret_mpi, ctx->nbits, ctx->hash_algo, + value, valuelen, + ctx->label, ctx->labellen, + random_override, random_override_len); + + xfree (random_override); + } + } + else if (ctx->encoding == PUBKEY_ENC_PSS && lhash + && ctx->op == PUBKEY_OP_SIGN) + { + if (sexp_length (lhash) != 3) + rc = GPG_ERR_INV_OBJ; + else if ( !(s=sexp_nth_data (lhash, 1, &n)) || !n ) + rc = GPG_ERR_INV_OBJ; + else + { + const void * value; + size_t valuelen; + void *random_override = NULL; + size_t random_override_len = 0; + + ctx->hash_algo = get_hash_algo (s, n); + + if (!ctx->hash_algo) + rc = GPG_ERR_DIGEST_ALGO; + else if ( !(value=sexp_nth_data (lhash, 2, &valuelen)) + || !valuelen ) + rc = GPG_ERR_INV_OBJ; + else + { + gcry_sexp_t list; + + /* Get SALT-LENGTH. */ + list = sexp_find_token (ldata, "salt-length", 0); + if (list) + { + s = sexp_nth_data (list, 1, &n); + if (!s) + { + rc = GPG_ERR_NO_OBJ; + goto leave; + } + ctx->saltlen = (unsigned int)strtoul (s, NULL, 10); + sexp_release (list); + } + + /* Get optional RANDOM-OVERRIDE. */ + list = sexp_find_token (ldata, "random-override", 0); + if (list) + { + s = sexp_nth_data (list, 1, &n); + if (!s) + rc = GPG_ERR_NO_OBJ; + else if (n > 0) + { + random_override = xtrymalloc (n); + if (!random_override) + rc = gpg_err_code_from_syserror (); + else + { + memcpy (random_override, s, n); + random_override_len = n; + } + } + sexp_release (list); + if (rc) + goto leave; + } + + /* Encode the data. (NBITS-1 is due to 8.1.1, step 1.) */ + rc = _gcry_rsa_pss_encode (ret_mpi, ctx->nbits - 1, + ctx->hash_algo, + value, valuelen, ctx->saltlen, + random_override, random_override_len); + + xfree (random_override); + } + } + } + else if (ctx->encoding == PUBKEY_ENC_PSS && lhash + && ctx->op == PUBKEY_OP_VERIFY) + { + if (sexp_length (lhash) != 3) + rc = GPG_ERR_INV_OBJ; + else if ( !(s=sexp_nth_data (lhash, 1, &n)) || !n ) + rc = GPG_ERR_INV_OBJ; + else + { + ctx->hash_algo = get_hash_algo (s, n); + + if (!ctx->hash_algo) + rc = GPG_ERR_DIGEST_ALGO; + else + { + gcry_sexp_t list; + /* Get SALT-LENGTH. */ + list = sexp_find_token (ldata, "salt-length", 0); + if (list) + { + unsigned long ul; + + s = sexp_nth_data (list, 1, &n); + if (!s) + { + rc = GPG_ERR_NO_OBJ; + sexp_release (list); + goto leave; + } + ul = strtoul (s, NULL, 10); + if (ul > 16384) + { + rc = GPG_ERR_TOO_LARGE; + sexp_release (list); + goto leave; + } + ctx->saltlen = ul; + sexp_release (list); + } + + *ret_mpi = sexp_nth_mpi (lhash, 2, GCRYMPI_FMT_USG); + if (!*ret_mpi) + rc = GPG_ERR_INV_OBJ; + ctx->verify_cmp = pss_verify_cmp; + ctx->verify_arg = *ret_mpi; + } + } + } + else + rc = GPG_ERR_CONFLICT; + + leave: + sexp_release (ldata); + sexp_release (lhash); + sexp_release (lvalue); + + if (!rc) + ctx->flags = parsed_flags; + else + { + xfree (ctx->label); + ctx->label = NULL; + } + + return rc; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/pubkey.c b/libotr/libgcrypt-1.8.7/cipher/pubkey.c new file mode 100644 index 0000000..8ec15fd --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/pubkey.c @@ -0,0 +1,970 @@ +/* pubkey.c - pubkey dispatcher + * Copyright (C) 1998, 1999, 2000, 2002, 2003, 2005, + * 2007, 2008, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "pubkey-internal.h" + + +/* This is the list of the public-key algorithms included in + Libgcrypt. */ +static gcry_pk_spec_t *pubkey_list[] = + { +#if USE_ECC + &_gcry_pubkey_spec_ecc, +#endif +#if USE_RSA + &_gcry_pubkey_spec_rsa, +#endif +#if USE_DSA + &_gcry_pubkey_spec_dsa, +#endif +#if USE_ELGAMAL + &_gcry_pubkey_spec_elg, +#endif + NULL + }; + + +static int +map_algo (int algo) +{ + switch (algo) + { + case GCRY_PK_RSA_E: return GCRY_PK_RSA; + case GCRY_PK_RSA_S: return GCRY_PK_RSA; + case GCRY_PK_ELG_E: return GCRY_PK_ELG; + case GCRY_PK_ECDSA: return GCRY_PK_ECC; + case GCRY_PK_ECDH: return GCRY_PK_ECC; + default: return algo; + } +} + + +/* Return the spec structure for the public key algorithm ALGO. For + an unknown algorithm NULL is returned. */ +static gcry_pk_spec_t * +spec_from_algo (int algo) +{ + int idx; + gcry_pk_spec_t *spec; + + algo = map_algo (algo); + + for (idx = 0; (spec = pubkey_list[idx]); idx++) + if (algo == spec->algo) + return spec; + return NULL; +} + + +/* Return the spec structure for the public key algorithm with NAME. + For an unknown name NULL is returned. */ +static gcry_pk_spec_t * +spec_from_name (const char *name) +{ + gcry_pk_spec_t *spec; + int idx; + const char **aliases; + + for (idx=0; (spec = pubkey_list[idx]); idx++) + { + if (!stricmp (name, spec->name)) + return spec; + for (aliases = spec->aliases; *aliases; aliases++) + if (!stricmp (name, *aliases)) + return spec; + } + + return NULL; +} + + + +/* Given the s-expression SEXP with the first element be either + * "private-key" or "public-key" return the spec structure for it. We + * look through the list to find a list beginning with "private-key" + * or "public-key" - the first one found is used. If WANT_PRIVATE is + * set the function will only succeed if a private key has been given. + * On success the spec is stored at R_SPEC. On error NULL is stored + * at R_SPEC and an error code returned. If R_PARMS is not NULL and + * the function returns success, the parameter list below + * "private-key" or "public-key" is stored there and the caller must + * call gcry_sexp_release on it. + */ +static gcry_err_code_t +spec_from_sexp (gcry_sexp_t sexp, int want_private, + gcry_pk_spec_t **r_spec, gcry_sexp_t *r_parms) +{ + gcry_sexp_t list, l2; + char *name; + gcry_pk_spec_t *spec; + + *r_spec = NULL; + if (r_parms) + *r_parms = NULL; + + /* Check that the first element is valid. If we are looking for a + public key but a private key was supplied, we allow the use of + the private key anyway. The rationale for this is that the + private key is a superset of the public key. */ + list = sexp_find_token (sexp, want_private? "private-key":"public-key", 0); + if (!list && !want_private) + list = sexp_find_token (sexp, "private-key", 0); + if (!list) + return GPG_ERR_INV_OBJ; /* Does not contain a key object. */ + + l2 = sexp_cadr (list); + sexp_release (list); + list = l2; + name = sexp_nth_string (list, 0); + if (!name) + { + sexp_release ( list ); + return GPG_ERR_INV_OBJ; /* Invalid structure of object. */ + } + spec = spec_from_name (name); + xfree (name); + if (!spec) + { + sexp_release (list); + return GPG_ERR_PUBKEY_ALGO; /* Unknown algorithm. */ + } + *r_spec = spec; + if (r_parms) + *r_parms = list; + else + sexp_release (list); + return 0; +} + + + +/* Disable the use of the algorithm ALGO. This is not thread safe and + should thus be called early. */ +static void +disable_pubkey_algo (int algo) +{ + gcry_pk_spec_t *spec = spec_from_algo (algo); + + if (spec) + spec->flags.disabled = 1; +} + + + +/* + * Map a string to the pubkey algo + */ +int +_gcry_pk_map_name (const char *string) +{ + gcry_pk_spec_t *spec; + + if (!string) + return 0; + spec = spec_from_name (string); + if (!spec) + return 0; + if (spec->flags.disabled) + return 0; + return spec->algo; +} + + +/* Map the public key algorithm whose ID is contained in ALGORITHM to + a string representation of the algorithm name. For unknown + algorithm IDs this functions returns "?". */ +const char * +_gcry_pk_algo_name (int algo) +{ + gcry_pk_spec_t *spec; + + spec = spec_from_algo (algo); + if (spec) + return spec->name; + return "?"; +} + + +/**************** + * A USE of 0 means: don't care. + */ +static gcry_err_code_t +check_pubkey_algo (int algo, unsigned use) +{ + gcry_err_code_t err = 0; + gcry_pk_spec_t *spec; + + spec = spec_from_algo (algo); + if (spec) + { + if (((use & GCRY_PK_USAGE_SIGN) + && (! (spec->use & GCRY_PK_USAGE_SIGN))) + || ((use & GCRY_PK_USAGE_ENCR) + && (! (spec->use & GCRY_PK_USAGE_ENCR)))) + err = GPG_ERR_WRONG_PUBKEY_ALGO; + } + else + err = GPG_ERR_PUBKEY_ALGO; + + return err; +} + + +/**************** + * Return the number of public key material numbers + */ +static int +pubkey_get_npkey (int algo) +{ + gcry_pk_spec_t *spec = spec_from_algo (algo); + + return spec? strlen (spec->elements_pkey) : 0; +} + + +/**************** + * Return the number of secret key material numbers + */ +static int +pubkey_get_nskey (int algo) +{ + gcry_pk_spec_t *spec = spec_from_algo (algo); + + return spec? strlen (spec->elements_skey) : 0; +} + + +/**************** + * Return the number of signature material numbers + */ +static int +pubkey_get_nsig (int algo) +{ + gcry_pk_spec_t *spec = spec_from_algo (algo); + + return spec? strlen (spec->elements_sig) : 0; +} + +/**************** + * Return the number of encryption material numbers + */ +static int +pubkey_get_nenc (int algo) +{ + gcry_pk_spec_t *spec = spec_from_algo (algo); + + return spec? strlen (spec->elements_enc) : 0; +} + + + +/* + Do a PK encrypt operation + + Caller has to provide a public key as the SEXP pkey and data as a + SEXP with just one MPI in it. Alternatively S_DATA might be a + complex S-Expression, similar to the one used for signature + verification. This provides a flag which allows to handle PKCS#1 + block type 2 padding. The function returns a sexp which may be + passed to to pk_decrypt. + + Returns: 0 or an errorcode. + + s_data = See comment for _gcry_pk_util_data_to_mpi + s_pkey = <key-as-defined-in-sexp_to_key> + r_ciph = (enc-val + (<algo> + (<param_name1> <mpi>) + ... + (<param_namen> <mpi>) + )) + +*/ +gcry_err_code_t +_gcry_pk_encrypt (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t s_pkey) +{ + gcry_err_code_t rc; + gcry_pk_spec_t *spec; + gcry_sexp_t keyparms; + + *r_ciph = NULL; + + rc = spec_from_sexp (s_pkey, 0, &spec, &keyparms); + if (rc) + goto leave; + + if (spec->encrypt) + rc = spec->encrypt (r_ciph, s_data, keyparms); + else + rc = GPG_ERR_NOT_IMPLEMENTED; + + leave: + sexp_release (keyparms); + return rc; +} + + +/* + Do a PK decrypt operation + + Caller has to provide a secret key as the SEXP skey and data in a + format as created by gcry_pk_encrypt. For historic reasons the + function returns simply an MPI as an S-expression part; this is + deprecated and the new method should be used which returns a real + S-expressionl this is selected by adding at least an empty flags + list to S_DATA. + + Returns: 0 or an errorcode. + + s_data = (enc-val + [(flags [raw, pkcs1, oaep])] + (<algo> + (<param_name1> <mpi>) + ... + (<param_namen> <mpi>) + )) + s_skey = <key-as-defined-in-sexp_to_key> + r_plain= Either an incomplete S-expression without the parentheses + or if the flags list is used (even if empty) a real S-expression: + (value PLAIN). In raw mode (or no flags given) the returned value + is to be interpreted as a signed MPI, thus it may have an extra + leading zero octet even if not included in the original data. + With pkcs1 or oaep decoding enabled the returned value is a + verbatim octet string. + */ +gcry_err_code_t +_gcry_pk_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t s_skey) +{ + gcry_err_code_t rc; + gcry_pk_spec_t *spec; + gcry_sexp_t keyparms; + + *r_plain = NULL; + + rc = spec_from_sexp (s_skey, 1, &spec, &keyparms); + if (rc) + goto leave; + + if (spec->decrypt) + rc = spec->decrypt (r_plain, s_data, keyparms); + else + rc = GPG_ERR_NOT_IMPLEMENTED; + + leave: + sexp_release (keyparms); + return rc; +} + + + +/* + Create a signature. + + Caller has to provide a secret key as the SEXP skey and data + expressed as a SEXP list hash with only one element which should + instantly be available as a MPI. Alternatively the structure given + below may be used for S_HASH, it provides the abiliy to pass flags + to the operation; the flags defined by now are "pkcs1" which does + PKCS#1 block type 1 style padding and "pss" for PSS encoding. + + Returns: 0 or an errorcode. + In case of 0 the function returns a new SEXP with the + signature value; the structure of this signature depends on the + other arguments but is always suitable to be passed to + gcry_pk_verify + + s_hash = See comment for _gcry-pk_util_data_to_mpi + + s_skey = <key-as-defined-in-sexp_to_key> + r_sig = (sig-val + (<algo> + (<param_name1> <mpi>) + ... + (<param_namen> <mpi>)) + [(hash algo)]) + + Note that (hash algo) in R_SIG is not used. +*/ +gcry_err_code_t +_gcry_pk_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_hash, gcry_sexp_t s_skey) +{ + gcry_err_code_t rc; + gcry_pk_spec_t *spec; + gcry_sexp_t keyparms; + + *r_sig = NULL; + + rc = spec_from_sexp (s_skey, 1, &spec, &keyparms); + if (rc) + goto leave; + + if (spec->sign) + rc = spec->sign (r_sig, s_hash, keyparms); + else + rc = GPG_ERR_NOT_IMPLEMENTED; + + leave: + sexp_release (keyparms); + return rc; +} + + +/* + Verify a signature. + + Caller has to supply the public key pkey, the signature sig and his + hashvalue data. Public key has to be a standard public key given + as an S-Exp, sig is a S-Exp as returned from gcry_pk_sign and data + must be an S-Exp like the one in sign too. */ +gcry_err_code_t +_gcry_pk_verify (gcry_sexp_t s_sig, gcry_sexp_t s_hash, gcry_sexp_t s_pkey) +{ + gcry_err_code_t rc; + gcry_pk_spec_t *spec; + gcry_sexp_t keyparms; + + rc = spec_from_sexp (s_pkey, 0, &spec, &keyparms); + if (rc) + goto leave; + + if (spec->verify) + rc = spec->verify (s_sig, s_hash, keyparms); + else + rc = GPG_ERR_NOT_IMPLEMENTED; + + leave: + sexp_release (keyparms); + return rc; +} + + +/* + Test a key. + + This may be used either for a public or a secret key to see whether + the internal structure is okay. + + Returns: 0 or an errorcode. + + NOTE: We currently support only secret key checking. */ +gcry_err_code_t +_gcry_pk_testkey (gcry_sexp_t s_key) +{ + gcry_err_code_t rc; + gcry_pk_spec_t *spec; + gcry_sexp_t keyparms; + + rc = spec_from_sexp (s_key, 1, &spec, &keyparms); + if (rc) + goto leave; + + if (spec->check_secret_key) + rc = spec->check_secret_key (keyparms); + else + rc = GPG_ERR_NOT_IMPLEMENTED; + + leave: + sexp_release (keyparms); + return rc; +} + + +/* + Create a public key pair and return it in r_key. + How the key is created depends on s_parms: + (genkey + (algo + (parameter_name_1 ....) + .... + (parameter_name_n ....) + )) + The key is returned in a format depending on the + algorithm. Both, private and secret keys are returned + and optionally some additional informatin. + For elgamal we return this structure: + (key-data + (public-key + (elg + (p <mpi>) + (g <mpi>) + (y <mpi>) + ) + ) + (private-key + (elg + (p <mpi>) + (g <mpi>) + (y <mpi>) + (x <mpi>) + ) + ) + (misc-key-info + (pm1-factors n1 n2 ... nn) + )) + */ +gcry_err_code_t +_gcry_pk_genkey (gcry_sexp_t *r_key, gcry_sexp_t s_parms) +{ + gcry_pk_spec_t *spec = NULL; + gcry_sexp_t list = NULL; + gcry_sexp_t l2 = NULL; + char *name = NULL; + gcry_err_code_t rc; + + *r_key = NULL; + + list = sexp_find_token (s_parms, "genkey", 0); + if (!list) + { + rc = GPG_ERR_INV_OBJ; /* Does not contain genkey data. */ + goto leave; + } + + l2 = sexp_cadr (list); + sexp_release (list); + list = l2; + l2 = NULL; + if (! list) + { + rc = GPG_ERR_NO_OBJ; /* No cdr for the genkey. */ + goto leave; + } + + name = _gcry_sexp_nth_string (list, 0); + if (!name) + { + rc = GPG_ERR_INV_OBJ; /* Algo string missing. */ + goto leave; + } + + spec = spec_from_name (name); + xfree (name); + name = NULL; + if (!spec) + { + rc = GPG_ERR_PUBKEY_ALGO; /* Unknown algorithm. */ + goto leave; + } + + if (spec->generate) + rc = spec->generate (list, r_key); + else + rc = GPG_ERR_NOT_IMPLEMENTED; + + leave: + sexp_release (list); + xfree (name); + sexp_release (l2); + + return rc; +} + + +/* + Get the number of nbits from the public key. + + Hmmm: Should we have really this function or is it better to have a + more general function to retrieve different properties of the key? */ +unsigned int +_gcry_pk_get_nbits (gcry_sexp_t key) +{ + gcry_pk_spec_t *spec; + gcry_sexp_t parms; + unsigned int nbits; + + /* Parsing KEY might be considered too much overhead. For example + for RSA we would only need to look at P and stop parsing right + away. However, with ECC things are more complicate in that only + a curve name might be specified. Thus we need to tear the sexp + apart. */ + + if (spec_from_sexp (key, 0, &spec, &parms)) + return 0; /* Error - 0 is a suitable indication for that. */ + + nbits = spec->get_nbits (parms); + sexp_release (parms); + return nbits; +} + + +/* Return the so called KEYGRIP which is the SHA-1 hash of the public + key parameters expressed in a way depending on the algorithm. + + ARRAY must either be 20 bytes long or NULL; in the latter case a + newly allocated array of that size is returned, otherwise ARRAY or + NULL is returned to indicate an error which is most likely an + unknown algorithm. The function accepts public or secret keys. */ +unsigned char * +_gcry_pk_get_keygrip (gcry_sexp_t key, unsigned char *array) +{ + gcry_sexp_t list = NULL; + gcry_sexp_t l2 = NULL; + gcry_pk_spec_t *spec = NULL; + const char *s; + char *name = NULL; + int idx; + const char *elems; + gcry_md_hd_t md = NULL; + int okay = 0; + + /* Check that the first element is valid. */ + list = sexp_find_token (key, "public-key", 0); + if (! list) + list = sexp_find_token (key, "private-key", 0); + if (! list) + list = sexp_find_token (key, "protected-private-key", 0); + if (! list) + list = sexp_find_token (key, "shadowed-private-key", 0); + if (! list) + return NULL; /* No public- or private-key object. */ + + l2 = sexp_cadr (list); + sexp_release (list); + list = l2; + l2 = NULL; + + name = _gcry_sexp_nth_string (list, 0); + if (!name) + goto fail; /* Invalid structure of object. */ + + spec = spec_from_name (name); + if (!spec) + goto fail; /* Unknown algorithm. */ + + elems = spec->elements_grip; + if (!elems) + goto fail; /* No grip parameter. */ + + if (_gcry_md_open (&md, GCRY_MD_SHA1, 0)) + goto fail; + + if (spec->comp_keygrip) + { + /* Module specific method to compute a keygrip. */ + if (spec->comp_keygrip (md, list)) + goto fail; + } + else + { + /* Generic method to compute a keygrip. */ + for (idx = 0, s = elems; *s; s++, idx++) + { + const char *data; + size_t datalen; + char buf[30]; + + l2 = sexp_find_token (list, s, 1); + if (! l2) + goto fail; + data = sexp_nth_data (l2, 1, &datalen); + if (! data) + goto fail; + + snprintf (buf, sizeof buf, "(1:%c%u:", *s, (unsigned int)datalen); + _gcry_md_write (md, buf, strlen (buf)); + _gcry_md_write (md, data, datalen); + sexp_release (l2); + l2 = NULL; + _gcry_md_write (md, ")", 1); + } + } + + if (!array) + { + array = xtrymalloc (20); + if (! array) + goto fail; + } + + memcpy (array, _gcry_md_read (md, GCRY_MD_SHA1), 20); + okay = 1; + + fail: + xfree (name); + sexp_release (l2); + _gcry_md_close (md); + sexp_release (list); + return okay? array : NULL; +} + + + +const char * +_gcry_pk_get_curve (gcry_sexp_t key, int iterator, unsigned int *r_nbits) +{ + const char *result = NULL; + gcry_pk_spec_t *spec; + gcry_sexp_t keyparms = NULL; + + if (r_nbits) + *r_nbits = 0; + + if (key) + { + iterator = 0; + + if (spec_from_sexp (key, 0, &spec, &keyparms)) + return NULL; + } + else + { + spec = spec_from_name ("ecc"); + if (!spec) + return NULL; + } + + if (spec->get_curve) + result = spec->get_curve (keyparms, iterator, r_nbits); + + sexp_release (keyparms); + return result; +} + + + +gcry_sexp_t +_gcry_pk_get_param (int algo, const char *name) +{ + gcry_sexp_t result = NULL; + gcry_pk_spec_t *spec = NULL; + + algo = map_algo (algo); + + if (algo != GCRY_PK_ECC) + return NULL; + + spec = spec_from_name ("ecc"); + if (spec) + { + if (spec && spec->get_curve_param) + result = spec->get_curve_param (name); + } + return result; +} + + + +gcry_err_code_t +_gcry_pk_ctl (int cmd, void *buffer, size_t buflen) +{ + gcry_err_code_t rc = 0; + + switch (cmd) + { + case GCRYCTL_DISABLE_ALGO: + /* This one expects a buffer pointing to an integer with the + algo number. */ + if ((! buffer) || (buflen != sizeof (int))) + rc = GPG_ERR_INV_ARG; + else + disable_pubkey_algo (*((int *) buffer)); + break; + + default: + rc = GPG_ERR_INV_OP; + } + + return rc; +} + + +/* Return information about the given algorithm + + WHAT selects the kind of information returned: + + GCRYCTL_TEST_ALGO: + Returns 0 when the specified algorithm is available for use. + Buffer must be NULL, nbytes may have the address of a variable + with the required usage of the algorithm. It may be 0 for don't + care or a combination of the GCRY_PK_USAGE_xxx flags; + + GCRYCTL_GET_ALGO_USAGE: + Return the usage flags for the given algo. An invalid algo + returns 0. Disabled algos are ignored here because we + only want to know whether the algo is at all capable of + the usage. + + Note: Because this function is in most cases used to return an + integer value, we can make it easier for the caller to just look at + the return value. The caller will in all cases consult the value + and thereby detecting whether a error occurred or not (i.e. while + checking the block size) */ +gcry_err_code_t +_gcry_pk_algo_info (int algorithm, int what, void *buffer, size_t *nbytes) +{ + gcry_err_code_t rc = 0; + + switch (what) + { + case GCRYCTL_TEST_ALGO: + { + int use = nbytes ? *nbytes : 0; + if (buffer) + rc = GPG_ERR_INV_ARG; + else if (check_pubkey_algo (algorithm, use)) + rc = GPG_ERR_PUBKEY_ALGO; + break; + } + + case GCRYCTL_GET_ALGO_USAGE: + { + gcry_pk_spec_t *spec; + + spec = spec_from_algo (algorithm); + *nbytes = spec? spec->use : 0; + break; + } + + case GCRYCTL_GET_ALGO_NPKEY: + { + /* FIXME? */ + int npkey = pubkey_get_npkey (algorithm); + *nbytes = npkey; + break; + } + case GCRYCTL_GET_ALGO_NSKEY: + { + /* FIXME? */ + int nskey = pubkey_get_nskey (algorithm); + *nbytes = nskey; + break; + } + case GCRYCTL_GET_ALGO_NSIGN: + { + /* FIXME? */ + int nsign = pubkey_get_nsig (algorithm); + *nbytes = nsign; + break; + } + case GCRYCTL_GET_ALGO_NENCR: + { + /* FIXME? */ + int nencr = pubkey_get_nenc (algorithm); + *nbytes = nencr; + break; + } + + default: + rc = GPG_ERR_INV_OP; + } + + return rc; +} + + +/* Return an S-expression representing the context CTX. Depending on + the state of that context, the S-expression may either be a public + key, a private key or any other object used with public key + operations. On success a new S-expression is stored at R_SEXP and + 0 is returned, on error NULL is store there and an error code is + returned. MODE is either 0 or one of the GCRY_PK_GET_xxx values. + + As of now it only support certain ECC operations because a context + object is right now only defined for ECC. Over time this function + will be extended to cover more algorithms. Note also that the name + of the function is gcry_pubkey_xxx and not gcry_pk_xxx. The idea + is that we will eventually provide variants of the existing + gcry_pk_xxx functions which will take a context parameter. */ +gcry_err_code_t +_gcry_pubkey_get_sexp (gcry_sexp_t *r_sexp, int mode, gcry_ctx_t ctx) +{ + mpi_ec_t ec; + + if (!r_sexp) + return GPG_ERR_INV_VALUE; + *r_sexp = NULL; + switch (mode) + { + case 0: + case GCRY_PK_GET_PUBKEY: + case GCRY_PK_GET_SECKEY: + break; + default: + return GPG_ERR_INV_VALUE; + } + if (!ctx) + return GPG_ERR_NO_CRYPT_CTX; + + ec = _gcry_ctx_find_pointer (ctx, CONTEXT_TYPE_EC); + if (ec) + return _gcry_pk_ecc_get_sexp (r_sexp, mode, ec); + + return GPG_ERR_WRONG_CRYPT_CTX; +} + + + +/* Explicitly initialize this module. */ +gcry_err_code_t +_gcry_pk_init (void) +{ + if (fips_mode()) + { + /* disable algorithms that are disallowed in fips */ + int idx; + gcry_pk_spec_t *spec; + + for (idx = 0; (spec = pubkey_list[idx]); idx++) + if (!spec->flags.fips) + spec->flags.disabled = 1; + } + + return 0; +} + + +/* Run the selftests for pubkey algorithm ALGO with optional reporting + function REPORT. */ +gpg_error_t +_gcry_pk_selftest (int algo, int extended, selftest_report_func_t report) +{ + gcry_err_code_t ec; + gcry_pk_spec_t *spec; + + algo = map_algo (algo); + spec = spec_from_algo (algo); + if (spec && !spec->flags.disabled && spec->selftest) + ec = spec->selftest (algo, extended, report); + else + { + ec = GPG_ERR_PUBKEY_ALGO; + /* Fixme: We need to change the report function to allow passing + of an encryption mode (e.g. pkcs1, ecdsa, or ecdh). */ + if (report) + report ("pubkey", algo, "module", + spec && !spec->flags.disabled? + "no selftest available" : + spec? "algorithm disabled" : + "algorithm not found"); + } + + return gpg_error (ec); +} diff --git a/libotr/libgcrypt-1.8.7/cipher/rfc2268.c b/libotr/libgcrypt-1.8.7/cipher/rfc2268.c new file mode 100644 index 0000000..aed8cad --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rfc2268.c @@ -0,0 +1,375 @@ +/* rfc2268.c - The cipher described in rfc2268; aka Ron's Cipher 2. + * Copyright (C) 2003 Nikos Mavroyanopoulos + * Copyright (C) 2004 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +/* This implementation was written by Nikos Mavroyanopoulos for GNUTLS + * as a Libgcrypt module (gnutls/lib/x509/rc2.c) and later adapted for + * direct use by Libgcrypt by Werner Koch. This implementation is + * only useful for pkcs#12 decryption. + * + * The implementation here is based on Peter Gutmann's RRC.2 paper. + */ + + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "g10lib.h" +#include "types.h" +#include "cipher.h" + +#define RFC2268_BLOCKSIZE 8 + +typedef struct +{ + u16 S[64]; +} RFC2268_context; + +static const unsigned char rfc2268_sbox[] = { + 217, 120, 249, 196, 25, 221, 181, 237, + 40, 233, 253, 121, 74, 160, 216, 157, + 198, 126, 55, 131, 43, 118, 83, 142, + 98, 76, 100, 136, 68, 139, 251, 162, + 23, 154, 89, 245, 135, 179, 79, 19, + 97, 69, 109, 141, 9, 129, 125, 50, + 189, 143, 64, 235, 134, 183, 123, 11, + 240, 149, 33, 34, 92, 107, 78, 130, + 84, 214, 101, 147, 206, 96, 178, 28, + 115, 86, 192, 20, 167, 140, 241, 220, + 18, 117, 202, 31, 59, 190, 228, 209, + 66, 61, 212, 48, 163, 60, 182, 38, + 111, 191, 14, 218, 70, 105, 7, 87, + 39, 242, 29, 155, 188, 148, 67, 3, + 248, 17, 199, 246, 144, 239, 62, 231, + 6, 195, 213, 47, 200, 102, 30, 215, + 8, 232, 234, 222, 128, 82, 238, 247, + 132, 170, 114, 172, 53, 77, 106, 42, + 150, 26, 210, 113, 90, 21, 73, 116, + 75, 159, 208, 94, 4, 24, 164, 236, + 194, 224, 65, 110, 15, 81, 203, 204, + 36, 145, 175, 80, 161, 244, 112, 57, + 153, 124, 58, 133, 35, 184, 180, 122, + 252, 2, 54, 91, 37, 85, 151, 49, + 45, 93, 250, 152, 227, 138, 146, 174, + 5, 223, 41, 16, 103, 108, 186, 201, + 211, 0, 230, 207, 225, 158, 168, 44, + 99, 22, 1, 63, 88, 226, 137, 169, + 13, 56, 52, 27, 171, 51, 255, 176, + 187, 72, 12, 95, 185, 177, 205, 46, + 197, 243, 219, 71, 229, 165, 156, 119, + 10, 166, 32, 104, 254, 127, 193, 173 +}; + +#define rotl16(x,n) (((x) << ((u16)(n))) | ((x) >> (16 - (u16)(n)))) +#define rotr16(x,n) (((x) >> ((u16)(n))) | ((x) << (16 - (u16)(n)))) + +static const char *selftest (void); + + +static void +do_encrypt (void *context, unsigned char *outbuf, const unsigned char *inbuf) +{ + RFC2268_context *ctx = context; + register int i, j; + u16 word0 = 0, word1 = 0, word2 = 0, word3 = 0; + + word0 = (word0 << 8) | inbuf[1]; + word0 = (word0 << 8) | inbuf[0]; + word1 = (word1 << 8) | inbuf[3]; + word1 = (word1 << 8) | inbuf[2]; + word2 = (word2 << 8) | inbuf[5]; + word2 = (word2 << 8) | inbuf[4]; + word3 = (word3 << 8) | inbuf[7]; + word3 = (word3 << 8) | inbuf[6]; + + for (i = 0; i < 16; i++) + { + j = i * 4; + /* For some reason I cannot combine those steps. */ + word0 += (word1 & ~word3) + (word2 & word3) + ctx->S[j]; + word0 = rotl16(word0, 1); + + word1 += (word2 & ~word0) + (word3 & word0) + ctx->S[j + 1]; + word1 = rotl16(word1, 2); + + word2 += (word3 & ~word1) + (word0 & word1) + ctx->S[j + 2]; + word2 = rotl16(word2, 3); + + word3 += (word0 & ~word2) + (word1 & word2) + ctx->S[j + 3]; + word3 = rotl16(word3, 5); + + if (i == 4 || i == 10) + { + word0 += ctx->S[word3 & 63]; + word1 += ctx->S[word0 & 63]; + word2 += ctx->S[word1 & 63]; + word3 += ctx->S[word2 & 63]; + } + + } + + outbuf[0] = word0 & 255; + outbuf[1] = word0 >> 8; + outbuf[2] = word1 & 255; + outbuf[3] = word1 >> 8; + outbuf[4] = word2 & 255; + outbuf[5] = word2 >> 8; + outbuf[6] = word3 & 255; + outbuf[7] = word3 >> 8; +} + +static unsigned int +encrypt_block (void *context, unsigned char *outbuf, const unsigned char *inbuf) +{ + do_encrypt (context, outbuf, inbuf); + return /*burn_stack*/ (4 * sizeof(void *) + sizeof(void *) + sizeof(u32) * 4); +} + +static void +do_decrypt (void *context, unsigned char *outbuf, const unsigned char *inbuf) +{ + RFC2268_context *ctx = context; + register int i, j; + u16 word0 = 0, word1 = 0, word2 = 0, word3 = 0; + + word0 = (word0 << 8) | inbuf[1]; + word0 = (word0 << 8) | inbuf[0]; + word1 = (word1 << 8) | inbuf[3]; + word1 = (word1 << 8) | inbuf[2]; + word2 = (word2 << 8) | inbuf[5]; + word2 = (word2 << 8) | inbuf[4]; + word3 = (word3 << 8) | inbuf[7]; + word3 = (word3 << 8) | inbuf[6]; + + for (i = 15; i >= 0; i--) + { + j = i * 4; + + word3 = rotr16(word3, 5); + word3 -= (word0 & ~word2) + (word1 & word2) + ctx->S[j + 3]; + + word2 = rotr16(word2, 3); + word2 -= (word3 & ~word1) + (word0 & word1) + ctx->S[j + 2]; + + word1 = rotr16(word1, 2); + word1 -= (word2 & ~word0) + (word3 & word0) + ctx->S[j + 1]; + + word0 = rotr16(word0, 1); + word0 -= (word1 & ~word3) + (word2 & word3) + ctx->S[j]; + + if (i == 5 || i == 11) + { + word3 = word3 - ctx->S[word2 & 63]; + word2 = word2 - ctx->S[word1 & 63]; + word1 = word1 - ctx->S[word0 & 63]; + word0 = word0 - ctx->S[word3 & 63]; + } + + } + + outbuf[0] = word0 & 255; + outbuf[1] = word0 >> 8; + outbuf[2] = word1 & 255; + outbuf[3] = word1 >> 8; + outbuf[4] = word2 & 255; + outbuf[5] = word2 >> 8; + outbuf[6] = word3 & 255; + outbuf[7] = word3 >> 8; +} + +static unsigned int +decrypt_block (void *context, unsigned char *outbuf, const unsigned char *inbuf) +{ + do_decrypt (context, outbuf, inbuf); + return /*burn_stack*/ (4 * sizeof(void *) + sizeof(void *) + sizeof(u32) * 4); +} + + +static gpg_err_code_t +setkey_core (void *context, const unsigned char *key, unsigned int keylen, int with_phase2) +{ + static int initialized; + static const char *selftest_failed; + RFC2268_context *ctx = context; + unsigned int i; + unsigned char *S, x; + int len; + int bits = keylen * 8; + + if (!initialized) + { + initialized = 1; + selftest_failed = selftest (); + if (selftest_failed) + log_error ("RFC2268 selftest failed (%s).\n", selftest_failed); + } + if (selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + + if (keylen < 40 / 8) /* We want at least 40 bits. */ + return GPG_ERR_INV_KEYLEN; + + S = (unsigned char *) ctx->S; + + for (i = 0; i < keylen; i++) + S[i] = key[i]; + + for (i = keylen; i < 128; i++) + S[i] = rfc2268_sbox[(S[i - keylen] + S[i - 1]) & 255]; + + S[0] = rfc2268_sbox[S[0]]; + + /* Phase 2 - reduce effective key size to "bits". This was not + * discussed in Gutmann's paper. I've copied that from the public + * domain code posted in sci.crypt. */ + if (with_phase2) + { + len = (bits + 7) >> 3; + i = 128 - len; + x = rfc2268_sbox[S[i] & (255 >> (7 & -bits))]; + S[i] = x; + + while (i--) + { + x = rfc2268_sbox[x ^ S[i + len]]; + S[i] = x; + } + } + + /* Make the expanded key, endian independent. */ + for (i = 0; i < 64; i++) + ctx->S[i] = ( (u16) S[i * 2] | (((u16) S[i * 2 + 1]) << 8)); + + return 0; +} + +static gpg_err_code_t +do_setkey (void *context, const unsigned char *key, unsigned int keylen) +{ + return setkey_core (context, key, keylen, 1); +} + +static const char * +selftest (void) +{ + RFC2268_context ctx; + unsigned char scratch[16]; + + /* Test vectors from Peter Gutmann's paper. */ + static unsigned char key_1[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + }; + static unsigned char plaintext_1[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + static const unsigned char ciphertext_1[] = + { 0x1C, 0x19, 0x8A, 0x83, 0x8D, 0xF0, 0x28, 0xB7 }; + + static unsigned char key_2[] = + { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F + }; + static unsigned char plaintext_2[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + static unsigned char ciphertext_2[] = + { 0x50, 0xDC, 0x01, 0x62, 0xBD, 0x75, 0x7F, 0x31 }; + + /* This one was checked against libmcrypt's RFC2268. */ + static unsigned char key_3[] = + { 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + }; + static unsigned char plaintext_3[] = + { 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + static unsigned char ciphertext_3[] = + { 0x8f, 0xd1, 0x03, 0x89, 0x33, 0x6b, 0xf9, 0x5e }; + + + /* First test. */ + setkey_core (&ctx, key_1, sizeof(key_1), 0); + do_encrypt (&ctx, scratch, plaintext_1); + + if (memcmp (scratch, ciphertext_1, sizeof(ciphertext_1))) + return "RFC2268 encryption test 1 failed."; + + setkey_core (&ctx, key_1, sizeof(key_1), 0); + do_decrypt (&ctx, scratch, scratch); + if (memcmp (scratch, plaintext_1, sizeof(plaintext_1))) + return "RFC2268 decryption test 1 failed."; + + /* Second test. */ + setkey_core (&ctx, key_2, sizeof(key_2), 0); + do_encrypt (&ctx, scratch, plaintext_2); + if (memcmp (scratch, ciphertext_2, sizeof(ciphertext_2))) + return "RFC2268 encryption test 2 failed."; + + setkey_core (&ctx, key_2, sizeof(key_2), 0); + do_decrypt (&ctx, scratch, scratch); + if (memcmp (scratch, plaintext_2, sizeof(plaintext_2))) + return "RFC2268 decryption test 2 failed."; + + /* Third test. */ + setkey_core(&ctx, key_3, sizeof(key_3), 0); + do_encrypt(&ctx, scratch, plaintext_3); + + if (memcmp(scratch, ciphertext_3, sizeof(ciphertext_3))) + return "RFC2268 encryption test 3 failed."; + + setkey_core (&ctx, key_3, sizeof(key_3), 0); + do_decrypt (&ctx, scratch, scratch); + if (memcmp(scratch, plaintext_3, sizeof(plaintext_3))) + return "RFC2268 decryption test 3 failed."; + + return NULL; +} + + + +static gcry_cipher_oid_spec_t oids_rfc2268_40[] = + { + /*{ "1.2.840.113549.3.2", GCRY_CIPHER_MODE_CBC },*/ + /* pbeWithSHAAnd40BitRC2_CBC */ + { "1.2.840.113549.1.12.1.6", GCRY_CIPHER_MODE_CBC }, + { NULL } + }; + +static gcry_cipher_oid_spec_t oids_rfc2268_128[] = + { + /* pbeWithSHAAnd128BitRC2_CBC */ + { "1.2.840.113549.1.12.1.5", GCRY_CIPHER_MODE_CBC }, + { NULL } + }; + +gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_40 = + { + GCRY_CIPHER_RFC2268_40, {0, 0}, + "RFC2268_40", NULL, oids_rfc2268_40, + RFC2268_BLOCKSIZE, 40, sizeof(RFC2268_context), + do_setkey, encrypt_block, decrypt_block + }; + +gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_128 = + { + GCRY_CIPHER_RFC2268_128, {0, 0}, + "RFC2268_128", NULL, oids_rfc2268_128, + RFC2268_BLOCKSIZE, 128, sizeof(RFC2268_context), + do_setkey, encrypt_block, decrypt_block + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/rijndael-aarch64.S b/libotr/libgcrypt-1.8.7/cipher/rijndael-aarch64.S new file mode 100644 index 0000000..e533bbe --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rijndael-aarch64.S @@ -0,0 +1,510 @@ +/* rijndael-aarch64.S - ARMv8/AArch64 assembly implementation of AES cipher + * + * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(__AARCH64EL__) +#ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS + +.text + +/* register macros */ +#define CTX x0 +#define RDST x1 +#define RSRC x2 +#define NROUNDS w3 +#define RTAB x4 +#define RMASK w5 + +#define RA w8 +#define RB w9 +#define RC w10 +#define RD w11 + +#define RNA w12 +#define RNB w13 +#define RNC w14 +#define RND w15 + +#define RT0 w6 +#define RT1 w7 +#define RT2 w16 +#define xRT0 x6 +#define xRT1 x7 +#define xRT2 x16 + +#define xw8 x8 +#define xw9 x9 +#define xw10 x10 +#define xw11 x11 + +#define xw12 x12 +#define xw13 x13 +#define xw14 x14 +#define xw15 x15 + +/*********************************************************************** + * ARMv8/AArch64 assembly implementation of the AES cipher + ***********************************************************************/ +#define preload_first_key(round, ra) \ + ldr ra, [CTX, #(((round) * 16) + 0 * 4)]; + +#define dummy(round, ra) /* nothing */ + +#define addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ + ldp rna, rnb, [CTX]; \ + ldp rnc, rnd, [CTX, #8]; \ + eor ra, ra, rna; \ + eor rb, rb, rnb; \ + eor rc, rc, rnc; \ + preload_key(1, rna); \ + eor rd, rd, rnd; + +#define do_encround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ + ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \ + \ + and RT0, RMASK, ra, lsl#2; \ + ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \ + and RT1, RMASK, ra, lsr#(8 - 2); \ + ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \ + and RT2, RMASK, ra, lsr#(16 - 2); \ + ldr RT0, [RTAB, xRT0]; \ + and ra, RMASK, ra, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, xRT1]; \ + eor rna, rna, RT0; \ + ldr RT2, [RTAB, xRT2]; \ + and RT0, RMASK, rd, lsl#2; \ + ldr ra, [RTAB, x##ra]; \ + \ + eor rnd, rnd, RT1, ror #24; \ + and RT1, RMASK, rd, lsr#(8 - 2); \ + eor rnc, rnc, RT2, ror #16; \ + and RT2, RMASK, rd, lsr#(16 - 2); \ + eor rnb, rnb, ra, ror #8; \ + ldr RT0, [RTAB, xRT0]; \ + and rd, RMASK, rd, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, xRT1]; \ + eor rnd, rnd, RT0; \ + ldr RT2, [RTAB, xRT2]; \ + and RT0, RMASK, rc, lsl#2; \ + ldr rd, [RTAB, x##rd]; \ + \ + eor rnc, rnc, RT1, ror #24; \ + and RT1, RMASK, rc, lsr#(8 - 2); \ + eor rnb, rnb, RT2, ror #16; \ + and RT2, RMASK, rc, lsr#(16 - 2); \ + eor rna, rna, rd, ror #8; \ + ldr RT0, [RTAB, xRT0]; \ + and rc, RMASK, rc, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, xRT1]; \ + eor rnc, rnc, RT0; \ + ldr RT2, [RTAB, xRT2]; \ + and RT0, RMASK, rb, lsl#2; \ + ldr rc, [RTAB, x##rc]; \ + \ + eor rnb, rnb, RT1, ror #24; \ + and RT1, RMASK, rb, lsr#(8 - 2); \ + eor rna, rna, RT2, ror #16; \ + and RT2, RMASK, rb, lsr#(16 - 2); \ + eor rnd, rnd, rc, ror #8; \ + ldr RT0, [RTAB, xRT0]; \ + and rb, RMASK, rb, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, xRT1]; \ + eor rnb, rnb, RT0; \ + ldr RT2, [RTAB, xRT2]; \ + eor rna, rna, RT1, ror #24; \ + ldr rb, [RTAB, x##rb]; \ + \ + eor rnd, rnd, RT2, ror #16; \ + preload_key((next_r) + 1, ra); \ + eor rnc, rnc, rb, ror #8; + +#define do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + and RT0, RMASK, ra, lsl#2; \ + and RT1, RMASK, ra, lsr#(8 - 2); \ + and RT2, RMASK, ra, lsr#(16 - 2); \ + ldrb rna, [RTAB, xRT0]; \ + and ra, RMASK, ra, lsr#(24 - 2); \ + ldrb rnd, [RTAB, xRT1]; \ + and RT0, RMASK, rd, lsl#2; \ + ldrb rnc, [RTAB, xRT2]; \ + ror rnd, rnd, #24; \ + ldrb rnb, [RTAB, x##ra]; \ + and RT1, RMASK, rd, lsr#(8 - 2); \ + ror rnc, rnc, #16; \ + and RT2, RMASK, rd, lsr#(16 - 2); \ + ror rnb, rnb, #8; \ + ldrb RT0, [RTAB, xRT0]; \ + and rd, RMASK, rd, lsr#(24 - 2); \ + ldrb RT1, [RTAB, xRT1]; \ + \ + orr rnd, rnd, RT0; \ + ldrb RT2, [RTAB, xRT2]; \ + and RT0, RMASK, rc, lsl#2; \ + ldrb rd, [RTAB, x##rd]; \ + orr rnc, rnc, RT1, ror #24; \ + and RT1, RMASK, rc, lsr#(8 - 2); \ + orr rnb, rnb, RT2, ror #16; \ + and RT2, RMASK, rc, lsr#(16 - 2); \ + orr rna, rna, rd, ror #8; \ + ldrb RT0, [RTAB, xRT0]; \ + and rc, RMASK, rc, lsr#(24 - 2); \ + ldrb RT1, [RTAB, xRT1]; \ + \ + orr rnc, rnc, RT0; \ + ldrb RT2, [RTAB, xRT2]; \ + and RT0, RMASK, rb, lsl#2; \ + ldrb rc, [RTAB, x##rc]; \ + orr rnb, rnb, RT1, ror #24; \ + and RT1, RMASK, rb, lsr#(8 - 2); \ + orr rna, rna, RT2, ror #16; \ + ldrb RT0, [RTAB, xRT0]; \ + and RT2, RMASK, rb, lsr#(16 - 2); \ + ldrb RT1, [RTAB, xRT1]; \ + orr rnd, rnd, rc, ror #8; \ + ldrb RT2, [RTAB, xRT2]; \ + and rb, RMASK, rb, lsr#(24 - 2); \ + ldrb rb, [RTAB, x##rb]; \ + \ + orr rnb, rnb, RT0; \ + orr rna, rna, RT1, ror #24; \ + orr rnd, rnd, RT2, ror #16; \ + orr rnc, rnc, rb, ror #8; + +#define firstencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); \ + do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); + +#define encround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ + do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key); + +#define lastencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + add CTX, CTX, #(((round) + 1) * 16); \ + add RTAB, RTAB, #1; \ + do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \ + addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); + +.globl _gcry_aes_arm_encrypt_block +.type _gcry_aes_arm_encrypt_block,%function; + +_gcry_aes_arm_encrypt_block: + /* input: + * %x0: keysched, CTX + * %x1: dst + * %x2: src + * %w3: number of rounds.. 10, 12 or 14 + * %x4: encryption table + */ + + /* read input block */ + + /* aligned load */ + ldp RA, RB, [RSRC]; + ldp RC, RD, [RSRC, #8]; +#ifndef __AARCH64EL__ + rev RA, RA; + rev RB, RB; + rev RC, RC; + rev RD, RD; +#endif + + mov RMASK, #(0xff<<2); + + firstencround(0, RA, RB, RC, RD, RNA, RNB, RNC, RND); + encround(1, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(2, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(3, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(4, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(5, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(6, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(7, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + + cmp NROUNDS, #12; + bge .Lenc_not_128; + + encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy); + lastencround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD); + +.Lenc_done: + + /* store output block */ + + /* aligned store */ +#ifndef __AARCH64EL__ + rev RA, RA; + rev RB, RB; + rev RC, RC; + rev RD, RD; +#endif + /* write output block */ + stp RA, RB, [RDST]; + stp RC, RD, [RDST, #8]; + + mov x0, #(0); + ret; + +.ltorg +.Lenc_not_128: + beq .Lenc_192 + + encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(12, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy); + lastencround(13, RNA, RNB, RNC, RND, RA, RB, RC, RD); + + b .Lenc_done; + +.ltorg +.Lenc_192: + encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy); + lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD); + + b .Lenc_done; +.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block; + +#define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \ + ldr rnb, [CTX, #(((round) * 16) + 1 * 4)]; \ + eor ra, ra, rna; \ + ldr rnc, [CTX, #(((round) * 16) + 2 * 4)]; \ + eor rb, rb, rnb; \ + ldr rnd, [CTX, #(((round) * 16) + 3 * 4)]; \ + eor rc, rc, rnc; \ + preload_first_key((round) - 1, rna); \ + eor rd, rd, rnd; + +#define do_decround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ + ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \ + \ + and RT0, RMASK, ra, lsl#2; \ + ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \ + and RT1, RMASK, ra, lsr#(8 - 2); \ + ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \ + and RT2, RMASK, ra, lsr#(16 - 2); \ + ldr RT0, [RTAB, xRT0]; \ + and ra, RMASK, ra, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, xRT1]; \ + eor rna, rna, RT0; \ + ldr RT2, [RTAB, xRT2]; \ + and RT0, RMASK, rb, lsl#2; \ + ldr ra, [RTAB, x##ra]; \ + \ + eor rnb, rnb, RT1, ror #24; \ + and RT1, RMASK, rb, lsr#(8 - 2); \ + eor rnc, rnc, RT2, ror #16; \ + and RT2, RMASK, rb, lsr#(16 - 2); \ + eor rnd, rnd, ra, ror #8; \ + ldr RT0, [RTAB, xRT0]; \ + and rb, RMASK, rb, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, xRT1]; \ + eor rnb, rnb, RT0; \ + ldr RT2, [RTAB, xRT2]; \ + and RT0, RMASK, rc, lsl#2; \ + ldr rb, [RTAB, x##rb]; \ + \ + eor rnc, rnc, RT1, ror #24; \ + and RT1, RMASK, rc, lsr#(8 - 2); \ + eor rnd, rnd, RT2, ror #16; \ + and RT2, RMASK, rc, lsr#(16 - 2); \ + eor rna, rna, rb, ror #8; \ + ldr RT0, [RTAB, xRT0]; \ + and rc, RMASK, rc, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, xRT1]; \ + eor rnc, rnc, RT0; \ + ldr RT2, [RTAB, xRT2]; \ + and RT0, RMASK, rd, lsl#2; \ + ldr rc, [RTAB, x##rc]; \ + \ + eor rnd, rnd, RT1, ror #24; \ + and RT1, RMASK, rd, lsr#(8 - 2); \ + eor rna, rna, RT2, ror #16; \ + and RT2, RMASK, rd, lsr#(16 - 2); \ + eor rnb, rnb, rc, ror #8; \ + ldr RT0, [RTAB, xRT0]; \ + and rd, RMASK, rd, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, xRT1]; \ + eor rnd, rnd, RT0; \ + ldr RT2, [RTAB, xRT2]; \ + eor rna, rna, RT1, ror #24; \ + ldr rd, [RTAB, x##rd]; \ + \ + eor rnb, rnb, RT2, ror #16; \ + preload_key((next_r) - 1, ra); \ + eor rnc, rnc, rd, ror #8; + +#define do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + and RT0, RMASK, ra; \ + and RT1, RMASK, ra, lsr#8; \ + and RT2, RMASK, ra, lsr#16; \ + ldrb rna, [RTAB, xRT0]; \ + lsr ra, ra, #24; \ + ldrb rnb, [RTAB, xRT1]; \ + and RT0, RMASK, rb; \ + ldrb rnc, [RTAB, xRT2]; \ + ror rnb, rnb, #24; \ + ldrb rnd, [RTAB, x##ra]; \ + and RT1, RMASK, rb, lsr#8; \ + ror rnc, rnc, #16; \ + and RT2, RMASK, rb, lsr#16; \ + ror rnd, rnd, #8; \ + ldrb RT0, [RTAB, xRT0]; \ + lsr rb, rb, #24; \ + ldrb RT1, [RTAB, xRT1]; \ + \ + orr rnb, rnb, RT0; \ + ldrb RT2, [RTAB, xRT2]; \ + and RT0, RMASK, rc; \ + ldrb rb, [RTAB, x##rb]; \ + orr rnc, rnc, RT1, ror #24; \ + and RT1, RMASK, rc, lsr#8; \ + orr rnd, rnd, RT2, ror #16; \ + and RT2, RMASK, rc, lsr#16; \ + orr rna, rna, rb, ror #8; \ + ldrb RT0, [RTAB, xRT0]; \ + lsr rc, rc, #24; \ + ldrb RT1, [RTAB, xRT1]; \ + \ + orr rnc, rnc, RT0; \ + ldrb RT2, [RTAB, xRT2]; \ + and RT0, RMASK, rd; \ + ldrb rc, [RTAB, x##rc]; \ + orr rnd, rnd, RT1, ror #24; \ + and RT1, RMASK, rd, lsr#8; \ + orr rna, rna, RT2, ror #16; \ + ldrb RT0, [RTAB, xRT0]; \ + and RT2, RMASK, rd, lsr#16; \ + ldrb RT1, [RTAB, xRT1]; \ + orr rnb, rnb, rc, ror #8; \ + ldrb RT2, [RTAB, xRT2]; \ + lsr rd, rd, #24; \ + ldrb rd, [RTAB, x##rd]; \ + \ + orr rnd, rnd, RT0; \ + orr rna, rna, RT1, ror #24; \ + orr rnb, rnb, RT2, ror #16; \ + orr rnc, rnc, rd, ror #8; + +#define firstdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + addroundkey_dec(((round) + 1), ra, rb, rc, rd, rna, rnb, rnc, rnd); \ + do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); + +#define decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ + do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key); + +#define set_last_round_rmask(_, __) \ + mov RMASK, #0xff; + +#define lastdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + add RTAB, RTAB, #(4 * 256); \ + do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \ + addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); + +.globl _gcry_aes_arm_decrypt_block +.type _gcry_aes_arm_decrypt_block,%function; + +_gcry_aes_arm_decrypt_block: + /* input: + * %x0: keysched, CTX + * %x1: dst + * %x2: src + * %w3: number of rounds.. 10, 12 or 14 + * %x4: decryption table + */ + + /* read input block */ + + /* aligned load */ + ldp RA, RB, [RSRC]; + ldp RC, RD, [RSRC, #8]; +#ifndef __AARCH64EL__ + rev RA, RA; + rev RB, RB; + rev RC, RC; + rev RD, RD; +#endif + + mov RMASK, #(0xff << 2); + + cmp NROUNDS, #12; + bge .Ldec_256; + + firstdecround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND); +.Ldec_tail: + decround(8, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(7, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + decround(6, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(5, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + decround(4, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(3, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + decround(2, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(1, RA, RB, RC, RD, RNA, RNB, RNC, RND, set_last_round_rmask); + lastdecround(0, RNA, RNB, RNC, RND, RA, RB, RC, RD); + + /* store output block */ + + /* aligned store */ +#ifndef __AARCH64EL__ + rev RA, RA; + rev RB, RB; + rev RC, RC; + rev RD, RD; +#endif + /* write output block */ + stp RA, RB, [RDST]; + stp RC, RD, [RDST, #8]; + + mov x0, #(0); + ret; + +.ltorg +.Ldec_256: + beq .Ldec_192; + + firstdecround(13, RA, RB, RC, RD, RNA, RNB, RNC, RND); + decround(12, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + + b .Ldec_tail; + +.ltorg +.Ldec_192: + firstdecround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND); + decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + + b .Ldec_tail; +.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block; + +#endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ +#endif /*__AARCH64EL__ */ diff --git a/libotr/libgcrypt-1.8.7/cipher/rijndael-aesni.c b/libotr/libgcrypt-1.8.7/cipher/rijndael-aesni.c new file mode 100644 index 0000000..735e5cd --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rijndael-aesni.c @@ -0,0 +1,1816 @@ +/* AES-NI accelerated AES for Libgcrypt + * Copyright (C) 2000, 2001, 2002, 2003, 2007, + * 2008, 2011, 2012 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> /* for memcmp() */ + +#include "types.h" /* for byte and u32 typedefs */ +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "cipher-selftest.h" +#include "rijndael-internal.h" +#include "./cipher-internal.h" + + +#ifdef USE_AESNI + + +#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */ +/* Prevent compiler from issuing SSE instructions between asm blocks. */ +# pragma GCC target("no-sse") +#endif + + +typedef struct u128_s +{ + u32 a, b, c, d; +} __attribute__((packed, aligned(1), may_alias)) u128_t; + + +/* Two macros to be called prior and after the use of AESNI + instructions. There should be no external function calls between + the use of these macros. There purpose is to make sure that the + SSE regsiters are cleared and won't reveal any information about + the key or the data. */ +#ifdef __WIN64__ +/* XMM6-XMM15 are callee-saved registers on WIN64. */ +# define aesni_prepare_2_6_variable char win64tmp[16] +# define aesni_prepare() do { } while (0) +# define aesni_prepare_2_6() \ + do { asm volatile ("movdqu %%xmm6, %0\n\t" \ + : "=m" (*win64tmp) \ + : \ + : "memory"); \ + } while (0) +# define aesni_cleanup() \ + do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ + "pxor %%xmm1, %%xmm1\n" :: ); \ + } while (0) +# define aesni_cleanup_2_6() \ + do { asm volatile ("movdqu %0, %%xmm6\n\t" \ + "pxor %%xmm2, %%xmm2\n" \ + "pxor %%xmm3, %%xmm3\n" \ + "pxor %%xmm4, %%xmm4\n" \ + "pxor %%xmm5, %%xmm5\n" \ + : \ + : "m" (*win64tmp) \ + : "memory"); \ + } while (0) +#else +# define aesni_prepare_2_6_variable +# define aesni_prepare() do { } while (0) +# define aesni_prepare_2_6() do { } while (0) +# define aesni_cleanup() \ + do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ + "pxor %%xmm1, %%xmm1\n" :: ); \ + } while (0) +# define aesni_cleanup_2_6() \ + do { asm volatile ("pxor %%xmm2, %%xmm2\n\t" \ + "pxor %%xmm3, %%xmm3\n" \ + "pxor %%xmm4, %%xmm4\n" \ + "pxor %%xmm5, %%xmm5\n" \ + "pxor %%xmm6, %%xmm6\n":: ); \ + } while (0) +#endif + +void +_gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key) +{ + aesni_prepare_2_6_variable; + + aesni_prepare(); + aesni_prepare_2_6(); + + if (ctx->rounds < 12) + { + /* 128-bit key */ +#define AESKEYGENASSIST_xmm1_xmm2(imm8) \ + ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd1, " #imm8 " \n\t" +#define AESKEY_EXPAND128 \ + "pshufd $0xff, %%xmm2, %%xmm2\n\t" \ + "movdqa %%xmm1, %%xmm3\n\t" \ + "pslldq $4, %%xmm3\n\t" \ + "pxor %%xmm3, %%xmm1\n\t" \ + "pslldq $4, %%xmm3\n\t" \ + "pxor %%xmm3, %%xmm1\n\t" \ + "pslldq $4, %%xmm3\n\t" \ + "pxor %%xmm3, %%xmm2\n\t" \ + "pxor %%xmm2, %%xmm1\n\t" + + asm volatile ("movdqu (%[key]), %%xmm1\n\t" /* xmm1 := key */ + "movdqa %%xmm1, (%[ksch])\n\t" /* ksch[0] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x01) + AESKEY_EXPAND128 + "movdqa %%xmm1, 0x10(%[ksch])\n\t" /* ksch[1] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x02) + AESKEY_EXPAND128 + "movdqa %%xmm1, 0x20(%[ksch])\n\t" /* ksch[2] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x04) + AESKEY_EXPAND128 + "movdqa %%xmm1, 0x30(%[ksch])\n\t" /* ksch[3] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x08) + AESKEY_EXPAND128 + "movdqa %%xmm1, 0x40(%[ksch])\n\t" /* ksch[4] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x10) + AESKEY_EXPAND128 + "movdqa %%xmm1, 0x50(%[ksch])\n\t" /* ksch[5] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x20) + AESKEY_EXPAND128 + "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x40) + AESKEY_EXPAND128 + "movdqa %%xmm1, 0x70(%[ksch])\n\t" /* ksch[7] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x80) + AESKEY_EXPAND128 + "movdqa %%xmm1, 0x80(%[ksch])\n\t" /* ksch[8] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x1b) + AESKEY_EXPAND128 + "movdqa %%xmm1, 0x90(%[ksch])\n\t" /* ksch[9] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x36) + AESKEY_EXPAND128 + "movdqa %%xmm1, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm1 */ + : + : [key] "r" (key), [ksch] "r" (ctx->keyschenc) + : "cc", "memory" ); +#undef AESKEYGENASSIST_xmm1_xmm2 +#undef AESKEY_EXPAND128 + } + else if (ctx->rounds == 12) + { + /* 192-bit key */ +#define AESKEYGENASSIST_xmm3_xmm2(imm8) \ + ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd3, " #imm8 " \n\t" +#define AESKEY_EXPAND192 \ + "pshufd $0x55, %%xmm2, %%xmm2\n\t" \ + "movdqu %%xmm1, %%xmm4\n\t" \ + "pslldq $4, %%xmm4\n\t" \ + "pxor %%xmm4, %%xmm1\n\t" \ + "pslldq $4, %%xmm4\n\t" \ + "pxor %%xmm4, %%xmm1\n\t" \ + "pslldq $4, %%xmm4\n\t" \ + "pxor %%xmm4, %%xmm1\n\t" \ + "pxor %%xmm2, %%xmm1\n\t" \ + "pshufd $0xff, %%xmm1, %%xmm2\n\t" \ + "movdqu %%xmm3, %%xmm4\n\t" \ + "pslldq $4, %%xmm4\n\t" \ + "pxor %%xmm4, %%xmm3\n\t" \ + "pxor %%xmm2, %%xmm3\n\t" + + asm volatile ("movdqu (%[key]), %%xmm1\n\t" /* xmm1 := key[0..15] */ + "movq 16(%[key]), %%xmm3\n\t" /* xmm3 := key[16..23] */ + "movdqa %%xmm1, (%[ksch])\n\t" /* ksch[0] := xmm1 */ + "movdqa %%xmm3, %%xmm5\n\t" + + AESKEYGENASSIST_xmm3_xmm2(0x01) + AESKEY_EXPAND192 + "shufpd $0, %%xmm1, %%xmm5\n\t" + "movdqa %%xmm5, 0x10(%[ksch])\n\t" /* ksch[1] := xmm5 */ + "movdqa %%xmm1, %%xmm6\n\t" + "shufpd $1, %%xmm3, %%xmm6\n\t" + "movdqa %%xmm6, 0x20(%[ksch])\n\t" /* ksch[2] := xmm6 */ + AESKEYGENASSIST_xmm3_xmm2(0x02) + AESKEY_EXPAND192 + "movdqa %%xmm1, 0x30(%[ksch])\n\t" /* ksch[3] := xmm1 */ + "movdqa %%xmm3, %%xmm5\n\t" + + AESKEYGENASSIST_xmm3_xmm2(0x04) + AESKEY_EXPAND192 + "shufpd $0, %%xmm1, %%xmm5\n\t" + "movdqa %%xmm5, 0x40(%[ksch])\n\t" /* ksch[4] := xmm5 */ + "movdqa %%xmm1, %%xmm6\n\t" + "shufpd $1, %%xmm3, %%xmm6\n\t" + "movdqa %%xmm6, 0x50(%[ksch])\n\t" /* ksch[5] := xmm6 */ + AESKEYGENASSIST_xmm3_xmm2(0x08) + AESKEY_EXPAND192 + "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1 */ + "movdqa %%xmm3, %%xmm5\n\t" + + AESKEYGENASSIST_xmm3_xmm2(0x10) + AESKEY_EXPAND192 + "shufpd $0, %%xmm1, %%xmm5\n\t" + "movdqa %%xmm5, 0x70(%[ksch])\n\t" /* ksch[7] := xmm5 */ + "movdqa %%xmm1, %%xmm6\n\t" + "shufpd $1, %%xmm3, %%xmm6\n\t" + "movdqa %%xmm6, 0x80(%[ksch])\n\t" /* ksch[8] := xmm6 */ + AESKEYGENASSIST_xmm3_xmm2(0x20) + AESKEY_EXPAND192 + "movdqa %%xmm1, 0x90(%[ksch])\n\t" /* ksch[9] := xmm1 */ + "movdqa %%xmm3, %%xmm5\n\t" + + AESKEYGENASSIST_xmm3_xmm2(0x40) + AESKEY_EXPAND192 + "shufpd $0, %%xmm1, %%xmm5\n\t" + "movdqa %%xmm5, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm5 */ + "movdqa %%xmm1, %%xmm6\n\t" + "shufpd $1, %%xmm3, %%xmm6\n\t" + "movdqa %%xmm6, 0xb0(%[ksch])\n\t" /* ksch[11] := xmm6 */ + AESKEYGENASSIST_xmm3_xmm2(0x80) + AESKEY_EXPAND192 + "movdqa %%xmm1, 0xc0(%[ksch])\n\t" /* ksch[12] := xmm1 */ + : + : [key] "r" (key), [ksch] "r" (ctx->keyschenc) + : "cc", "memory" ); +#undef AESKEYGENASSIST_xmm3_xmm2 +#undef AESKEY_EXPAND192 + } + else if (ctx->rounds > 12) + { + /* 256-bit key */ +#define AESKEYGENASSIST_xmm1_xmm2(imm8) \ + ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd1, " #imm8 " \n\t" +#define AESKEYGENASSIST_xmm3_xmm2(imm8) \ + ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd3, " #imm8 " \n\t" +#define AESKEY_EXPAND256_A \ + "pshufd $0xff, %%xmm2, %%xmm2\n\t" \ + "movdqa %%xmm1, %%xmm4\n\t" \ + "pslldq $4, %%xmm4\n\t" \ + "pxor %%xmm4, %%xmm1\n\t" \ + "pslldq $4, %%xmm4\n\t" \ + "pxor %%xmm4, %%xmm1\n\t" \ + "pslldq $4, %%xmm4\n\t" \ + "pxor %%xmm4, %%xmm1\n\t" \ + "pxor %%xmm2, %%xmm1\n\t" +#define AESKEY_EXPAND256_B \ + "pshufd $0xaa, %%xmm2, %%xmm2\n\t" \ + "movdqa %%xmm3, %%xmm4\n\t" \ + "pslldq $4, %%xmm4\n\t" \ + "pxor %%xmm4, %%xmm3\n\t" \ + "pslldq $4, %%xmm4\n\t" \ + "pxor %%xmm4, %%xmm3\n\t" \ + "pslldq $4, %%xmm4\n\t" \ + "pxor %%xmm4, %%xmm3\n\t" \ + "pxor %%xmm2, %%xmm3\n\t" + + asm volatile ("movdqu (%[key]), %%xmm1\n\t" /* xmm1 := key[0..15] */ + "movdqu 16(%[key]), %%xmm3\n\t" /* xmm3 := key[16..31] */ + "movdqa %%xmm1, (%[ksch])\n\t" /* ksch[0] := xmm1 */ + "movdqa %%xmm3, 0x10(%[ksch])\n\t" /* ksch[1] := xmm3 */ + + AESKEYGENASSIST_xmm3_xmm2(0x01) + AESKEY_EXPAND256_A + "movdqa %%xmm1, 0x20(%[ksch])\n\t" /* ksch[2] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x00) + AESKEY_EXPAND256_B + "movdqa %%xmm3, 0x30(%[ksch])\n\t" /* ksch[3] := xmm3 */ + + AESKEYGENASSIST_xmm3_xmm2(0x02) + AESKEY_EXPAND256_A + "movdqa %%xmm1, 0x40(%[ksch])\n\t" /* ksch[4] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x00) + AESKEY_EXPAND256_B + "movdqa %%xmm3, 0x50(%[ksch])\n\t" /* ksch[5] := xmm3 */ + + AESKEYGENASSIST_xmm3_xmm2(0x04) + AESKEY_EXPAND256_A + "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x00) + AESKEY_EXPAND256_B + "movdqa %%xmm3, 0x70(%[ksch])\n\t" /* ksch[7] := xmm3 */ + + AESKEYGENASSIST_xmm3_xmm2(0x08) + AESKEY_EXPAND256_A + "movdqa %%xmm1, 0x80(%[ksch])\n\t" /* ksch[8] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x00) + AESKEY_EXPAND256_B + "movdqa %%xmm3, 0x90(%[ksch])\n\t" /* ksch[9] := xmm3 */ + + AESKEYGENASSIST_xmm3_xmm2(0x10) + AESKEY_EXPAND256_A + "movdqa %%xmm1, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x00) + AESKEY_EXPAND256_B + "movdqa %%xmm3, 0xb0(%[ksch])\n\t" /* ksch[11] := xmm3 */ + + AESKEYGENASSIST_xmm3_xmm2(0x20) + AESKEY_EXPAND256_A + "movdqa %%xmm1, 0xc0(%[ksch])\n\t" /* ksch[12] := xmm1 */ + AESKEYGENASSIST_xmm1_xmm2(0x00) + AESKEY_EXPAND256_B + "movdqa %%xmm3, 0xd0(%[ksch])\n\t" /* ksch[13] := xmm3 */ + + AESKEYGENASSIST_xmm3_xmm2(0x40) + AESKEY_EXPAND256_A + "movdqa %%xmm1, 0xe0(%[ksch])\n\t" /* ksch[14] := xmm1 */ + + : + : [key] "r" (key), [ksch] "r" (ctx->keyschenc) + : "cc", "memory" ); +#undef AESKEYGENASSIST_xmm1_xmm2 +#undef AESKEYGENASSIST_xmm3_xmm2 +#undef AESKEY_EXPAND256_A +#undef AESKEY_EXPAND256_B + } + + aesni_cleanup(); + aesni_cleanup_2_6(); +} + + +/* Make a decryption key from an encryption key. */ +void +_gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx) +{ + /* The AES-NI decrypt instructions use the Equivalent Inverse + Cipher, thus we can't use the the standard decrypt key + preparation. */ + u128_t *ekey = (u128_t *)ctx->keyschenc; + u128_t *dkey = (u128_t *)ctx->keyschdec; + int rr; + int r; + + aesni_prepare(); + +#define DO_AESNI_AESIMC() \ + asm volatile ("movdqa %[ekey], %%xmm1\n\t" \ + /*"aesimc %%xmm1, %%xmm1\n\t"*/ \ + ".byte 0x66, 0x0f, 0x38, 0xdb, 0xc9\n\t" \ + "movdqa %%xmm1, %[dkey]" \ + : [dkey] "=m" (dkey[r]) \ + : [ekey] "m" (ekey[rr]) \ + : "memory") + + dkey[0] = ekey[ctx->rounds]; + r=1; + rr=ctx->rounds-1; + DO_AESNI_AESIMC(); r++; rr--; /* round 1 */ + DO_AESNI_AESIMC(); r++; rr--; /* round 2 */ + DO_AESNI_AESIMC(); r++; rr--; /* round 3 */ + DO_AESNI_AESIMC(); r++; rr--; /* round 4 */ + DO_AESNI_AESIMC(); r++; rr--; /* round 5 */ + DO_AESNI_AESIMC(); r++; rr--; /* round 6 */ + DO_AESNI_AESIMC(); r++; rr--; /* round 7 */ + DO_AESNI_AESIMC(); r++; rr--; /* round 8 */ + DO_AESNI_AESIMC(); r++; rr--; /* round 9 */ + if (ctx->rounds > 10) + { + DO_AESNI_AESIMC(); r++; rr--; /* round 10 */ + DO_AESNI_AESIMC(); r++; rr--; /* round 11 */ + if (ctx->rounds > 12) + { + DO_AESNI_AESIMC(); r++; rr--; /* round 12 */ + DO_AESNI_AESIMC(); r++; rr--; /* round 13 */ + } + } + + dkey[r] = ekey[0]; + +#undef DO_AESNI_AESIMC + + aesni_cleanup(); +} + + +/* Encrypt one block using the Intel AES-NI instructions. Block is input + * and output through SSE register xmm0. */ +static inline void +do_aesni_enc (const RIJNDAEL_context *ctx) +{ +#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" +#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" + asm volatile ("movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */ + "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ + "movdqa 0x10(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x20(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x30(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x40(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x50(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x60(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x70(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x80(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x90(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xa0(%[key]), %%xmm1\n\t" + "cmpl $10, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xb0(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xc0(%[key]), %%xmm1\n\t" + "cmpl $12, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xd0(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xe0(%[key]), %%xmm1\n" + + ".Lenclast%=:\n\t" + aesenclast_xmm1_xmm0 + "\n" + : + : [key] "r" (ctx->keyschenc), + [rounds] "r" (ctx->rounds) + : "cc", "memory"); +#undef aesenc_xmm1_xmm0 +#undef aesenclast_xmm1_xmm0 +} + + +/* Decrypt one block using the Intel AES-NI instructions. Block is input + * and output through SSE register xmm0. */ +static inline void +do_aesni_dec (const RIJNDAEL_context *ctx) +{ +#define aesdec_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc1\n\t" +#define aesdeclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc1\n\t" + asm volatile ("movdqa (%[key]), %%xmm1\n\t" + "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ + "movdqa 0x10(%[key]), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x20(%[key]), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x30(%[key]), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x40(%[key]), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x50(%[key]), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x60(%[key]), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x70(%[key]), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x80(%[key]), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x90(%[key]), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0xa0(%[key]), %%xmm1\n\t" + "cmpl $10, %[rounds]\n\t" + "jz .Ldeclast%=\n\t" + aesdec_xmm1_xmm0 + "movdqa 0xb0(%[key]), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0xc0(%[key]), %%xmm1\n\t" + "cmpl $12, %[rounds]\n\t" + "jz .Ldeclast%=\n\t" + aesdec_xmm1_xmm0 + "movdqa 0xd0(%[key]), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0xe0(%[key]), %%xmm1\n" + + ".Ldeclast%=:\n\t" + aesdeclast_xmm1_xmm0 + "\n" + : + : [key] "r" (ctx->keyschdec), + [rounds] "r" (ctx->rounds) + : "cc", "memory"); +#undef aesdec_xmm1_xmm0 +#undef aesdeclast_xmm1_xmm0 +} + + +/* Encrypt four blocks using the Intel AES-NI instructions. Blocks are input + * and output through SSE registers xmm1 to xmm4. */ +static inline void +do_aesni_enc_vec4 (const RIJNDAEL_context *ctx) +{ +#define aesenc_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc8\n\t" +#define aesenc_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd0\n\t" +#define aesenc_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd8\n\t" +#define aesenc_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe0\n\t" +#define aesenclast_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc8\n\t" +#define aesenclast_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd0\n\t" +#define aesenclast_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd8\n\t" +#define aesenclast_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe0\n\t" + asm volatile ("movdqa (%[key]), %%xmm0\n\t" + "pxor %%xmm0, %%xmm1\n\t" /* xmm1 ^= key[0] */ + "pxor %%xmm0, %%xmm2\n\t" /* xmm2 ^= key[0] */ + "pxor %%xmm0, %%xmm3\n\t" /* xmm3 ^= key[0] */ + "pxor %%xmm0, %%xmm4\n\t" /* xmm4 ^= key[0] */ + "movdqa 0x10(%[key]), %%xmm0\n\t" + aesenc_xmm0_xmm1 + aesenc_xmm0_xmm2 + aesenc_xmm0_xmm3 + aesenc_xmm0_xmm4 + "movdqa 0x20(%[key]), %%xmm0\n\t" + aesenc_xmm0_xmm1 + aesenc_xmm0_xmm2 + aesenc_xmm0_xmm3 + aesenc_xmm0_xmm4 + "movdqa 0x30(%[key]), %%xmm0\n\t" + aesenc_xmm0_xmm1 + aesenc_xmm0_xmm2 + aesenc_xmm0_xmm3 + aesenc_xmm0_xmm4 + "movdqa 0x40(%[key]), %%xmm0\n\t" + aesenc_xmm0_xmm1 + aesenc_xmm0_xmm2 + aesenc_xmm0_xmm3 + aesenc_xmm0_xmm4 + "movdqa 0x50(%[key]), %%xmm0\n\t" + aesenc_xmm0_xmm1 + aesenc_xmm0_xmm2 + aesenc_xmm0_xmm3 + aesenc_xmm0_xmm4 + "movdqa 0x60(%[key]), %%xmm0\n\t" + aesenc_xmm0_xmm1 + aesenc_xmm0_xmm2 + aesenc_xmm0_xmm3 + aesenc_xmm0_xmm4 + "movdqa 0x70(%[key]), %%xmm0\n\t" + aesenc_xmm0_xmm1 + aesenc_xmm0_xmm2 + aesenc_xmm0_xmm3 + aesenc_xmm0_xmm4 + "movdqa 0x80(%[key]), %%xmm0\n\t" + aesenc_xmm0_xmm1 + aesenc_xmm0_xmm2 + aesenc_xmm0_xmm3 + aesenc_xmm0_xmm4 + "movdqa 0x90(%[key]), %%xmm0\n\t" + aesenc_xmm0_xmm1 + aesenc_xmm0_xmm2 + aesenc_xmm0_xmm3 + aesenc_xmm0_xmm4 + "movdqa 0xa0(%[key]), %%xmm0\n\t" + "cmpl $10, %[rounds]\n\t" + "jz .Ldeclast%=\n\t" + aesenc_xmm0_xmm1 + aesenc_xmm0_xmm2 + aesenc_xmm0_xmm3 + aesenc_xmm0_xmm4 + "movdqa 0xb0(%[key]), %%xmm0\n\t" + aesenc_xmm0_xmm1 + aesenc_xmm0_xmm2 + aesenc_xmm0_xmm3 + aesenc_xmm0_xmm4 + "movdqa 0xc0(%[key]), %%xmm0\n\t" + "cmpl $12, %[rounds]\n\t" + "jz .Ldeclast%=\n\t" + aesenc_xmm0_xmm1 + aesenc_xmm0_xmm2 + aesenc_xmm0_xmm3 + aesenc_xmm0_xmm4 + "movdqa 0xd0(%[key]), %%xmm0\n\t" + aesenc_xmm0_xmm1 + aesenc_xmm0_xmm2 + aesenc_xmm0_xmm3 + aesenc_xmm0_xmm4 + "movdqa 0xe0(%[key]), %%xmm0\n" + + ".Ldeclast%=:\n\t" + aesenclast_xmm0_xmm1 + aesenclast_xmm0_xmm2 + aesenclast_xmm0_xmm3 + aesenclast_xmm0_xmm4 + : /* no output */ + : [key] "r" (ctx->keyschenc), + [rounds] "r" (ctx->rounds) + : "cc", "memory"); +#undef aesenc_xmm0_xmm1 +#undef aesenc_xmm0_xmm2 +#undef aesenc_xmm0_xmm3 +#undef aesenc_xmm0_xmm4 +#undef aesenclast_xmm0_xmm1 +#undef aesenclast_xmm0_xmm2 +#undef aesenclast_xmm0_xmm3 +#undef aesenclast_xmm0_xmm4 +} + + +/* Decrypt four blocks using the Intel AES-NI instructions. Blocks are input + * and output through SSE registers xmm1 to xmm4. */ +static inline void +do_aesni_dec_vec4 (const RIJNDAEL_context *ctx) +{ +#define aesdec_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc8\n\t" +#define aesdec_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xde, 0xd0\n\t" +#define aesdec_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xde, 0xd8\n\t" +#define aesdec_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xde, 0xe0\n\t" +#define aesdeclast_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc8\n\t" +#define aesdeclast_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xd0\n\t" +#define aesdeclast_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xd8\n\t" +#define aesdeclast_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xe0\n\t" + asm volatile ("movdqa (%[key]), %%xmm0\n\t" + "pxor %%xmm0, %%xmm1\n\t" /* xmm1 ^= key[0] */ + "pxor %%xmm0, %%xmm2\n\t" /* xmm2 ^= key[0] */ + "pxor %%xmm0, %%xmm3\n\t" /* xmm3 ^= key[0] */ + "pxor %%xmm0, %%xmm4\n\t" /* xmm4 ^= key[0] */ + "movdqa 0x10(%[key]), %%xmm0\n\t" + aesdec_xmm0_xmm1 + aesdec_xmm0_xmm2 + aesdec_xmm0_xmm3 + aesdec_xmm0_xmm4 + "movdqa 0x20(%[key]), %%xmm0\n\t" + aesdec_xmm0_xmm1 + aesdec_xmm0_xmm2 + aesdec_xmm0_xmm3 + aesdec_xmm0_xmm4 + "movdqa 0x30(%[key]), %%xmm0\n\t" + aesdec_xmm0_xmm1 + aesdec_xmm0_xmm2 + aesdec_xmm0_xmm3 + aesdec_xmm0_xmm4 + "movdqa 0x40(%[key]), %%xmm0\n\t" + aesdec_xmm0_xmm1 + aesdec_xmm0_xmm2 + aesdec_xmm0_xmm3 + aesdec_xmm0_xmm4 + "movdqa 0x50(%[key]), %%xmm0\n\t" + aesdec_xmm0_xmm1 + aesdec_xmm0_xmm2 + aesdec_xmm0_xmm3 + aesdec_xmm0_xmm4 + "movdqa 0x60(%[key]), %%xmm0\n\t" + aesdec_xmm0_xmm1 + aesdec_xmm0_xmm2 + aesdec_xmm0_xmm3 + aesdec_xmm0_xmm4 + "movdqa 0x70(%[key]), %%xmm0\n\t" + aesdec_xmm0_xmm1 + aesdec_xmm0_xmm2 + aesdec_xmm0_xmm3 + aesdec_xmm0_xmm4 + "movdqa 0x80(%[key]), %%xmm0\n\t" + aesdec_xmm0_xmm1 + aesdec_xmm0_xmm2 + aesdec_xmm0_xmm3 + aesdec_xmm0_xmm4 + "movdqa 0x90(%[key]), %%xmm0\n\t" + aesdec_xmm0_xmm1 + aesdec_xmm0_xmm2 + aesdec_xmm0_xmm3 + aesdec_xmm0_xmm4 + "movdqa 0xa0(%[key]), %%xmm0\n\t" + "cmpl $10, %[rounds]\n\t" + "jz .Ldeclast%=\n\t" + aesdec_xmm0_xmm1 + aesdec_xmm0_xmm2 + aesdec_xmm0_xmm3 + aesdec_xmm0_xmm4 + "movdqa 0xb0(%[key]), %%xmm0\n\t" + aesdec_xmm0_xmm1 + aesdec_xmm0_xmm2 + aesdec_xmm0_xmm3 + aesdec_xmm0_xmm4 + "movdqa 0xc0(%[key]), %%xmm0\n\t" + "cmpl $12, %[rounds]\n\t" + "jz .Ldeclast%=\n\t" + aesdec_xmm0_xmm1 + aesdec_xmm0_xmm2 + aesdec_xmm0_xmm3 + aesdec_xmm0_xmm4 + "movdqa 0xd0(%[key]), %%xmm0\n\t" + aesdec_xmm0_xmm1 + aesdec_xmm0_xmm2 + aesdec_xmm0_xmm3 + aesdec_xmm0_xmm4 + "movdqa 0xe0(%[key]), %%xmm0\n" + + ".Ldeclast%=:\n\t" + aesdeclast_xmm0_xmm1 + aesdeclast_xmm0_xmm2 + aesdeclast_xmm0_xmm3 + aesdeclast_xmm0_xmm4 + : /* no output */ + : [key] "r" (ctx->keyschdec), + [rounds] "r" (ctx->rounds) + : "cc", "memory"); +#undef aesdec_xmm0_xmm1 +#undef aesdec_xmm0_xmm2 +#undef aesdec_xmm0_xmm3 +#undef aesdec_xmm0_xmm4 +#undef aesdeclast_xmm0_xmm1 +#undef aesdeclast_xmm0_xmm2 +#undef aesdeclast_xmm0_xmm3 +#undef aesdeclast_xmm0_xmm4 +} + + +/* Perform a CTR encryption round using the counter CTR and the input + block A. Write the result to the output block B and update CTR. + CTR needs to be a 16 byte aligned little-endian value. */ +static void +do_aesni_ctr (const RIJNDAEL_context *ctx, + unsigned char *ctr, unsigned char *b, const unsigned char *a) +{ +#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" +#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" + + asm volatile ("movdqa %%xmm5, %%xmm0\n\t" /* xmm0 := CTR (xmm5) */ + "pcmpeqd %%xmm1, %%xmm1\n\t" + "psrldq $8, %%xmm1\n\t" /* xmm1 = -1 */ + + "pshufb %%xmm6, %%xmm5\n\t" + "psubq %%xmm1, %%xmm5\n\t" /* xmm5++ (big endian) */ + + /* detect if 64-bit carry handling is needed */ + "cmpl $0xffffffff, 8(%[ctr])\n\t" + "jne .Lno_carry%=\n\t" + "cmpl $0xffffffff, 12(%[ctr])\n\t" + "jne .Lno_carry%=\n\t" + + "pslldq $8, %%xmm1\n\t" /* move lower 64-bit to high */ + "psubq %%xmm1, %%xmm5\n\t" /* add carry to upper 64bits */ + + ".Lno_carry%=:\n\t" + + "pshufb %%xmm6, %%xmm5\n\t" + "movdqa %%xmm5, (%[ctr])\n\t" /* Update CTR (mem). */ + + "pxor (%[key]), %%xmm0\n\t" /* xmm1 ^= key[0] */ + "movdqa 0x10(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x20(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x30(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x40(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x50(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x60(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x70(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x80(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x90(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xa0(%[key]), %%xmm1\n\t" + "cmpl $10, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xb0(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xc0(%[key]), %%xmm1\n\t" + "cmpl $12, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xd0(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xe0(%[key]), %%xmm1\n" + + ".Lenclast%=:\n\t" + aesenclast_xmm1_xmm0 + "movdqu %[src], %%xmm1\n\t" /* xmm1 := input */ + "pxor %%xmm1, %%xmm0\n\t" /* EncCTR ^= input */ + "movdqu %%xmm0, %[dst]" /* Store EncCTR. */ + + : [dst] "=m" (*b) + : [src] "m" (*a), + [ctr] "r" (ctr), + [key] "r" (ctx->keyschenc), + [rounds] "g" (ctx->rounds) + : "cc", "memory"); +#undef aesenc_xmm1_xmm0 +#undef aesenclast_xmm1_xmm0 +} + + +/* Four blocks at a time variant of do_aesni_ctr. */ +static void +do_aesni_ctr_4 (const RIJNDAEL_context *ctx, + unsigned char *ctr, unsigned char *b, const unsigned char *a) +{ + static const byte bige_addb_const[4][16] __attribute__ ((aligned (16))) = + { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4 } + }; + const void *bige_addb = bige_addb_const; +#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" +#define aesenc_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd1\n\t" +#define aesenc_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd9\n\t" +#define aesenc_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe1\n\t" +#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" +#define aesenclast_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd1\n\t" +#define aesenclast_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd9\n\t" +#define aesenclast_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe1\n\t" + + /* Register usage: + esi keyschedule + xmm0 CTR-0 + xmm1 temp / round key + xmm2 CTR-1 + xmm3 CTR-2 + xmm4 CTR-3 + xmm5 copy of *ctr + xmm6 endian swapping mask + */ + + asm volatile (/* detect if 8-bit carry handling is needed */ + "cmpb $0xfb, 15(%[ctr])\n\t" + "ja .Ladd32bit%=\n\t" + + "movdqa %%xmm5, %%xmm0\n\t" /* xmm0 := CTR (xmm5) */ + "movdqa 0*16(%[addb]), %%xmm2\n\t" /* xmm2 := be(1) */ + "movdqa 1*16(%[addb]), %%xmm3\n\t" /* xmm3 := be(2) */ + "movdqa 2*16(%[addb]), %%xmm4\n\t" /* xmm4 := be(3) */ + "movdqa 3*16(%[addb]), %%xmm5\n\t" /* xmm5 := be(4) */ + "paddb %%xmm0, %%xmm2\n\t" /* xmm2 := be(1) + CTR (xmm0) */ + "paddb %%xmm0, %%xmm3\n\t" /* xmm3 := be(2) + CTR (xmm0) */ + "paddb %%xmm0, %%xmm4\n\t" /* xmm4 := be(3) + CTR (xmm0) */ + "paddb %%xmm0, %%xmm5\n\t" /* xmm5 := be(4) + CTR (xmm0) */ + "movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */ + "jmp .Lstore_ctr%=\n\t" + + ".Ladd32bit%=:\n\t" + "movdqa %%xmm5, %%xmm0\n\t" /* xmm0, xmm2 := CTR (xmm5) */ + "movdqa %%xmm0, %%xmm2\n\t" + "pcmpeqd %%xmm1, %%xmm1\n\t" + "psrldq $8, %%xmm1\n\t" /* xmm1 = -1 */ + + "pshufb %%xmm6, %%xmm2\n\t" /* xmm2 := le(xmm2) */ + "psubq %%xmm1, %%xmm2\n\t" /* xmm2++ */ + "movdqa %%xmm2, %%xmm3\n\t" /* xmm3 := xmm2 */ + "psubq %%xmm1, %%xmm3\n\t" /* xmm3++ */ + "movdqa %%xmm3, %%xmm4\n\t" /* xmm4 := xmm3 */ + "psubq %%xmm1, %%xmm4\n\t" /* xmm4++ */ + "movdqa %%xmm4, %%xmm5\n\t" /* xmm5 := xmm4 */ + "psubq %%xmm1, %%xmm5\n\t" /* xmm5++ */ + + /* detect if 64-bit carry handling is needed */ + "cmpl $0xffffffff, 8(%[ctr])\n\t" + "jne .Lno_carry%=\n\t" + "movl 12(%[ctr]), %%esi\n\t" + "bswapl %%esi\n\t" + "cmpl $0xfffffffc, %%esi\n\t" + "jb .Lno_carry%=\n\t" /* no carry */ + + "pslldq $8, %%xmm1\n\t" /* move lower 64-bit to high */ + "je .Lcarry_xmm5%=\n\t" /* esi == 0xfffffffc */ + "cmpl $0xfffffffe, %%esi\n\t" + "jb .Lcarry_xmm4%=\n\t" /* esi == 0xfffffffd */ + "je .Lcarry_xmm3%=\n\t" /* esi == 0xfffffffe */ + /* esi == 0xffffffff */ + + "psubq %%xmm1, %%xmm2\n\t" + ".Lcarry_xmm3%=:\n\t" + "psubq %%xmm1, %%xmm3\n\t" + ".Lcarry_xmm4%=:\n\t" + "psubq %%xmm1, %%xmm4\n\t" + ".Lcarry_xmm5%=:\n\t" + "psubq %%xmm1, %%xmm5\n\t" + + ".Lno_carry%=:\n\t" + "movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */ + + "pshufb %%xmm6, %%xmm2\n\t" /* xmm2 := be(xmm2) */ + "pshufb %%xmm6, %%xmm3\n\t" /* xmm3 := be(xmm3) */ + "pshufb %%xmm6, %%xmm4\n\t" /* xmm4 := be(xmm4) */ + "pshufb %%xmm6, %%xmm5\n\t" /* xmm5 := be(xmm5) */ + + ".Lstore_ctr%=:\n\t" + "movdqa %%xmm5, (%[ctr])\n\t" /* Update CTR (mem). */ + : + : [ctr] "r" (ctr), + [key] "r" (ctx->keyschenc), + [addb] "r" (bige_addb) + : "%esi", "cc", "memory"); + + asm volatile ("pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ + "pxor %%xmm1, %%xmm2\n\t" /* xmm2 ^= key[0] */ + "pxor %%xmm1, %%xmm3\n\t" /* xmm3 ^= key[0] */ + "pxor %%xmm1, %%xmm4\n\t" /* xmm4 ^= key[0] */ + "movdqa 0x10(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x20(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x30(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x40(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x50(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x60(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x70(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x80(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x90(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0xa0(%[key]), %%xmm1\n\t" + "cmpl $10, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0xb0(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0xc0(%[key]), %%xmm1\n\t" + "cmpl $12, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0xd0(%[key]), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0xe0(%[key]), %%xmm1\n" + + ".Lenclast%=:\n\t" + aesenclast_xmm1_xmm0 + aesenclast_xmm1_xmm2 + aesenclast_xmm1_xmm3 + aesenclast_xmm1_xmm4 + : + : [key] "r" (ctx->keyschenc), + [rounds] "r" (ctx->rounds) + : "cc", "memory"); + + asm volatile ("movdqu (%[src]), %%xmm1\n\t" /* Get block 1. */ + "pxor %%xmm1, %%xmm0\n\t" /* EncCTR-1 ^= input */ + "movdqu %%xmm0, (%[dst])\n\t" /* Store block 1 */ + + "movdqu 16(%[src]), %%xmm1\n\t" /* Get block 2. */ + "pxor %%xmm1, %%xmm2\n\t" /* EncCTR-2 ^= input */ + "movdqu %%xmm2, 16(%[dst])\n\t" /* Store block 2. */ + + "movdqu 32(%[src]), %%xmm1\n\t" /* Get block 3. */ + "pxor %%xmm1, %%xmm3\n\t" /* EncCTR-3 ^= input */ + "movdqu %%xmm3, 32(%[dst])\n\t" /* Store block 3. */ + + "movdqu 48(%[src]), %%xmm1\n\t" /* Get block 4. */ + "pxor %%xmm1, %%xmm4\n\t" /* EncCTR-4 ^= input */ + "movdqu %%xmm4, 48(%[dst])" /* Store block 4. */ + : + : [src] "r" (a), + [dst] "r" (b) + : "memory"); +#undef aesenc_xmm1_xmm0 +#undef aesenc_xmm1_xmm2 +#undef aesenc_xmm1_xmm3 +#undef aesenc_xmm1_xmm4 +#undef aesenclast_xmm1_xmm0 +#undef aesenclast_xmm1_xmm2 +#undef aesenclast_xmm1_xmm3 +#undef aesenclast_xmm1_xmm4 +} + + +unsigned int +_gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, + const unsigned char *src) +{ + aesni_prepare (); + asm volatile ("movdqu %[src], %%xmm0\n\t" + : + : [src] "m" (*src) + : "memory" ); + do_aesni_enc (ctx); + asm volatile ("movdqu %%xmm0, %[dst]\n\t" + : [dst] "=m" (*dst) + : + : "memory" ); + aesni_cleanup (); + return 0; +} + + +void +_gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks) +{ + aesni_prepare (); + + asm volatile ("movdqu %[iv], %%xmm0\n\t" + : /* No output */ + : [iv] "m" (*iv) + : "memory" ); + + for ( ;nblocks; nblocks-- ) + { + do_aesni_enc (ctx); + + asm volatile ("movdqu %[inbuf], %%xmm1\n\t" + "pxor %%xmm1, %%xmm0\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : [inbuf] "m" (*inbuf) + : "memory" ); + + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + + asm volatile ("movdqu %%xmm0, %[iv]\n\t" + : [iv] "=m" (*iv) + : + : "memory" ); + + aesni_cleanup (); +} + + +void +_gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks, int cbc_mac) +{ + aesni_prepare_2_6_variable; + + aesni_prepare (); + aesni_prepare_2_6(); + + asm volatile ("movdqu %[iv], %%xmm5\n\t" + : /* No output */ + : [iv] "m" (*iv) + : "memory" ); + + for ( ;nblocks; nblocks-- ) + { + asm volatile ("movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm5, %%xmm0\n\t" + : /* No output */ + : [inbuf] "m" (*inbuf) + : "memory" ); + + do_aesni_enc (ctx); + + asm volatile ("movdqa %%xmm0, %%xmm5\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + inbuf += BLOCKSIZE; + if (!cbc_mac) + outbuf += BLOCKSIZE; + } + + asm volatile ("movdqu %%xmm5, %[iv]\n\t" + : [iv] "=m" (*iv) + : + : "memory" ); + + aesni_cleanup (); + aesni_cleanup_2_6 (); +} + + +void +_gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *ctr, + size_t nblocks) +{ + static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + aesni_prepare_2_6_variable; + + aesni_prepare (); + aesni_prepare_2_6(); + + asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */ + "movdqa %[ctr], %%xmm5\n\t" /* Preload CTR */ + : /* No output */ + : [mask] "m" (*be_mask), + [ctr] "m" (*ctr) + : "memory"); + + for ( ;nblocks > 3 ; nblocks -= 4 ) + { + do_aesni_ctr_4 (ctx, ctr, outbuf, inbuf); + outbuf += 4*BLOCKSIZE; + inbuf += 4*BLOCKSIZE; + } + for ( ;nblocks; nblocks-- ) + { + do_aesni_ctr (ctx, ctr, outbuf, inbuf); + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + aesni_cleanup (); + aesni_cleanup_2_6 (); +} + + +unsigned int +_gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, + const unsigned char *src) +{ + aesni_prepare (); + asm volatile ("movdqu %[src], %%xmm0\n\t" + : + : [src] "m" (*src) + : "memory" ); + do_aesni_dec (ctx); + asm volatile ("movdqu %%xmm0, %[dst]\n\t" + : [dst] "=m" (*dst) + : + : "memory" ); + aesni_cleanup (); + return 0; +} + + +void +_gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks) +{ + aesni_prepare_2_6_variable; + + aesni_prepare (); + aesni_prepare_2_6(); + + asm volatile ("movdqu %[iv], %%xmm6\n\t" + : /* No output */ + : [iv] "m" (*iv) + : "memory" ); + + /* CFB decryption can be parallelized */ + for ( ;nblocks >= 4; nblocks -= 4) + { + asm volatile + ("movdqu %%xmm6, %%xmm1\n\t" /* load input blocks */ + "movdqu 0*16(%[inbuf]), %%xmm2\n\t" + "movdqu 1*16(%[inbuf]), %%xmm3\n\t" + "movdqu 2*16(%[inbuf]), %%xmm4\n\t" + + "movdqu 3*16(%[inbuf]), %%xmm6\n\t" /* update IV */ + : /* No output */ + : [inbuf] "r" (inbuf) + : "memory"); + + do_aesni_enc_vec4 (ctx); + + asm volatile + ("movdqu 0*16(%[inbuf]), %%xmm5\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu %%xmm1, 0*16(%[outbuf])\n\t" + + "movdqu 1*16(%[inbuf]), %%xmm5\n\t" + "pxor %%xmm5, %%xmm2\n\t" + "movdqu %%xmm2, 1*16(%[outbuf])\n\t" + + "movdqu 2*16(%[inbuf]), %%xmm5\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "movdqu %%xmm3, 2*16(%[outbuf])\n\t" + + "movdqu 3*16(%[inbuf]), %%xmm5\n\t" + "pxor %%xmm5, %%xmm4\n\t" + "movdqu %%xmm4, 3*16(%[outbuf])\n\t" + + : /* No output */ + : [inbuf] "r" (inbuf), + [outbuf] "r" (outbuf) + : "memory"); + + outbuf += 4*BLOCKSIZE; + inbuf += 4*BLOCKSIZE; + } + + asm volatile ("movdqu %%xmm6, %%xmm0\n\t" ::: "cc"); + + for ( ;nblocks; nblocks-- ) + { + do_aesni_enc (ctx); + + asm volatile ("movdqa %%xmm0, %%xmm6\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm0, %%xmm6\n\t" + "movdqu %%xmm6, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : [inbuf] "m" (*inbuf) + : "memory" ); + + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + + asm volatile ("movdqu %%xmm0, %[iv]\n\t" + : [iv] "=m" (*iv) + : + : "memory" ); + + aesni_cleanup (); + aesni_cleanup_2_6 (); +} + + +void +_gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks) +{ + aesni_prepare_2_6_variable; + + aesni_prepare (); + aesni_prepare_2_6(); + + asm volatile + ("movdqu %[iv], %%xmm5\n\t" /* use xmm5 as fast IV storage */ + : /* No output */ + : [iv] "m" (*iv) + : "memory"); + + for ( ;nblocks > 3 ; nblocks -= 4 ) + { + asm volatile + ("movdqu 0*16(%[inbuf]), %%xmm1\n\t" /* load input blocks */ + "movdqu 1*16(%[inbuf]), %%xmm2\n\t" + "movdqu 2*16(%[inbuf]), %%xmm3\n\t" + "movdqu 3*16(%[inbuf]), %%xmm4\n\t" + : /* No output */ + : [inbuf] "r" (inbuf) + : "memory"); + + do_aesni_dec_vec4 (ctx); + + asm volatile + ("pxor %%xmm5, %%xmm1\n\t" /* xor IV with output */ + "movdqu 0*16(%[inbuf]), %%xmm5\n\t" /* load new IV */ + "movdqu %%xmm1, 0*16(%[outbuf])\n\t" + + "pxor %%xmm5, %%xmm2\n\t" /* xor IV with output */ + "movdqu 1*16(%[inbuf]), %%xmm5\n\t" /* load new IV */ + "movdqu %%xmm2, 1*16(%[outbuf])\n\t" + + "pxor %%xmm5, %%xmm3\n\t" /* xor IV with output */ + "movdqu 2*16(%[inbuf]), %%xmm5\n\t" /* load new IV */ + "movdqu %%xmm3, 2*16(%[outbuf])\n\t" + + "pxor %%xmm5, %%xmm4\n\t" /* xor IV with output */ + "movdqu 3*16(%[inbuf]), %%xmm5\n\t" /* load new IV */ + "movdqu %%xmm4, 3*16(%[outbuf])\n\t" + + : /* No output */ + : [inbuf] "r" (inbuf), + [outbuf] "r" (outbuf) + : "memory"); + + outbuf += 4*BLOCKSIZE; + inbuf += 4*BLOCKSIZE; + } + + for ( ;nblocks; nblocks-- ) + { + asm volatile + ("movdqu %[inbuf], %%xmm0\n\t" + "movdqa %%xmm0, %%xmm2\n\t" /* use xmm2 as savebuf */ + : /* No output */ + : [inbuf] "m" (*inbuf) + : "memory"); + + /* uses only xmm0 and xmm1 */ + do_aesni_dec (ctx); + + asm volatile + ("pxor %%xmm5, %%xmm0\n\t" /* xor IV with output */ + "movdqu %%xmm0, %[outbuf]\n\t" + "movdqu %%xmm2, %%xmm5\n\t" /* store savebuf as new IV */ + : [outbuf] "=m" (*outbuf) + : + : "memory"); + + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + + asm volatile + ("movdqu %%xmm5, %[iv]\n\t" /* store IV */ + : /* No output */ + : [iv] "m" (*iv) + : "memory"); + + aesni_cleanup (); + aesni_cleanup_2_6 (); +} + + +static void +aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + u64 n = c->u_mode.ocb.data_nblocks; + const unsigned char *l; + aesni_prepare_2_6_variable; + + aesni_prepare (); + aesni_prepare_2_6 (); + + /* Preload Offset and Checksum */ + asm volatile ("movdqu %[iv], %%xmm5\n\t" + "movdqu %[ctr], %%xmm6\n\t" + : /* No output */ + : [iv] "m" (*c->u_iv.iv), + [ctr] "m" (*c->u_ctr.ctr) + : "memory" ); + + + for ( ;nblocks && n % 4; nblocks-- ) + { + l = ocb_get_l(c, ++n); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + asm volatile ("movdqu %[l], %%xmm1\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm1, %%xmm5\n\t" + "pxor %%xmm0, %%xmm6\n\t" + "pxor %%xmm5, %%xmm0\n\t" + : + : [l] "m" (*l), + [inbuf] "m" (*inbuf) + : "memory" ); + + do_aesni_enc (ctx); + + asm volatile ("pxor %%xmm5, %%xmm0\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + + for ( ;nblocks > 3 ; nblocks -= 4 ) + { + n += 4; + l = ocb_get_l(c, n); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + asm volatile ("movdqu %[l0], %%xmm0\n\t" + "movdqu %[inbuf0], %%xmm1\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm1, %%xmm6\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu %%xmm5, %[outbuf0]\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)) + : [l0] "m" (*c->u_mode.ocb.L[0]), + [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l1], %%xmm0\n\t" + "movdqu %[inbuf1], %%xmm2\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm2, %%xmm6\n\t" + "pxor %%xmm5, %%xmm2\n\t" + "movdqu %%xmm5, %[outbuf1]\n\t" + : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) + : [l1] "m" (*c->u_mode.ocb.L[1]), + [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l2], %%xmm0\n\t" + "movdqu %[inbuf2], %%xmm3\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm3, %%xmm6\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "movdqu %%xmm5, %[outbuf2]\n\t" + : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)) + : [l2] "m" (*c->u_mode.ocb.L[0]), + [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l3], %%xmm0\n\t" + "movdqu %[inbuf3], %%xmm4\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm4\n\t" + : + : [l3] "m" (*l), + [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) + : "memory" ); + + do_aesni_enc_vec4 (ctx); + + asm volatile ("movdqu %[outbuf0],%%xmm0\n\t" + "pxor %%xmm0, %%xmm1\n\t" + "movdqu %%xmm1, %[outbuf0]\n\t" + "movdqu %[outbuf1],%%xmm0\n\t" + "pxor %%xmm0, %%xmm2\n\t" + "movdqu %%xmm2, %[outbuf1]\n\t" + "movdqu %[outbuf2],%%xmm0\n\t" + "pxor %%xmm0, %%xmm3\n\t" + "movdqu %%xmm3, %[outbuf2]\n\t" + "pxor %%xmm5, %%xmm4\n\t" + "movdqu %%xmm4, %[outbuf3]\n\t" + : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)), + [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)), + [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)), + [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)) + : + : "memory" ); + + outbuf += 4*BLOCKSIZE; + inbuf += 4*BLOCKSIZE; + } + + for ( ;nblocks; nblocks-- ) + { + l = ocb_get_l(c, ++n); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + asm volatile ("movdqu %[l], %%xmm1\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm1, %%xmm5\n\t" + "pxor %%xmm0, %%xmm6\n\t" + "pxor %%xmm5, %%xmm0\n\t" + : + : [l] "m" (*l), + [inbuf] "m" (*inbuf) + : "memory" ); + + do_aesni_enc (ctx); + + asm volatile ("pxor %%xmm5, %%xmm0\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + + c->u_mode.ocb.data_nblocks = n; + asm volatile ("movdqu %%xmm5, %[iv]\n\t" + "movdqu %%xmm6, %[ctr]\n\t" + : [iv] "=m" (*c->u_iv.iv), + [ctr] "=m" (*c->u_ctr.ctr) + : + : "memory" ); + + aesni_cleanup (); + aesni_cleanup_2_6 (); +} + + +static void +aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + u64 n = c->u_mode.ocb.data_nblocks; + const unsigned char *l; + aesni_prepare_2_6_variable; + + aesni_prepare (); + aesni_prepare_2_6 (); + + /* Preload Offset and Checksum */ + asm volatile ("movdqu %[iv], %%xmm5\n\t" + "movdqu %[ctr], %%xmm6\n\t" + : /* No output */ + : [iv] "m" (*c->u_iv.iv), + [ctr] "m" (*c->u_ctr.ctr) + : "memory" ); + + for ( ;nblocks && n % 4; nblocks-- ) + { + l = ocb_get_l(c, ++n); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + asm volatile ("movdqu %[l], %%xmm1\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm1, %%xmm5\n\t" + "pxor %%xmm5, %%xmm0\n\t" + : + : [l] "m" (*l), + [inbuf] "m" (*inbuf) + : "memory" ); + + do_aesni_dec (ctx); + + asm volatile ("pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm6\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + + for ( ;nblocks > 3 ; nblocks -= 4 ) + { + n += 4; + l = ocb_get_l(c, n); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + asm volatile ("movdqu %[l0], %%xmm0\n\t" + "movdqu %[inbuf0], %%xmm1\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu %%xmm5, %[outbuf0]\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)) + : [l0] "m" (*c->u_mode.ocb.L[0]), + [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l1], %%xmm0\n\t" + "movdqu %[inbuf1], %%xmm2\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm2\n\t" + "movdqu %%xmm5, %[outbuf1]\n\t" + : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) + : [l1] "m" (*c->u_mode.ocb.L[1]), + [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l2], %%xmm0\n\t" + "movdqu %[inbuf2], %%xmm3\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "movdqu %%xmm5, %[outbuf2]\n\t" + : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)) + : [l2] "m" (*c->u_mode.ocb.L[0]), + [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l3], %%xmm0\n\t" + "movdqu %[inbuf3], %%xmm4\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm4\n\t" + : + : [l3] "m" (*l), + [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) + : "memory" ); + + do_aesni_dec_vec4 (ctx); + + asm volatile ("movdqu %[outbuf0],%%xmm0\n\t" + "pxor %%xmm0, %%xmm1\n\t" + "movdqu %%xmm1, %[outbuf0]\n\t" + "movdqu %[outbuf1],%%xmm0\n\t" + "pxor %%xmm0, %%xmm2\n\t" + "movdqu %%xmm2, %[outbuf1]\n\t" + "movdqu %[outbuf2],%%xmm0\n\t" + "pxor %%xmm0, %%xmm3\n\t" + "movdqu %%xmm3, %[outbuf2]\n\t" + "pxor %%xmm5, %%xmm4\n\t" + "movdqu %%xmm4, %[outbuf3]\n\t" + "pxor %%xmm1, %%xmm6\n\t" + "pxor %%xmm2, %%xmm6\n\t" + "pxor %%xmm3, %%xmm6\n\t" + "pxor %%xmm4, %%xmm6\n\t" + : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)), + [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)), + [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)), + [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)) + : + : "memory" ); + + outbuf += 4*BLOCKSIZE; + inbuf += 4*BLOCKSIZE; + } + + for ( ;nblocks; nblocks-- ) + { + l = ocb_get_l(c, ++n); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + asm volatile ("movdqu %[l], %%xmm1\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm1, %%xmm5\n\t" + "pxor %%xmm5, %%xmm0\n\t" + : + : [l] "m" (*l), + [inbuf] "m" (*inbuf) + : "memory" ); + + do_aesni_dec (ctx); + + asm volatile ("pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm6\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + + c->u_mode.ocb.data_nblocks = n; + asm volatile ("movdqu %%xmm5, %[iv]\n\t" + "movdqu %%xmm6, %[ctr]\n\t" + : [iv] "=m" (*c->u_iv.iv), + [ctr] "=m" (*c->u_ctr.ctr) + : + : "memory" ); + + aesni_cleanup (); + aesni_cleanup_2_6 (); +} + + +void +_gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ + if (encrypt) + aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks); + else + aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks); +} + + +void +_gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + const unsigned char *abuf = abuf_arg; + u64 n = c->u_mode.ocb.aad_nblocks; + const unsigned char *l; + aesni_prepare_2_6_variable; + + aesni_prepare (); + aesni_prepare_2_6 (); + + /* Preload Offset and Sum */ + asm volatile ("movdqu %[iv], %%xmm5\n\t" + "movdqu %[ctr], %%xmm6\n\t" + : /* No output */ + : [iv] "m" (*c->u_mode.ocb.aad_offset), + [ctr] "m" (*c->u_mode.ocb.aad_sum) + : "memory" ); + + for ( ;nblocks && n % 4; nblocks-- ) + { + l = ocb_get_l(c, ++n); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + asm volatile ("movdqu %[l], %%xmm1\n\t" + "movdqu %[abuf], %%xmm0\n\t" + "pxor %%xmm1, %%xmm5\n\t" + "pxor %%xmm5, %%xmm0\n\t" + : + : [l] "m" (*l), + [abuf] "m" (*abuf) + : "memory" ); + + do_aesni_enc (ctx); + + asm volatile ("pxor %%xmm0, %%xmm6\n\t" + : + : + : "memory" ); + + abuf += BLOCKSIZE; + } + + for ( ;nblocks > 3 ; nblocks -= 4 ) + { + n += 4; + l = ocb_get_l(c, n); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + asm volatile ("movdqu %[l0], %%xmm0\n\t" + "movdqu %[abuf0], %%xmm1\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm1\n\t" + : + : [l0] "m" (*c->u_mode.ocb.L[0]), + [abuf0] "m" (*(abuf + 0 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l1], %%xmm0\n\t" + "movdqu %[abuf1], %%xmm2\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm2\n\t" + : + : [l1] "m" (*c->u_mode.ocb.L[1]), + [abuf1] "m" (*(abuf + 1 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l2], %%xmm0\n\t" + "movdqu %[abuf2], %%xmm3\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm3\n\t" + : + : [l2] "m" (*c->u_mode.ocb.L[0]), + [abuf2] "m" (*(abuf + 2 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l3], %%xmm0\n\t" + "movdqu %[abuf3], %%xmm4\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm4\n\t" + : + : [l3] "m" (*l), + [abuf3] "m" (*(abuf + 3 * BLOCKSIZE)) + : "memory" ); + + do_aesni_enc_vec4 (ctx); + + asm volatile ("pxor %%xmm1, %%xmm6\n\t" + "pxor %%xmm2, %%xmm6\n\t" + "pxor %%xmm3, %%xmm6\n\t" + "pxor %%xmm4, %%xmm6\n\t" + : + : + : "memory" ); + + abuf += 4*BLOCKSIZE; + } + + for ( ;nblocks; nblocks-- ) + { + l = ocb_get_l(c, ++n); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + asm volatile ("movdqu %[l], %%xmm1\n\t" + "movdqu %[abuf], %%xmm0\n\t" + "pxor %%xmm1, %%xmm5\n\t" + "pxor %%xmm5, %%xmm0\n\t" + : + : [l] "m" (*l), + [abuf] "m" (*abuf) + : "memory" ); + + do_aesni_enc (ctx); + + asm volatile ("pxor %%xmm0, %%xmm6\n\t" + : + : + : "memory" ); + + abuf += BLOCKSIZE; + } + + c->u_mode.ocb.aad_nblocks = n; + asm volatile ("movdqu %%xmm5, %[iv]\n\t" + "movdqu %%xmm6, %[ctr]\n\t" + : [iv] "=m" (*c->u_mode.ocb.aad_offset), + [ctr] "=m" (*c->u_mode.ocb.aad_sum) + : + : "memory" ); + + aesni_cleanup (); + aesni_cleanup_2_6 (); +} + + +#endif /* USE_AESNI */ diff --git a/libotr/libgcrypt-1.8.7/cipher/rijndael-amd64.S b/libotr/libgcrypt-1.8.7/cipher/rijndael-amd64.S new file mode 100644 index 0000000..b149e94 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rijndael-amd64.S @@ -0,0 +1,453 @@ +/* rinjdael-amd64.S - AMD64 assembly implementation of AES cipher + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_AES) + +#ifdef __PIC__ +# define RIP (%rip) +#else +# define RIP +#endif + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.text + +/* table macros */ +#define E0 (0) +#define Es0 (1) +#define Esize 4 +#define Essize 4 + +#define D0 (0) +#define Ds0 (4 * 256) +#define Dsize 4 +#define Dssize 1 + +/* register macros */ +#define CTX %rdi +#define RTAB %r12 + +#define RA %rax +#define RB %rbx +#define RC %rcx +#define RD %rdx + +#define RAd %eax +#define RBd %ebx +#define RCd %ecx +#define RDd %edx + +#define RAbl %al +#define RBbl %bl +#define RCbl %cl +#define RDbl %dl + +#define RAbh %ah +#define RBbh %bh +#define RCbh %ch +#define RDbh %dh + +#define RNA %r8 +#define RNB %r9 +#define RNC %r10 +#define RND %r11 + +#define RNAd %r8d +#define RNBd %r9d +#define RNCd %r10d +#define RNDd %r11d + +#define RT0 %rbp +#define RT1 %rsi + +#define RT0d %ebp +#define RT1d %esi + +/* helper macros */ +#define do16bit(op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \ + movzbl source ## bl, t0 ## d; \ + movzbl source ## bh, t1 ## d; \ + op ## l table1(RTAB,t0,tablemul), dest1 ## d; \ + op ## l table2(RTAB,t1,tablemul), dest2 ## d; + +#define do16bit_shr(shf, op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \ + movzbl source ## bl, t0 ## d; \ + movzbl source ## bh, t1 ## d; \ + shrl $(shf), source ## d; \ + op ## l table1(RTAB,t0,tablemul), dest1 ## d; \ + op ## l table2(RTAB,t1,tablemul), dest2 ## d; + +#define last_do16bit(op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \ + movzbl source ## bl, t0 ## d; \ + movzbl source ## bh, t1 ## d; \ + movzbl table1(RTAB,t0,tablemul), t0 ## d; \ + movzbl table2(RTAB,t1,tablemul), t1 ## d; \ + op ## l t0 ## d, dest1 ## d; \ + op ## l t1 ## d, dest2 ## d; + +#define last_do16bit_shr(shf, op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \ + movzbl source ## bl, t0 ## d; \ + movzbl source ## bh, t1 ## d; \ + shrl $(shf), source ## d; \ + movzbl table1(RTAB,t0,tablemul), t0 ## d; \ + movzbl table2(RTAB,t1,tablemul), t1 ## d; \ + op ## l t0 ## d, dest1 ## d; \ + op ## l t1 ## d, dest2 ## d; + +/*********************************************************************** + * AMD64 assembly implementation of the AES cipher + ***********************************************************************/ +#define addroundkey(round, ra, rb, rc, rd) \ + xorl (((round) * 16) + 0 * 4)(CTX), ra ## d; \ + xorl (((round) * 16) + 1 * 4)(CTX), rb ## d; \ + xorl (((round) * 16) + 2 * 4)(CTX), rc ## d; \ + xorl (((round) * 16) + 3 * 4)(CTX), rd ## d; + +#define do_encround(next_r) \ + do16bit_shr(16, mov, RA, Esize, E0, RNA, E0, RND, RT0, RT1); \ + do16bit( mov, RA, Esize, E0, RNC, E0, RNB, RT0, RT1); \ + movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \ + roll $8, RNDd; \ + xorl RNAd, RAd; \ + roll $8, RNCd; \ + roll $8, RNBd; \ + roll $8, RAd; \ + \ + do16bit_shr(16, xor, RD, Esize, E0, RND, E0, RNC, RT0, RT1); \ + do16bit( xor, RD, Esize, E0, RNB, E0, RA, RT0, RT1); \ + movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \ + roll $8, RNCd; \ + xorl RNDd, RDd; \ + roll $8, RNBd; \ + roll $8, RAd; \ + roll $8, RDd; \ + \ + do16bit_shr(16, xor, RC, Esize, E0, RNC, E0, RNB, RT0, RT1); \ + do16bit( xor, RC, Esize, E0, RA, E0, RD, RT0, RT1); \ + movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \ + roll $8, RNBd; \ + xorl RNCd, RCd; \ + roll $8, RAd; \ + roll $8, RDd; \ + roll $8, RCd; \ + \ + do16bit_shr(16, xor, RB, Esize, E0, RNB, E0, RA, RT0, RT1); \ + do16bit( xor, RB, Esize, E0, RD, E0, RC, RT0, RT1); \ + movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \ + roll $8, RAd; \ + xorl RNBd, RBd; \ + roll $16, RDd; \ + roll $24, RCd; + +#define do_lastencround(next_r) \ + do16bit_shr(16, movzb, RA, Essize, Es0, RNA, Es0, RND, RT0, RT1); \ + do16bit( movzb, RA, Essize, Es0, RNC, Es0, RNB, RT0, RT1); \ + movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \ + roll $8, RNDd; \ + xorl RNAd, RAd; \ + roll $8, RNCd; \ + roll $8, RNBd; \ + roll $8, RAd; \ + \ + last_do16bit_shr(16, xor, RD, Essize, Es0, RND, Es0, RNC, RT0, RT1); \ + last_do16bit( xor, RD, Essize, Es0, RNB, Es0, RA, RT0, RT1); \ + movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \ + roll $8, RNCd; \ + xorl RNDd, RDd; \ + roll $8, RNBd; \ + roll $8, RAd; \ + roll $8, RDd; \ + \ + last_do16bit_shr(16, xor, RC, Essize, Es0, RNC, Es0, RNB, RT0, RT1); \ + last_do16bit( xor, RC, Essize, Es0, RA, Es0, RD, RT0, RT1); \ + movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \ + roll $8, RNBd; \ + xorl RNCd, RCd; \ + roll $8, RAd; \ + roll $8, RDd; \ + roll $8, RCd; \ + \ + last_do16bit_shr(16, xor, RB, Essize, Es0, RNB, Es0, RA, RT0, RT1); \ + last_do16bit( xor, RB, Essize, Es0, RD, Es0, RC, RT0, RT1); \ + movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \ + roll $8, RAd; \ + xorl RNBd, RBd; \ + roll $16, RDd; \ + roll $24, RCd; + +#define firstencround(round) \ + addroundkey(round, RA, RB, RC, RD); \ + do_encround((round) + 1); + +#define encround(round) \ + do_encround((round) + 1); + +#define lastencround(round) \ + do_lastencround((round) + 1); + +.align 8 +.globl _gcry_aes_amd64_encrypt_block +ELF(.type _gcry_aes_amd64_encrypt_block,@function;) + +_gcry_aes_amd64_encrypt_block: + /* input: + * %rdi: keysched, CTX + * %rsi: dst + * %rdx: src + * %ecx: number of rounds.. 10, 12 or 14 + * %r8: encryption tables + */ + subq $(5 * 8), %rsp; + movq %rsi, (0 * 8)(%rsp); + movl %ecx, (1 * 8)(%rsp); + movq %rbp, (2 * 8)(%rsp); + movq %rbx, (3 * 8)(%rsp); + movq %r12, (4 * 8)(%rsp); + + leaq (%r8), RTAB; + + /* read input block */ + movl 0 * 4(%rdx), RAd; + movl 1 * 4(%rdx), RBd; + movl 2 * 4(%rdx), RCd; + movl 3 * 4(%rdx), RDd; + + firstencround(0); + encround(1); + encround(2); + encround(3); + encround(4); + encround(5); + encround(6); + encround(7); + encround(8); + cmpl $12, (1 * 8)(%rsp); + jnb .Lenc_not_128; + lastencround(9); + +.align 4 +.Lenc_done: + /* write output block */ + movq (0 * 8)(%rsp), %rsi; + movl RAd, 0 * 4(%rsi); + movl RBd, 1 * 4(%rsi); + movl RCd, 2 * 4(%rsi); + movl RDd, 3 * 4(%rsi); + + movq (4 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %rbp; + addq $(5 * 8), %rsp; + + movl $(6 * 8), %eax; + ret; + +.align 4 +.Lenc_not_128: + je .Lenc_192 + + encround(9); + encround(10); + encround(11); + encround(12); + lastencround(13); + + jmp .Lenc_done; + +.align 4 +.Lenc_192: + encround(9); + encround(10); + lastencround(11); + + jmp .Lenc_done; +ELF(.size _gcry_aes_amd64_encrypt_block,.-_gcry_aes_amd64_encrypt_block;) + +#define do_decround(next_r) \ + do16bit_shr(16, mov, RA, Dsize, D0, RNA, D0, RNB, RT0, RT1); \ + do16bit( mov, RA, Dsize, D0, RNC, D0, RND, RT0, RT1); \ + movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \ + roll $8, RNBd; \ + xorl RNAd, RAd; \ + roll $8, RNCd; \ + roll $8, RNDd; \ + roll $8, RAd; \ + \ + do16bit_shr(16, xor, RB, Dsize, D0, RNB, D0, RNC, RT0, RT1); \ + do16bit( xor, RB, Dsize, D0, RND, D0, RA, RT0, RT1); \ + movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \ + roll $8, RNCd; \ + xorl RNBd, RBd; \ + roll $8, RNDd; \ + roll $8, RAd; \ + roll $8, RBd; \ + \ + do16bit_shr(16, xor, RC, Dsize, D0, RNC, D0, RND, RT0, RT1); \ + do16bit( xor, RC, Dsize, D0, RA, D0, RB, RT0, RT1); \ + movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \ + roll $8, RNDd; \ + xorl RNCd, RCd; \ + roll $8, RAd; \ + roll $8, RBd; \ + roll $8, RCd; \ + \ + do16bit_shr(16, xor, RD, Dsize, D0, RND, D0, RA, RT0, RT1); \ + do16bit( xor, RD, Dsize, D0, RB, D0, RC, RT0, RT1); \ + movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \ + roll $8, RAd; \ + xorl RNDd, RDd; \ + roll $16, RBd; \ + roll $24, RCd; + +#define do_lastdecround(next_r) \ + do16bit_shr(16, movzb, RA, Dssize, Ds0, RNA, Ds0, RNB, RT0, RT1); \ + do16bit( movzb, RA, Dssize, Ds0, RNC, Ds0, RND, RT0, RT1); \ + movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \ + roll $8, RNBd; \ + xorl RNAd, RAd; \ + roll $8, RNCd; \ + roll $8, RNDd; \ + roll $8, RAd; \ + \ + last_do16bit_shr(16, xor, RB, Dssize, Ds0, RNB, Ds0, RNC, RT0, RT1); \ + last_do16bit( xor, RB, Dssize, Ds0, RND, Ds0, RA, RT0, RT1); \ + movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \ + roll $8, RNCd; \ + xorl RNBd, RBd; \ + roll $8, RNDd; \ + roll $8, RAd; \ + roll $8, RBd; \ + \ + last_do16bit_shr(16, xor, RC, Dssize, Ds0, RNC, Ds0, RND, RT0, RT1); \ + last_do16bit( xor, RC, Dssize, Ds0, RA, Ds0, RB, RT0, RT1); \ + movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \ + roll $8, RNDd; \ + xorl RNCd, RCd; \ + roll $8, RAd; \ + roll $8, RBd; \ + roll $8, RCd; \ + \ + last_do16bit_shr(16, xor, RD, Dssize, Ds0, RND, Ds0, RA, RT0, RT1); \ + last_do16bit( xor, RD, Dssize, Ds0, RB, Ds0, RC, RT0, RT1); \ + movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \ + roll $8, RAd; \ + xorl RNDd, RDd; \ + roll $16, RBd; \ + roll $24, RCd; + +#define firstdecround(round) \ + addroundkey((round + 1), RA, RB, RC, RD); \ + do_decround(round); + +#define decround(round) \ + do_decround(round); + +#define lastdecround(round) \ + do_lastdecround(round); + +.align 8 +.globl _gcry_aes_amd64_decrypt_block +ELF(.type _gcry_aes_amd64_decrypt_block,@function;) + +_gcry_aes_amd64_decrypt_block: + /* input: + * %rdi: keysched, CTX + * %rsi: dst + * %rdx: src + * %ecx: number of rounds.. 10, 12 or 14 + * %r8: decryption tables + */ + subq $(5 * 8), %rsp; + movq %rsi, (0 * 8)(%rsp); + movl %ecx, (1 * 8)(%rsp); + movq %rbp, (2 * 8)(%rsp); + movq %rbx, (3 * 8)(%rsp); + movq %r12, (4 * 8)(%rsp); + + leaq (%r8), RTAB; + + /* read input block */ + movl 0 * 4(%rdx), RAd; + movl 1 * 4(%rdx), RBd; + movl 2 * 4(%rdx), RCd; + movl 3 * 4(%rdx), RDd; + + cmpl $12, (1 * 8)(%rsp); + jnb .Ldec_256; + + firstdecround(9); +.align 4 +.Ldec_tail: + decround(8); + decround(7); + decround(6); + decround(5); + decround(4); + decround(3); + decround(2); + decround(1); + lastdecround(0); + + /* write output block */ + movq (0 * 8)(%rsp), %rsi; + movl RAd, 0 * 4(%rsi); + movl RBd, 1 * 4(%rsi); + movl RCd, 2 * 4(%rsi); + movl RDd, 3 * 4(%rsi); + + movq (4 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %rbp; + addq $(5 * 8), %rsp; + + movl $(6 * 8), %eax; + ret; + +.align 4 +.Ldec_256: + je .Ldec_192; + + firstdecround(13); + decround(12); + decround(11); + decround(10); + decround(9); + + jmp .Ldec_tail; + +.align 4 +.Ldec_192: + firstdecround(11); + decround(10); + decround(9); + + jmp .Ldec_tail; +ELF(.size _gcry_aes_amd64_decrypt_block,.-_gcry_aes_amd64_decrypt_block;) + +#endif /*USE_AES*/ +#endif /*__x86_64*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/rijndael-arm.S b/libotr/libgcrypt-1.8.7/cipher/rijndael-arm.S new file mode 100644 index 0000000..e680c81 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rijndael-arm.S @@ -0,0 +1,581 @@ +/* rijndael-arm.S - ARM assembly implementation of AES cipher + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(__ARMEL__) +#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS + +.text + +.syntax unified +.arm + +/* register macros */ +#define CTX %r0 +#define RTAB %lr +#define RMASK %ip + +#define RA %r4 +#define RB %r5 +#define RC %r6 +#define RD %r7 + +#define RNA %r8 +#define RNB %r9 +#define RNC %r10 +#define RND %r11 + +#define RT0 %r1 +#define RT1 %r2 +#define RT2 %r3 + +/* helper macros */ +#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 0)]; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 3)]; \ + orr rout, rout, rtmp, lsl #24; + +#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \ + mov rtmp0, rin, lsr #8; \ + strb rin, [rdst, #((offs) + 0)]; \ + mov rtmp1, rin, lsr #16; \ + strb rtmp0, [rdst, #((offs) + 1)]; \ + mov rtmp0, rin, lsr #24; \ + strb rtmp1, [rdst, #((offs) + 2)]; \ + strb rtmp0, [rdst, #((offs) + 3)]; + +/*********************************************************************** + * ARM assembly implementation of the AES cipher + ***********************************************************************/ +#define preload_first_key(round, ra) \ + ldr ra, [CTX, #(((round) * 16) + 0 * 4)]; + +#define dummy(round, ra) /* nothing */ + +#define addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ + ldm CTX, {rna, rnb, rnc, rnd}; \ + eor ra, rna; \ + eor rb, rnb; \ + eor rc, rnc; \ + preload_key(1, rna); \ + eor rd, rnd; + +#define do_encround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ + ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \ + \ + and RT0, RMASK, ra, lsl#2; \ + ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \ + and RT1, RMASK, ra, lsr#(8 - 2); \ + ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \ + and RT2, RMASK, ra, lsr#(16 - 2); \ + ldr RT0, [RTAB, RT0]; \ + and ra, RMASK, ra, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rna, rna, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rd, lsl#2; \ + ldr ra, [RTAB, ra]; \ + \ + eor rnd, rnd, RT1, ror #24; \ + and RT1, RMASK, rd, lsr#(8 - 2); \ + eor rnc, rnc, RT2, ror #16; \ + and RT2, RMASK, rd, lsr#(16 - 2); \ + eor rnb, rnb, ra, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rd, RMASK, rd, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rnd, rnd, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rc, lsl#2; \ + ldr rd, [RTAB, rd]; \ + \ + eor rnc, rnc, RT1, ror #24; \ + and RT1, RMASK, rc, lsr#(8 - 2); \ + eor rnb, rnb, RT2, ror #16; \ + and RT2, RMASK, rc, lsr#(16 - 2); \ + eor rna, rna, rd, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rc, RMASK, rc, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rnc, rnc, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rb, lsl#2; \ + ldr rc, [RTAB, rc]; \ + \ + eor rnb, rnb, RT1, ror #24; \ + and RT1, RMASK, rb, lsr#(8 - 2); \ + eor rna, rna, RT2, ror #16; \ + and RT2, RMASK, rb, lsr#(16 - 2); \ + eor rnd, rnd, rc, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rb, RMASK, rb, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rnb, rnb, RT0; \ + ldr RT2, [RTAB, RT2]; \ + eor rna, rna, RT1, ror #24; \ + ldr rb, [RTAB, rb]; \ + \ + eor rnd, rnd, RT2, ror #16; \ + preload_key((next_r) + 1, ra); \ + eor rnc, rnc, rb, ror #8; + +#define do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + and RT0, RMASK, ra, lsl#2; \ + and RT1, RMASK, ra, lsr#(8 - 2); \ + and RT2, RMASK, ra, lsr#(16 - 2); \ + ldrb rna, [RTAB, RT0]; \ + and ra, RMASK, ra, lsr#(24 - 2); \ + ldrb rnd, [RTAB, RT1]; \ + and RT0, RMASK, rd, lsl#2; \ + ldrb rnc, [RTAB, RT2]; \ + mov rnd, rnd, ror #24; \ + ldrb rnb, [RTAB, ra]; \ + and RT1, RMASK, rd, lsr#(8 - 2); \ + mov rnc, rnc, ror #16; \ + and RT2, RMASK, rd, lsr#(16 - 2); \ + mov rnb, rnb, ror #8; \ + ldrb RT0, [RTAB, RT0]; \ + and rd, RMASK, rd, lsr#(24 - 2); \ + ldrb RT1, [RTAB, RT1]; \ + \ + orr rnd, rnd, RT0; \ + ldrb RT2, [RTAB, RT2]; \ + and RT0, RMASK, rc, lsl#2; \ + ldrb rd, [RTAB, rd]; \ + orr rnc, rnc, RT1, ror #24; \ + and RT1, RMASK, rc, lsr#(8 - 2); \ + orr rnb, rnb, RT2, ror #16; \ + and RT2, RMASK, rc, lsr#(16 - 2); \ + orr rna, rna, rd, ror #8; \ + ldrb RT0, [RTAB, RT0]; \ + and rc, RMASK, rc, lsr#(24 - 2); \ + ldrb RT1, [RTAB, RT1]; \ + \ + orr rnc, rnc, RT0; \ + ldrb RT2, [RTAB, RT2]; \ + and RT0, RMASK, rb, lsl#2; \ + ldrb rc, [RTAB, rc]; \ + orr rnb, rnb, RT1, ror #24; \ + and RT1, RMASK, rb, lsr#(8 - 2); \ + orr rna, rna, RT2, ror #16; \ + ldrb RT0, [RTAB, RT0]; \ + and RT2, RMASK, rb, lsr#(16 - 2); \ + ldrb RT1, [RTAB, RT1]; \ + orr rnd, rnd, rc, ror #8; \ + ldrb RT2, [RTAB, RT2]; \ + and rb, RMASK, rb, lsr#(24 - 2); \ + ldrb rb, [RTAB, rb]; \ + \ + orr rnb, rnb, RT0; \ + orr rna, rna, RT1, ror #24; \ + orr rnd, rnd, RT2, ror #16; \ + orr rnc, rnc, rb, ror #8; + +#define firstencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); \ + do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); + +#define encround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ + do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key); + +#define lastencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + add CTX, #(((round) + 1) * 16); \ + add RTAB, #1; \ + do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \ + addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); + +.align 3 +.globl _gcry_aes_arm_encrypt_block +.type _gcry_aes_arm_encrypt_block,%function; + +_gcry_aes_arm_encrypt_block: + /* input: + * %r0: keysched, CTX + * %r1: dst + * %r2: src + * %r3: number of rounds.. 10, 12 or 14 + * %st+0: encryption table + */ + push {%r4-%r11, %ip, %lr}; + + /* read input block */ + + /* test if src is unaligned */ + tst %r2, #3; + beq 1f; + + /* unaligned load */ + ldr_unaligned_le(RA, %r2, 0, RNA); + ldr_unaligned_le(RB, %r2, 4, RNB); + ldr_unaligned_le(RC, %r2, 8, RNA); + ldr_unaligned_le(RD, %r2, 12, RNB); + b 2f; +.ltorg +1: + /* aligned load */ + ldm %r2, {RA, RB, RC, RD}; +#ifndef __ARMEL__ + rev RA, RA; + rev RB, RB; + rev RC, RC; + rev RD, RD; +#endif +2: + ldr RTAB, [%sp, #40]; + sub %sp, #16; + + str %r1, [%sp, #4]; /* dst */ + mov RMASK, #0xff; + str %r3, [%sp, #8]; /* nrounds */ + mov RMASK, RMASK, lsl#2; /* byte mask */ + + firstencround(0, RA, RB, RC, RD, RNA, RNB, RNC, RND); + encround(1, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(2, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(3, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(4, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(5, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(6, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(7, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + + ldr RT0, [%sp, #8]; /* nrounds */ + cmp RT0, #12; + bge .Lenc_not_128; + + encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy); + lastencround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD); + +.Lenc_done: + ldr RT0, [%sp, #4]; /* dst */ + add %sp, #16; + + /* store output block */ + + /* test if dst is unaligned */ + tst RT0, #3; + beq 1f; + + /* unaligned store */ + str_unaligned_le(RA, RT0, 0, RNA, RNB); + str_unaligned_le(RB, RT0, 4, RNA, RNB); + str_unaligned_le(RC, RT0, 8, RNA, RNB); + str_unaligned_le(RD, RT0, 12, RNA, RNB); + b 2f; +.ltorg +1: + /* aligned store */ +#ifndef __ARMEL__ + rev RA, RA; + rev RB, RB; + rev RC, RC; + rev RD, RD; +#endif + /* write output block */ + stm RT0, {RA, RB, RC, RD}; +2: + + mov r0, #(10 * 4); + pop {%r4-%r11, %ip, %pc}; + +.ltorg +.Lenc_not_128: + beq .Lenc_192 + + encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(12, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy); + lastencround(13, RNA, RNB, RNC, RND, RA, RB, RC, RD); + + b .Lenc_done; + +.ltorg +.Lenc_192: + encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy); + lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD); + + b .Lenc_done; +.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block; + +#define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \ + ldr rnb, [CTX, #(((round) * 16) + 1 * 4)]; \ + eor ra, rna; \ + ldr rnc, [CTX, #(((round) * 16) + 2 * 4)]; \ + eor rb, rnb; \ + ldr rnd, [CTX, #(((round) * 16) + 3 * 4)]; \ + eor rc, rnc; \ + preload_first_key((round) - 1, rna); \ + eor rd, rnd; + +#define do_decround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ + ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \ + \ + and RT0, RMASK, ra, lsl#2; \ + ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \ + and RT1, RMASK, ra, lsr#(8 - 2); \ + ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \ + and RT2, RMASK, ra, lsr#(16 - 2); \ + ldr RT0, [RTAB, RT0]; \ + and ra, RMASK, ra, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rna, rna, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rb, lsl#2; \ + ldr ra, [RTAB, ra]; \ + \ + eor rnb, rnb, RT1, ror #24; \ + and RT1, RMASK, rb, lsr#(8 - 2); \ + eor rnc, rnc, RT2, ror #16; \ + and RT2, RMASK, rb, lsr#(16 - 2); \ + eor rnd, rnd, ra, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rb, RMASK, rb, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rnb, rnb, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rc, lsl#2; \ + ldr rb, [RTAB, rb]; \ + \ + eor rnc, rnc, RT1, ror #24; \ + and RT1, RMASK, rc, lsr#(8 - 2); \ + eor rnd, rnd, RT2, ror #16; \ + and RT2, RMASK, rc, lsr#(16 - 2); \ + eor rna, rna, rb, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rc, RMASK, rc, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rnc, rnc, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rd, lsl#2; \ + ldr rc, [RTAB, rc]; \ + \ + eor rnd, rnd, RT1, ror #24; \ + and RT1, RMASK, rd, lsr#(8 - 2); \ + eor rna, rna, RT2, ror #16; \ + and RT2, RMASK, rd, lsr#(16 - 2); \ + eor rnb, rnb, rc, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rd, RMASK, rd, lsr#(24 - 2); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rnd, rnd, RT0; \ + ldr RT2, [RTAB, RT2]; \ + eor rna, rna, RT1, ror #24; \ + ldr rd, [RTAB, rd]; \ + \ + eor rnb, rnb, RT2, ror #16; \ + preload_key((next_r) - 1, ra); \ + eor rnc, rnc, rd, ror #8; + +#define do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + and RT0, RMASK, ra; \ + and RT1, RMASK, ra, lsr#8; \ + and RT2, RMASK, ra, lsr#16; \ + ldrb rna, [RTAB, RT0]; \ + mov ra, ra, lsr#24; \ + ldrb rnb, [RTAB, RT1]; \ + and RT0, RMASK, rb; \ + ldrb rnc, [RTAB, RT2]; \ + mov rnb, rnb, ror #24; \ + ldrb rnd, [RTAB, ra]; \ + and RT1, RMASK, rb, lsr#8; \ + mov rnc, rnc, ror #16; \ + and RT2, RMASK, rb, lsr#16; \ + mov rnd, rnd, ror #8; \ + ldrb RT0, [RTAB, RT0]; \ + mov rb, rb, lsr#24; \ + ldrb RT1, [RTAB, RT1]; \ + \ + orr rnb, rnb, RT0; \ + ldrb RT2, [RTAB, RT2]; \ + and RT0, RMASK, rc; \ + ldrb rb, [RTAB, rb]; \ + orr rnc, rnc, RT1, ror #24; \ + and RT1, RMASK, rc, lsr#8; \ + orr rnd, rnd, RT2, ror #16; \ + and RT2, RMASK, rc, lsr#16; \ + orr rna, rna, rb, ror #8; \ + ldrb RT0, [RTAB, RT0]; \ + mov rc, rc, lsr#24; \ + ldrb RT1, [RTAB, RT1]; \ + \ + orr rnc, rnc, RT0; \ + ldrb RT2, [RTAB, RT2]; \ + and RT0, RMASK, rd; \ + ldrb rc, [RTAB, rc]; \ + orr rnd, rnd, RT1, ror #24; \ + and RT1, RMASK, rd, lsr#8; \ + orr rna, rna, RT2, ror #16; \ + ldrb RT0, [RTAB, RT0]; \ + and RT2, RMASK, rd, lsr#16; \ + ldrb RT1, [RTAB, RT1]; \ + orr rnb, rnb, rc, ror #8; \ + ldrb RT2, [RTAB, RT2]; \ + mov rd, rd, lsr#24; \ + ldrb rd, [RTAB, rd]; \ + \ + orr rnd, rnd, RT0; \ + orr rna, rna, RT1, ror #24; \ + orr rnb, rnb, RT2, ror #16; \ + orr rnc, rnc, rd, ror #8; + +#define firstdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + addroundkey_dec(((round) + 1), ra, rb, rc, rd, rna, rnb, rnc, rnd); \ + do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); + +#define decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ + do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key); + +#define set_last_round_rmask(_, __) \ + mov RMASK, #0xff; + +#define lastdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + add RTAB, #(4 * 256); \ + do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \ + addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); + +.align 3 +.globl _gcry_aes_arm_decrypt_block +.type _gcry_aes_arm_decrypt_block,%function; + +_gcry_aes_arm_decrypt_block: + /* input: + * %r0: keysched, CTX + * %r1: dst + * %r2: src + * %r3: number of rounds.. 10, 12 or 14 + * %st+0: decryption table + */ + push {%r4-%r11, %ip, %lr}; + + /* read input block */ + + /* test if src is unaligned */ + tst %r2, #3; + beq 1f; + + /* unaligned load */ + ldr_unaligned_le(RA, %r2, 0, RNA); + ldr_unaligned_le(RB, %r2, 4, RNB); + ldr_unaligned_le(RC, %r2, 8, RNA); + ldr_unaligned_le(RD, %r2, 12, RNB); + b 2f; +.ltorg +1: + /* aligned load */ + ldm %r2, {RA, RB, RC, RD}; +#ifndef __ARMEL__ + rev RA, RA; + rev RB, RB; + rev RC, RC; + rev RD, RD; +#endif +2: + ldr RTAB, [%sp, #40]; + sub %sp, #16; + + mov RMASK, #0xff; + str %r1, [%sp, #4]; /* dst */ + mov RMASK, RMASK, lsl#2; /* byte mask */ + + cmp %r3, #12; + bge .Ldec_256; + + firstdecround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND); +.Ldec_tail: + decround(8, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(7, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + decround(6, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(5, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + decround(4, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(3, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + decround(2, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(1, RA, RB, RC, RD, RNA, RNB, RNC, RND, set_last_round_rmask); + lastdecround(0, RNA, RNB, RNC, RND, RA, RB, RC, RD); + + ldr RT0, [%sp, #4]; /* dst */ + add %sp, #16; + + /* store output block */ + + /* test if dst is unaligned */ + tst RT0, #3; + beq 1f; + + /* unaligned store */ + str_unaligned_le(RA, RT0, 0, RNA, RNB); + str_unaligned_le(RB, RT0, 4, RNA, RNB); + str_unaligned_le(RC, RT0, 8, RNA, RNB); + str_unaligned_le(RD, RT0, 12, RNA, RNB); + b 2f; +.ltorg +1: + /* aligned store */ +#ifndef __ARMEL__ + rev RA, RA; + rev RB, RB; + rev RC, RC; + rev RD, RD; +#endif + /* write output block */ + stm RT0, {RA, RB, RC, RD}; +2: + mov r0, #(10 * 4); + pop {%r4-%r11, %ip, %pc}; + +.ltorg +.Ldec_256: + beq .Ldec_192; + + firstdecround(13, RA, RB, RC, RD, RNA, RNB, RNC, RND); + decround(12, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + + b .Ldec_tail; + +.ltorg +.Ldec_192: + firstdecround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND); + decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + + b .Ldec_tail; +.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block; + +#endif /*HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS*/ +#endif /*__ARMEL__ */ diff --git a/libotr/libgcrypt-1.8.7/cipher/rijndael-armv8-aarch32-ce.S b/libotr/libgcrypt-1.8.7/cipher/rijndael-armv8-aarch32-ce.S new file mode 100644 index 0000000..5c8fa3c --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rijndael-armv8-aarch32-ce.S @@ -0,0 +1,1556 @@ +/* rijndael-armv8-aarch32-ce.S - ARMv8/CE accelerated AES + * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) + +.syntax unified +.arch armv8-a +.fpu crypto-neon-fp-armv8 +.arm + +.text + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + + +/* AES macros */ + +#define aes_preload_keys(keysched, rekeysched) \ + vldmia keysched!, {q5-q7}; \ + mov rekeysched, keysched; \ + vldmialo keysched!, {q8-q15}; /* 128-bit */ \ + addeq keysched, #(2*16); \ + vldmiaeq keysched!, {q10-q15}; /* 192-bit */ \ + addhi keysched, #(4*16); \ + vldmiahi keysched!, {q12-q15}; /* 256-bit */ \ + +#define do_aes_one128(ed, mcimc, qo, qb) \ + aes##ed.8 qb, q5; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q6; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q7; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q8; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q9; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q10; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q11; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q12; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q13; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q14; \ + veor qo, qb, q15; + +#define do_aes_one128re(ed, mcimc, qo, qb, keysched, rekeysched) \ + vldm rekeysched, {q8-q9}; \ + do_aes_one128(ed, mcimc, qo, qb); + +#define do_aes_one192(ed, mcimc, qo, qb, keysched, rekeysched) \ + vldm rekeysched!, {q8}; \ + aes##ed.8 qb, q5; \ + aes##mcimc.8 qb, qb; \ + vldm rekeysched, {q9}; \ + aes##ed.8 qb, q6; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q7; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q8; \ + aes##mcimc.8 qb, qb; \ + vldmia keysched!, {q8}; \ + aes##ed.8 qb, q9; \ + aes##mcimc.8 qb, qb; \ + sub rekeysched, #(1*16); \ + aes##ed.8 qb, q10; \ + aes##mcimc.8 qb, qb; \ + vldm keysched, {q9}; \ + aes##ed.8 qb, q11; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q12; \ + aes##mcimc.8 qb, qb; \ + sub keysched, #16; \ + aes##ed.8 qb, q13; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q14; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q15; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q8; \ + veor qo, qb, q9; \ + +#define do_aes_one256(ed, mcimc, qo, qb, keysched, rekeysched) \ + vldmia rekeysched!, {q8}; \ + aes##ed.8 qb, q5; \ + aes##mcimc.8 qb, qb; \ + vldmia rekeysched!, {q9}; \ + aes##ed.8 qb, q6; \ + aes##mcimc.8 qb, qb; \ + vldmia rekeysched!, {q10}; \ + aes##ed.8 qb, q7; \ + aes##mcimc.8 qb, qb; \ + vldm rekeysched, {q11}; \ + aes##ed.8 qb, q8; \ + aes##mcimc.8 qb, qb; \ + vldmia keysched!, {q8}; \ + aes##ed.8 qb, q9; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q10; \ + aes##mcimc.8 qb, qb; \ + vldmia keysched!, {q9}; \ + aes##ed.8 qb, q11; \ + aes##mcimc.8 qb, qb; \ + sub rekeysched, #(3*16); \ + aes##ed.8 qb, q12; \ + aes##mcimc.8 qb, qb; \ + vldmia keysched!, {q10}; \ + aes##ed.8 qb, q13; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q14; \ + aes##mcimc.8 qb, qb; \ + vldm keysched, {q11}; \ + aes##ed.8 qb, q15; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q8; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q9; \ + aes##mcimc.8 qb, qb; \ + aes##ed.8 qb, q10; \ + veor qo, qb, q11; \ + sub keysched, #(3*16); \ + +#define aes_round_4(ed, mcimc, b0, b1, b2, b3, key) \ + aes##ed.8 b0, key; \ + aes##mcimc.8 b0, b0; \ + aes##ed.8 b1, key; \ + aes##mcimc.8 b1, b1; \ + aes##ed.8 b2, key; \ + aes##mcimc.8 b2, b2; \ + aes##ed.8 b3, key; \ + aes##mcimc.8 b3, b3; + +#define do_aes_4_128(ed, mcimc, b0, b1, b2, b3) \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q5); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q6); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q7); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q8); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q9); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q10); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q11); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q12); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q13); \ + aes##ed.8 b0, q14; \ + veor b0, b0, q15; \ + aes##ed.8 b1, q14; \ + veor b1, b1, q15; \ + aes##ed.8 b2, q14; \ + veor b2, b2, q15; \ + aes##ed.8 b3, q14; \ + veor b3, b3, q15; + +#define do_aes_4_128re(ed, mcimc, b0, b1, b2, b3, keysched, rekeysched) \ + vldm rekeysched, {q8-q9}; \ + do_aes_4_128(ed, mcimc, b0, b1, b2, b3); + +#define do_aes_4_192(ed, mcimc, b0, b1, b2, b3, keysched, rekeysched) \ + vldm rekeysched!, {q8}; \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q5); \ + vldm rekeysched, {q9}; \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q6); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q7); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q8); \ + vldmia keysched!, {q8}; \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q9); \ + sub rekeysched, #(1*16); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q10); \ + vldm keysched, {q9}; \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q11); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q12); \ + sub keysched, #16; \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q13); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q14); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q15); \ + aes##ed.8 b0, q8; \ + veor b0, b0, q9; \ + aes##ed.8 b1, q8; \ + veor b1, b1, q9; \ + aes##ed.8 b2, q8; \ + veor b2, b2, q9; \ + aes##ed.8 b3, q8; \ + veor b3, b3, q9; + +#define do_aes_4_256(ed, mcimc, b0, b1, b2, b3, keysched, rekeysched) \ + vldmia rekeysched!, {q8}; \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q5); \ + vldmia rekeysched!, {q9}; \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q6); \ + vldmia rekeysched!, {q10}; \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q7); \ + vldm rekeysched, {q11}; \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q8); \ + vldmia keysched!, {q8}; \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q9); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q10); \ + vldmia keysched!, {q9}; \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q11); \ + sub rekeysched, #(3*16); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q12); \ + vldmia keysched!, {q10}; \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q13); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q14); \ + vldm keysched, {q11}; \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q15); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q8); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, q9); \ + sub keysched, #(3*16); \ + aes##ed.8 b0, q10; \ + veor b0, b0, q11; \ + aes##ed.8 b1, q10; \ + veor b1, b1, q11; \ + aes##ed.8 b2, q10; \ + veor b2, b2, q11; \ + aes##ed.8 b3, q10; \ + veor b3, b3, q11; + + +/* Other functional macros */ + +#define CLEAR_REG(reg) veor reg, reg; + + +/* + * unsigned int _gcry_aes_enc_armv8_ce(void *keysched, byte *dst, + * const byte *src, + * unsigned int nrounds); + */ +.align 3 +.globl _gcry_aes_enc_armv8_ce +.type _gcry_aes_enc_armv8_ce,%function; +_gcry_aes_enc_armv8_ce: + /* input: + * r0: keysched + * r1: dst + * r2: src + * r3: nrounds + */ + + vldmia r0!, {q1-q3} /* load 3 round keys */ + + cmp r3, #12 + + vld1.8 {q0}, [r2] + + bhi .Lenc1_256 + beq .Lenc1_192 + +.Lenc1_128: + +.Lenc1_tail: + vldmia r0, {q8-q15} /* load 8 round keys */ + + aese.8 q0, q1 + aesmc.8 q0, q0 + CLEAR_REG(q1) + + aese.8 q0, q2 + aesmc.8 q0, q0 + CLEAR_REG(q2) + + aese.8 q0, q3 + aesmc.8 q0, q0 + CLEAR_REG(q3) + + aese.8 q0, q8 + aesmc.8 q0, q0 + CLEAR_REG(q8) + + aese.8 q0, q9 + aesmc.8 q0, q0 + CLEAR_REG(q9) + + aese.8 q0, q10 + aesmc.8 q0, q0 + CLEAR_REG(q10) + + aese.8 q0, q11 + aesmc.8 q0, q0 + CLEAR_REG(q11) + + aese.8 q0, q12 + aesmc.8 q0, q0 + CLEAR_REG(q12) + + aese.8 q0, q13 + aesmc.8 q0, q0 + CLEAR_REG(q13) + + aese.8 q0, q14 + veor q0, q15 + CLEAR_REG(q14) + CLEAR_REG(q15) + + vst1.8 {q0}, [r1] + CLEAR_REG(q0) + + mov r0, #0 + bx lr + +.Lenc1_192: + aese.8 q0, q1 + aesmc.8 q0, q0 + vmov q1, q3 + + aese.8 q0, q2 + aesmc.8 q0, q0 + vldm r0!, {q2-q3} /* load 3 round keys */ + + b .Lenc1_tail + +.Lenc1_256: + vldm r0!, {q15} /* load 1 round key */ + aese.8 q0, q1 + aesmc.8 q0, q0 + + aese.8 q0, q2 + aesmc.8 q0, q0 + + aese.8 q0, q3 + aesmc.8 q0, q0 + vldm r0!, {q1-q3} /* load 3 round keys */ + + aese.8 q0, q15 + aesmc.8 q0, q0 + + b .Lenc1_tail +.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce; + + +/* + * unsigned int _gcry_aes_dec_armv8_ce(void *keysched, byte *dst, + * const byte *src, + * unsigned int nrounds); + */ +.align 3 +.globl _gcry_aes_dec_armv8_ce +.type _gcry_aes_dec_armv8_ce,%function; +_gcry_aes_dec_armv8_ce: + /* input: + * r0: keysched + * r1: dst + * r2: src + * r3: nrounds + */ + + vldmia r0!, {q1-q3} /* load 3 round keys */ + + cmp r3, #12 + + vld1.8 {q0}, [r2] + + bhi .Ldec1_256 + beq .Ldec1_192 + +.Ldec1_128: + +.Ldec1_tail: + vldmia r0, {q8-q15} /* load 8 round keys */ + + aesd.8 q0, q1 + aesimc.8 q0, q0 + CLEAR_REG(q1) + + aesd.8 q0, q2 + aesimc.8 q0, q0 + CLEAR_REG(q2) + + aesd.8 q0, q3 + aesimc.8 q0, q0 + CLEAR_REG(q3) + + aesd.8 q0, q8 + aesimc.8 q0, q0 + CLEAR_REG(q8) + + aesd.8 q0, q9 + aesimc.8 q0, q0 + CLEAR_REG(q9) + + aesd.8 q0, q10 + aesimc.8 q0, q0 + CLEAR_REG(q10) + + aesd.8 q0, q11 + aesimc.8 q0, q0 + CLEAR_REG(q11) + + aesd.8 q0, q12 + aesimc.8 q0, q0 + CLEAR_REG(q12) + + aesd.8 q0, q13 + aesimc.8 q0, q0 + CLEAR_REG(q13) + + aesd.8 q0, q14 + veor q0, q15 + CLEAR_REG(q14) + CLEAR_REG(q15) + + vst1.8 {q0}, [r1] + CLEAR_REG(q0) + + mov r0, #0 + bx lr + +.Ldec1_192: + aesd.8 q0, q1 + aesimc.8 q0, q0 + vmov q1, q3 + + aesd.8 q0, q2 + aesimc.8 q0, q0 + vldm r0!, {q2-q3} /* load 3 round keys */ + + b .Ldec1_tail + +.Ldec1_256: + vldm r0!, {q15} /* load 1 round key */ + aesd.8 q0, q1 + aesimc.8 q0, q0 + + aesd.8 q0, q2 + aesimc.8 q0, q0 + + aesd.8 q0, q3 + aesimc.8 q0, q0 + vldm r0!, {q1-q3} /* load 3 round keys */ + + aesd.8 q0, q15 + aesimc.8 q0, q0 + + b .Ldec1_tail +.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce; + + +/* + * void _gcry_aes_cbc_enc_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *iv, size_t nblocks, + * int cbc_mac, unsigned int nrounds); + */ + +.align 3 +.globl _gcry_aes_cbc_enc_armv8_ce +.type _gcry_aes_cbc_enc_armv8_ce,%function; +_gcry_aes_cbc_enc_armv8_ce: + /* input: + * r0: keysched + * r1: outbuf + * r2: inbuf + * r3: iv + * %st+0: nblocks => r4 + * %st+4: cbc_mac => r5 + * %st+8: nrounds => r6 + */ + + push {r4-r6,lr} /* 4*4 = 16b */ + ldr r4, [sp, #(16+0)] + ldr r5, [sp, #(16+4)] + cmp r4, #0 + ldr r6, [sp, #(16+8)] + beq .Lcbc_enc_skip + cmp r5, #0 + vpush {q4-q7} + moveq r5, #16 + movne r5, #0 + + cmp r6, #12 + vld1.8 {q1}, [r3] /* load IV */ + + aes_preload_keys(r0, lr); + + beq .Lcbc_enc_loop192 + bhi .Lcbc_enc_loop256 + +#define CBC_ENC(bits, ...) \ + .Lcbc_enc_loop##bits: \ + vld1.8 {q0}, [r2]!; /* load plaintext */ \ + veor q1, q0, q1; \ + subs r4, r4, #1; \ + \ + do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__); \ + \ + vst1.8 {q1}, [r1], r5; /* store ciphertext */ \ + \ + bne .Lcbc_enc_loop##bits; \ + b .Lcbc_enc_done; + + CBC_ENC(128) + CBC_ENC(192, r0, lr) + CBC_ENC(256, r0, lr) + +#undef CBC_ENC + +.Lcbc_enc_done: + vst1.8 {q1}, [r3] /* store IV */ + + CLEAR_REG(q0) + CLEAR_REG(q1) + CLEAR_REG(q2) + CLEAR_REG(q3) + CLEAR_REG(q8) + CLEAR_REG(q9) + vpop {q4-q7} + CLEAR_REG(q10) + CLEAR_REG(q11) + CLEAR_REG(q12) + CLEAR_REG(q13) + CLEAR_REG(q14) + +.Lcbc_enc_skip: + pop {r4-r6,pc} +.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce; + + +/* + * void _gcry_aes_cbc_dec_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *iv, unsigned int nrounds); + */ + +.align 3 +.globl _gcry_aes_cbc_dec_armv8_ce +.type _gcry_aes_cbc_dec_armv8_ce,%function; +_gcry_aes_cbc_dec_armv8_ce: + /* input: + * r0: keysched + * r1: outbuf + * r2: inbuf + * r3: iv + * %st+0: nblocks => r4 + * %st+4: nrounds => r5 + */ + + push {r4-r6,lr} /* 4*4 = 16b */ + ldr r4, [sp, #(16+0)] + ldr r5, [sp, #(16+4)] + cmp r4, #0 + beq .Lcbc_dec_skip + vpush {q4-q7} + + cmp r5, #12 + vld1.8 {q0}, [r3] /* load IV */ + + aes_preload_keys(r0, r6); + + beq .Lcbc_dec_entry_192 + bhi .Lcbc_dec_entry_256 + +#define CBC_DEC(bits, ...) \ + .Lcbc_dec_entry_##bits: \ + cmp r4, #4; \ + blo .Lcbc_dec_loop_##bits; \ + \ + .Lcbc_dec_loop4_##bits: \ + \ + vld1.8 {q1-q2}, [r2]!; /* load ciphertext */ \ + sub r4, r4, #4; \ + vld1.8 {q3-q4}, [r2]; /* load ciphertext */ \ + cmp r4, #4; \ + sub r2, #32; \ + \ + do_aes_4_##bits(d, imc, q1, q2, q3, q4, ##__VA_ARGS__); \ + \ + veor q1, q1, q0; \ + vld1.8 {q0}, [r2]!; /* load next IV */ \ + veor q2, q2, q0; \ + vld1.8 {q0}, [r2]!; /* load next IV */ \ + vst1.8 {q1-q2}, [r1]!; /* store plaintext */ \ + veor q3, q3, q0; \ + vld1.8 {q0}, [r2]!; /* load next IV */ \ + veor q4, q4, q0; \ + vld1.8 {q0}, [r2]!; /* load next IV */ \ + vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \ + \ + bhs .Lcbc_dec_loop4_##bits; \ + cmp r4, #0; \ + beq .Lcbc_dec_done; \ + \ + .Lcbc_dec_loop_##bits: \ + vld1.8 {q1}, [r2]!; /* load ciphertext */ \ + subs r4, r4, #1; \ + vmov q2, q1; \ + \ + do_aes_one##bits(d, imc, q1, q1, ##__VA_ARGS__); \ + \ + veor q1, q1, q0; \ + vmov q0, q2; \ + vst1.8 {q1}, [r1]!; /* store plaintext */ \ + \ + bne .Lcbc_dec_loop_##bits; \ + b .Lcbc_dec_done; + + CBC_DEC(128) + CBC_DEC(192, r0, r6) + CBC_DEC(256, r0, r6) + +#undef CBC_DEC + +.Lcbc_dec_done: + vst1.8 {q0}, [r3] /* store IV */ + + CLEAR_REG(q0) + CLEAR_REG(q1) + CLEAR_REG(q2) + CLEAR_REG(q3) + CLEAR_REG(q8) + CLEAR_REG(q9) + vpop {q4-q7} + CLEAR_REG(q10) + CLEAR_REG(q11) + CLEAR_REG(q12) + CLEAR_REG(q13) + CLEAR_REG(q14) + +.Lcbc_dec_skip: + pop {r4-r6,pc} +.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce; + + +/* + * void _gcry_aes_cfb_enc_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *iv, unsigned int nrounds); + */ + +.align 3 +.globl _gcry_aes_cfb_enc_armv8_ce +.type _gcry_aes_cfb_enc_armv8_ce,%function; +_gcry_aes_cfb_enc_armv8_ce: + /* input: + * r0: keysched + * r1: outbuf + * r2: inbuf + * r3: iv + * %st+0: nblocks => r4 + * %st+4: nrounds => r5 + */ + + push {r4-r6,lr} /* 4*4 = 16b */ + ldr r4, [sp, #(16+0)] + ldr r5, [sp, #(16+4)] + cmp r4, #0 + beq .Lcfb_enc_skip + vpush {q4-q7} + + cmp r5, #12 + vld1.8 {q0}, [r3] /* load IV */ + + aes_preload_keys(r0, r6); + + beq .Lcfb_enc_entry_192 + bhi .Lcfb_enc_entry_256 + +#define CFB_ENC(bits, ...) \ + .Lcfb_enc_entry_##bits: \ + .Lcfb_enc_loop_##bits: \ + vld1.8 {q1}, [r2]!; /* load plaintext */ \ + subs r4, r4, #1; \ + \ + do_aes_one##bits(e, mc, q0, q0, ##__VA_ARGS__); \ + \ + veor q0, q1, q0; \ + vst1.8 {q0}, [r1]!; /* store ciphertext */ \ + \ + bne .Lcfb_enc_loop_##bits; \ + b .Lcfb_enc_done; + + CFB_ENC(128) + CFB_ENC(192, r0, r6) + CFB_ENC(256, r0, r6) + +#undef CFB_ENC + +.Lcfb_enc_done: + vst1.8 {q0}, [r3] /* store IV */ + + CLEAR_REG(q0) + CLEAR_REG(q1) + CLEAR_REG(q2) + CLEAR_REG(q3) + CLEAR_REG(q8) + CLEAR_REG(q9) + vpop {q4-q7} + CLEAR_REG(q10) + CLEAR_REG(q11) + CLEAR_REG(q12) + CLEAR_REG(q13) + CLEAR_REG(q14) + +.Lcfb_enc_skip: + pop {r4-r6,pc} +.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce; + + +/* + * void _gcry_aes_cfb_dec_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *iv, unsigned int nrounds); + */ + +.align 3 +.globl _gcry_aes_cfb_dec_armv8_ce +.type _gcry_aes_cfb_dec_armv8_ce,%function; +_gcry_aes_cfb_dec_armv8_ce: + /* input: + * r0: keysched + * r1: outbuf + * r2: inbuf + * r3: iv + * %st+0: nblocks => r4 + * %st+4: nrounds => r5 + */ + + push {r4-r6,lr} /* 4*4 = 16b */ + ldr r4, [sp, #(16+0)] + ldr r5, [sp, #(16+4)] + cmp r4, #0 + beq .Lcfb_dec_skip + vpush {q4-q7} + + cmp r5, #12 + vld1.8 {q0}, [r3] /* load IV */ + + aes_preload_keys(r0, r6); + + beq .Lcfb_dec_entry_192 + bhi .Lcfb_dec_entry_256 + +#define CFB_DEC(bits, ...) \ + .Lcfb_dec_entry_##bits: \ + cmp r4, #4; \ + blo .Lcfb_dec_loop_##bits; \ + \ + .Lcfb_dec_loop4_##bits: \ + \ + vld1.8 {q2-q3}, [r2]!; /* load ciphertext */ \ + vmov q1, q0; \ + sub r4, r4, #4; \ + vld1.8 {q4}, [r2]; /* load ciphertext */ \ + sub r2, #32; \ + cmp r4, #4; \ + \ + do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \ + \ + vld1.8 {q0}, [r2]!; /* load ciphertext */ \ + veor q1, q1, q0; \ + vld1.8 {q0}, [r2]!; /* load ciphertext */ \ + veor q2, q2, q0; \ + vst1.8 {q1-q2}, [r1]!; /* store plaintext */ \ + vld1.8 {q0}, [r2]!; \ + veor q3, q3, q0; \ + vld1.8 {q0}, [r2]!; /* load next IV / ciphertext */ \ + veor q4, q4, q0; \ + vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \ + \ + bhs .Lcfb_dec_loop4_##bits; \ + cmp r4, #0; \ + beq .Lcfb_dec_done; \ + \ + .Lcfb_dec_loop_##bits: \ + \ + vld1.8 {q1}, [r2]!; /* load ciphertext */ \ + \ + subs r4, r4, #1; \ + \ + do_aes_one##bits(e, mc, q0, q0, ##__VA_ARGS__); \ + \ + veor q2, q1, q0; \ + vmov q0, q1; \ + vst1.8 {q2}, [r1]!; /* store plaintext */ \ + \ + bne .Lcfb_dec_loop_##bits; \ + b .Lcfb_dec_done; + + CFB_DEC(128) + CFB_DEC(192, r0, r6) + CFB_DEC(256, r0, r6) + +#undef CFB_DEC + +.Lcfb_dec_done: + vst1.8 {q0}, [r3] /* store IV */ + + CLEAR_REG(q0) + CLEAR_REG(q1) + CLEAR_REG(q2) + CLEAR_REG(q3) + CLEAR_REG(q8) + CLEAR_REG(q9) + vpop {q4-q7} + CLEAR_REG(q10) + CLEAR_REG(q11) + CLEAR_REG(q12) + CLEAR_REG(q13) + CLEAR_REG(q14) + +.Lcfb_dec_skip: + pop {r4-r6,pc} +.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce; + + +/* + * void _gcry_aes_ctr_enc_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *iv, unsigned int nrounds); + */ + +.align 3 +.globl _gcry_aes_ctr_enc_armv8_ce +.type _gcry_aes_ctr_enc_armv8_ce,%function; +_gcry_aes_ctr_enc_armv8_ce: + /* input: + * r0: keysched + * r1: outbuf + * r2: inbuf + * r3: iv + * %st+0: nblocks => r4 + * %st+4: nrounds => r5 + */ + + vpush {q4-q7} + push {r4-r12,lr} /* 4*16 + 4*10 = 104b */ + ldr r4, [sp, #(104+0)] + ldr r5, [sp, #(104+4)] + cmp r4, #0 + beq .Lctr_enc_skip + + cmp r5, #12 + ldm r3, {r7-r10} + vld1.8 {q0}, [r3] /* load IV */ + rev r7, r7 + rev r8, r8 + rev r9, r9 + rev r10, r10 + + aes_preload_keys(r0, r6); + + beq .Lctr_enc_entry_192 + bhi .Lctr_enc_entry_256 + +#define CTR_ENC(bits, ...) \ + .Lctr_enc_entry_##bits: \ + cmp r4, #4; \ + blo .Lctr_enc_loop_##bits; \ + \ + .Lctr_enc_loop4_##bits: \ + cmp r10, #0xfffffffc; \ + sub r4, r4, #4; \ + blo .Lctr_enc_loop4_##bits##_nocarry; \ + cmp r9, #0xffffffff; \ + bne .Lctr_enc_loop4_##bits##_nocarry; \ + \ + adds r10, #1; \ + vmov q1, q0; \ + blcs .Lctr_overflow_one; \ + rev r11, r10; \ + vmov.32 d1[1], r11; \ + \ + adds r10, #1; \ + vmov q2, q0; \ + blcs .Lctr_overflow_one; \ + rev r11, r10; \ + vmov.32 d1[1], r11; \ + \ + adds r10, #1; \ + vmov q3, q0; \ + blcs .Lctr_overflow_one; \ + rev r11, r10; \ + vmov.32 d1[1], r11; \ + \ + adds r10, #1; \ + vmov q4, q0; \ + blcs .Lctr_overflow_one; \ + rev r11, r10; \ + vmov.32 d1[1], r11; \ + \ + b .Lctr_enc_loop4_##bits##_store_ctr; \ + \ + .Lctr_enc_loop4_##bits##_nocarry: \ + \ + veor q2, q2; \ + vrev64.8 q1, q0; \ + vceq.u32 d5, d5; \ + vadd.u64 q3, q2, q2; \ + vadd.u64 q4, q3, q2; \ + vadd.u64 q0, q3, q3; \ + vsub.u64 q2, q1, q2; \ + vsub.u64 q3, q1, q3; \ + vsub.u64 q4, q1, q4; \ + vsub.u64 q0, q1, q0; \ + vrev64.8 q1, q1; \ + vrev64.8 q2, q2; \ + vrev64.8 q3, q3; \ + vrev64.8 q0, q0; \ + vrev64.8 q4, q4; \ + add r10, #4; \ + \ + .Lctr_enc_loop4_##bits##_store_ctr: \ + \ + vst1.8 {q0}, [r3]; \ + cmp r4, #4; \ + vld1.8 {q0}, [r2]!; /* load ciphertext */ \ + \ + do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \ + \ + veor q1, q1, q0; \ + vld1.8 {q0}, [r2]!; /* load ciphertext */ \ + vst1.8 {q1}, [r1]!; /* store plaintext */ \ + vld1.8 {q1}, [r2]!; /* load ciphertext */ \ + veor q2, q2, q0; \ + veor q3, q3, q1; \ + vld1.8 {q0}, [r2]!; /* load ciphertext */ \ + vst1.8 {q2}, [r1]!; /* store plaintext */ \ + veor q4, q4, q0; \ + vld1.8 {q0}, [r3]; /* reload IV */ \ + vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \ + \ + bhs .Lctr_enc_loop4_##bits; \ + cmp r4, #0; \ + beq .Lctr_enc_done; \ + \ + .Lctr_enc_loop_##bits: \ + \ + adds r10, #1; \ + vmov q1, q0; \ + blcs .Lctr_overflow_one; \ + rev r11, r10; \ + subs r4, r4, #1; \ + vld1.8 {q2}, [r2]!; /* load ciphertext */ \ + vmov.32 d1[1], r11; \ + \ + do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__); \ + \ + veor q1, q2, q1; \ + vst1.8 {q1}, [r1]!; /* store plaintext */ \ + \ + bne .Lctr_enc_loop_##bits; \ + b .Lctr_enc_done; + + CTR_ENC(128) + CTR_ENC(192, r0, r6) + CTR_ENC(256, r0, r6) + +#undef CTR_ENC + +.Lctr_enc_done: + vst1.8 {q0}, [r3] /* store IV */ + + CLEAR_REG(q0) + CLEAR_REG(q1) + CLEAR_REG(q2) + CLEAR_REG(q3) + CLEAR_REG(q8) + CLEAR_REG(q9) + CLEAR_REG(q10) + CLEAR_REG(q11) + CLEAR_REG(q12) + CLEAR_REG(q13) + CLEAR_REG(q14) + +.Lctr_enc_skip: + pop {r4-r12,lr} + vpop {q4-q7} + bx lr + +.Lctr_overflow_one: + adcs r9, #0 + adcs r8, #0 + adc r7, #0 + rev r11, r9 + rev r12, r8 + vmov.32 d1[0], r11 + rev r11, r7 + vmov.32 d0[1], r12 + vmov.32 d0[0], r11 + bx lr +.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce; + + +/* + * void _gcry_aes_ocb_enc_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *offset, + * unsigned char *checksum, + * unsigned char *L_table, + * size_t nblocks, + * unsigned int nrounds, + * unsigned int blkn); + */ + +.align 3 +.globl _gcry_aes_ocb_enc_armv8_ce +.type _gcry_aes_ocb_enc_armv8_ce,%function; +_gcry_aes_ocb_enc_armv8_ce: + /* input: + * r0: keysched + * r1: outbuf + * r2: inbuf + * r3: offset + * %st+0: checksum => r4 + * %st+4: Ls => r5 + * %st+8: nblocks => r6 (0 < nblocks <= 32) + * %st+12: nrounds => r7 + * %st+16: blkn => lr + */ + + vpush {q4-q7} + push {r4-r12,lr} /* 4*16 + 4*10 = 104b */ + ldr r7, [sp, #(104+12)] + ldr r4, [sp, #(104+0)] + ldr r5, [sp, #(104+4)] + ldr r6, [sp, #(104+8)] + ldr lr, [sp, #(104+16)] + + cmp r7, #12 + vld1.8 {q0}, [r3] /* load offset */ + + aes_preload_keys(r0, r12); + + beq .Locb_enc_entry_192 + bhi .Locb_enc_entry_256 + +#define OCB_ENC(bits, ...) \ + .Locb_enc_entry_##bits: \ + cmp r6, #4; \ + add lr, #1; \ + blo .Locb_enc_loop_##bits; \ + \ + .Locb_enc_loop4_##bits: \ + \ + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ + /* Checksum_i = Checksum_{i-1} xor P_i */ \ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ \ + \ + add r9, lr, #1; \ + add r10, lr, #2; \ + add r11, lr, #3; \ + rbit r8, lr; \ + add lr, lr, #4; \ + rbit r9, r9; \ + rbit r10, r10; \ + rbit r11, r11; \ + clz r8, r8; /* ntz(i+0) */ \ + clz r9, r9; /* ntz(i+1) */ \ + clz r10, r10; /* ntz(i+2) */ \ + clz r11, r11; /* ntz(i+3) */ \ + add r8, r5, r8, lsl #4; \ + add r9, r5, r9, lsl #4; \ + add r10, r5, r10, lsl #4; \ + add r11, r5, r11, lsl #4; \ + \ + sub r6, #4; \ + \ + vld1.8 {q9}, [r8]; /* load L_{ntz(i+0)} */ \ + vld1.8 {q1-q2}, [r2]!; /* load P_i+<0-1> */ \ + vld1.8 {q8}, [r4]; /* load Checksum_{i-1} */ \ + veor q0, q0, q9; /* Offset_i+0 */ \ + vld1.8 {q9}, [r9]; /* load L_{ntz(i+1)} */ \ + veor q8, q8, q1; /* Checksum_i+0 */ \ + veor q1, q1, q0; /* P_i+0 xor Offset_i+0 */\ + vld1.8 {q3-q4}, [r2]!; /* load P_i+<2-3> */ \ + vst1.8 {q0}, [r1]!; /* store Offset_i+0 */\ + veor q0, q0, q9; /* Offset_i+1 */ \ + vld1.8 {q9}, [r10]; /* load L_{ntz(i+2)} */ \ + veor q8, q8, q2; /* Checksum_i+1 */ \ + veor q2, q2, q0; /* P_i+1 xor Offset_i+1 */\ + vst1.8 {q0}, [r1]!; /* store Offset_i+1 */\ + veor q0, q0, q9; /* Offset_i+2 */ \ + vld1.8 {q9}, [r11]; /* load L_{ntz(i+3)} */ \ + veor q8, q8, q3; /* Checksum_i+2 */ \ + veor q3, q3, q0; /* P_i+2 xor Offset_i+2 */\ + vst1.8 {q0}, [r1]!; /* store Offset_i+2 */\ + veor q0, q0, q9; /* Offset_i+3 */ \ + veor q8, q8, q4; /* Checksum_i+3 */ \ + veor q4, q4, q0; /* P_i+3 xor Offset_i+3 */\ + vst1.8 {q0}, [r1]; /* store Offset_i+3 */\ + sub r1, #(3*16); \ + vst1.8 {q8}, [r4]; /* store Checksum_i+3 */\ + \ + cmp r6, #4; \ + \ + do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \ + \ + mov r8, r1; \ + vld1.8 {q8-q9}, [r1]!; \ + veor q1, q1, q8; \ + veor q2, q2, q9; \ + vld1.8 {q8-q9}, [r1]!; \ + vst1.8 {q1-q2}, [r8]!; \ + veor q3, q3, q8; \ + veor q4, q4, q9; \ + vst1.8 {q3-q4}, [r8]; \ + \ + bhs .Locb_enc_loop4_##bits; \ + cmp r6, #0; \ + beq .Locb_enc_done; \ + \ + .Locb_enc_loop_##bits: \ + \ + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ + /* Checksum_i = Checksum_{i-1} xor P_i */ \ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ \ + \ + rbit r8, lr; \ + add lr, #1; \ + clz r8, r8; /* ntz(i) */ \ + add r8, r5, r8, lsl #4; \ + \ + vld1.8 {q1}, [r2]!; /* load plaintext */ \ + vld1.8 {q2}, [r8]; /* load L_{ntz(i)} */ \ + vld1.8 {q3}, [r4]; /* load checksum */ \ + subs r6, #1; \ + veor q0, q0, q2; \ + veor q3, q3, q1; \ + veor q1, q1, q0; \ + vst1.8 {q3}, [r4]; /* store checksum */ \ + \ + do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__); \ + \ + veor q1, q1, q0; \ + vst1.8 {q1}, [r1]!; /* store ciphertext */ \ + \ + bne .Locb_enc_loop_##bits; \ + b .Locb_enc_done; + + OCB_ENC(128re, r0, r12) + OCB_ENC(192, r0, r12) + OCB_ENC(256, r0, r12) + +#undef OCB_ENC + +.Locb_enc_done: + vst1.8 {q0}, [r3] /* store offset */ + + CLEAR_REG(q0) + CLEAR_REG(q1) + CLEAR_REG(q2) + CLEAR_REG(q3) + CLEAR_REG(q8) + CLEAR_REG(q9) + CLEAR_REG(q10) + CLEAR_REG(q11) + CLEAR_REG(q12) + CLEAR_REG(q13) + CLEAR_REG(q14) + + pop {r4-r12,lr} + vpop {q4-q7} + bx lr +.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce; + + +/* + * void _gcry_aes_ocb_dec_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *offset, + * unsigned char *checksum, + * unsigned char *L_table, + * size_t nblocks, + * unsigned int nrounds, + * unsigned int blkn); + */ + +.align 3 +.globl _gcry_aes_ocb_dec_armv8_ce +.type _gcry_aes_ocb_dec_armv8_ce,%function; +_gcry_aes_ocb_dec_armv8_ce: + /* input: + * r0: keysched + * r1: outbuf + * r2: inbuf + * r3: offset + * %st+0: checksum => r4 + * %st+4: Ls => r5 + * %st+8: nblocks => r6 (0 < nblocks <= 32) + * %st+12: nrounds => r7 + * %st+16: blkn => lr + */ + + vpush {q4-q7} + push {r4-r12,lr} /* 4*16 + 4*10 = 104b */ + ldr r7, [sp, #(104+12)] + ldr r4, [sp, #(104+0)] + ldr r5, [sp, #(104+4)] + ldr r6, [sp, #(104+8)] + ldr lr, [sp, #(104+16)] + + cmp r7, #12 + vld1.8 {q0}, [r3] /* load offset */ + + aes_preload_keys(r0, r12); + + beq .Locb_dec_entry_192 + bhi .Locb_dec_entry_256 + +#define OCB_DEC(bits, ...) \ + .Locb_dec_entry_##bits: \ + cmp r6, #4; \ + add lr, #1; \ + blo .Locb_dec_loop_##bits; \ + \ + .Locb_dec_loop4_##bits: \ + \ + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ \ + /* Checksum_i = Checksum_{i-1} xor P_i */ \ + \ + add r9, lr, #1; \ + add r10, lr, #2; \ + add r11, lr, #3; \ + rbit r8, lr; \ + add lr, lr, #4; \ + rbit r9, r9; \ + rbit r10, r10; \ + rbit r11, r11; \ + clz r8, r8; /* ntz(i+0) */ \ + clz r9, r9; /* ntz(i+1) */ \ + clz r10, r10; /* ntz(i+2) */ \ + clz r11, r11; /* ntz(i+3) */ \ + add r8, r5, r8, lsl #4; \ + add r9, r5, r9, lsl #4; \ + add r10, r5, r10, lsl #4; \ + add r11, r5, r11, lsl #4; \ + \ + sub r6, #4; \ + \ + vld1.8 {q9}, [r8]; /* load L_{ntz(i+0)} */ \ + vld1.8 {q1-q2}, [r2]!; /* load P_i+<0-1> */ \ + veor q0, q0, q9; /* Offset_i+0 */ \ + vld1.8 {q9}, [r9]; /* load L_{ntz(i+1)} */ \ + veor q1, q1, q0; /* P_i+0 xor Offset_i+0 */\ + vld1.8 {q3-q4}, [r2]!; /* load P_i+<2-3> */ \ + vst1.8 {q0}, [r1]!; /* store Offset_i+0 */\ + veor q0, q0, q9; /* Offset_i+1 */ \ + vld1.8 {q9}, [r10]; /* load L_{ntz(i+2)} */ \ + veor q2, q2, q0; /* P_i+1 xor Offset_i+1 */\ + vst1.8 {q0}, [r1]!; /* store Offset_i+1 */\ + veor q0, q0, q9; /* Offset_i+2 */ \ + vld1.8 {q9}, [r11]; /* load L_{ntz(i+3)} */ \ + veor q3, q3, q0; /* P_i+2 xor Offset_i+2 */\ + vst1.8 {q0}, [r1]!; /* store Offset_i+2 */\ + veor q0, q0, q9; /* Offset_i+3 */ \ + veor q4, q4, q0; /* P_i+3 xor Offset_i+3 */\ + vst1.8 {q0}, [r1]; /* store Offset_i+3 */\ + sub r1, #(3*16); \ + \ + cmp r6, #4; \ + \ + do_aes_4_##bits(d, imc, q1, q2, q3, q4, ##__VA_ARGS__); \ + \ + mov r8, r1; \ + vld1.8 {q8-q9}, [r1]!; \ + veor q1, q1, q8; \ + veor q2, q2, q9; \ + vld1.8 {q8-q9}, [r1]!; \ + vst1.8 {q1-q2}, [r8]!; \ + veor q1, q1, q2; \ + vld1.8 {q2}, [r4]; /* load Checksum_{i-1} */ \ + veor q3, q3, q8; \ + veor q1, q1, q3; \ + veor q4, q4, q9; \ + veor q1, q1, q4; \ + vst1.8 {q3-q4}, [r8]; \ + veor q2, q2, q1; \ + vst1.8 {q2}, [r4]; /* store Checksum_i+3 */ \ + \ + bhs .Locb_dec_loop4_##bits; \ + cmp r6, #0; \ + beq .Locb_dec_done; \ + \ + .Locb_dec_loop_##bits: \ + \ + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ \ + /* Checksum_i = Checksum_{i-1} xor P_i */ \ + \ + rbit r8, lr; \ + add lr, #1; \ + clz r8, r8; /* ntz(i) */ \ + add r8, r5, r8, lsl #4; \ + \ + vld1.8 {q2}, [r8]; /* load L_{ntz(i)} */ \ + vld1.8 {q1}, [r2]!; /* load ciphertext */ \ + subs r6, #1; \ + veor q0, q0, q2; \ + veor q1, q1, q0; \ + \ + do_aes_one##bits(d, imc, q1, q1, ##__VA_ARGS__) \ + \ + vld1.8 {q2}, [r4]; /* load checksum */ \ + veor q1, q1, q0; \ + vst1.8 {q1}, [r1]!; /* store plaintext */ \ + veor q2, q2, q1; \ + vst1.8 {q2}, [r4]; /* store checksum */ \ + \ + bne .Locb_dec_loop_##bits; \ + b .Locb_dec_done; + + OCB_DEC(128re, r0, r12) + OCB_DEC(192, r0, r12) + OCB_DEC(256, r0, r12) + +#undef OCB_DEC + +.Locb_dec_done: + vst1.8 {q0}, [r3] /* store offset */ + + CLEAR_REG(q0) + CLEAR_REG(q1) + CLEAR_REG(q2) + CLEAR_REG(q3) + CLEAR_REG(q8) + CLEAR_REG(q9) + CLEAR_REG(q10) + CLEAR_REG(q11) + CLEAR_REG(q12) + CLEAR_REG(q13) + CLEAR_REG(q14) + + pop {r4-r12,lr} + vpop {q4-q7} + bx lr +.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce; + + +/* + * void _gcry_aes_ocb_auth_armv8_ce (const void *keysched, + * const unsigned char *abuf, + * unsigned char *offset, + * unsigned char *checksum, + * unsigned char *L_table, + * size_t nblocks, + * unsigned int nrounds, + * unsigned int blkn); + */ + +.align 3 +.globl _gcry_aes_ocb_auth_armv8_ce +.type _gcry_aes_ocb_auth_armv8_ce,%function; +_gcry_aes_ocb_auth_armv8_ce: + /* input: + * r0: keysched + * r1: abuf + * r2: offset + * r3: checksum + * %st+0: Ls => r5 + * %st+4: nblocks => r6 (0 < nblocks <= 32) + * %st+8: nrounds => r7 + * %st+12: blkn => lr + */ + + vpush {q4-q7} + push {r4-r12,lr} /* 4*16 + 4*10 = 104b */ + ldr r7, [sp, #(104+8)] + ldr r5, [sp, #(104+0)] + ldr r6, [sp, #(104+4)] + ldr lr, [sp, #(104+12)] + + cmp r7, #12 + vld1.8 {q0}, [r2] /* load offset */ + + aes_preload_keys(r0, r12); + + beq .Locb_auth_entry_192 + bhi .Locb_auth_entry_256 + +#define OCB_AUTH(bits, ...) \ + .Locb_auth_entry_##bits: \ + cmp r6, #4; \ + add lr, #1; \ + blo .Locb_auth_loop_##bits; \ + \ + .Locb_auth_loop4_##bits: \ + \ + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ \ + \ + add r9, lr, #1; \ + add r10, lr, #2; \ + add r11, lr, #3; \ + rbit r8, lr; \ + add lr, lr, #4; \ + rbit r9, r9; \ + rbit r10, r10; \ + rbit r11, r11; \ + clz r8, r8; /* ntz(i+0) */ \ + clz r9, r9; /* ntz(i+1) */ \ + clz r10, r10; /* ntz(i+2) */ \ + clz r11, r11; /* ntz(i+3) */ \ + add r8, r5, r8, lsl #4; \ + add r9, r5, r9, lsl #4; \ + add r10, r5, r10, lsl #4; \ + add r11, r5, r11, lsl #4; \ + \ + sub r6, #4; \ + \ + vld1.8 {q9}, [r8]; /* load L_{ntz(i+0)} */ \ + vld1.8 {q1-q2}, [r1]!; /* load A_i+<0-1> */ \ + veor q0, q0, q9; /* Offset_i+0 */ \ + vld1.8 {q9}, [r9]; /* load L_{ntz(i+1)} */ \ + veor q1, q1, q0; /* A_i+0 xor Offset_i+0 */\ + vld1.8 {q3-q4}, [r1]!; /* load A_i+<2-3> */ \ + veor q0, q0, q9; /* Offset_i+1 */ \ + vld1.8 {q9}, [r10]; /* load L_{ntz(i+2)} */ \ + veor q2, q2, q0; /* A_i+1 xor Offset_i+1 */\ + veor q0, q0, q9; /* Offset_i+2 */ \ + vld1.8 {q9}, [r11]; /* load L_{ntz(i+3)} */ \ + veor q3, q3, q0; /* A_i+2 xor Offset_i+2 */\ + veor q0, q0, q9; /* Offset_i+3 */ \ + veor q4, q4, q0; /* A_i+3 xor Offset_i+3 */\ + \ + cmp r6, #4; \ + \ + do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \ + \ + veor q1, q1, q2; \ + veor q3, q3, q4; \ + vld1.8 {q2}, [r3]; \ + veor q1, q1, q3; \ + veor q2, q2, q1; \ + vst1.8 {q2}, [r3]; \ + \ + bhs .Locb_auth_loop4_##bits; \ + cmp r6, #0; \ + beq .Locb_auth_done; \ + \ + .Locb_auth_loop_##bits: \ + \ + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ \ + \ + rbit r8, lr; \ + add lr, #1; \ + clz r8, r8; /* ntz(i) */ \ + add r8, r5, r8, lsl #4; \ + \ + vld1.8 {q2}, [r8]; /* load L_{ntz(i)} */ \ + vld1.8 {q1}, [r1]!; /* load aadtext */ \ + subs r6, #1; \ + veor q0, q0, q2; \ + vld1.8 {q2}, [r3]; /* load checksum */ \ + veor q1, q1, q0; \ + \ + do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__) \ + \ + veor q2, q2, q1; \ + vst1.8 {q2}, [r3]; /* store checksum */ \ + \ + bne .Locb_auth_loop_##bits; \ + b .Locb_auth_done; + + OCB_AUTH(128re, r0, r12) + OCB_AUTH(192, r0, r12) + OCB_AUTH(256, r0, r12) + +#undef OCB_AUTH + +.Locb_auth_done: + vst1.8 {q0}, [r2] /* store offset */ + + CLEAR_REG(q0) + CLEAR_REG(q1) + CLEAR_REG(q2) + CLEAR_REG(q3) + CLEAR_REG(q8) + CLEAR_REG(q9) + CLEAR_REG(q10) + CLEAR_REG(q11) + CLEAR_REG(q12) + CLEAR_REG(q13) + CLEAR_REG(q14) + + pop {r4-r12,lr} + vpop {q4-q7} + bx lr +.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce; + + +/* + * u32 _gcry_aes_sbox4_armv8_ce(u32 in4b); + */ +.align 3 +.globl _gcry_aes_sbox4_armv8_ce +.type _gcry_aes_sbox4_armv8_ce,%function; +_gcry_aes_sbox4_armv8_ce: + /* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in + * Cryptology — CT-RSA 2015" for details. + */ + vmov.i8 q0, #0x52 + vmov.i8 q1, #0 + vmov s0, r0 + aese.8 q0, q1 + veor d0, d1 + vpadd.i32 d0, d0, d1 + vmov r0, s0 + CLEAR_REG(q0) + bx lr +.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce; + + +/* + * void _gcry_aes_invmixcol_armv8_ce(void *dst, const void *src); + */ +.align 3 +.globl _gcry_aes_invmixcol_armv8_ce +.type _gcry_aes_invmixcol_armv8_ce,%function; +_gcry_aes_invmixcol_armv8_ce: + vld1.8 {q0}, [r1] + aesimc.8 q0, q0 + vst1.8 {q0}, [r0] + CLEAR_REG(q0) + bx lr +.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce; + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/rijndael-armv8-aarch64-ce.S b/libotr/libgcrypt-1.8.7/cipher/rijndael-armv8-aarch64-ce.S new file mode 100644 index 0000000..708ef34 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rijndael-armv8-aarch64-ce.S @@ -0,0 +1,1314 @@ +/* rijndael-armv8-aarch64-ce.S - ARMv8/CE accelerated AES + * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(__AARCH64EL__) && \ + defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) + +.cpu generic+simd+crypto + +.text + + +#define GET_DATA_POINTER(reg, name) \ + adrp reg, :got:name ; \ + ldr reg, [reg, #:got_lo12:name] ; + + +/* Register macros */ + +#define vk0 v17 +#define vk1 v18 +#define vk2 v19 +#define vk3 v20 +#define vk4 v21 +#define vk5 v22 +#define vk6 v23 +#define vk7 v24 +#define vk8 v25 +#define vk9 v26 +#define vk10 v27 +#define vk11 v28 +#define vk12 v29 +#define vk13 v30 +#define vk14 v31 + + +/* AES macros */ + +#define aes_preload_keys(keysched, nrounds) \ + cmp nrounds, #12; \ + ld1 {vk0.16b-vk3.16b}, [keysched], #64; \ + ld1 {vk4.16b-vk7.16b}, [keysched], #64; \ + ld1 {vk8.16b-vk10.16b}, [keysched], #48; \ + b.lo 1f; \ + ld1 {vk11.16b-vk12.16b}, [keysched], #32; \ + b.eq 1f; \ + ld1 {vk13.16b-vk14.16b}, [keysched]; \ +1: ; + +#define do_aes_one128(ed, mcimc, vo, vb) \ + aes##ed vb.16b, vk0.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk1.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk2.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk3.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk4.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk5.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk6.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk7.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk8.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk9.16b; \ + eor vo.16b, vb.16b, vk10.16b; + +#define do_aes_one192(ed, mcimc, vo, vb) \ + aes##ed vb.16b, vk0.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk1.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk2.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk3.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk4.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk5.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk6.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk7.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk8.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk9.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk10.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk11.16b; \ + eor vo.16b, vb.16b, vk12.16b; + +#define do_aes_one256(ed, mcimc, vo, vb) \ + aes##ed vb.16b, vk0.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk1.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk2.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk3.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk4.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk5.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk6.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk7.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk8.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk9.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk10.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk11.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk12.16b; \ + aes##mcimc vb.16b, vb.16b; \ + aes##ed vb.16b, vk13.16b; \ + eor vo.16b, vb.16b, vk14.16b; + +#define aes_round_4(ed, mcimc, b0, b1, b2, b3, key) \ + aes##ed b0.16b, key.16b; \ + aes##mcimc b0.16b, b0.16b; \ + aes##ed b1.16b, key.16b; \ + aes##mcimc b1.16b, b1.16b; \ + aes##ed b2.16b, key.16b; \ + aes##mcimc b2.16b, b2.16b; \ + aes##ed b3.16b, key.16b; \ + aes##mcimc b3.16b, b3.16b; + +#define aes_lastround_4(ed, b0, b1, b2, b3, key1, key2) \ + aes##ed b0.16b, key1.16b; \ + eor b0.16b, b0.16b, key2.16b; \ + aes##ed b1.16b, key1.16b; \ + eor b1.16b, b1.16b, key2.16b; \ + aes##ed b2.16b, key1.16b; \ + eor b2.16b, b2.16b, key2.16b; \ + aes##ed b3.16b, key1.16b; \ + eor b3.16b, b3.16b, key2.16b; + +#define do_aes_4_128(ed, mcimc, b0, b1, b2, b3) \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk0); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk1); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk2); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk3); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk4); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk5); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk6); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk7); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk8); \ + aes_lastround_4(ed, b0, b1, b2, b3, vk9, vk10); + +#define do_aes_4_192(ed, mcimc, b0, b1, b2, b3) \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk0); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk1); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk2); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk3); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk4); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk5); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk6); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk7); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk8); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk9); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk10); \ + aes_lastround_4(ed, b0, b1, b2, b3, vk11, vk12); + +#define do_aes_4_256(ed, mcimc, b0, b1, b2, b3) \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk0); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk1); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk2); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk3); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk4); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk5); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk6); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk7); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk8); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk9); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk10); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk11); \ + aes_round_4(ed, mcimc, b0, b1, b2, b3, vk12); \ + aes_lastround_4(ed, b0, b1, b2, b3, vk13, vk14); + + +/* Other functional macros */ + +#define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b; + +#define aes_clear_keys(nrounds) \ + cmp nrounds, #12; \ + CLEAR_REG(vk0); \ + CLEAR_REG(vk1); \ + CLEAR_REG(vk2); \ + CLEAR_REG(vk3); \ + CLEAR_REG(vk4); \ + CLEAR_REG(vk5); \ + CLEAR_REG(vk6); \ + CLEAR_REG(vk7); \ + CLEAR_REG(vk9); \ + CLEAR_REG(vk8); \ + CLEAR_REG(vk10); \ + b.lo 1f; \ + CLEAR_REG(vk11); \ + CLEAR_REG(vk12); \ + b.eq 1f; \ + CLEAR_REG(vk13); \ + CLEAR_REG(vk14); \ +1: ; + + +/* + * unsigned int _gcry_aes_enc_armv8_ce(void *keysched, byte *dst, + * const byte *src, + * unsigned int nrounds); + */ +.align 3 +.globl _gcry_aes_enc_armv8_ce +.type _gcry_aes_enc_armv8_ce,%function; +_gcry_aes_enc_armv8_ce: + /* input: + * x0: keysched + * x1: dst + * x2: src + * w3: nrounds + */ + + aes_preload_keys(x0, w3); + + ld1 {v0.16b}, [x2] + + b.hi .Lenc1_256 + b.eq .Lenc1_192 + +.Lenc1_128: + do_aes_one128(e, mc, v0, v0); + +.Lenc1_tail: + CLEAR_REG(vk0) + CLEAR_REG(vk1) + CLEAR_REG(vk2) + CLEAR_REG(vk3) + CLEAR_REG(vk4) + CLEAR_REG(vk5) + CLEAR_REG(vk6) + CLEAR_REG(vk7) + CLEAR_REG(vk8) + CLEAR_REG(vk9) + CLEAR_REG(vk10) + st1 {v0.16b}, [x1] + CLEAR_REG(v0) + + mov x0, #0 + ret + +.Lenc1_192: + do_aes_one192(e, mc, v0, v0); + + CLEAR_REG(vk11) + CLEAR_REG(vk12) + b .Lenc1_tail + +.Lenc1_256: + do_aes_one256(e, mc, v0, v0); + + CLEAR_REG(vk11) + CLEAR_REG(vk12) + CLEAR_REG(vk13) + CLEAR_REG(vk14) + b .Lenc1_tail +.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce; + + +/* + * unsigned int _gcry_aes_dec_armv8_ce(void *keysched, byte *dst, + * const byte *src, + * unsigned int nrounds); + */ +.align 3 +.globl _gcry_aes_dec_armv8_ce +.type _gcry_aes_dec_armv8_ce,%function; +_gcry_aes_dec_armv8_ce: + /* input: + * x0: keysched + * x1: dst + * x2: src + * w3: nrounds + */ + + aes_preload_keys(x0, w3); + + ld1 {v0.16b}, [x2] + + b.hi .Ldec1_256 + b.eq .Ldec1_192 + +.Ldec1_128: + do_aes_one128(d, imc, v0, v0); + +.Ldec1_tail: + CLEAR_REG(vk0) + CLEAR_REG(vk1) + CLEAR_REG(vk2) + CLEAR_REG(vk3) + CLEAR_REG(vk4) + CLEAR_REG(vk5) + CLEAR_REG(vk6) + CLEAR_REG(vk7) + CLEAR_REG(vk8) + CLEAR_REG(vk9) + CLEAR_REG(vk10) + st1 {v0.16b}, [x1] + CLEAR_REG(v0) + + mov x0, #0 + ret + +.Ldec1_192: + do_aes_one192(d, imc, v0, v0); + + CLEAR_REG(vk11) + CLEAR_REG(vk12) + b .Ldec1_tail + +.Ldec1_256: + do_aes_one256(d, imc, v0, v0); + + CLEAR_REG(vk11) + CLEAR_REG(vk12) + CLEAR_REG(vk13) + CLEAR_REG(vk14) + b .Ldec1_tail +.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce; + + +/* + * void _gcry_aes_cbc_enc_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *iv, size_t nblocks, + * int cbc_mac, unsigned int nrounds); + */ + +.align 3 +.globl _gcry_aes_cbc_enc_armv8_ce +.type _gcry_aes_cbc_enc_armv8_ce,%function; +_gcry_aes_cbc_enc_armv8_ce: + /* input: + * x0: keysched + * x1: outbuf + * x2: inbuf + * x3: iv + * x4: nblocks + * w5: cbc_mac + * w6: nrounds + */ + + cbz x4, .Lcbc_enc_skip + + cmp w5, #0 + ld1 {v1.16b}, [x3] /* load IV */ + cset x5, eq + + aes_preload_keys(x0, w6); + lsl x5, x5, #4 + + b.eq .Lcbc_enc_loop192 + b.hi .Lcbc_enc_loop256 + +#define CBC_ENC(bits) \ + .Lcbc_enc_loop##bits: \ + ld1 {v0.16b}, [x2], #16; /* load plaintext */ \ + eor v1.16b, v0.16b, v1.16b; \ + sub x4, x4, #1; \ + \ + do_aes_one##bits(e, mc, v1, v1); \ + \ + st1 {v1.16b}, [x1], x5; /* store ciphertext */ \ + \ + cbnz x4, .Lcbc_enc_loop##bits; \ + b .Lcbc_enc_done; + + CBC_ENC(128) + CBC_ENC(192) + CBC_ENC(256) + +#undef CBC_ENC + +.Lcbc_enc_done: + aes_clear_keys(w6) + + st1 {v1.16b}, [x3] /* store IV */ + + CLEAR_REG(v1) + CLEAR_REG(v0) + +.Lcbc_enc_skip: + ret +.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce; + +/* + * void _gcry_aes_cbc_dec_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *iv, unsigned int nrounds); + */ + +.align 3 +.globl _gcry_aes_cbc_dec_armv8_ce +.type _gcry_aes_cbc_dec_armv8_ce,%function; +_gcry_aes_cbc_dec_armv8_ce: + /* input: + * x0: keysched + * x1: outbuf + * x2: inbuf + * x3: iv + * x4: nblocks + * w5: nrounds + */ + + cbz x4, .Lcbc_dec_skip + + ld1 {v0.16b}, [x3] /* load IV */ + + aes_preload_keys(x0, w5); + + b.eq .Lcbc_dec_entry_192 + b.hi .Lcbc_dec_entry_256 + +#define CBC_DEC(bits) \ + .Lcbc_dec_entry_##bits: \ + cmp x4, #4; \ + b.lo .Lcbc_dec_loop_##bits; \ + \ + .Lcbc_dec_loop4_##bits: \ + \ + ld1 {v1.16b-v4.16b}, [x2], #64; /* load ciphertext */ \ + sub x4, x4, #4; \ + mov v5.16b, v1.16b; \ + mov v6.16b, v2.16b; \ + mov v7.16b, v3.16b; \ + mov v16.16b, v4.16b; \ + cmp x4, #4; \ + \ + do_aes_4_##bits(d, imc, v1, v2, v3, v4); \ + \ + eor v1.16b, v1.16b, v0.16b; \ + eor v2.16b, v2.16b, v5.16b; \ + st1 {v1.16b-v2.16b}, [x1], #32; /* store plaintext */ \ + eor v3.16b, v3.16b, v6.16b; \ + eor v4.16b, v4.16b, v7.16b; \ + mov v0.16b, v16.16b; /* next IV */ \ + st1 {v3.16b-v4.16b}, [x1], #32; /* store plaintext */ \ + \ + b.hs .Lcbc_dec_loop4_##bits; \ + CLEAR_REG(v3); \ + CLEAR_REG(v4); \ + CLEAR_REG(v5); \ + CLEAR_REG(v6); \ + CLEAR_REG(v7); \ + CLEAR_REG(v16); \ + cbz x4, .Lcbc_dec_done; \ + \ + .Lcbc_dec_loop_##bits: \ + ld1 {v1.16b}, [x2], #16; /* load ciphertext */ \ + sub x4, x4, #1; \ + mov v2.16b, v1.16b; \ + \ + do_aes_one##bits(d, imc, v1, v1); \ + \ + eor v1.16b, v1.16b, v0.16b; \ + mov v0.16b, v2.16b; \ + st1 {v1.16b}, [x1], #16; /* store plaintext */ \ + \ + cbnz x4, .Lcbc_dec_loop_##bits; \ + b .Lcbc_dec_done; + + CBC_DEC(128) + CBC_DEC(192) + CBC_DEC(256) + +#undef CBC_DEC + +.Lcbc_dec_done: + aes_clear_keys(w5) + + st1 {v0.16b}, [x3] /* store IV */ + + CLEAR_REG(v0) + CLEAR_REG(v1) + CLEAR_REG(v2) + +.Lcbc_dec_skip: + ret +.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce; + + +/* + * void _gcry_aes_ctr_enc_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *iv, unsigned int nrounds); + */ + +.align 3 +.globl _gcry_aes_ctr_enc_armv8_ce +.type _gcry_aes_ctr_enc_armv8_ce,%function; +_gcry_aes_ctr_enc_armv8_ce: + /* input: + * r0: keysched + * r1: outbuf + * r2: inbuf + * r3: iv + * x4: nblocks + * w5: nrounds + */ + + cbz x4, .Lctr_enc_skip + + mov x6, #1 + movi v16.16b, #0 + mov v16.D[1], x6 + + /* load IV */ + ldp x9, x10, [x3] + ld1 {v0.16b}, [x3] + rev x9, x9 + rev x10, x10 + + aes_preload_keys(x0, w5); + + b.eq .Lctr_enc_entry_192 + b.hi .Lctr_enc_entry_256 + +#define CTR_ENC(bits) \ + .Lctr_enc_entry_##bits: \ + cmp x4, #4; \ + b.lo .Lctr_enc_loop_##bits; \ + \ + .Lctr_enc_loop4_##bits: \ + cmp x10, #0xfffffffffffffffc; \ + sub x4, x4, #4; \ + b.lo .Lctr_enc_loop4_##bits##_nocarry; \ + \ + adds x10, x10, #1; \ + mov v1.16b, v0.16b; \ + adc x9, x9, xzr; \ + mov v2.D[1], x10; \ + mov v2.D[0], x9; \ + \ + adds x10, x10, #1; \ + rev64 v2.16b, v2.16b; \ + adc x9, x9, xzr; \ + mov v3.D[1], x10; \ + mov v3.D[0], x9; \ + \ + adds x10, x10, #1; \ + rev64 v3.16b, v3.16b; \ + adc x9, x9, xzr; \ + mov v4.D[1], x10; \ + mov v4.D[0], x9; \ + \ + adds x10, x10, #1; \ + rev64 v4.16b, v4.16b; \ + adc x9, x9, xzr; \ + mov v0.D[1], x10; \ + mov v0.D[0], x9; \ + rev64 v0.16b, v0.16b; \ + \ + b .Lctr_enc_loop4_##bits##_store_ctr; \ + \ + .Lctr_enc_loop4_##bits##_nocarry: \ + \ + add v3.2d, v16.2d, v16.2d; /* 2 */ \ + rev64 v6.16b, v0.16b; \ + add x10, x10, #4; \ + add v4.2d, v3.2d, v16.2d; /* 3 */ \ + add v0.2d, v3.2d, v3.2d; /* 4 */ \ + rev64 v1.16b, v6.16b; \ + add v2.2d, v6.2d, v16.2d; \ + add v3.2d, v6.2d, v3.2d; \ + add v4.2d, v6.2d, v4.2d; \ + add v0.2d, v6.2d, v0.2d; \ + rev64 v2.16b, v2.16b; \ + rev64 v3.16b, v3.16b; \ + rev64 v0.16b, v0.16b; \ + rev64 v4.16b, v4.16b; \ + \ + .Lctr_enc_loop4_##bits##_store_ctr: \ + \ + st1 {v0.16b}, [x3]; \ + cmp x4, #4; \ + ld1 {v5.16b-v7.16b}, [x2], #48; /* preload ciphertext */ \ + \ + do_aes_4_##bits(e, mc, v1, v2, v3, v4); \ + \ + eor v1.16b, v1.16b, v5.16b; \ + ld1 {v5.16b}, [x2], #16; /* load ciphertext */ \ + eor v2.16b, v2.16b, v6.16b; \ + eor v3.16b, v3.16b, v7.16b; \ + eor v4.16b, v4.16b, v5.16b; \ + st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \ + \ + b.hs .Lctr_enc_loop4_##bits; \ + CLEAR_REG(v3); \ + CLEAR_REG(v4); \ + CLEAR_REG(v5); \ + CLEAR_REG(v6); \ + CLEAR_REG(v7); \ + cbz x4, .Lctr_enc_done; \ + \ + .Lctr_enc_loop_##bits: \ + \ + adds x10, x10, #1; \ + mov v1.16b, v0.16b; \ + adc x9, x9, xzr; \ + mov v0.D[1], x10; \ + mov v0.D[0], x9; \ + sub x4, x4, #1; \ + ld1 {v2.16b}, [x2], #16; /* load ciphertext */ \ + rev64 v0.16b, v0.16b; \ + \ + do_aes_one##bits(e, mc, v1, v1); \ + \ + eor v1.16b, v2.16b, v1.16b; \ + st1 {v1.16b}, [x1], #16; /* store plaintext */ \ + \ + cbnz x4, .Lctr_enc_loop_##bits; \ + b .Lctr_enc_done; + + CTR_ENC(128) + CTR_ENC(192) + CTR_ENC(256) + +#undef CTR_ENC + +.Lctr_enc_done: + aes_clear_keys(w5) + + st1 {v0.16b}, [x3] /* store IV */ + + CLEAR_REG(v0) + CLEAR_REG(v1) + CLEAR_REG(v2) + +.Lctr_enc_skip: + ret + +.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce; + + +/* + * void _gcry_aes_cfb_enc_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *iv, unsigned int nrounds); + */ + +.align 3 +.globl _gcry_aes_cfb_enc_armv8_ce +.type _gcry_aes_cfb_enc_armv8_ce,%function; +_gcry_aes_cfb_enc_armv8_ce: + /* input: + * r0: keysched + * r1: outbuf + * r2: inbuf + * r3: iv + * x4: nblocks + * w5: nrounds + */ + + cbz x4, .Lcfb_enc_skip + + /* load IV */ + ld1 {v0.16b}, [x3] + + aes_preload_keys(x0, w5); + + b.eq .Lcfb_enc_entry_192 + b.hi .Lcfb_enc_entry_256 + +#define CFB_ENC(bits) \ + .Lcfb_enc_entry_##bits: \ + .Lcfb_enc_loop_##bits: \ + ld1 {v1.16b}, [x2], #16; /* load plaintext */ \ + sub x4, x4, #1; \ + \ + do_aes_one##bits(e, mc, v0, v0); \ + \ + eor v0.16b, v1.16b, v0.16b; \ + st1 {v0.16b}, [x1], #16; /* store ciphertext */ \ + \ + cbnz x4, .Lcfb_enc_loop_##bits; \ + b .Lcfb_enc_done; + + CFB_ENC(128) + CFB_ENC(192) + CFB_ENC(256) + +#undef CFB_ENC + +.Lcfb_enc_done: + aes_clear_keys(w5) + + st1 {v0.16b}, [x3] /* store IV */ + + CLEAR_REG(v0) + CLEAR_REG(v1) + +.Lcfb_enc_skip: + ret +.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce; + + +/* + * void _gcry_aes_cfb_dec_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *iv, unsigned int nrounds); + */ + +.align 3 +.globl _gcry_aes_cfb_dec_armv8_ce +.type _gcry_aes_cfb_dec_armv8_ce,%function; +_gcry_aes_cfb_dec_armv8_ce: + /* input: + * r0: keysched + * r1: outbuf + * r2: inbuf + * r3: iv + * x4: nblocks + * w5: nrounds + */ + + cbz x4, .Lcfb_dec_skip + + /* load IV */ + ld1 {v0.16b}, [x3] + + aes_preload_keys(x0, w5); + + b.eq .Lcfb_dec_entry_192 + b.hi .Lcfb_dec_entry_256 + +#define CFB_DEC(bits) \ + .Lcfb_dec_entry_##bits: \ + cmp x4, #4; \ + b.lo .Lcfb_dec_loop_##bits; \ + \ + .Lcfb_dec_loop4_##bits: \ + \ + ld1 {v2.16b-v4.16b}, [x2], #48; /* load ciphertext */ \ + mov v1.16b, v0.16b; \ + sub x4, x4, #4; \ + cmp x4, #4; \ + mov v5.16b, v2.16b; \ + mov v6.16b, v3.16b; \ + mov v7.16b, v4.16b; \ + ld1 {v0.16b}, [x2], #16; /* load next IV / ciphertext */ \ + \ + do_aes_4_##bits(e, mc, v1, v2, v3, v4); \ + \ + eor v1.16b, v1.16b, v5.16b; \ + eor v2.16b, v2.16b, v6.16b; \ + eor v3.16b, v3.16b, v7.16b; \ + eor v4.16b, v4.16b, v0.16b; \ + st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \ + \ + b.hs .Lcfb_dec_loop4_##bits; \ + CLEAR_REG(v3); \ + CLEAR_REG(v4); \ + CLEAR_REG(v5); \ + CLEAR_REG(v6); \ + CLEAR_REG(v7); \ + cbz x4, .Lcfb_dec_done; \ + \ + .Lcfb_dec_loop_##bits: \ + \ + ld1 {v1.16b}, [x2], #16; /* load ciphertext */ \ + \ + sub x4, x4, #1; \ + \ + do_aes_one##bits(e, mc, v0, v0); \ + \ + eor v2.16b, v1.16b, v0.16b; \ + mov v0.16b, v1.16b; \ + st1 {v2.16b}, [x1], #16; /* store plaintext */ \ + \ + cbnz x4, .Lcfb_dec_loop_##bits; \ + b .Lcfb_dec_done; + + CFB_DEC(128) + CFB_DEC(192) + CFB_DEC(256) + +#undef CFB_DEC + +.Lcfb_dec_done: + aes_clear_keys(w5) + + st1 {v0.16b}, [x3] /* store IV */ + + CLEAR_REG(v0) + CLEAR_REG(v1) + CLEAR_REG(v2) + +.Lcfb_dec_skip: + ret +.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce; + + +/* + * void _gcry_aes_ocb_enc_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *offset, + * unsigned char *checksum, + * unsigned char *L_table, + * size_t nblocks, + * unsigned int nrounds, + * unsigned int blkn); + */ + +.align 3 +.globl _gcry_aes_ocb_enc_armv8_ce +.type _gcry_aes_ocb_enc_armv8_ce,%function; +_gcry_aes_ocb_enc_armv8_ce: + /* input: + * x0: keysched + * x1: outbuf + * x2: inbuf + * x3: offset + * x4: checksum + * x5: Ltable + * x6: nblocks (0 < nblocks <= 32) + * w7: nrounds + * %st+0: blkn => w12 + */ + + ldr w12, [sp] + ld1 {v0.16b}, [x3] /* load offset */ + ld1 {v16.16b}, [x4] /* load checksum */ + + aes_preload_keys(x0, w7); + + b.eq .Locb_enc_entry_192 + b.hi .Locb_enc_entry_256 + +#define OCB_ENC(bits, ...) \ + .Locb_enc_entry_##bits: \ + cmp x6, #4; \ + add x12, x12, #1; \ + b.lo .Locb_enc_loop_##bits; \ + \ + .Locb_enc_loop4_##bits: \ + \ + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ + /* Checksum_i = Checksum_{i-1} xor P_i */ \ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ \ + \ + add w9, w12, #1; \ + add w10, w12, #2; \ + add w11, w12, #3; \ + rbit w8, w12; \ + add w12, w12, #4; \ + rbit w9, w9; \ + rbit w10, w10; \ + rbit w11, w11; \ + clz w8, w8; /* ntz(i+0) */ \ + clz w9, w9; /* ntz(i+1) */ \ + clz w10, w10; /* ntz(i+2) */ \ + clz w11, w11; /* ntz(i+3) */ \ + add x8, x5, x8, lsl #4; \ + ld1 {v1.16b-v4.16b}, [x2], #64; /* load P_i+<0-3> */ \ + add x9, x5, x9, lsl #4; \ + add x10, x5, x10, lsl #4; \ + add x11, x5, x11, lsl #4; \ + \ + sub x6, x6, #4; \ + \ + ld1 {v5.16b}, [x8]; /* load L_{ntz(i+0)} */ \ + eor v16.16b, v16.16b, v1.16b; /* Checksum_i+0 */ \ + ld1 {v6.16b}, [x9]; /* load L_{ntz(i+1)} */ \ + eor v16.16b, v16.16b, v2.16b; /* Checksum_i+1 */ \ + ld1 {v7.16b}, [x10]; /* load L_{ntz(i+2)} */ \ + eor v16.16b, v16.16b, v3.16b; /* Checksum_i+2 */ \ + eor v5.16b, v5.16b, v0.16b; /* Offset_i+0 */ \ + ld1 {v0.16b}, [x11]; /* load L_{ntz(i+3)} */ \ + eor v16.16b, v16.16b, v4.16b; /* Checksum_i+3 */ \ + eor v6.16b, v6.16b, v5.16b; /* Offset_i+1 */ \ + eor v1.16b, v1.16b, v5.16b; /* P_i+0 xor Offset_i+0 */ \ + eor v7.16b, v7.16b, v6.16b; /* Offset_i+2 */ \ + eor v2.16b, v2.16b, v6.16b; /* P_i+1 xor Offset_i+1 */ \ + eor v0.16b, v0.16b, v7.16b; /* Offset_i+3 */ \ + cmp x6, #4; \ + eor v3.16b, v3.16b, v7.16b; /* P_i+2 xor Offset_i+2 */ \ + eor v4.16b, v4.16b, v0.16b; /* P_i+3 xor Offset_i+3 */ \ + \ + do_aes_4_##bits(e, mc, v1, v2, v3, v4); \ + \ + eor v1.16b, v1.16b, v5.16b; /* xor Offset_i+0 */ \ + eor v2.16b, v2.16b, v6.16b; /* xor Offset_i+1 */ \ + eor v3.16b, v3.16b, v7.16b; /* xor Offset_i+2 */ \ + eor v4.16b, v4.16b, v0.16b; /* xor Offset_i+3 */ \ + st1 {v1.16b-v4.16b}, [x1], #64; \ + \ + b.hs .Locb_enc_loop4_##bits; \ + CLEAR_REG(v3); \ + CLEAR_REG(v4); \ + CLEAR_REG(v5); \ + CLEAR_REG(v6); \ + CLEAR_REG(v7); \ + cbz x6, .Locb_enc_done; \ + \ + .Locb_enc_loop_##bits: \ + \ + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ + /* Checksum_i = Checksum_{i-1} xor P_i */ \ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ \ + \ + rbit x8, x12; \ + add x12, x12, #1; \ + clz x8, x8; /* ntz(i) */ \ + add x8, x5, x8, lsl #4; \ + \ + ld1 {v1.16b}, [x2], #16; /* load plaintext */ \ + ld1 {v2.16b}, [x8]; /* load L_{ntz(i)} */ \ + sub x6, x6, #1; \ + eor v0.16b, v0.16b, v2.16b; \ + eor v16.16b, v16.16b, v1.16b; \ + eor v1.16b, v1.16b, v0.16b; \ + \ + do_aes_one##bits(e, mc, v1, v1); \ + \ + eor v1.16b, v1.16b, v0.16b; \ + st1 {v1.16b}, [x1], #16; /* store ciphertext */ \ + \ + cbnz x6, .Locb_enc_loop_##bits; \ + b .Locb_enc_done; + + OCB_ENC(128) + OCB_ENC(192) + OCB_ENC(256) + +#undef OCB_ENC + +.Locb_enc_done: + aes_clear_keys(w7) + + st1 {v16.16b}, [x4] /* store checksum */ + st1 {v0.16b}, [x3] /* store offset */ + + CLEAR_REG(v0) + CLEAR_REG(v1) + CLEAR_REG(v2) + CLEAR_REG(v16) + + ret +.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce; + + +/* + * void _gcry_aes_ocb_dec_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *offset, + * unsigned char *checksum, + * unsigned char *L_table, + * size_t nblocks, + * unsigned int nrounds, + * unsigned int blkn); + */ + +.align 3 +.globl _gcry_aes_ocb_dec_armv8_ce +.type _gcry_aes_ocb_dec_armv8_ce,%function; +_gcry_aes_ocb_dec_armv8_ce: + /* input: + * x0: keysched + * x1: outbuf + * x2: inbuf + * x3: offset + * x4: checksum + * x5: Ltable + * x6: nblocks (0 < nblocks <= 32) + * w7: nrounds + * %st+0: blkn => w12 + */ + + ldr w12, [sp] + ld1 {v0.16b}, [x3] /* load offset */ + ld1 {v16.16b}, [x4] /* load checksum */ + + aes_preload_keys(x0, w7); + + b.eq .Locb_dec_entry_192 + b.hi .Locb_dec_entry_256 + +#define OCB_DEC(bits) \ + .Locb_dec_entry_##bits: \ + cmp x6, #4; \ + add w12, w12, #1; \ + b.lo .Locb_dec_loop_##bits; \ + \ + .Locb_dec_loop4_##bits: \ + \ + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ \ + /* Checksum_i = Checksum_{i-1} xor P_i */ \ + \ + add w9, w12, #1; \ + add w10, w12, #2; \ + add w11, w12, #3; \ + rbit w8, w12; \ + add w12, w12, #4; \ + rbit w9, w9; \ + rbit w10, w10; \ + rbit w11, w11; \ + clz w8, w8; /* ntz(i+0) */ \ + clz w9, w9; /* ntz(i+1) */ \ + clz w10, w10; /* ntz(i+2) */ \ + clz w11, w11; /* ntz(i+3) */ \ + add x8, x5, x8, lsl #4; \ + ld1 {v1.16b-v4.16b}, [x2], #64; /* load C_i+<0-3> */ \ + add x9, x5, x9, lsl #4; \ + add x10, x5, x10, lsl #4; \ + add x11, x5, x11, lsl #4; \ + \ + sub x6, x6, #4; \ + \ + ld1 {v5.16b}, [x8]; /* load L_{ntz(i+0)} */ \ + ld1 {v6.16b}, [x9]; /* load L_{ntz(i+1)} */ \ + ld1 {v7.16b}, [x10]; /* load L_{ntz(i+2)} */ \ + eor v5.16b, v5.16b, v0.16b; /* Offset_i+0 */ \ + ld1 {v0.16b}, [x11]; /* load L_{ntz(i+3)} */ \ + eor v6.16b, v6.16b, v5.16b; /* Offset_i+1 */ \ + eor v1.16b, v1.16b, v5.16b; /* C_i+0 xor Offset_i+0 */ \ + eor v7.16b, v7.16b, v6.16b; /* Offset_i+2 */ \ + eor v2.16b, v2.16b, v6.16b; /* C_i+1 xor Offset_i+1 */ \ + eor v0.16b, v0.16b, v7.16b; /* Offset_i+3 */ \ + cmp x6, #4; \ + eor v3.16b, v3.16b, v7.16b; /* C_i+2 xor Offset_i+2 */ \ + eor v4.16b, v4.16b, v0.16b; /* C_i+3 xor Offset_i+3 */ \ + \ + do_aes_4_##bits(d, imc, v1, v2, v3, v4); \ + \ + eor v1.16b, v1.16b, v5.16b; /* xor Offset_i+0 */ \ + eor v2.16b, v2.16b, v6.16b; /* xor Offset_i+1 */ \ + eor v16.16b, v16.16b, v1.16b; /* Checksum_i+0 */ \ + eor v3.16b, v3.16b, v7.16b; /* xor Offset_i+2 */ \ + eor v16.16b, v16.16b, v2.16b; /* Checksum_i+1 */ \ + eor v4.16b, v4.16b, v0.16b; /* xor Offset_i+3 */ \ + eor v16.16b, v16.16b, v3.16b; /* Checksum_i+2 */ \ + eor v16.16b, v16.16b, v4.16b; /* Checksum_i+3 */ \ + st1 {v1.16b-v4.16b}, [x1], #64; \ + \ + b.hs .Locb_dec_loop4_##bits; \ + CLEAR_REG(v3); \ + CLEAR_REG(v4); \ + CLEAR_REG(v5); \ + CLEAR_REG(v6); \ + CLEAR_REG(v7); \ + cbz x6, .Locb_dec_done; \ + \ + .Locb_dec_loop_##bits: \ + \ + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ \ + /* Checksum_i = Checksum_{i-1} xor P_i */ \ + \ + rbit w8, w12; \ + add w12, w12, #1; \ + clz w8, w8; /* ntz(i) */ \ + add x8, x5, x8, lsl #4; \ + \ + ld1 {v1.16b}, [x2], #16; /* load ciphertext */ \ + ld1 {v2.16b}, [x8]; /* load L_{ntz(i)} */ \ + sub x6, x6, #1; \ + eor v0.16b, v0.16b, v2.16b; \ + eor v1.16b, v1.16b, v0.16b; \ + \ + do_aes_one##bits(d, imc, v1, v1) \ + \ + eor v1.16b, v1.16b, v0.16b; \ + st1 {v1.16b}, [x1], #16; /* store plaintext */ \ + eor v16.16b, v16.16b, v1.16b; \ + \ + cbnz x6, .Locb_dec_loop_##bits; \ + b .Locb_dec_done; + + OCB_DEC(128) + OCB_DEC(192) + OCB_DEC(256) + +#undef OCB_DEC + +.Locb_dec_done: + aes_clear_keys(w7) + + st1 {v16.16b}, [x4] /* store checksum */ + st1 {v0.16b}, [x3] /* store offset */ + + CLEAR_REG(v0) + CLEAR_REG(v1) + CLEAR_REG(v2) + CLEAR_REG(v16) + + ret +.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce; + + +/* + * void _gcry_aes_ocb_auth_armv8_ce (const void *keysched, + * const unsigned char *abuf, + * unsigned char *offset, + * unsigned char *checksum, + * unsigned char *L_table, + * size_t nblocks, + * unsigned int nrounds, + * unsigned int blkn); + */ + +.align 3 +.globl _gcry_aes_ocb_auth_armv8_ce +.type _gcry_aes_ocb_auth_armv8_ce,%function; +_gcry_aes_ocb_auth_armv8_ce: + /* input: + * x0: keysched + * x1: abuf + * x2: offset => x3 + * x3: checksum => x4 + * x4: Ltable => x5 + * x5: nblocks => x6 (0 < nblocks <= 32) + * w6: nrounds => w7 + * w7: blkn => w12 + */ + mov x12, x7 + mov x7, x6 + mov x6, x5 + mov x5, x4 + mov x4, x3 + mov x3, x2 + + aes_preload_keys(x0, w7); + + ld1 {v0.16b}, [x3] /* load offset */ + ld1 {v16.16b}, [x4] /* load checksum */ + + beq .Locb_auth_entry_192 + bhi .Locb_auth_entry_256 + +#define OCB_AUTH(bits) \ + .Locb_auth_entry_##bits: \ + cmp x6, #4; \ + add w12, w12, #1; \ + b.lo .Locb_auth_loop_##bits; \ + \ + .Locb_auth_loop4_##bits: \ + \ + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ \ + \ + add w9, w12, #1; \ + add w10, w12, #2; \ + add w11, w12, #3; \ + rbit w8, w12; \ + add w12, w12, #4; \ + rbit w9, w9; \ + rbit w10, w10; \ + rbit w11, w11; \ + clz w8, w8; /* ntz(i+0) */ \ + clz w9, w9; /* ntz(i+1) */ \ + clz w10, w10; /* ntz(i+2) */ \ + clz w11, w11; /* ntz(i+3) */ \ + add x8, x5, x8, lsl #4; \ + ld1 {v1.16b-v4.16b}, [x1], #64; /* load A_i+<0-3> */ \ + add x9, x5, x9, lsl #4; \ + add x10, x5, x10, lsl #4; \ + add x11, x5, x11, lsl #4; \ + \ + sub x6, x6, #4; \ + \ + ld1 {v5.16b}, [x8]; /* load L_{ntz(i+0)} */ \ + ld1 {v6.16b}, [x9]; /* load L_{ntz(i+1)} */ \ + ld1 {v7.16b}, [x10]; /* load L_{ntz(i+2)} */ \ + eor v5.16b, v5.16b, v0.16b; /* Offset_i+0 */ \ + ld1 {v0.16b}, [x11]; /* load L_{ntz(i+3)} */ \ + eor v6.16b, v6.16b, v5.16b; /* Offset_i+1 */ \ + eor v1.16b, v1.16b, v5.16b; /* A_i+0 xor Offset_i+0 */ \ + eor v7.16b, v7.16b, v6.16b; /* Offset_i+2 */ \ + eor v2.16b, v2.16b, v6.16b; /* A_i+1 xor Offset_i+1 */ \ + eor v0.16b, v0.16b, v7.16b; /* Offset_i+3 */ \ + cmp x6, #4; \ + eor v3.16b, v3.16b, v7.16b; /* A_i+2 xor Offset_i+2 */ \ + eor v4.16b, v4.16b, v0.16b; /* A_i+3 xor Offset_i+3 */ \ + \ + do_aes_4_##bits(e, mc, v1, v2, v3, v4); \ + \ + eor v1.16b, v1.16b, v2.16b; \ + eor v16.16b, v16.16b, v3.16b; \ + eor v1.16b, v1.16b, v4.16b; \ + eor v16.16b, v16.16b, v1.16b; \ + \ + b.hs .Locb_auth_loop4_##bits; \ + CLEAR_REG(v3); \ + CLEAR_REG(v4); \ + CLEAR_REG(v5); \ + CLEAR_REG(v6); \ + CLEAR_REG(v7); \ + cbz x6, .Locb_auth_done; \ + \ + .Locb_auth_loop_##bits: \ + \ + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ \ + \ + rbit w8, w12; \ + add w12, w12, #1; \ + clz w8, w8; /* ntz(i) */ \ + add x8, x5, x8, lsl #4; \ + \ + ld1 {v1.16b}, [x1], #16; /* load aadtext */ \ + ld1 {v2.16b}, [x8]; /* load L_{ntz(i)} */ \ + sub x6, x6, #1; \ + eor v0.16b, v0.16b, v2.16b; \ + eor v1.16b, v1.16b, v0.16b; \ + \ + do_aes_one##bits(e, mc, v1, v1) \ + \ + eor v16.16b, v16.16b, v1.16b; \ + \ + cbnz x6, .Locb_auth_loop_##bits; \ + b .Locb_auth_done; + + OCB_AUTH(128) + OCB_AUTH(192) + OCB_AUTH(256) + +#undef OCB_AUTH + +.Locb_auth_done: + aes_clear_keys(w7) + + st1 {v16.16b}, [x4] /* store checksum */ + st1 {v0.16b}, [x3] /* store offset */ + + CLEAR_REG(v0) + CLEAR_REG(v1) + CLEAR_REG(v2) + CLEAR_REG(v16) + + ret +.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce; + + +/* + * u32 _gcry_aes_sbox4_armv8_ce(u32 in4b); + */ +.align 3 +.globl _gcry_aes_sbox4_armv8_ce +.type _gcry_aes_sbox4_armv8_ce,%function; +_gcry_aes_sbox4_armv8_ce: + /* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in + * Cryptology — CT-RSA 2015" for details. + */ + movi v0.16b, #0x52 + movi v1.16b, #0 + mov v0.S[0], w0 + aese v0.16b, v1.16b + addv s0, v0.4s + mov w0, v0.S[0] + CLEAR_REG(v0) + ret +.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce; + + +/* + * void _gcry_aes_invmixcol_armv8_ce(void *dst, const void *src); + */ +.align 3 +.globl _gcry_aes_invmixcol_armv8_ce +.type _gcry_aes_invmixcol_armv8_ce,%function; +_gcry_aes_invmixcol_armv8_ce: + ld1 {v0.16b}, [x1] + aesimc v0.16b, v0.16b + st1 {v0.16b}, [x0] + CLEAR_REG(v0) + ret +.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce; + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/rijndael-armv8-ce.c b/libotr/libgcrypt-1.8.7/cipher/rijndael-armv8-ce.c new file mode 100644 index 0000000..334cf68 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rijndael-armv8-ce.c @@ -0,0 +1,364 @@ +/* ARMv8 Crypto Extension AES for Libgcrypt + * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> /* for memcmp() */ + +#include "types.h" /* for byte and u32 typedefs */ +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "cipher-selftest.h" +#include "rijndael-internal.h" +#include "./cipher-internal.h" + + +#ifdef USE_ARM_CE + + +typedef struct u128_s { u32 a, b, c, d; } u128_t; + +extern u32 _gcry_aes_sbox4_armv8_ce(u32 in4b); +extern void _gcry_aes_invmixcol_armv8_ce(u128_t *dst, const u128_t *src); + +extern unsigned int _gcry_aes_enc_armv8_ce(const void *keysched, byte *dst, + const byte *src, + unsigned int nrounds); +extern unsigned int _gcry_aes_dec_armv8_ce(const void *keysched, byte *dst, + const byte *src, + unsigned int nrounds); + +extern void _gcry_aes_cbc_enc_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks, + int cbc_mac, unsigned int nrounds); +extern void _gcry_aes_cbc_dec_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks, + unsigned int nrounds); + +extern void _gcry_aes_cfb_enc_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks, + unsigned int nrounds); +extern void _gcry_aes_cfb_dec_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks, + unsigned int nrounds); + +extern void _gcry_aes_ctr_enc_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks, + unsigned int nrounds); + +extern void _gcry_aes_ocb_enc_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *offset, + unsigned char *checksum, + unsigned char *L_table, + size_t nblocks, + unsigned int nrounds, + unsigned int blkn); +extern void _gcry_aes_ocb_dec_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *offset, + unsigned char *checksum, + unsigned char *L_table, + size_t nblocks, + unsigned int nrounds, + unsigned int blkn); +extern void _gcry_aes_ocb_auth_armv8_ce (const void *keysched, + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + unsigned char *L_table, + size_t nblocks, + unsigned int nrounds, + unsigned int blkn); + +typedef void (*ocb_crypt_fn_t) (const void *keysched, unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *offset, unsigned char *checksum, + unsigned char *L_table, size_t nblocks, + unsigned int nrounds, unsigned int blkn); + +void +_gcry_aes_armv8_ce_setkey (RIJNDAEL_context *ctx, const byte *key) +{ + union + { + PROPERLY_ALIGNED_TYPE dummy; + byte data[MAXKC][4]; + u32 data32[MAXKC]; + } tkk[2]; + unsigned int rounds = ctx->rounds; + int KC = rounds - 6; + unsigned int keylen = KC * 4; + unsigned int i, r, t; + byte rcon = 1; + int j; +#define k tkk[0].data +#define k_u32 tkk[0].data32 +#define tk tkk[1].data +#define tk_u32 tkk[1].data32 +#define W (ctx->keyschenc) +#define W_u32 (ctx->keyschenc32) + + for (i = 0; i < keylen; i++) + { + k[i >> 2][i & 3] = key[i]; + } + + for (j = KC-1; j >= 0; j--) + { + tk_u32[j] = k_u32[j]; + } + r = 0; + t = 0; + /* Copy values into round key array. */ + for (j = 0; (j < KC) && (r < rounds + 1); ) + { + for (; (j < KC) && (t < 4); j++, t++) + { + W_u32[r][t] = le_bswap32(tk_u32[j]); + } + if (t == 4) + { + r++; + t = 0; + } + } + + while (r < rounds + 1) + { + tk_u32[0] ^= _gcry_aes_sbox4_armv8_ce(rol(tk_u32[KC - 1], 24)) ^ rcon; + + if (KC != 8) + { + for (j = 1; j < KC; j++) + { + tk_u32[j] ^= tk_u32[j-1]; + } + } + else + { + for (j = 1; j < KC/2; j++) + { + tk_u32[j] ^= tk_u32[j-1]; + } + + tk_u32[KC/2] ^= _gcry_aes_sbox4_armv8_ce(tk_u32[KC/2 - 1]); + + for (j = KC/2 + 1; j < KC; j++) + { + tk_u32[j] ^= tk_u32[j-1]; + } + } + + /* Copy values into round key array. */ + for (j = 0; (j < KC) && (r < rounds + 1); ) + { + for (; (j < KC) && (t < 4); j++, t++) + { + W_u32[r][t] = le_bswap32(tk_u32[j]); + } + if (t == 4) + { + r++; + t = 0; + } + } + + rcon = (rcon << 1) ^ ((rcon >> 7) * 0x1b); + } + +#undef W +#undef tk +#undef k +#undef W_u32 +#undef tk_u32 +#undef k_u32 + wipememory(&tkk, sizeof(tkk)); +} + +/* Make a decryption key from an encryption key. */ +void +_gcry_aes_armv8_ce_prepare_decryption (RIJNDAEL_context *ctx) +{ + u128_t *ekey = (u128_t *)(void *)ctx->keyschenc; + u128_t *dkey = (u128_t *)(void *)ctx->keyschdec; + int rounds = ctx->rounds; + int rr; + int r; + +#define DO_AESIMC() _gcry_aes_invmixcol_armv8_ce(&dkey[r], &ekey[rr]) + + dkey[0] = ekey[rounds]; + r = 1; + rr = rounds-1; + DO_AESIMC(); r++; rr--; /* round 1 */ + DO_AESIMC(); r++; rr--; /* round 2 */ + DO_AESIMC(); r++; rr--; /* round 3 */ + DO_AESIMC(); r++; rr--; /* round 4 */ + DO_AESIMC(); r++; rr--; /* round 5 */ + DO_AESIMC(); r++; rr--; /* round 6 */ + DO_AESIMC(); r++; rr--; /* round 7 */ + DO_AESIMC(); r++; rr--; /* round 8 */ + DO_AESIMC(); r++; rr--; /* round 9 */ + if (rounds >= 12) + { + if (rounds > 12) + { + DO_AESIMC(); r++; rr--; /* round 10 */ + DO_AESIMC(); r++; rr--; /* round 11 */ + } + + DO_AESIMC(); r++; rr--; /* round 12 / 10 */ + DO_AESIMC(); r++; rr--; /* round 13 / 11 */ + } + + dkey[r] = ekey[0]; + +#undef DO_AESIMC +} + +unsigned int +_gcry_aes_armv8_ce_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, + const unsigned char *src) +{ + const void *keysched = ctx->keyschenc32; + unsigned int nrounds = ctx->rounds; + + return _gcry_aes_enc_armv8_ce(keysched, dst, src, nrounds); +} + +unsigned int +_gcry_aes_armv8_ce_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, + const unsigned char *src) +{ + const void *keysched = ctx->keyschdec32; + unsigned int nrounds = ctx->rounds; + + return _gcry_aes_dec_armv8_ce(keysched, dst, src, nrounds); +} + +void +_gcry_aes_armv8_ce_cbc_enc (const RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks, int cbc_mac) +{ + const void *keysched = ctx->keyschenc32; + unsigned int nrounds = ctx->rounds; + + _gcry_aes_cbc_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, cbc_mac, + nrounds); +} + +void +_gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks) +{ + const void *keysched = ctx->keyschdec32; + unsigned int nrounds = ctx->rounds; + + _gcry_aes_cbc_dec_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds); +} + +void +_gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks) +{ + const void *keysched = ctx->keyschenc32; + unsigned int nrounds = ctx->rounds; + + _gcry_aes_cfb_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds); +} + +void +_gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks) +{ + const void *keysched = ctx->keyschenc32; + unsigned int nrounds = ctx->rounds; + + _gcry_aes_cfb_dec_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds); +} + +void +_gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks) +{ + const void *keysched = ctx->keyschenc32; + unsigned int nrounds = ctx->rounds; + + _gcry_aes_ctr_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds); +} + +void +_gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32; + ocb_crypt_fn_t crypt_fn = encrypt ? _gcry_aes_ocb_enc_armv8_ce + : _gcry_aes_ocb_dec_armv8_ce; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned int nrounds = ctx->rounds; + u64 blkn = c->u_mode.ocb.data_nblocks; + + c->u_mode.ocb.data_nblocks = blkn + nblocks; + + crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, + c->u_mode.ocb.L[0], nblocks, nrounds, (unsigned int)blkn); +} + +void +_gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, + size_t nblocks) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + const void *keysched = ctx->keyschenc32; + const unsigned char *abuf = abuf_arg; + unsigned int nrounds = ctx->rounds; + u64 blkn = c->u_mode.ocb.aad_nblocks; + + c->u_mode.ocb.aad_nblocks = blkn + nblocks; + + _gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, c->u_mode.ocb.L[0], + nblocks, nrounds, (unsigned int)blkn); +} + +#endif /* USE_ARM_CE */ diff --git a/libotr/libgcrypt-1.8.7/cipher/rijndael-internal.h b/libotr/libgcrypt-1.8.7/cipher/rijndael-internal.h new file mode 100644 index 0000000..160fb8c --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rijndael-internal.h @@ -0,0 +1,166 @@ +/* Rijndael (AES) for GnuPG + * Copyright (C) 2000, 2001, 2002, 2003, 2007, + * 2008, 2011, 2012 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef G10_RIJNDAEL_INTERNAL_H +#define G10_RIJNDAEL_INTERNAL_H + +#include "types.h" /* for byte and u32 typedefs */ + + +#define MAXKC (256/32) +#define MAXROUNDS 14 +#define BLOCKSIZE (128/8) + + +/* Helper macro to force alignment to 16 bytes. */ +#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED +# define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) +#else +# define ATTR_ALIGNED_16 +#endif + + +/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ +#undef USE_AMD64_ASM +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AMD64_ASM 1 +#endif + +/* USE_SSSE3 indicates whether to use SSSE3 code. */ +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_SSSE3 1 +#endif + +/* USE_ARM_ASM indicates whether to use ARM assembly code. */ +#undef USE_ARM_ASM +#if defined(__ARMEL__) +# ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS +# define USE_ARM_ASM 1 +# endif +#endif +#if defined(__AARCH64EL__) +# ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS +# define USE_ARM_ASM 1 +# endif +#endif + +/* USE_PADLOCK indicates whether to compile the padlock specific + code. */ +#undef USE_PADLOCK +#ifdef ENABLE_PADLOCK_SUPPORT +# ifdef HAVE_GCC_ATTRIBUTE_ALIGNED +# if (defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__) +# define USE_PADLOCK 1 +# endif +# endif +#endif /*ENABLE_PADLOCK_SUPPORT*/ + +/* USE_AESNI inidicates whether to compile with Intel AES-NI code. We + need the vector-size attribute which seems to be available since + gcc 3. However, to be on the safe side we require at least gcc 4. */ +#undef USE_AESNI +#ifdef ENABLE_AESNI_SUPPORT +# if ((defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) +# if __GNUC__ >= 4 +# define USE_AESNI 1 +# endif +# endif +#endif /* ENABLE_AESNI_SUPPORT */ + +/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly + * code. */ +#undef USE_ARM_CE +#ifdef ENABLE_ARM_CRYPTO_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) +# define USE_ARM_CE 1 +# elif defined(__AARCH64EL__) \ + && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) +# define USE_ARM_CE 1 +# endif +#endif /* ENABLE_ARM_CRYPTO_SUPPORT */ + +struct RIJNDAEL_context_s; + +typedef unsigned int (*rijndael_cryptfn_t)(const struct RIJNDAEL_context_s *ctx, + unsigned char *bx, + const unsigned char *ax); +typedef void (*rijndael_prefetchfn_t)(void); + +/* Our context object. */ +typedef struct RIJNDAEL_context_s +{ + /* The first fields are the keyschedule arrays. This is so that + they are aligned on a 16 byte boundary if using gcc. This + alignment is required for the AES-NI code and a good idea in any + case. The alignment is guaranteed due to the way cipher.c + allocates the space for the context. The PROPERLY_ALIGNED_TYPE + hack is used to force a minimal alignment if not using gcc of if + the alignment requirement is higher that 16 bytes. */ + union + { + PROPERLY_ALIGNED_TYPE dummy; + byte keyschedule[MAXROUNDS+1][4][4]; + u32 keyschedule32[MAXROUNDS+1][4]; +#ifdef USE_PADLOCK + /* The key as passed to the padlock engine. It is only used if + the padlock engine is used (USE_PADLOCK, below). */ + unsigned char padlock_key[16] __attribute__ ((aligned (16))); +#endif /*USE_PADLOCK*/ + } u1; + union + { + PROPERLY_ALIGNED_TYPE dummy; + byte keyschedule[MAXROUNDS+1][4][4]; + u32 keyschedule32[MAXROUNDS+1][4]; + } u2; + int rounds; /* Key-length-dependent number of rounds. */ + unsigned int decryption_prepared:1; /* The decryption key schedule is available. */ +#ifdef USE_PADLOCK + unsigned int use_padlock:1; /* Padlock shall be used. */ +#endif /*USE_PADLOCK*/ +#ifdef USE_AESNI + unsigned int use_aesni:1; /* AES-NI shall be used. */ +#endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + unsigned int use_ssse3:1; /* SSSE3 shall be used. */ +#endif /*USE_SSSE3*/ +#ifdef USE_ARM_CE + unsigned int use_arm_ce:1; /* ARMv8 CE shall be used. */ +#endif /*USE_ARM_CE*/ + rijndael_cryptfn_t encrypt_fn; + rijndael_cryptfn_t decrypt_fn; + rijndael_prefetchfn_t prefetch_enc_fn; + rijndael_prefetchfn_t prefetch_dec_fn; +} RIJNDAEL_context ATTR_ALIGNED_16; + +/* Macros defining alias for the keyschedules. */ +#define keyschenc u1.keyschedule +#define keyschenc32 u1.keyschedule32 +#define keyschdec u2.keyschedule +#define keyschdec32 u2.keyschedule32 +#define padlockkey u1.padlock_key + +#endif /* G10_RIJNDAEL_INTERNAL_H */ diff --git a/libotr/libgcrypt-1.8.7/cipher/rijndael-padlock.c b/libotr/libgcrypt-1.8.7/cipher/rijndael-padlock.c new file mode 100644 index 0000000..234751b --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rijndael-padlock.c @@ -0,0 +1,104 @@ +/* Padlock accelerated AES for Libgcrypt + * Copyright (C) 2000, 2001, 2002, 2003, 2007, + * 2008, 2011, 2012 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> /* for memcmp() */ + +#include "types.h" /* for byte and u32 typedefs */ +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "cipher-selftest.h" +#include "rijndael-internal.h" + +#ifdef USE_PADLOCK + +/* Encrypt or decrypt one block using the padlock engine. A and B may + be the same. */ +static unsigned int +do_padlock (const RIJNDAEL_context *ctx, unsigned char *bx, + const unsigned char *ax, int decrypt_flag) +{ + /* BX and AX are not necessary correctly aligned. Thus we need to + copy them here. */ + unsigned char a[16] __attribute__ ((aligned (16))); + unsigned char b[16] __attribute__ ((aligned (16))); + unsigned int cword[4] __attribute__ ((aligned (16))); + unsigned char *pa = a; + unsigned char *pb = b; + int blocks; + + /* The control word fields are: + 127:12 11:10 9 8 7 6 5 4 3:0 + RESERVED KSIZE CRYPT INTER KEYGN CIPHR ALIGN DGEST ROUND */ + cword[0] = (ctx->rounds & 15); /* (The mask is just a safeguard.) */ + cword[1] = 0; + cword[2] = 0; + cword[3] = 0; + if (decrypt_flag) + cword[0] |= 0x00000200; + + memcpy (a, ax, 16); + + blocks = 1; /* Init counter for just one block. */ +#ifdef __x86_64__ + asm volatile + ("pushfq\n\t" /* Force key reload. */ + "popfq\n\t" + ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */ + : "+S" (pa), "+D" (pb), "+c" (blocks) + : "d" (cword), "b" (ctx->padlockkey) + : "cc", "memory" + ); +#else + asm volatile + ("pushfl\n\t" /* Force key reload. */ + "popfl\n\t" + "xchg %4, %%ebx\n\t" /* Load key. */ + ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */ + "xchg %4, %%ebx\n" /* Restore GOT register. */ + : "+S" (pa), "+D" (pb), "+c" (blocks) + : "d" (cword), "r" (ctx->padlockkey) + : "cc", "memory" + ); +#endif + + memcpy (bx, b, 16); + + return (48 + 15 /* possible padding for alignment */); +} + +unsigned int +_gcry_aes_padlock_encrypt (const RIJNDAEL_context *ctx, + unsigned char *bx, const unsigned char *ax) +{ + return do_padlock(ctx, bx, ax, 0); +} + +unsigned int +_gcry_aes_padlock_decrypt (const RIJNDAEL_context *ctx, + unsigned char *bx, const unsigned char *ax) +{ + return do_padlock(ctx, bx, ax, 1); +} + +#endif /* USE_PADLOCK */ diff --git a/libotr/libgcrypt-1.8.7/cipher/rijndael-ssse3-amd64-asm.S b/libotr/libgcrypt-1.8.7/cipher/rijndael-ssse3-amd64-asm.S new file mode 100644 index 0000000..3ae55e8 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rijndael-ssse3-amd64-asm.S @@ -0,0 +1,853 @@ +/* SSSE3 vector permutation AES for Libgcrypt + * Copyright (C) 2014-2017 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * + * The code is based on the public domain library libvpaes version 0.5 + * available at http://crypto.stanford.edu/vpaes/ and which carries + * this notice: + * + * libvpaes: constant-time SSSE3 AES encryption and decryption. + * version 0.5 + * + * By Mike Hamburg, Stanford University, 2009. Public domain. + * I wrote essentially all of this code. I did not write the test + * vectors; they are the NIST known answer tests. I hereby release all + * the code and documentation here that I wrote into the public domain. + * + * This is an implementation of AES following my paper, + * "Accelerating AES with Vector Permute Instructions + * CHES 2009; http://shiftleft.org/papers/vector_aes/ + */ + +#if defined(__x86_64__) +#include <config.h> +#if defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) + +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ELF(...) +#else +# define ELF(...) __VA_ARGS__ +#endif + +.text + +## +## _gcry_aes_ssse3_enc_preload +## +ELF(.type _gcry_aes_ssse3_enc_preload,@function) +.globl _gcry_aes_ssse3_enc_preload +_gcry_aes_ssse3_enc_preload: + lea .Laes_consts(%rip), %rax + movdqa (%rax), %xmm9 # 0F + movdqa .Lk_inv (%rax), %xmm10 # inv + movdqa .Lk_inv+16(%rax), %xmm11 # inva + movdqa .Lk_sb1 (%rax), %xmm13 # sb1u + movdqa .Lk_sb1+16(%rax), %xmm12 # sb1t + movdqa .Lk_sb2 (%rax), %xmm15 # sb2u + movdqa .Lk_sb2+16(%rax), %xmm14 # sb2t + ret +ELF(.size _gcry_aes_ssse3_enc_preload,.-_gcry_aes_ssse3_enc_preload) + +## +## _gcry_aes_ssse3_dec_preload +## +ELF(.type _gcry_aes_ssse3_dec_preload,@function) +.globl _gcry_aes_ssse3_dec_preload +_gcry_aes_ssse3_dec_preload: + lea .Laes_consts(%rip), %rax + movdqa (%rax), %xmm9 # 0F + movdqa .Lk_inv (%rax), %xmm10 # inv + movdqa .Lk_inv+16(%rax), %xmm11 # inva + movdqa .Lk_dsb9 (%rax), %xmm13 # sb9u + movdqa .Lk_dsb9+16(%rax), %xmm12 # sb9t + movdqa .Lk_dsbd (%rax), %xmm15 # sbdu + movdqa .Lk_dsbb (%rax), %xmm14 # sbbu + movdqa .Lk_dsbe (%rax), %xmm8 # sbeu + ret +ELF(.size _gcry_aes_ssse3_dec_preload,.-_gcry_aes_ssse3_dec_preload) + +## +## Constant-time SSSE3 AES core implementation. +## +## By Mike Hamburg (Stanford University), 2009 +## Public domain. +## + +## +## _aes_encrypt_core +## +## AES-encrypt %xmm0. +## +## Inputs: +## %xmm0 = input +## %xmm9-%xmm15 as in .Laes_preheat +## (%rdx) = scheduled keys +## %rax = nrounds - 1 +## +## Output in %xmm0 +## Clobbers %xmm1-%xmm4, %r9, %r11, %rax, %rcx +## Preserves %xmm6 - %xmm7 so you get some local vectors +## +## +.align 16 +ELF(.type _gcry_aes_ssse3_encrypt_core,@function) +.globl _gcry_aes_ssse3_encrypt_core +_gcry_aes_ssse3_encrypt_core: +_aes_encrypt_core: + lea .Laes_consts(%rip), %rcx + leaq .Lk_mc_backward(%rcx), %rdi + mov $16, %rsi + movdqa .Lk_ipt (%rcx), %xmm2 # iptlo + movdqa %xmm9, %xmm1 + pandn %xmm0, %xmm1 + psrld $4, %xmm1 + pand %xmm9, %xmm0 + pshufb %xmm0, %xmm2 + movdqa .Lk_ipt+16(%rcx), %xmm0 # ipthi + pshufb %xmm1, %xmm0 + pxor (%rdx),%xmm2 + pxor %xmm2, %xmm0 + add $16, %rdx + jmp .Laes_entry + +.align 8 +.Laes_loop: + # middle of middle round + movdqa %xmm13, %xmm4 # 4 : sb1u + pshufb %xmm2, %xmm4 # 4 = sb1u + pxor (%rdx), %xmm4 # 4 = sb1u + k + movdqa %xmm12, %xmm0 # 0 : sb1t + pshufb %xmm3, %xmm0 # 0 = sb1t + pxor %xmm4, %xmm0 # 0 = A + movdqa %xmm15, %xmm4 # 4 : sb2u + pshufb %xmm2, %xmm4 # 4 = sb2u + movdqa .Lk_mc_forward-.Lk_mc_backward(%rsi,%rdi), %xmm1 + movdqa %xmm14, %xmm2 # 2 : sb2t + pshufb %xmm3, %xmm2 # 2 = sb2t + pxor %xmm4, %xmm2 # 2 = 2A + movdqa %xmm0, %xmm3 # 3 = A + pshufb %xmm1, %xmm0 # 0 = B + pxor %xmm2, %xmm0 # 0 = 2A+B + pshufb (%rsi,%rdi), %xmm3 # 3 = D + lea 16(%esi),%esi # next mc + pxor %xmm0, %xmm3 # 3 = 2A+B+D + lea 16(%rdx),%rdx # next key + pshufb %xmm1, %xmm0 # 0 = 2B+C + pxor %xmm3, %xmm0 # 0 = 2A+3B+C+D + and $48, %rsi # ... mod 4 + dec %rax # nr-- + +.Laes_entry: + # top of round + movdqa %xmm9, %xmm1 # 1 : i + pandn %xmm0, %xmm1 # 1 = i<<4 + psrld $4, %xmm1 # 1 = i + pand %xmm9, %xmm0 # 0 = k + movdqa %xmm11, %xmm2 # 2 : a/k + pshufb %xmm0, %xmm2 # 2 = a/k + pxor %xmm1, %xmm0 # 0 = j + movdqa %xmm10, %xmm3 # 3 : 1/i + pshufb %xmm1, %xmm3 # 3 = 1/i + pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k + movdqa %xmm10, %xmm4 # 4 : 1/j + pshufb %xmm0, %xmm4 # 4 = 1/j + pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k + movdqa %xmm10, %xmm2 # 2 : 1/iak + pshufb %xmm3, %xmm2 # 2 = 1/iak + pxor %xmm0, %xmm2 # 2 = io + movdqa %xmm10, %xmm3 # 3 : 1/jak + pshufb %xmm4, %xmm3 # 3 = 1/jak + pxor %xmm1, %xmm3 # 3 = jo + jnz .Laes_loop + + # middle of last round + movdqa .Lk_sbo(%rcx), %xmm4 # 3 : sbou + pshufb %xmm2, %xmm4 # 4 = sbou + pxor (%rdx), %xmm4 # 4 = sb1u + k + movdqa .Lk_sbo+16(%rcx), %xmm0 # 0 : sbot + pshufb %xmm3, %xmm0 # 0 = sb1t + pxor %xmm4, %xmm0 # 0 = A + pshufb .Lk_sr(%rsi,%rcx), %xmm0 + ret +ELF(.size _aes_encrypt_core,.-_aes_encrypt_core) + +## +## Decryption core +## +## Same API as encryption core. +## +.align 16 +.globl _gcry_aes_ssse3_decrypt_core +ELF(.type _gcry_aes_ssse3_decrypt_core,@function) +_gcry_aes_ssse3_decrypt_core: +_aes_decrypt_core: + lea .Laes_consts(%rip), %rcx + movl %eax, %esi + shll $4, %esi + xorl $48, %esi + andl $48, %esi + movdqa .Lk_dipt (%rcx), %xmm2 # iptlo + movdqa %xmm9, %xmm1 + pandn %xmm0, %xmm1 + psrld $4, %xmm1 + pand %xmm9, %xmm0 + pshufb %xmm0, %xmm2 + movdqa .Lk_dipt+16(%rcx), %xmm0 # ipthi + pshufb %xmm1, %xmm0 + pxor (%rdx), %xmm2 + pxor %xmm2, %xmm0 + movdqa .Lk_mc_forward+48(%rcx), %xmm5 + lea 16(%rdx), %rdx + neg %rax + jmp .Laes_dec_entry + +.align 16 +.Laes_dec_loop: +## +## Inverse mix columns +## + movdqa %xmm13, %xmm4 # 4 : sb9u + pshufb %xmm2, %xmm4 # 4 = sb9u + pxor (%rdx), %xmm4 + movdqa %xmm12, %xmm0 # 0 : sb9t + pshufb %xmm3, %xmm0 # 0 = sb9t + movdqa .Lk_dsbd+16(%rcx),%xmm1 # 1 : sbdt + pxor %xmm4, %xmm0 # 0 = ch + lea 16(%rdx), %rdx # next round key + + pshufb %xmm5, %xmm0 # MC ch + movdqa %xmm15, %xmm4 # 4 : sbdu + pshufb %xmm2, %xmm4 # 4 = sbdu + pxor %xmm0, %xmm4 # 4 = ch + pshufb %xmm3, %xmm1 # 1 = sbdt + pxor %xmm4, %xmm1 # 1 = ch + + pshufb %xmm5, %xmm1 # MC ch + movdqa %xmm14, %xmm4 # 4 : sbbu + pshufb %xmm2, %xmm4 # 4 = sbbu + inc %rax # nr-- + pxor %xmm1, %xmm4 # 4 = ch + movdqa .Lk_dsbb+16(%rcx),%xmm0 # 0 : sbbt + pshufb %xmm3, %xmm0 # 0 = sbbt + pxor %xmm4, %xmm0 # 0 = ch + + pshufb %xmm5, %xmm0 # MC ch + movdqa %xmm8, %xmm4 # 4 : sbeu + pshufb %xmm2, %xmm4 # 4 = sbeu + pshufd $0x93, %xmm5, %xmm5 + pxor %xmm0, %xmm4 # 4 = ch + movdqa .Lk_dsbe+16(%rcx),%xmm0 # 0 : sbet + pshufb %xmm3, %xmm0 # 0 = sbet + pxor %xmm4, %xmm0 # 0 = ch + +.Laes_dec_entry: + # top of round + movdqa %xmm9, %xmm1 # 1 : i + pandn %xmm0, %xmm1 # 1 = i<<4 + psrld $4, %xmm1 # 1 = i + pand %xmm9, %xmm0 # 0 = k + movdqa %xmm11, %xmm2 # 2 : a/k + pshufb %xmm0, %xmm2 # 2 = a/k + pxor %xmm1, %xmm0 # 0 = j + movdqa %xmm10, %xmm3 # 3 : 1/i + pshufb %xmm1, %xmm3 # 3 = 1/i + pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k + movdqa %xmm10, %xmm4 # 4 : 1/j + pshufb %xmm0, %xmm4 # 4 = 1/j + pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k + movdqa %xmm10, %xmm2 # 2 : 1/iak + pshufb %xmm3, %xmm2 # 2 = 1/iak + pxor %xmm0, %xmm2 # 2 = io + movdqa %xmm10, %xmm3 # 3 : 1/jak + pshufb %xmm4, %xmm3 # 3 = 1/jak + pxor %xmm1, %xmm3 # 3 = jo + jnz .Laes_dec_loop + + # middle of last round + movdqa .Lk_dsbo(%rcx), %xmm4 # 3 : sbou + pshufb %xmm2, %xmm4 # 4 = sbou + pxor (%rdx), %xmm4 # 4 = sb1u + k + movdqa .Lk_dsbo+16(%rcx), %xmm0 # 0 : sbot + pshufb %xmm3, %xmm0 # 0 = sb1t + pxor %xmm4, %xmm0 # 0 = A + pshufb .Lk_sr(%rsi,%rcx), %xmm0 + ret +ELF(.size _aes_decrypt_core,.-_aes_decrypt_core) + +######################################################## +## ## +## AES key schedule ## +## ## +######################################################## + +.align 16 +.globl _gcry_aes_ssse3_schedule_core +ELF(.type _gcry_aes_ssse3_schedule_core,@function) +_gcry_aes_ssse3_schedule_core: +_aes_schedule_core: + # rdi = key + # rsi = size in bits + # rdx = buffer + # rcx = direction. 0=encrypt, 1=decrypt + + # load the tables + lea .Laes_consts(%rip), %r10 + movdqa (%r10), %xmm9 # 0F + movdqa .Lk_inv (%r10), %xmm10 # inv + movdqa .Lk_inv+16(%r10), %xmm11 # inva + movdqa .Lk_sb1 (%r10), %xmm13 # sb1u + movdqa .Lk_sb1+16(%r10), %xmm12 # sb1t + movdqa .Lk_sb2 (%r10), %xmm15 # sb2u + movdqa .Lk_sb2+16(%r10), %xmm14 # sb2t + + movdqa .Lk_rcon(%r10), %xmm8 # load rcon + movdqu (%rdi), %xmm0 # load key (unaligned) + + # input transform + movdqu %xmm0, %xmm3 + lea .Lk_ipt(%r10), %r11 + call .Laes_schedule_transform + movdqu %xmm0, %xmm7 + + test %rcx, %rcx + jnz .Laes_schedule_am_decrypting + + # encrypting, output zeroth round key after transform + movdqa %xmm0, (%rdx) + jmp .Laes_schedule_go + +.Laes_schedule_am_decrypting: + # decrypting, output zeroth round key after shiftrows + pshufb .Lk_sr(%r8,%r10),%xmm3 + movdqa %xmm3, (%rdx) + xor $48, %r8 + +.Laes_schedule_go: + cmp $192, %rsi + je .Laes_schedule_192 + cmp $256, %rsi + je .Laes_schedule_256 + # 128: fall though + +## +## .Laes_schedule_128 +## +## 128-bit specific part of key schedule. +## +## This schedule is really simple, because all its parts +## are accomplished by the subroutines. +## +.Laes_schedule_128: + mov $10, %rsi + +.Laes_schedule_128_L: + call .Laes_schedule_round + dec %rsi + jz .Laes_schedule_mangle_last + call .Laes_schedule_mangle # write output + jmp .Laes_schedule_128_L + +## +## .Laes_schedule_192 +## +## 192-bit specific part of key schedule. +## +## The main body of this schedule is the same as the 128-bit +## schedule, but with more smearing. The long, high side is +## stored in %xmm7 as before, and the short, low side is in +## the high bits of %xmm6. +## +## This schedule is somewhat nastier, however, because each +## round produces 192 bits of key material, or 1.5 round keys. +## Therefore, on each cycle we do 2 rounds and produce 3 round +## keys. +## +.Laes_schedule_192: + movdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) + call .Laes_schedule_transform # input transform + pshufd $0x0E, %xmm0, %xmm6 + pslldq $8, %xmm6 # clobber low side with zeros + mov $4, %rsi + +.Laes_schedule_192_L: + call .Laes_schedule_round + palignr $8,%xmm6,%xmm0 + call .Laes_schedule_mangle # save key n + call .Laes_schedule_192_smear + call .Laes_schedule_mangle # save key n+1 + call .Laes_schedule_round + dec %rsi + jz .Laes_schedule_mangle_last + call .Laes_schedule_mangle # save key n+2 + call .Laes_schedule_192_smear + jmp .Laes_schedule_192_L + +## +## .Laes_schedule_192_smear +## +## Smear the short, low side in the 192-bit key schedule. +## +## Inputs: +## %xmm7: high side, b a x y +## %xmm6: low side, d c 0 0 +## %xmm13: 0 +## +## Outputs: +## %xmm6: b+c+d b+c 0 0 +## %xmm0: b+c+d b+c b a +## +.Laes_schedule_192_smear: + pshufd $0x80, %xmm6, %xmm0 # d c 0 0 -> c 0 0 0 + pxor %xmm0, %xmm6 # -> c+d c 0 0 + pshufd $0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a + pxor %xmm6, %xmm0 # -> b+c+d b+c b a + pshufd $0x0E, %xmm0, %xmm6 + pslldq $8, %xmm6 # clobber low side with zeros + ret + +## +## .Laes_schedule_256 +## +## 256-bit specific part of key schedule. +## +## The structure here is very similar to the 128-bit +## schedule, but with an additional 'low side' in +## %xmm6. The low side's rounds are the same as the +## high side's, except no rcon and no rotation. +## +.Laes_schedule_256: + movdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) + call .Laes_schedule_transform # input transform + mov $7, %rsi + +.Laes_schedule_256_L: + call .Laes_schedule_mangle # output low result + movdqa %xmm0, %xmm6 # save cur_lo in xmm6 + + # high round + call .Laes_schedule_round + dec %rsi + jz .Laes_schedule_mangle_last + call .Laes_schedule_mangle + + # low round. swap xmm7 and xmm6 + pshufd $0xFF, %xmm0, %xmm0 + movdqa %xmm7, %xmm5 + movdqa %xmm6, %xmm7 + call .Laes_schedule_low_round + movdqa %xmm5, %xmm7 + + jmp .Laes_schedule_256_L + +## +## .Laes_schedule_round +## +## Runs one main round of the key schedule on %xmm0, %xmm7 +## +## Specifically, runs subbytes on the high dword of %xmm0 +## then rotates it by one byte and xors into the low dword of +## %xmm7. +## +## Adds rcon from low byte of %xmm8, then rotates %xmm8 for +## next rcon. +## +## Smears the dwords of %xmm7 by xoring the low into the +## second low, result into third, result into highest. +## +## Returns results in %xmm7 = %xmm0. +## Clobbers %xmm1-%xmm4, %r11. +## +.Laes_schedule_round: + # extract rcon from xmm8 + pxor %xmm1, %xmm1 + palignr $15, %xmm8, %xmm1 + palignr $15, %xmm8, %xmm8 + pxor %xmm1, %xmm7 + + # rotate + pshufd $0xFF, %xmm0, %xmm0 + palignr $1, %xmm0, %xmm0 + + # fall through... + + # low round: same as high round, but no rotation and no rcon. +.Laes_schedule_low_round: + # smear xmm7 + movdqa %xmm7, %xmm1 + pslldq $4, %xmm7 + pxor %xmm1, %xmm7 + movdqa %xmm7, %xmm1 + pslldq $8, %xmm7 + pxor %xmm1, %xmm7 + pxor .Lk_s63(%r10), %xmm7 + + # subbytes + movdqa %xmm9, %xmm1 + pandn %xmm0, %xmm1 + psrld $4, %xmm1 # 1 = i + pand %xmm9, %xmm0 # 0 = k + movdqa %xmm11, %xmm2 # 2 : a/k + pshufb %xmm0, %xmm2 # 2 = a/k + pxor %xmm1, %xmm0 # 0 = j + movdqa %xmm10, %xmm3 # 3 : 1/i + pshufb %xmm1, %xmm3 # 3 = 1/i + pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k + movdqa %xmm10, %xmm4 # 4 : 1/j + pshufb %xmm0, %xmm4 # 4 = 1/j + pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k + movdqa %xmm10, %xmm2 # 2 : 1/iak + pshufb %xmm3, %xmm2 # 2 = 1/iak + pxor %xmm0, %xmm2 # 2 = io + movdqa %xmm10, %xmm3 # 3 : 1/jak + pshufb %xmm4, %xmm3 # 3 = 1/jak + pxor %xmm1, %xmm3 # 3 = jo + movdqa .Lk_sb1(%r10), %xmm4 # 4 : sbou + pshufb %xmm2, %xmm4 # 4 = sbou + movdqa .Lk_sb1+16(%r10), %xmm0 # 0 : sbot + pshufb %xmm3, %xmm0 # 0 = sb1t + pxor %xmm4, %xmm0 # 0 = sbox output + + # add in smeared stuff + pxor %xmm7, %xmm0 + movdqa %xmm0, %xmm7 + ret + +## +## .Laes_schedule_transform +## +## Linear-transform %xmm0 according to tables at (%r11) +## +## Requires that %xmm9 = 0x0F0F... as in preheat +## Output in %xmm0 +## Clobbers %xmm1, %xmm2 +## +.Laes_schedule_transform: + movdqa %xmm9, %xmm1 + pandn %xmm0, %xmm1 + psrld $4, %xmm1 + pand %xmm9, %xmm0 + movdqa (%r11), %xmm2 # lo + pshufb %xmm0, %xmm2 + movdqa 16(%r11), %xmm0 # hi + pshufb %xmm1, %xmm0 + pxor %xmm2, %xmm0 + ret + +## +## .Laes_schedule_mangle +## +## Mangle xmm0 from (basis-transformed) standard version +## to our version. +## +## On encrypt, +## xor with 0x63 +## multiply by circulant 0,1,1,1 +## apply shiftrows transform +## +## On decrypt, +## xor with 0x63 +## multiply by 'inverse mixcolumns' circulant E,B,D,9 +## deskew +## apply shiftrows transform +## +## +## Writes out to (%rdx), and increments or decrements it +## Keeps track of round number mod 4 in %r8 +## Preserves xmm0 +## Clobbers xmm1-xmm5 +## +.Laes_schedule_mangle: + movdqa %xmm0, %xmm4 # save xmm0 for later + movdqa .Lk_mc_forward(%r10),%xmm5 + test %rcx, %rcx + jnz .Laes_schedule_mangle_dec + + # encrypting + add $16, %rdx + pxor .Lk_s63(%r10),%xmm4 + pshufb %xmm5, %xmm4 + movdqa %xmm4, %xmm3 + pshufb %xmm5, %xmm4 + pxor %xmm4, %xmm3 + pshufb %xmm5, %xmm4 + pxor %xmm4, %xmm3 + + jmp .Laes_schedule_mangle_both + +.Laes_schedule_mangle_dec: + lea .Lk_dks_1(%r10), %r11 # first table: *9 + call .Laes_schedule_transform + movdqa %xmm0, %xmm3 + pshufb %xmm5, %xmm3 + + add $32, %r11 # next table: *B + call .Laes_schedule_transform + pxor %xmm0, %xmm3 + pshufb %xmm5, %xmm3 + + add $32, %r11 # next table: *D + call .Laes_schedule_transform + pxor %xmm0, %xmm3 + pshufb %xmm5, %xmm3 + + add $32, %r11 # next table: *E + call .Laes_schedule_transform + pxor %xmm0, %xmm3 + pshufb %xmm5, %xmm3 + + movdqa %xmm4, %xmm0 # restore %xmm0 + add $-16, %rdx + +.Laes_schedule_mangle_both: + pshufb .Lk_sr(%r8,%r10),%xmm3 + add $-16, %r8 + and $48, %r8 + movdqa %xmm3, (%rdx) + ret + +## +## .Laes_schedule_mangle_last +## +## Mangler for last round of key schedule +## Mangles %xmm0 +## when encrypting, outputs out(%xmm0) ^ 63 +## when decrypting, outputs unskew(%xmm0) +## +## Always called right before return... jumps to cleanup and exits +## +.Laes_schedule_mangle_last: + # schedule last round key from xmm0 + lea .Lk_deskew(%r10),%r11 # prepare to deskew + test %rcx, %rcx + jnz .Laes_schedule_mangle_last_dec + + # encrypting + pshufb .Lk_sr(%r8,%r10),%xmm0 # output permute + lea .Lk_opt(%r10), %r11 # prepare to output transform + add $32, %rdx + +.Laes_schedule_mangle_last_dec: + add $-16, %rdx + pxor .Lk_s63(%r10), %xmm0 + call .Laes_schedule_transform # output transform + movdqa %xmm0, (%rdx) # save last key + + #_aes_cleanup + pxor %xmm0, %xmm0 + pxor %xmm1, %xmm1 + pxor %xmm2, %xmm2 + pxor %xmm3, %xmm3 + pxor %xmm4, %xmm4 + pxor %xmm5, %xmm5 + pxor %xmm6, %xmm6 + pxor %xmm7, %xmm7 + pxor %xmm8, %xmm8 + ret +ELF(.size _aes_schedule_core,.-_aes_schedule_core) + +######################################################## +## ## +## Constants ## +## ## +######################################################## + +.align 16 +ELF(.type _aes_consts,@object) +.Laes_consts: +_aes_consts: + # s0F + .Lk_s0F = .-.Laes_consts + .quad 0x0F0F0F0F0F0F0F0F + .quad 0x0F0F0F0F0F0F0F0F + + # input transform (lo, hi) + .Lk_ipt = .-.Laes_consts + .quad 0xC2B2E8985A2A7000 + .quad 0xCABAE09052227808 + .quad 0x4C01307D317C4D00 + .quad 0xCD80B1FCB0FDCC81 + + # inv, inva + .Lk_inv = .-.Laes_consts + .quad 0x0E05060F0D080180 + .quad 0x040703090A0B0C02 + .quad 0x01040A060F0B0780 + .quad 0x030D0E0C02050809 + + # sb1u, sb1t + .Lk_sb1 = .-.Laes_consts + .quad 0xB19BE18FCB503E00 + .quad 0xA5DF7A6E142AF544 + .quad 0x3618D415FAE22300 + .quad 0x3BF7CCC10D2ED9EF + + + # sb2u, sb2t + .Lk_sb2 = .-.Laes_consts + .quad 0xE27A93C60B712400 + .quad 0x5EB7E955BC982FCD + .quad 0x69EB88400AE12900 + .quad 0xC2A163C8AB82234A + + # sbou, sbot + .Lk_sbo = .-.Laes_consts + .quad 0xD0D26D176FBDC700 + .quad 0x15AABF7AC502A878 + .quad 0xCFE474A55FBB6A00 + .quad 0x8E1E90D1412B35FA + + # mc_forward + .Lk_mc_forward = .-.Laes_consts + .quad 0x0407060500030201 + .quad 0x0C0F0E0D080B0A09 + .quad 0x080B0A0904070605 + .quad 0x000302010C0F0E0D + .quad 0x0C0F0E0D080B0A09 + .quad 0x0407060500030201 + .quad 0x000302010C0F0E0D + .quad 0x080B0A0904070605 + + # mc_backward + .Lk_mc_backward = .-.Laes_consts + .quad 0x0605040702010003 + .quad 0x0E0D0C0F0A09080B + .quad 0x020100030E0D0C0F + .quad 0x0A09080B06050407 + .quad 0x0E0D0C0F0A09080B + .quad 0x0605040702010003 + .quad 0x0A09080B06050407 + .quad 0x020100030E0D0C0F + + # sr + .Lk_sr = .-.Laes_consts + .quad 0x0706050403020100 + .quad 0x0F0E0D0C0B0A0908 + .quad 0x030E09040F0A0500 + .quad 0x0B06010C07020D08 + .quad 0x0F060D040B020900 + .quad 0x070E050C030A0108 + .quad 0x0B0E0104070A0D00 + .quad 0x0306090C0F020508 + + # rcon + .Lk_rcon = .-.Laes_consts + .quad 0x1F8391B9AF9DEEB6 + .quad 0x702A98084D7C7D81 + + # s63: all equal to 0x63 transformed + .Lk_s63 = .-.Laes_consts + .quad 0x5B5B5B5B5B5B5B5B + .quad 0x5B5B5B5B5B5B5B5B + + # output transform + .Lk_opt = .-.Laes_consts + .quad 0xFF9F4929D6B66000 + .quad 0xF7974121DEBE6808 + .quad 0x01EDBD5150BCEC00 + .quad 0xE10D5DB1B05C0CE0 + + # deskew tables: inverts the sbox's 'skew' + .Lk_deskew = .-.Laes_consts + .quad 0x07E4A34047A4E300 + .quad 0x1DFEB95A5DBEF91A + .quad 0x5F36B5DC83EA6900 + .quad 0x2841C2ABF49D1E77 + +## +## Decryption stuff +## Key schedule constants +## + # decryption key schedule: x -> invskew x*9 + .Lk_dks_1 = .-.Laes_consts + .quad 0xB6116FC87ED9A700 + .quad 0x4AED933482255BFC + .quad 0x4576516227143300 + .quad 0x8BB89FACE9DAFDCE + + # decryption key schedule: invskew x*9 -> invskew x*D + .Lk_dks_2 = .-.Laes_consts + .quad 0x27438FEBCCA86400 + .quad 0x4622EE8AADC90561 + .quad 0x815C13CE4F92DD00 + .quad 0x73AEE13CBD602FF2 + + # decryption key schedule: invskew x*D -> invskew x*B + .Lk_dks_3 = .-.Laes_consts + .quad 0x03C4C50201C6C700 + .quad 0xF83F3EF9FA3D3CFB + .quad 0xEE1921D638CFF700 + .quad 0xA5526A9D7384BC4B + + # decryption key schedule: invskew x*B -> invskew x*E + 0x63 + .Lk_dks_4 = .-.Laes_consts + .quad 0xE3C390B053732000 + .quad 0xA080D3F310306343 + .quad 0xA0CA214B036982E8 + .quad 0x2F45AEC48CE60D67 + +## +## Decryption stuff +## Round function constants +## + # decryption input transform + .Lk_dipt = .-.Laes_consts + .quad 0x0F505B040B545F00 + .quad 0x154A411E114E451A + .quad 0x86E383E660056500 + .quad 0x12771772F491F194 + + # decryption sbox output *9*u, *9*t + .Lk_dsb9 = .-.Laes_consts + .quad 0x851C03539A86D600 + .quad 0xCAD51F504F994CC9 + .quad 0xC03B1789ECD74900 + .quad 0x725E2C9EB2FBA565 + + # decryption sbox output *D*u, *D*t + .Lk_dsbd = .-.Laes_consts + .quad 0x7D57CCDFE6B1A200 + .quad 0xF56E9B13882A4439 + .quad 0x3CE2FAF724C6CB00 + .quad 0x2931180D15DEEFD3 + + # decryption sbox output *B*u, *B*t + .Lk_dsbb = .-.Laes_consts + .quad 0xD022649296B44200 + .quad 0x602646F6B0F2D404 + .quad 0xC19498A6CD596700 + .quad 0xF3FF0C3E3255AA6B + + # decryption sbox output *E*u, *E*t + .Lk_dsbe = .-.Laes_consts + .quad 0x46F2929626D4D000 + .quad 0x2242600464B4F6B0 + .quad 0x0C55A6CDFFAAC100 + .quad 0x9467F36B98593E32 + + # decryption sbox final output + .Lk_dsbo = .-.Laes_consts + .quad 0x1387EA537EF94000 + .quad 0xC7AA6DB9D4943E2D + .quad 0x12D7560F93441D00 + .quad 0xCA4B8159D8C58E9C +ELF(.size _aes_consts,.-_aes_consts) + +#endif +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/rijndael-ssse3-amd64.c b/libotr/libgcrypt-1.8.7/cipher/rijndael-ssse3-amd64.c new file mode 100644 index 0000000..da5339e --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rijndael-ssse3-amd64.c @@ -0,0 +1,751 @@ +/* SSSE3 vector permutation AES for Libgcrypt + * Copyright (C) 2014-2017 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * + * The code is based on the public domain library libvpaes version 0.5 + * available at http://crypto.stanford.edu/vpaes/ and which carries + * this notice: + * + * libvpaes: constant-time SSSE3 AES encryption and decryption. + * version 0.5 + * + * By Mike Hamburg, Stanford University, 2009. Public domain. + * I wrote essentially all of this code. I did not write the test + * vectors; they are the NIST known answer tests. I hereby release all + * the code and documentation here that I wrote into the public domain. + * + * This is an implementation of AES following my paper, + * "Accelerating AES with Vector Permute Instructions" + * CHES 2009; http://shiftleft.org/papers/vector_aes/ + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> /* for memcmp() */ + +#include "types.h" /* for byte and u32 typedefs */ +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "cipher-selftest.h" +#include "rijndael-internal.h" +#include "./cipher-internal.h" + + +#ifdef USE_SSSE3 + + +#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */ +/* Prevent compiler from issuing SSE instructions between asm blocks. */ +# pragma GCC target("no-sse") +#endif + + +/* Assembly functions in rijndael-ssse3-amd64-asm.S. Note that these + have custom calling convention and need to be called from assembly + blocks, not directly. */ +extern void _gcry_aes_ssse3_enc_preload(void); +extern void _gcry_aes_ssse3_dec_preload(void); +extern void _gcry_aes_ssse3_schedule_core(void); +extern void _gcry_aes_ssse3_encrypt_core(void); +extern void _gcry_aes_ssse3_decrypt_core(void); + + + +/* Two macros to be called prior and after the use of SSSE3 + instructions. There should be no external function calls between + the use of these macros. There purpose is to make sure that the + SSE registers are cleared and won't reveal any information about + the key or the data. */ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define SSSE3_STATE_SIZE (16 * 10) +/* XMM6-XMM15 are callee-saved registers on WIN64. */ +# define vpaes_ssse3_prepare() \ + asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" \ + "movdqu %%xmm7, 1*16(%0)\n\t" \ + "movdqu %%xmm8, 2*16(%0)\n\t" \ + "movdqu %%xmm9, 3*16(%0)\n\t" \ + "movdqu %%xmm10, 4*16(%0)\n\t" \ + "movdqu %%xmm11, 5*16(%0)\n\t" \ + "movdqu %%xmm12, 6*16(%0)\n\t" \ + "movdqu %%xmm13, 7*16(%0)\n\t" \ + "movdqu %%xmm14, 8*16(%0)\n\t" \ + "movdqu %%xmm15, 9*16(%0)\n\t" \ + : \ + : "r" (ssse3_state) \ + : "memory" ) +# define vpaes_ssse3_cleanup() \ + asm volatile ("pxor %%xmm0, %%xmm0 \n\t" \ + "pxor %%xmm1, %%xmm1 \n\t" \ + "pxor %%xmm2, %%xmm2 \n\t" \ + "pxor %%xmm3, %%xmm3 \n\t" \ + "pxor %%xmm4, %%xmm4 \n\t" \ + "pxor %%xmm5, %%xmm5 \n\t" \ + "movdqu 0*16(%0), %%xmm6 \n\t" \ + "movdqu 1*16(%0), %%xmm7 \n\t" \ + "movdqu 2*16(%0), %%xmm8 \n\t" \ + "movdqu 3*16(%0), %%xmm9 \n\t" \ + "movdqu 4*16(%0), %%xmm10 \n\t" \ + "movdqu 5*16(%0), %%xmm11 \n\t" \ + "movdqu 6*16(%0), %%xmm12 \n\t" \ + "movdqu 7*16(%0), %%xmm13 \n\t" \ + "movdqu 8*16(%0), %%xmm14 \n\t" \ + "movdqu 9*16(%0), %%xmm15 \n\t" \ + : \ + : "r" (ssse3_state) \ + : "memory" ) +# define PUSH_STACK_PTR +# define POP_STACK_PTR +#else +# define SSSE3_STATE_SIZE 1 +# define vpaes_ssse3_prepare() (void)ssse3_state +# define vpaes_ssse3_cleanup() \ + asm volatile ("pxor %%xmm0, %%xmm0 \n\t" \ + "pxor %%xmm1, %%xmm1 \n\t" \ + "pxor %%xmm2, %%xmm2 \n\t" \ + "pxor %%xmm3, %%xmm3 \n\t" \ + "pxor %%xmm4, %%xmm4 \n\t" \ + "pxor %%xmm5, %%xmm5 \n\t" \ + "pxor %%xmm6, %%xmm6 \n\t" \ + "pxor %%xmm7, %%xmm7 \n\t" \ + "pxor %%xmm8, %%xmm8 \n\t" \ + ::: "memory" ) +/* Old GCC versions use red-zone of AMD64 SYSV ABI and stack pointer is + * not properly adjusted for assembly block. Therefore stack pointer + * needs to be manually corrected. */ +# define PUSH_STACK_PTR "subq $128, %%rsp;\n\t" +# define POP_STACK_PTR "addq $128, %%rsp;\n\t" +#endif + +#define vpaes_ssse3_prepare_enc() \ + vpaes_ssse3_prepare(); \ + asm volatile (PUSH_STACK_PTR \ + "callq *%q[core] \n\t" \ + POP_STACK_PTR \ + : \ + : [core] "r" (_gcry_aes_ssse3_enc_preload) \ + : "rax", "cc", "memory" ) + +#define vpaes_ssse3_prepare_dec() \ + vpaes_ssse3_prepare(); \ + asm volatile (PUSH_STACK_PTR \ + "callq *%q[core] \n\t" \ + POP_STACK_PTR \ + : \ + : [core] "r" (_gcry_aes_ssse3_dec_preload) \ + : "rax", "cc", "memory" ) + + + +void +_gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key) +{ + unsigned int keybits = (ctx->rounds - 10) * 32 + 128; + byte ssse3_state[SSSE3_STATE_SIZE]; + + vpaes_ssse3_prepare(); + + asm volatile ("leaq %q[key], %%rdi" "\n\t" + "movl %[bits], %%esi" "\n\t" + "leaq %[buf], %%rdx" "\n\t" + "movl %[dir], %%ecx" "\n\t" + "movl %[rotoffs], %%r8d" "\n\t" + PUSH_STACK_PTR + "callq *%q[core]" "\n\t" + POP_STACK_PTR + : + : [core] "r" (&_gcry_aes_ssse3_schedule_core), + [key] "m" (*key), + [bits] "g" (keybits), + [buf] "m" (ctx->keyschenc32[0][0]), + [dir] "g" (0), + [rotoffs] "g" (48) + : "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi", + "cc", "memory"); + + /* Save key for setting up decryption. */ + if (keybits > 192) + asm volatile ("movdqu (%[src]), %%xmm0\n\t" + "movdqu 16(%[src]), %%xmm1\n\t" + "movdqu %%xmm0, (%[dst])\n\t" + "movdqu %%xmm1, 16(%[dst])\n\t" + : /* No output */ + : [dst] "r" (&ctx->keyschdec32[0][0]), [src] "r" (key) + : "memory" ); + else if (keybits == 192) + asm volatile ("movdqu (%[src]), %%xmm0\n\t" + "movq 16(%[src]), %%xmm1\n\t" + "movdqu %%xmm0, (%[dst])\n\t" + "movq %%xmm1, 16(%[dst])\n\t" + : /* No output */ + : [dst] "r" (&ctx->keyschdec32[0][0]), [src] "r" (key) + : "memory" ); + else + asm volatile ("movdqu (%[src]), %%xmm0\n\t" + "movdqu %%xmm0, (%[dst])\n\t" + : /* No output */ + : [dst] "r" (&ctx->keyschdec32[0][0]), [src] "r" (key) + : "memory" ); + + vpaes_ssse3_cleanup(); +} + + +/* Make a decryption key from an encryption key. */ +void +_gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx) +{ + unsigned int keybits = (ctx->rounds - 10) * 32 + 128; + byte ssse3_state[SSSE3_STATE_SIZE]; + + vpaes_ssse3_prepare(); + + asm volatile ("leaq %q[key], %%rdi" "\n\t" + "movl %[bits], %%esi" "\n\t" + "leaq %[buf], %%rdx" "\n\t" + "movl %[dir], %%ecx" "\n\t" + "movl %[rotoffs], %%r8d" "\n\t" + PUSH_STACK_PTR + "callq *%q[core]" "\n\t" + POP_STACK_PTR + : + : [core] "r" (_gcry_aes_ssse3_schedule_core), + [key] "m" (ctx->keyschdec32[0][0]), + [bits] "g" (keybits), + [buf] "m" (ctx->keyschdec32[ctx->rounds][0]), + [dir] "g" (1), + [rotoffs] "g" ((keybits == 192) ? 0 : 32) + : "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi", + "cc", "memory"); + + vpaes_ssse3_cleanup(); +} + + +/* Encrypt one block using the Intel SSSE3 instructions. Block is input +* and output through SSE register xmm0. */ +static inline void +do_vpaes_ssse3_enc (const RIJNDAEL_context *ctx, unsigned int nrounds) +{ + unsigned int middle_rounds = nrounds - 1; + const void *keysched = ctx->keyschenc32; + + asm volatile (PUSH_STACK_PTR + "callq *%q[core]" "\n\t" + POP_STACK_PTR + : "+a" (middle_rounds), "+d" (keysched) + : [core] "r" (_gcry_aes_ssse3_encrypt_core) + : "rcx", "rsi", "rdi", "cc", "memory"); +} + + +/* Decrypt one block using the Intel SSSE3 instructions. Block is input +* and output through SSE register xmm0. */ +static inline void +do_vpaes_ssse3_dec (const RIJNDAEL_context *ctx, unsigned int nrounds) +{ + unsigned int middle_rounds = nrounds - 1; + const void *keysched = ctx->keyschdec32; + + asm volatile (PUSH_STACK_PTR + "callq *%q[core]" "\n\t" + POP_STACK_PTR + : "+a" (middle_rounds), "+d" (keysched) + : [core] "r" (_gcry_aes_ssse3_decrypt_core) + : "rcx", "rsi", "cc", "memory"); +} + + +unsigned int +_gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, + const unsigned char *src) +{ + unsigned int nrounds = ctx->rounds; + byte ssse3_state[SSSE3_STATE_SIZE]; + + vpaes_ssse3_prepare_enc (); + asm volatile ("movdqu %[src], %%xmm0\n\t" + : + : [src] "m" (*src) + : "memory" ); + do_vpaes_ssse3_enc (ctx, nrounds); + asm volatile ("movdqu %%xmm0, %[dst]\n\t" + : [dst] "=m" (*dst) + : + : "memory" ); + vpaes_ssse3_cleanup (); + return 0; +} + + +void +_gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks) +{ + unsigned int nrounds = ctx->rounds; + byte ssse3_state[SSSE3_STATE_SIZE]; + + vpaes_ssse3_prepare_enc (); + + asm volatile ("movdqu %[iv], %%xmm0\n\t" + : /* No output */ + : [iv] "m" (*iv) + : "memory" ); + + for ( ;nblocks; nblocks-- ) + { + do_vpaes_ssse3_enc (ctx, nrounds); + + asm volatile ("movdqu %[inbuf], %%xmm1\n\t" + "pxor %%xmm1, %%xmm0\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : [inbuf] "m" (*inbuf) + : "memory" ); + + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + + asm volatile ("movdqu %%xmm0, %[iv]\n\t" + : [iv] "=m" (*iv) + : + : "memory" ); + + vpaes_ssse3_cleanup (); +} + + +void +_gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks, int cbc_mac) +{ + unsigned int nrounds = ctx->rounds; + byte ssse3_state[SSSE3_STATE_SIZE]; + + vpaes_ssse3_prepare_enc (); + + asm volatile ("movdqu %[iv], %%xmm7\n\t" + : /* No output */ + : [iv] "m" (*iv) + : "memory" ); + + for ( ;nblocks; nblocks-- ) + { + asm volatile ("movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm7, %%xmm0\n\t" + : /* No output */ + : [inbuf] "m" (*inbuf) + : "memory" ); + + do_vpaes_ssse3_enc (ctx, nrounds); + + asm volatile ("movdqa %%xmm0, %%xmm7\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + inbuf += BLOCKSIZE; + if (!cbc_mac) + outbuf += BLOCKSIZE; + } + + asm volatile ("movdqu %%xmm7, %[iv]\n\t" + : [iv] "=m" (*iv) + : + : "memory" ); + + vpaes_ssse3_cleanup (); +} + + +void +_gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *ctr, + size_t nblocks) +{ + static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + unsigned int nrounds = ctx->rounds; + byte ssse3_state[SSSE3_STATE_SIZE]; + u64 ctrlow; + + vpaes_ssse3_prepare_enc (); + + asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */ + "movdqa (%[ctr]), %%xmm7\n\t" /* Preload CTR */ + "movq 8(%[ctr]), %q[ctrlow]\n\t" + "bswapq %q[ctrlow]\n\t" + : [ctrlow] "=r" (ctrlow) + : [mask] "m" (*be_mask), + [ctr] "r" (ctr) + : "memory", "cc"); + + for ( ;nblocks; nblocks-- ) + { + asm volatile ("movdqa %%xmm7, %%xmm0\n\t" /* xmm0 := CTR (xmm7) */ + "pcmpeqd %%xmm1, %%xmm1\n\t" + "psrldq $8, %%xmm1\n\t" /* xmm1 = -1 */ + + "pshufb %%xmm6, %%xmm7\n\t" + "psubq %%xmm1, %%xmm7\n\t" /* xmm7++ (big endian) */ + + /* detect if 64-bit carry handling is needed */ + "incq %q[ctrlow]\n\t" + "jnz .Lno_carry%=\n\t" + + "pslldq $8, %%xmm1\n\t" /* move lower 64-bit to high */ + "psubq %%xmm1, %%xmm7\n\t" /* add carry to upper 64bits */ + + ".Lno_carry%=:\n\t" + + "pshufb %%xmm6, %%xmm7\n\t" + : [ctrlow] "+r" (ctrlow) + : + : "cc", "memory"); + + do_vpaes_ssse3_enc (ctx, nrounds); + + asm volatile ("movdqu %[src], %%xmm1\n\t" /* xmm1 := input */ + "pxor %%xmm1, %%xmm0\n\t" /* EncCTR ^= input */ + "movdqu %%xmm0, %[dst]" /* Store EncCTR. */ + : [dst] "=m" (*outbuf) + : [src] "m" (*inbuf) + : "memory"); + + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + + asm volatile ("movdqu %%xmm7, %[ctr]\n\t" /* Update CTR (mem). */ + : [ctr] "=m" (*ctr) + : + : "memory" ); + + vpaes_ssse3_cleanup (); +} + + +unsigned int +_gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, + const unsigned char *src) +{ + unsigned int nrounds = ctx->rounds; + byte ssse3_state[SSSE3_STATE_SIZE]; + + vpaes_ssse3_prepare_dec (); + asm volatile ("movdqu %[src], %%xmm0\n\t" + : + : [src] "m" (*src) + : "memory" ); + do_vpaes_ssse3_dec (ctx, nrounds); + asm volatile ("movdqu %%xmm0, %[dst]\n\t" + : [dst] "=m" (*dst) + : + : "memory" ); + vpaes_ssse3_cleanup (); + return 0; +} + + +void +_gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks) +{ + unsigned int nrounds = ctx->rounds; + byte ssse3_state[SSSE3_STATE_SIZE]; + + vpaes_ssse3_prepare_enc (); + + asm volatile ("movdqu %[iv], %%xmm0\n\t" + : /* No output */ + : [iv] "m" (*iv) + : "memory" ); + + for ( ;nblocks; nblocks-- ) + { + do_vpaes_ssse3_enc (ctx, nrounds); + + asm volatile ("movdqa %%xmm0, %%xmm6\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm0, %%xmm6\n\t" + "movdqu %%xmm6, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : [inbuf] "m" (*inbuf) + : "memory" ); + + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + + asm volatile ("movdqu %%xmm0, %[iv]\n\t" + : [iv] "=m" (*iv) + : + : "memory" ); + + vpaes_ssse3_cleanup (); +} + + +void +_gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks) +{ + unsigned int nrounds = ctx->rounds; + byte ssse3_state[SSSE3_STATE_SIZE]; + + vpaes_ssse3_prepare_dec (); + + asm volatile ("movdqu %[iv], %%xmm7\n\t" /* use xmm7 as fast IV storage */ + : /* No output */ + : [iv] "m" (*iv) + : "memory"); + + for ( ;nblocks; nblocks-- ) + { + asm volatile ("movdqu %[inbuf], %%xmm0\n\t" + "movdqa %%xmm0, %%xmm6\n\t" /* use xmm6 as savebuf */ + : /* No output */ + : [inbuf] "m" (*inbuf) + : "memory"); + + do_vpaes_ssse3_dec (ctx, nrounds); + + asm volatile ("pxor %%xmm7, %%xmm0\n\t" /* xor IV with output */ + "movdqu %%xmm0, %[outbuf]\n\t" + "movdqu %%xmm6, %%xmm7\n\t" /* store savebuf as new IV */ + : [outbuf] "=m" (*outbuf) + : + : "memory"); + + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + + asm volatile ("movdqu %%xmm7, %[iv]\n\t" /* store IV */ + : /* No output */ + : [iv] "m" (*iv) + : "memory"); + + vpaes_ssse3_cleanup (); +} + + +static void +ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + u64 n = c->u_mode.ocb.data_nblocks; + unsigned int nrounds = ctx->rounds; + byte ssse3_state[SSSE3_STATE_SIZE]; + + vpaes_ssse3_prepare_enc (); + + /* Preload Offset and Checksum */ + asm volatile ("movdqu %[iv], %%xmm7\n\t" + "movdqu %[ctr], %%xmm6\n\t" + : /* No output */ + : [iv] "m" (*c->u_iv.iv), + [ctr] "m" (*c->u_ctr.ctr) + : "memory" ); + + for ( ;nblocks; nblocks-- ) + { + const unsigned char *l; + + l = ocb_get_l(c, ++n); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + asm volatile ("movdqu %[l], %%xmm1\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm1, %%xmm7\n\t" + "pxor %%xmm0, %%xmm6\n\t" + "pxor %%xmm7, %%xmm0\n\t" + : + : [l] "m" (*l), + [inbuf] "m" (*inbuf) + : "memory" ); + + do_vpaes_ssse3_enc (ctx, nrounds); + + asm volatile ("pxor %%xmm7, %%xmm0\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + + c->u_mode.ocb.data_nblocks = n; + asm volatile ("movdqu %%xmm7, %[iv]\n\t" + "movdqu %%xmm6, %[ctr]\n\t" + : [iv] "=m" (*c->u_iv.iv), + [ctr] "=m" (*c->u_ctr.ctr) + : + : "memory" ); + + vpaes_ssse3_cleanup (); +} + +static void +ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + u64 n = c->u_mode.ocb.data_nblocks; + unsigned int nrounds = ctx->rounds; + byte ssse3_state[SSSE3_STATE_SIZE]; + + vpaes_ssse3_prepare_dec (); + + /* Preload Offset and Checksum */ + asm volatile ("movdqu %[iv], %%xmm7\n\t" + "movdqu %[ctr], %%xmm6\n\t" + : /* No output */ + : [iv] "m" (*c->u_iv.iv), + [ctr] "m" (*c->u_ctr.ctr) + : "memory" ); + + for ( ;nblocks; nblocks-- ) + { + const unsigned char *l; + + l = ocb_get_l(c, ++n); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + asm volatile ("movdqu %[l], %%xmm1\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm1, %%xmm7\n\t" + "pxor %%xmm7, %%xmm0\n\t" + : + : [l] "m" (*l), + [inbuf] "m" (*inbuf) + : "memory" ); + + do_vpaes_ssse3_dec (ctx, nrounds); + + asm volatile ("pxor %%xmm7, %%xmm0\n\t" + "pxor %%xmm0, %%xmm6\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + + c->u_mode.ocb.data_nblocks = n; + asm volatile ("movdqu %%xmm7, %[iv]\n\t" + "movdqu %%xmm6, %[ctr]\n\t" + : [iv] "=m" (*c->u_iv.iv), + [ctr] "=m" (*c->u_ctr.ctr) + : + : "memory" ); + + vpaes_ssse3_cleanup (); +} + + +void +_gcry_aes_ssse3_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ + if (encrypt) + ssse3_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks); + else + ssse3_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks); +} + + +void +_gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + const unsigned char *abuf = abuf_arg; + u64 n = c->u_mode.ocb.aad_nblocks; + unsigned int nrounds = ctx->rounds; + byte ssse3_state[SSSE3_STATE_SIZE]; + + vpaes_ssse3_prepare_enc (); + + /* Preload Offset and Sum */ + asm volatile ("movdqu %[iv], %%xmm7\n\t" + "movdqu %[ctr], %%xmm6\n\t" + : /* No output */ + : [iv] "m" (*c->u_mode.ocb.aad_offset), + [ctr] "m" (*c->u_mode.ocb.aad_sum) + : "memory" ); + + for ( ;nblocks; nblocks-- ) + { + const unsigned char *l; + + l = ocb_get_l(c, ++n); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + asm volatile ("movdqu %[l], %%xmm1\n\t" + "movdqu %[abuf], %%xmm0\n\t" + "pxor %%xmm1, %%xmm7\n\t" + "pxor %%xmm7, %%xmm0\n\t" + : + : [l] "m" (*l), + [abuf] "m" (*abuf) + : "memory" ); + + do_vpaes_ssse3_enc (ctx, nrounds); + + asm volatile ("pxor %%xmm0, %%xmm6\n\t" + : + : + : "memory" ); + + abuf += BLOCKSIZE; + } + + c->u_mode.ocb.aad_nblocks = n; + asm volatile ("movdqu %%xmm7, %[iv]\n\t" + "movdqu %%xmm6, %[ctr]\n\t" + : [iv] "=m" (*c->u_mode.ocb.aad_offset), + [ctr] "=m" (*c->u_mode.ocb.aad_sum) + : + : "memory" ); + + vpaes_ssse3_cleanup (); +} + +#endif /* USE_SSSE3 */ diff --git a/libotr/libgcrypt-1.8.7/cipher/rijndael-tables.h b/libotr/libgcrypt-1.8.7/cipher/rijndael-tables.h new file mode 100644 index 0000000..8359470 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rijndael-tables.h @@ -0,0 +1,208 @@ +/* rijndael-tables.h - Rijndael (AES) for GnuPG, + * Copyright (C) 2000, 2001, 2002, 2003, 2007, + * 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* To keep the actual implementation at a readable size we use this + include file to define the tables. */ + +static const u32 encT[256] = + { + 0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, + 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591, + 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56, + 0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec, + 0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa, + 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb, + 0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, + 0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b, + 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c, + 0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83, + 0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9, + 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a, + 0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, + 0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f, + 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df, + 0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea, + 0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34, + 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b, + 0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, + 0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413, + 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1, + 0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6, + 0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972, + 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85, + 0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, + 0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511, + 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe, + 0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b, + 0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05, + 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1, + 0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, + 0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf, + 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3, + 0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e, + 0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a, + 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6, + 0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, + 0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b, + 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428, + 0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad, + 0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14, + 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8, + 0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, + 0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2, + 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda, + 0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949, + 0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf, + 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810, + 0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, + 0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697, + 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e, + 0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f, + 0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc, + 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c, + 0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, + 0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27, + 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122, + 0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433, + 0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9, + 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5, + 0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, + 0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0, + 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e, + 0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c + }; + +static const struct +{ + u32 T[256]; + byte inv_sbox[256]; +} dec_tables = + { + { + 0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, + 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b, + 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5, + 0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5, + 0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d, + 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b, + 0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, + 0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e, + 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927, + 0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d, + 0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362, + 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9, + 0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, + 0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566, + 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3, + 0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed, + 0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e, + 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4, + 0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, + 0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd, + 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d, + 0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060, + 0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967, + 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879, + 0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, + 0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c, + 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36, + 0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624, + 0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b, + 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c, + 0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, + 0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14, + 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3, + 0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b, + 0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8, + 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684, + 0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, + 0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177, + 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947, + 0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322, + 0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498, + 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f, + 0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, + 0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382, + 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf, + 0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb, + 0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83, + 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef, + 0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, + 0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235, + 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733, + 0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117, + 0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4, + 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546, + 0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, + 0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d, + 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb, + 0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a, + 0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773, + 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478, + 0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, + 0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff, + 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664, + 0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0 + }, + { + 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38, + 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb, + 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87, + 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb, + 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d, + 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e, + 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2, + 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25, + 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16, + 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92, + 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda, + 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84, + 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a, + 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06, + 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02, + 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b, + 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea, + 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73, + 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85, + 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e, + 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89, + 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b, + 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20, + 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4, + 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31, + 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f, + 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d, + 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef, + 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0, + 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61, + 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26, + 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d + } + }; + +#define decT dec_tables.T +#define inv_sbox dec_tables.inv_sbox + +static const u32 rcon[30] = + { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, + 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, + 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91 + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/rijndael.c b/libotr/libgcrypt-1.8.7/cipher/rijndael.c new file mode 100644 index 0000000..8637195 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rijndael.c @@ -0,0 +1,2022 @@ +/* Rijndael (AES) for GnuPG + * Copyright (C) 2000, 2001, 2002, 2003, 2007, + * 2008, 2011, 2012 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + ******************************************************************* + * The code here is based on the optimized implementation taken from + * http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ on Oct 2, 2000, + * which carries this notice: + *------------------------------------------ + * rijndael-alg-fst.c v2.3 April '2000 + * + * Optimised ANSI C code + * + * authors: v1.0: Antoon Bosselaers + * v2.0: Vincent Rijmen + * v2.3: Paulo Barreto + * + * This code is placed in the public domain. + *------------------------------------------ + * + * The SP800-38a document is available at: + * http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf + * + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> /* for memcmp() */ + +#include "types.h" /* for byte and u32 typedefs */ +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "cipher-selftest.h" +#include "rijndael-internal.h" +#include "./cipher-internal.h" + + +#ifdef USE_AMD64_ASM +/* AMD64 assembly implementations of AES */ +extern unsigned int _gcry_aes_amd64_encrypt_block(const void *keysched_enc, + unsigned char *out, + const unsigned char *in, + int rounds, + const void *encT); + +extern unsigned int _gcry_aes_amd64_decrypt_block(const void *keysched_dec, + unsigned char *out, + const unsigned char *in, + int rounds, + const void *decT); +#endif /*USE_AMD64_ASM*/ + +#ifdef USE_AESNI +/* AES-NI (AMD64 & i386) accelerated implementations of AES */ +extern void _gcry_aes_aesni_do_setkey(RIJNDAEL_context *ctx, const byte *key); +extern void _gcry_aes_aesni_prepare_decryption(RIJNDAEL_context *ctx); + +extern unsigned int _gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx, + unsigned char *dst, + const unsigned char *src); +extern unsigned int _gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx, + unsigned char *dst, + const unsigned char *src); +extern void _gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks); +extern void _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks, + int cbc_mac); +extern void _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *ctr, size_t nblocks); +extern void _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks); +extern void _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks); +extern void _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +extern void _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); +#endif + +#ifdef USE_SSSE3 +/* SSSE3 (AMD64) vector permutation implementation of AES */ +extern void _gcry_aes_ssse3_do_setkey(RIJNDAEL_context *ctx, const byte *key); +extern void _gcry_aes_ssse3_prepare_decryption(RIJNDAEL_context *ctx); + +extern unsigned int _gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx, + unsigned char *dst, + const unsigned char *src); +extern unsigned int _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, + unsigned char *dst, + const unsigned char *src); +extern void _gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks); +extern void _gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks, + int cbc_mac); +extern void _gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *ctr, size_t nblocks); +extern void _gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks); +extern void _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks); +extern void _gcry_aes_ssse3_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +extern void _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); +#endif + +#ifdef USE_PADLOCK +extern unsigned int _gcry_aes_padlock_encrypt (const RIJNDAEL_context *ctx, + unsigned char *bx, + const unsigned char *ax); +extern unsigned int _gcry_aes_padlock_decrypt (const RIJNDAEL_context *ctx, + unsigned char *bx, + const unsigned char *ax); +#endif + +#ifdef USE_ARM_ASM +/* ARM assembly implementations of AES */ +extern unsigned int _gcry_aes_arm_encrypt_block(const void *keysched_enc, + unsigned char *out, + const unsigned char *in, + int rounds, + const void *encT); + +extern unsigned int _gcry_aes_arm_decrypt_block(const void *keysched_dec, + unsigned char *out, + const unsigned char *in, + int rounds, + const void *decT); +#endif /*USE_ARM_ASM*/ + +#ifdef USE_ARM_CE +/* ARMv8 Crypto Extension implementations of AES */ +extern void _gcry_aes_armv8_ce_setkey(RIJNDAEL_context *ctx, const byte *key); +extern void _gcry_aes_armv8_ce_prepare_decryption(RIJNDAEL_context *ctx); + +extern unsigned int _gcry_aes_armv8_ce_encrypt(const RIJNDAEL_context *ctx, + unsigned char *dst, + const unsigned char *src); +extern unsigned int _gcry_aes_armv8_ce_decrypt(const RIJNDAEL_context *ctx, + unsigned char *dst, + const unsigned char *src); + +extern void _gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks); +extern void _gcry_aes_armv8_ce_cbc_enc (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks, + int cbc_mac); +extern void _gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *ctr, size_t nblocks); +extern void _gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks); +extern void _gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks); +extern void _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +extern void _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, + const void *abuf_arg, size_t nblocks); +#endif /*USE_ARM_ASM*/ + +static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, + const unsigned char *ax); +static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, + const unsigned char *ax); + + + +/* All the numbers. */ +#include "rijndael-tables.h" + + + + +/* Function prototypes. */ +static const char *selftest(void); + + + +/* Prefetching for encryption/decryption tables. */ +static void prefetch_table(const volatile byte *tab, size_t len) +{ + size_t i; + + for (i = 0; i < len; i += 8 * 32) + { + (void)tab[i + 0 * 32]; + (void)tab[i + 1 * 32]; + (void)tab[i + 2 * 32]; + (void)tab[i + 3 * 32]; + (void)tab[i + 4 * 32]; + (void)tab[i + 5 * 32]; + (void)tab[i + 6 * 32]; + (void)tab[i + 7 * 32]; + } + + (void)tab[len - 1]; +} + +static void prefetch_enc(void) +{ + prefetch_table((const void *)encT, sizeof(encT)); +} + +static void prefetch_dec(void) +{ + prefetch_table((const void *)&dec_tables, sizeof(dec_tables)); +} + + + +/* Perform the key setup. */ +static gcry_err_code_t +do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) +{ + static int initialized = 0; + static const char *selftest_failed = 0; + int rounds; + int i,j, r, t, rconpointer = 0; + int KC; +#if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ + || defined(USE_ARM_CE) + unsigned int hwfeatures; +#endif + + /* The on-the-fly self tests are only run in non-fips mode. In fips + mode explicit self-tests are required. Actually the on-the-fly + self-tests are not fully thread-safe and it might happen that a + failed self-test won't get noticed in another thread. + + FIXME: We might want to have a central registry of succeeded + self-tests. */ + if (!fips_mode () && !initialized) + { + initialized = 1; + selftest_failed = selftest (); + if (selftest_failed) + log_error ("%s\n", selftest_failed ); + } + if (selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + + if( keylen == 128/8 ) + { + rounds = 10; + KC = 4; + } + else if ( keylen == 192/8 ) + { + rounds = 12; + KC = 6; + } + else if ( keylen == 256/8 ) + { + rounds = 14; + KC = 8; + } + else + return GPG_ERR_INV_KEYLEN; + + ctx->rounds = rounds; + +#if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ + || defined(USE_ARM_CE) + hwfeatures = _gcry_get_hw_features (); +#endif + + ctx->decryption_prepared = 0; +#ifdef USE_PADLOCK + ctx->use_padlock = 0; +#endif +#ifdef USE_AESNI + ctx->use_aesni = 0; +#endif +#ifdef USE_SSSE3 + ctx->use_ssse3 = 0; +#endif +#ifdef USE_ARM_CE + ctx->use_arm_ce = 0; +#endif + + if (0) + { + ; + } +#ifdef USE_AESNI + else if (hwfeatures & HWF_INTEL_AESNI) + { + ctx->encrypt_fn = _gcry_aes_aesni_encrypt; + ctx->decrypt_fn = _gcry_aes_aesni_decrypt; + ctx->prefetch_enc_fn = NULL; + ctx->prefetch_dec_fn = NULL; + ctx->use_aesni = 1; + } +#endif +#ifdef USE_PADLOCK + else if (hwfeatures & HWF_PADLOCK_AES && keylen == 128/8) + { + ctx->encrypt_fn = _gcry_aes_padlock_encrypt; + ctx->decrypt_fn = _gcry_aes_padlock_decrypt; + ctx->prefetch_enc_fn = NULL; + ctx->prefetch_dec_fn = NULL; + ctx->use_padlock = 1; + memcpy (ctx->padlockkey, key, keylen); + } +#endif +#ifdef USE_SSSE3 + else if (hwfeatures & HWF_INTEL_SSSE3) + { + ctx->encrypt_fn = _gcry_aes_ssse3_encrypt; + ctx->decrypt_fn = _gcry_aes_ssse3_decrypt; + ctx->prefetch_enc_fn = NULL; + ctx->prefetch_dec_fn = NULL; + ctx->use_ssse3 = 1; + } +#endif +#ifdef USE_ARM_CE + else if (hwfeatures & HWF_ARM_AES) + { + ctx->encrypt_fn = _gcry_aes_armv8_ce_encrypt; + ctx->decrypt_fn = _gcry_aes_armv8_ce_decrypt; + ctx->prefetch_enc_fn = NULL; + ctx->prefetch_dec_fn = NULL; + ctx->use_arm_ce = 1; + } +#endif + else + { + ctx->encrypt_fn = do_encrypt; + ctx->decrypt_fn = do_decrypt; + ctx->prefetch_enc_fn = prefetch_enc; + ctx->prefetch_dec_fn = prefetch_dec; + } + + /* NB: We don't yet support Padlock hardware key generation. */ + + if (0) + { + ; + } +#ifdef USE_AESNI + else if (ctx->use_aesni) + _gcry_aes_aesni_do_setkey (ctx, key); +#endif +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + _gcry_aes_ssse3_do_setkey (ctx, key); +#endif +#ifdef USE_ARM_CE + else if (ctx->use_arm_ce) + _gcry_aes_armv8_ce_setkey (ctx, key); +#endif + else + { + const byte *sbox = ((const byte *)encT) + 1; + union + { + PROPERLY_ALIGNED_TYPE dummy; + byte data[MAXKC][4]; + u32 data32[MAXKC]; + } tkk[2]; +#define k tkk[0].data +#define k_u32 tkk[0].data32 +#define tk tkk[1].data +#define tk_u32 tkk[1].data32 +#define W (ctx->keyschenc) +#define W_u32 (ctx->keyschenc32) + + prefetch_enc(); + + for (i = 0; i < keylen; i++) + { + k[i >> 2][i & 3] = key[i]; + } + + for (j = KC-1; j >= 0; j--) + { + tk_u32[j] = k_u32[j]; + } + r = 0; + t = 0; + /* Copy values into round key array. */ + for (j = 0; (j < KC) && (r < rounds + 1); ) + { + for (; (j < KC) && (t < 4); j++, t++) + { + W_u32[r][t] = le_bswap32(tk_u32[j]); + } + if (t == 4) + { + r++; + t = 0; + } + } + + while (r < rounds + 1) + { + /* While not enough round key material calculated calculate + new values. */ + tk[0][0] ^= sbox[tk[KC-1][1] * 4]; + tk[0][1] ^= sbox[tk[KC-1][2] * 4]; + tk[0][2] ^= sbox[tk[KC-1][3] * 4]; + tk[0][3] ^= sbox[tk[KC-1][0] * 4]; + tk[0][0] ^= rcon[rconpointer++]; + + if (KC != 8) + { + for (j = 1; j < KC; j++) + { + tk_u32[j] ^= tk_u32[j-1]; + } + } + else + { + for (j = 1; j < KC/2; j++) + { + tk_u32[j] ^= tk_u32[j-1]; + } + tk[KC/2][0] ^= sbox[tk[KC/2 - 1][0] * 4]; + tk[KC/2][1] ^= sbox[tk[KC/2 - 1][1] * 4]; + tk[KC/2][2] ^= sbox[tk[KC/2 - 1][2] * 4]; + tk[KC/2][3] ^= sbox[tk[KC/2 - 1][3] * 4]; + for (j = KC/2 + 1; j < KC; j++) + { + tk_u32[j] ^= tk_u32[j-1]; + } + } + + /* Copy values into round key array. */ + for (j = 0; (j < KC) && (r < rounds + 1); ) + { + for (; (j < KC) && (t < 4); j++, t++) + { + W_u32[r][t] = le_bswap32(tk_u32[j]); + } + if (t == 4) + { + r++; + t = 0; + } + } + } +#undef W +#undef tk +#undef k +#undef W_u32 +#undef tk_u32 +#undef k_u32 + wipememory(&tkk, sizeof(tkk)); + } + + return 0; +} + + +static gcry_err_code_t +rijndael_setkey (void *context, const byte *key, const unsigned keylen) +{ + RIJNDAEL_context *ctx = context; + return do_setkey (ctx, key, keylen); +} + + +/* Make a decryption key from an encryption key. */ +static void +prepare_decryption( RIJNDAEL_context *ctx ) +{ + int r; + + if (0) + ; +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + _gcry_aes_aesni_prepare_decryption (ctx); + } +#endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_prepare_decryption (ctx); + } +#endif /*USE_SSSE3*/ +#ifdef USE_ARM_CE + else if (ctx->use_arm_ce) + { + _gcry_aes_armv8_ce_prepare_decryption (ctx); + } +#endif /*USE_SSSE3*/ +#ifdef USE_PADLOCK + else if (ctx->use_padlock) + { + /* Padlock does not need decryption subkeys. */ + } +#endif /*USE_PADLOCK*/ + else + { + const byte *sbox = ((const byte *)encT) + 1; + + prefetch_enc(); + prefetch_dec(); + + ctx->keyschdec32[0][0] = ctx->keyschenc32[0][0]; + ctx->keyschdec32[0][1] = ctx->keyschenc32[0][1]; + ctx->keyschdec32[0][2] = ctx->keyschenc32[0][2]; + ctx->keyschdec32[0][3] = ctx->keyschenc32[0][3]; + + for (r = 1; r < ctx->rounds; r++) + { + u32 *wi = ctx->keyschenc32[r]; + u32 *wo = ctx->keyschdec32[r]; + u32 wt; + + wt = wi[0]; + wo[0] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) + ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) + ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) + ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); + + wt = wi[1]; + wo[1] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) + ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) + ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) + ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); + + wt = wi[2]; + wo[2] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) + ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) + ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) + ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); + + wt = wi[3]; + wo[3] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) + ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) + ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) + ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); + } + + ctx->keyschdec32[r][0] = ctx->keyschenc32[r][0]; + ctx->keyschdec32[r][1] = ctx->keyschenc32[r][1]; + ctx->keyschdec32[r][2] = ctx->keyschenc32[r][2]; + ctx->keyschdec32[r][3] = ctx->keyschenc32[r][3]; + } +} + + +#if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) +/* Encrypt one block. A and B may be the same. */ +static unsigned int +do_encrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, + const unsigned char *a) +{ +#define rk (ctx->keyschenc32) + const byte *sbox = ((const byte *)encT) + 1; + int rounds = ctx->rounds; + int r; + u32 sa[4]; + u32 sb[4]; + + sb[0] = buf_get_le32(a + 0); + sb[1] = buf_get_le32(a + 4); + sb[2] = buf_get_le32(a + 8); + sb[3] = buf_get_le32(a + 12); + + sa[0] = sb[0] ^ rk[0][0]; + sa[1] = sb[1] ^ rk[0][1]; + sa[2] = sb[2] ^ rk[0][2]; + sa[3] = sb[3] ^ rk[0][3]; + + sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); + sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); + sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); + sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); + sa[0] = rk[1][0] ^ sb[0]; + + sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); + sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); + sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); + sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); + sa[1] = rk[1][1] ^ sb[1]; + + sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); + sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); + sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); + sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); + sa[2] = rk[1][2] ^ sb[2]; + + sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); + sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); + sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); + sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); + sa[3] = rk[1][3] ^ sb[3]; + + for (r = 2; r < rounds; r++) + { + sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); + sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); + sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); + sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); + sa[0] = rk[r][0] ^ sb[0]; + + sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); + sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); + sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); + sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); + sa[1] = rk[r][1] ^ sb[1]; + + sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); + sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); + sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); + sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); + sa[2] = rk[r][2] ^ sb[2]; + + sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); + sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); + sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); + sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); + sa[3] = rk[r][3] ^ sb[3]; + + r++; + + sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); + sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); + sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); + sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); + sa[0] = rk[r][0] ^ sb[0]; + + sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); + sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); + sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); + sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); + sa[1] = rk[r][1] ^ sb[1]; + + sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); + sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); + sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); + sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); + sa[2] = rk[r][2] ^ sb[2]; + + sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); + sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); + sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); + sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); + sa[3] = rk[r][3] ^ sb[3]; + } + + /* Last round is special. */ + + sb[0] = (sbox[(byte)(sa[0] >> (0 * 8)) * 4]) << (0 * 8); + sb[3] = (sbox[(byte)(sa[0] >> (1 * 8)) * 4]) << (1 * 8); + sb[2] = (sbox[(byte)(sa[0] >> (2 * 8)) * 4]) << (2 * 8); + sb[1] = (sbox[(byte)(sa[0] >> (3 * 8)) * 4]) << (3 * 8); + sa[0] = rk[r][0] ^ sb[0]; + + sb[1] ^= (sbox[(byte)(sa[1] >> (0 * 8)) * 4]) << (0 * 8); + sa[0] ^= (sbox[(byte)(sa[1] >> (1 * 8)) * 4]) << (1 * 8); + sb[3] ^= (sbox[(byte)(sa[1] >> (2 * 8)) * 4]) << (2 * 8); + sb[2] ^= (sbox[(byte)(sa[1] >> (3 * 8)) * 4]) << (3 * 8); + sa[1] = rk[r][1] ^ sb[1]; + + sb[2] ^= (sbox[(byte)(sa[2] >> (0 * 8)) * 4]) << (0 * 8); + sa[1] ^= (sbox[(byte)(sa[2] >> (1 * 8)) * 4]) << (1 * 8); + sa[0] ^= (sbox[(byte)(sa[2] >> (2 * 8)) * 4]) << (2 * 8); + sb[3] ^= (sbox[(byte)(sa[2] >> (3 * 8)) * 4]) << (3 * 8); + sa[2] = rk[r][2] ^ sb[2]; + + sb[3] ^= (sbox[(byte)(sa[3] >> (0 * 8)) * 4]) << (0 * 8); + sa[2] ^= (sbox[(byte)(sa[3] >> (1 * 8)) * 4]) << (1 * 8); + sa[1] ^= (sbox[(byte)(sa[3] >> (2 * 8)) * 4]) << (2 * 8); + sa[0] ^= (sbox[(byte)(sa[3] >> (3 * 8)) * 4]) << (3 * 8); + sa[3] = rk[r][3] ^ sb[3]; + + buf_put_le32(b + 0, sa[0]); + buf_put_le32(b + 4, sa[1]); + buf_put_le32(b + 8, sa[2]); + buf_put_le32(b + 12, sa[3]); +#undef rk + + return (56 + 2*sizeof(int)); +} +#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ + + +static unsigned int +do_encrypt (const RIJNDAEL_context *ctx, + unsigned char *bx, const unsigned char *ax) +{ +#ifdef USE_AMD64_ASM +# ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS + return _gcry_aes_amd64_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, + encT); +# else + /* Call SystemV ABI function without storing non-volatile XMM registers, + * as target function does not use vector instruction sets. */ + const void *key = ctx->keyschenc; + uintptr_t rounds = ctx->rounds; + uintptr_t ret; + asm volatile ("movq %[encT], %%r8\n\t" + "callq *%[ret]\n\t" + : [ret] "=a" (ret), + "+D" (key), + "+S" (bx), + "+d" (ax), + "+c" (rounds) + : "0" (_gcry_aes_amd64_encrypt_block), + [encT] "r" (encT) + : "cc", "memory", "r8", "r9", "r10", "r11"); + return ret; +# endif /* HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS */ +#elif defined(USE_ARM_ASM) + return _gcry_aes_arm_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, encT); +#else + return do_encrypt_fn (ctx, bx, ax); +#endif /* !USE_ARM_ASM && !USE_AMD64_ASM*/ +} + + +static unsigned int +rijndael_encrypt (void *context, byte *b, const byte *a) +{ + RIJNDAEL_context *ctx = context; + + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + + return ctx->encrypt_fn (ctx, b, a); +} + + +/* Bulk encryption of complete blocks in CFB mode. Caller needs to + make sure that IV is aligned on an unsigned long boundary. This + function is only intended for the bulk encryption feature of + cipher.c. */ +void +_gcry_aes_cfb_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned int burn_depth = 0; + + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + + if (0) + ; +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + _gcry_aes_aesni_cfb_enc (ctx, outbuf, inbuf, iv, nblocks); + burn_depth = 0; + } +#endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_cfb_enc (ctx, outbuf, inbuf, iv, nblocks); + burn_depth = 0; + } +#endif /*USE_SSSE3*/ +#ifdef USE_ARM_CE + else if (ctx->use_arm_ce) + { + _gcry_aes_armv8_ce_cfb_enc (ctx, outbuf, inbuf, iv, nblocks); + burn_depth = 0; + } +#endif /*USE_ARM_CE*/ + else + { + rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the IV. */ + burn_depth = encrypt_fn (ctx, iv, iv); + /* XOR the input with the IV and store input into IV. */ + buf_xor_2dst(outbuf, iv, inbuf, BLOCKSIZE); + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + } + + if (burn_depth) + _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); +} + + +/* Bulk encryption of complete blocks in CBC mode. Caller needs to + make sure that IV is aligned on an unsigned long boundary. This + function is only intended for the bulk encryption feature of + cipher.c. */ +void +_gcry_aes_cbc_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int cbc_mac) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char *last_iv; + unsigned int burn_depth = 0; + + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + + if (0) + ; +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + _gcry_aes_aesni_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac); + burn_depth = 0; + } +#endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac); + burn_depth = 0; + } +#endif /*USE_SSSE3*/ +#ifdef USE_ARM_CE + else if (ctx->use_arm_ce) + { + _gcry_aes_armv8_ce_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac); + burn_depth = 0; + } +#endif /*USE_ARM_CE*/ + else + { + rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + + last_iv = iv; + + for ( ;nblocks; nblocks-- ) + { + buf_xor(outbuf, inbuf, last_iv, BLOCKSIZE); + + burn_depth = encrypt_fn (ctx, outbuf, outbuf); + + last_iv = outbuf; + inbuf += BLOCKSIZE; + if (!cbc_mac) + outbuf += BLOCKSIZE; + } + + if (last_iv != iv) + buf_cpy (iv, last_iv, BLOCKSIZE); + } + + if (burn_depth) + _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); +} + + +/* Bulk encryption of complete blocks in CTR mode. Caller needs to + make sure that CTR is aligned on a 16 byte boundary if AESNI; the + minimum alignment is for an u32. This function is only intended + for the bulk encryption feature of cipher.c. CTR is expected to be + of size BLOCKSIZE. */ +void +_gcry_aes_ctr_enc (void *context, unsigned char *ctr, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned int burn_depth = 0; + int i; + + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + + if (0) + ; +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + _gcry_aes_aesni_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks); + burn_depth = 0; + } +#endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks); + burn_depth = 0; + } +#endif /*USE_SSSE3*/ +#ifdef USE_ARM_CE + else if (ctx->use_arm_ce) + { + _gcry_aes_armv8_ce_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks); + burn_depth = 0; + } +#endif /*USE_ARM_CE*/ + else + { + union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } tmp; + rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the counter. */ + burn_depth = encrypt_fn (ctx, tmp.x1, ctr); + /* XOR the input with the encrypted counter and store in output. */ + buf_xor(outbuf, tmp.x1, inbuf, BLOCKSIZE); + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + /* Increment the counter. */ + for (i = BLOCKSIZE; i > 0; i--) + { + ctr[i-1]++; + if (ctr[i-1]) + break; + } + } + + wipememory(&tmp, sizeof(tmp)); + } + + if (burn_depth) + _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); +} + + + +#if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) +/* Decrypt one block. A and B may be the same. */ +static unsigned int +do_decrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, + const unsigned char *a) +{ +#define rk (ctx->keyschdec32) + int rounds = ctx->rounds; + int r; + u32 sa[4]; + u32 sb[4]; + + sb[0] = buf_get_le32(a + 0); + sb[1] = buf_get_le32(a + 4); + sb[2] = buf_get_le32(a + 8); + sb[3] = buf_get_le32(a + 12); + + sa[0] = sb[0] ^ rk[rounds][0]; + sa[1] = sb[1] ^ rk[rounds][1]; + sa[2] = sb[2] ^ rk[rounds][2]; + sa[3] = sb[3] ^ rk[rounds][3]; + + for (r = rounds - 1; r > 1; r--) + { + sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); + sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); + sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); + sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); + sa[0] = rk[r][0] ^ sb[0]; + + sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); + sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); + sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); + sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); + sa[1] = rk[r][1] ^ sb[1]; + + sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); + sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); + sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); + sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); + sa[2] = rk[r][2] ^ sb[2]; + + sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); + sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); + sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); + sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); + sa[3] = rk[r][3] ^ sb[3]; + + r--; + + sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); + sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); + sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); + sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); + sa[0] = rk[r][0] ^ sb[0]; + + sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); + sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); + sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); + sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); + sa[1] = rk[r][1] ^ sb[1]; + + sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); + sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); + sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); + sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); + sa[2] = rk[r][2] ^ sb[2]; + + sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); + sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); + sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); + sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); + sa[3] = rk[r][3] ^ sb[3]; + } + + sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); + sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); + sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); + sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); + sa[0] = rk[1][0] ^ sb[0]; + + sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); + sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); + sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); + sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); + sa[1] = rk[1][1] ^ sb[1]; + + sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); + sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); + sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); + sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); + sa[2] = rk[1][2] ^ sb[2]; + + sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); + sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); + sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); + sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); + sa[3] = rk[1][3] ^ sb[3]; + + /* Last round is special. */ + sb[0] = inv_sbox[(byte)(sa[0] >> (0 * 8))] << (0 * 8); + sb[1] = inv_sbox[(byte)(sa[0] >> (1 * 8))] << (1 * 8); + sb[2] = inv_sbox[(byte)(sa[0] >> (2 * 8))] << (2 * 8); + sb[3] = inv_sbox[(byte)(sa[0] >> (3 * 8))] << (3 * 8); + sa[0] = sb[0] ^ rk[0][0]; + + sb[1] ^= inv_sbox[(byte)(sa[1] >> (0 * 8))] << (0 * 8); + sb[2] ^= inv_sbox[(byte)(sa[1] >> (1 * 8))] << (1 * 8); + sb[3] ^= inv_sbox[(byte)(sa[1] >> (2 * 8))] << (2 * 8); + sa[0] ^= inv_sbox[(byte)(sa[1] >> (3 * 8))] << (3 * 8); + sa[1] = sb[1] ^ rk[0][1]; + + sb[2] ^= inv_sbox[(byte)(sa[2] >> (0 * 8))] << (0 * 8); + sb[3] ^= inv_sbox[(byte)(sa[2] >> (1 * 8))] << (1 * 8); + sa[0] ^= inv_sbox[(byte)(sa[2] >> (2 * 8))] << (2 * 8); + sa[1] ^= inv_sbox[(byte)(sa[2] >> (3 * 8))] << (3 * 8); + sa[2] = sb[2] ^ rk[0][2]; + + sb[3] ^= inv_sbox[(byte)(sa[3] >> (0 * 8))] << (0 * 8); + sa[0] ^= inv_sbox[(byte)(sa[3] >> (1 * 8))] << (1 * 8); + sa[1] ^= inv_sbox[(byte)(sa[3] >> (2 * 8))] << (2 * 8); + sa[2] ^= inv_sbox[(byte)(sa[3] >> (3 * 8))] << (3 * 8); + sa[3] = sb[3] ^ rk[0][3]; + + buf_put_le32(b + 0, sa[0]); + buf_put_le32(b + 4, sa[1]); + buf_put_le32(b + 8, sa[2]); + buf_put_le32(b + 12, sa[3]); +#undef rk + + return (56+2*sizeof(int)); +} +#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ + + +/* Decrypt one block. AX and BX may be the same. */ +static unsigned int +do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, + const unsigned char *ax) +{ +#ifdef USE_AMD64_ASM +# ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS + return _gcry_aes_amd64_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds, + &dec_tables); +# else + /* Call SystemV ABI function without storing non-volatile XMM registers, + * as target function does not use vector instruction sets. */ + const void *key = ctx->keyschdec; + uintptr_t rounds = ctx->rounds; + uintptr_t ret; + asm volatile ("movq %[dectabs], %%r8\n\t" + "callq *%[ret]\n\t" + : [ret] "=a" (ret), + "+D" (key), + "+S" (bx), + "+d" (ax), + "+c" (rounds) + : "0" (_gcry_aes_amd64_decrypt_block), + [dectabs] "r" (&dec_tables) + : "cc", "memory", "r8", "r9", "r10", "r11"); + return ret; +# endif /* HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS */ +#elif defined(USE_ARM_ASM) + return _gcry_aes_arm_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds, + &dec_tables); +#else + return do_decrypt_fn (ctx, bx, ax); +#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ +} + + +static inline void +check_decryption_preparation (RIJNDAEL_context *ctx) +{ + if ( !ctx->decryption_prepared ) + { + prepare_decryption ( ctx ); + ctx->decryption_prepared = 1; + } +} + + +static unsigned int +rijndael_decrypt (void *context, byte *b, const byte *a) +{ + RIJNDAEL_context *ctx = context; + + check_decryption_preparation (ctx); + + if (ctx->prefetch_dec_fn) + ctx->prefetch_dec_fn(); + + return ctx->decrypt_fn (ctx, b, a); +} + + +/* Bulk decryption of complete blocks in CFB mode. Caller needs to + make sure that IV is aligned on an unsigned long boundary. This + function is only intended for the bulk encryption feature of + cipher.c. */ +void +_gcry_aes_cfb_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned int burn_depth = 0; + + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + + if (0) + ; +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + _gcry_aes_aesni_cfb_dec (ctx, outbuf, inbuf, iv, nblocks); + burn_depth = 0; + } +#endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_cfb_dec (ctx, outbuf, inbuf, iv, nblocks); + burn_depth = 0; + } +#endif /*USE_SSSE3*/ +#ifdef USE_ARM_CE + else if (ctx->use_arm_ce) + { + _gcry_aes_armv8_ce_cfb_dec (ctx, outbuf, inbuf, iv, nblocks); + burn_depth = 0; + } +#endif /*USE_ARM_CE*/ + else + { + rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + + for ( ;nblocks; nblocks-- ) + { + burn_depth = encrypt_fn (ctx, iv, iv); + buf_xor_n_copy(outbuf, iv, inbuf, BLOCKSIZE); + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + } + + if (burn_depth) + _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); +} + + +/* Bulk decryption of complete blocks in CBC mode. Caller needs to + make sure that IV is aligned on an unsigned long boundary. This + function is only intended for the bulk encryption feature of + cipher.c. */ +void +_gcry_aes_cbc_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned int burn_depth = 0; + + check_decryption_preparation (ctx); + + if (ctx->prefetch_dec_fn) + ctx->prefetch_dec_fn(); + + if (0) + ; +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + _gcry_aes_aesni_cbc_dec (ctx, outbuf, inbuf, iv, nblocks); + burn_depth = 0; + } +#endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_cbc_dec (ctx, outbuf, inbuf, iv, nblocks); + burn_depth = 0; + } +#endif /*USE_SSSE3*/ +#ifdef USE_ARM_CE + else if (ctx->use_arm_ce) + { + _gcry_aes_armv8_ce_cbc_dec (ctx, outbuf, inbuf, iv, nblocks); + burn_depth = 0; + } +#endif /*USE_ARM_CE*/ + else + { + unsigned char savebuf[BLOCKSIZE] ATTR_ALIGNED_16; + rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn; + + for ( ;nblocks; nblocks-- ) + { + /* INBUF is needed later and it may be identical to OUTBUF, so store + the intermediate result to SAVEBUF. */ + + burn_depth = decrypt_fn (ctx, savebuf, inbuf); + + buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOCKSIZE); + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + + wipememory(savebuf, sizeof(savebuf)); + } + + if (burn_depth) + _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); +} + + + +/* Bulk encryption/decryption of complete blocks in OCB mode. */ +size_t +_gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned int burn_depth = 0; + + if (encrypt) + { + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + } + else + { + check_decryption_preparation (ctx); + + if (ctx->prefetch_dec_fn) + ctx->prefetch_dec_fn(); + } + + if (0) + ; +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + _gcry_aes_aesni_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); + burn_depth = 0; + } +#endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); + burn_depth = 0; + } +#endif /*USE_SSSE3*/ +#ifdef USE_ARM_CE + else if (ctx->use_arm_ce) + { + _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); + burn_depth = 0; + } +#endif /*USE_ARM_CE*/ + else if (encrypt) + { + union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; + rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + + for ( ;nblocks; nblocks-- ) + { + u64 i = ++c->u_mode.ocb.data_nblocks; + const unsigned char *l = ocb_get_l(c, i); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE); + buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE); + /* Checksum_i = Checksum_{i-1} xor P_i */ + buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); + burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); + buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); + buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE); + + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + } + else + { + union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; + rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn; + + for ( ;nblocks; nblocks-- ) + { + u64 i = ++c->u_mode.ocb.data_nblocks; + const unsigned char *l = ocb_get_l(c, i); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE); + buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); + burn_depth = decrypt_fn (ctx, l_tmp.x1, l_tmp.x1); + buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); + /* Checksum_i = Checksum_{i-1} xor P_i */ + buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); + buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE); + + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + } + + if (burn_depth) + _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); + + return 0; +} + + +/* Bulk authentication of complete blocks in OCB mode. */ +size_t +_gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + const unsigned char *abuf = abuf_arg; + unsigned int burn_depth = 0; + + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + + if (0) + ; +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + _gcry_aes_aesni_ocb_auth (c, abuf, nblocks); + burn_depth = 0; + } +#endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_ocb_auth (c, abuf, nblocks); + burn_depth = 0; + } +#endif /*USE_SSSE3*/ +#ifdef USE_ARM_CE + else if (ctx->use_arm_ce) + { + _gcry_aes_armv8_ce_ocb_auth (c, abuf, nblocks); + burn_depth = 0; + } +#endif /*USE_ARM_CE*/ + else + { + union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; + rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + + for ( ;nblocks; nblocks-- ) + { + u64 i = ++c->u_mode.ocb.aad_nblocks; + const unsigned char *l = ocb_get_l(c, i); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE); + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + buf_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf, BLOCKSIZE); + burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); + buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE); + + abuf += BLOCKSIZE; + } + + wipememory(&l_tmp, sizeof(l_tmp)); + } + + if (burn_depth) + _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); + + return 0; +} + + + +/* Run the self-tests for AES 128. Returns NULL on success. */ +static const char* +selftest_basic_128 (void) +{ + RIJNDAEL_context *ctx; + unsigned char *ctxmem; + unsigned char scratch[16]; + + /* The test vectors are from the AES supplied ones; more or less + randomly taken from ecb_tbl.txt (I=42,81,14) */ +#if 1 + static const unsigned char plaintext_128[16] = + { + 0x01,0x4B,0xAF,0x22,0x78,0xA6,0x9D,0x33, + 0x1D,0x51,0x80,0x10,0x36,0x43,0xE9,0x9A + }; + static const unsigned char key_128[16] = + { + 0xE8,0xE9,0xEA,0xEB,0xED,0xEE,0xEF,0xF0, + 0xF2,0xF3,0xF4,0xF5,0xF7,0xF8,0xF9,0xFA + }; + static const unsigned char ciphertext_128[16] = + { + 0x67,0x43,0xC3,0xD1,0x51,0x9A,0xB4,0xF2, + 0xCD,0x9A,0x78,0xAB,0x09,0xA5,0x11,0xBD + }; +#else + /* Test vectors from fips-197, appendix C. */ +# warning debug test vectors in use + static const unsigned char plaintext_128[16] = + { + 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77, + 0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff + }; + static const unsigned char key_128[16] = + { + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, + 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f + /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */ + /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */ + }; + static const unsigned char ciphertext_128[16] = + { + 0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30, + 0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a + }; +#endif + + /* Because gcc/ld can only align the CTX struct on 8 bytes on the + stack, we need to allocate that context on the heap. */ + ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem); + if (!ctx) + return "failed to allocate memory"; + + rijndael_setkey (ctx, key_128, sizeof (key_128)); + rijndael_encrypt (ctx, scratch, plaintext_128); + if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128))) + { + xfree (ctxmem); + return "AES-128 test encryption failed."; + } + rijndael_decrypt (ctx, scratch, scratch); + xfree (ctxmem); + if (memcmp (scratch, plaintext_128, sizeof (plaintext_128))) + return "AES-128 test decryption failed."; + + return NULL; +} + +/* Run the self-tests for AES 192. Returns NULL on success. */ +static const char* +selftest_basic_192 (void) +{ + RIJNDAEL_context *ctx; + unsigned char *ctxmem; + unsigned char scratch[16]; + + static unsigned char plaintext_192[16] = + { + 0x76,0x77,0x74,0x75,0xF1,0xF2,0xF3,0xF4, + 0xF8,0xF9,0xE6,0xE7,0x77,0x70,0x71,0x72 + }; + static unsigned char key_192[24] = + { + 0x04,0x05,0x06,0x07,0x09,0x0A,0x0B,0x0C, + 0x0E,0x0F,0x10,0x11,0x13,0x14,0x15,0x16, + 0x18,0x19,0x1A,0x1B,0x1D,0x1E,0x1F,0x20 + }; + static const unsigned char ciphertext_192[16] = + { + 0x5D,0x1E,0xF2,0x0D,0xCE,0xD6,0xBC,0xBC, + 0x12,0x13,0x1A,0xC7,0xC5,0x47,0x88,0xAA + }; + + ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem); + if (!ctx) + return "failed to allocate memory"; + rijndael_setkey (ctx, key_192, sizeof(key_192)); + rijndael_encrypt (ctx, scratch, plaintext_192); + if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192))) + { + xfree (ctxmem); + return "AES-192 test encryption failed."; + } + rijndael_decrypt (ctx, scratch, scratch); + xfree (ctxmem); + if (memcmp (scratch, plaintext_192, sizeof (plaintext_192))) + return "AES-192 test decryption failed."; + + return NULL; +} + + +/* Run the self-tests for AES 256. Returns NULL on success. */ +static const char* +selftest_basic_256 (void) +{ + RIJNDAEL_context *ctx; + unsigned char *ctxmem; + unsigned char scratch[16]; + + static unsigned char plaintext_256[16] = + { + 0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, + 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21 + }; + static unsigned char key_256[32] = + { + 0x08,0x09,0x0A,0x0B,0x0D,0x0E,0x0F,0x10, + 0x12,0x13,0x14,0x15,0x17,0x18,0x19,0x1A, + 0x1C,0x1D,0x1E,0x1F,0x21,0x22,0x23,0x24, + 0x26,0x27,0x28,0x29,0x2B,0x2C,0x2D,0x2E + }; + static const unsigned char ciphertext_256[16] = + { + 0x08,0x0E,0x95,0x17,0xEB,0x16,0x77,0x71, + 0x9A,0xCF,0x72,0x80,0x86,0x04,0x0A,0xE3 + }; + + ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem); + if (!ctx) + return "failed to allocate memory"; + rijndael_setkey (ctx, key_256, sizeof(key_256)); + rijndael_encrypt (ctx, scratch, plaintext_256); + if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256))) + { + xfree (ctxmem); + return "AES-256 test encryption failed."; + } + rijndael_decrypt (ctx, scratch, scratch); + xfree (ctxmem); + if (memcmp (scratch, plaintext_256, sizeof (plaintext_256))) + return "AES-256 test decryption failed."; + + return NULL; +} + + +/* Run the self-tests for AES-CTR-128, tests IV increment of bulk CTR + encryption. Returns NULL on success. */ +static const char* +selftest_ctr_128 (void) +{ + const int nblocks = 8+1; + const int blocksize = BLOCKSIZE; + const int context_size = sizeof(RIJNDAEL_context); + + return _gcry_selftest_helper_ctr("AES", &rijndael_setkey, + &rijndael_encrypt, &_gcry_aes_ctr_enc, nblocks, blocksize, + context_size); +} + + +/* Run the self-tests for AES-CBC-128, tests bulk CBC decryption. + Returns NULL on success. */ +static const char* +selftest_cbc_128 (void) +{ + const int nblocks = 8+2; + const int blocksize = BLOCKSIZE; + const int context_size = sizeof(RIJNDAEL_context); + + return _gcry_selftest_helper_cbc("AES", &rijndael_setkey, + &rijndael_encrypt, &_gcry_aes_cbc_dec, nblocks, blocksize, + context_size); +} + + +/* Run the self-tests for AES-CFB-128, tests bulk CFB decryption. + Returns NULL on success. */ +static const char* +selftest_cfb_128 (void) +{ + const int nblocks = 8+2; + const int blocksize = BLOCKSIZE; + const int context_size = sizeof(RIJNDAEL_context); + + return _gcry_selftest_helper_cfb("AES", &rijndael_setkey, + &rijndael_encrypt, &_gcry_aes_cfb_dec, nblocks, blocksize, + context_size); +} + + +/* Run all the self-tests and return NULL on success. This function + is used for the on-the-fly self-tests. */ +static const char * +selftest (void) +{ + const char *r; + + if ( (r = selftest_basic_128 ()) + || (r = selftest_basic_192 ()) + || (r = selftest_basic_256 ()) ) + return r; + + if ( (r = selftest_ctr_128 ()) ) + return r; + + if ( (r = selftest_cbc_128 ()) ) + return r; + + if ( (r = selftest_cfb_128 ()) ) + return r; + + return r; +} + + +/* SP800-38a.pdf for AES-128. */ +static const char * +selftest_fips_128_38a (int requested_mode) +{ + static const struct tv + { + int mode; + const unsigned char key[16]; + const unsigned char iv[16]; + struct + { + const unsigned char input[16]; + const unsigned char output[16]; + } data[4]; + } tv[2] = + { + { + GCRY_CIPHER_MODE_CFB, /* F.3.13, CFB128-AES128 */ + { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, + 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, + { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + { + { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, + 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, + { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, + 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, + + { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, + 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, + { 0xc8, 0xa6, 0x45, 0x37, 0xa0, 0xb3, 0xa9, 0x3f, + 0xcd, 0xe3, 0xcd, 0xad, 0x9f, 0x1c, 0xe5, 0x8b } }, + + { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, + 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, + { 0x26, 0x75, 0x1f, 0x67, 0xa3, 0xcb, 0xb1, 0x40, + 0xb1, 0x80, 0x8c, 0xf1, 0x87, 0xa4, 0xf4, 0xdf } }, + + { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, + 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, + { 0xc0, 0x4b, 0x05, 0x35, 0x7c, 0x5d, 0x1c, 0x0e, + 0xea, 0xc4, 0xc6, 0x6f, 0x9f, 0xf7, 0xf2, 0xe6 } } + } + }, + { + GCRY_CIPHER_MODE_OFB, + { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, + 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, + { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + { + { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, + 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, + { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, + 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, + + { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, + 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, + { 0x77, 0x89, 0x50, 0x8d, 0x16, 0x91, 0x8f, 0x03, + 0xf5, 0x3c, 0x52, 0xda, 0xc5, 0x4e, 0xd8, 0x25 } }, + + { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, + 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, + { 0x97, 0x40, 0x05, 0x1e, 0x9c, 0x5f, 0xec, 0xf6, + 0x43, 0x44, 0xf7, 0xa8, 0x22, 0x60, 0xed, 0xcc } }, + + { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, + 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, + { 0x30, 0x4c, 0x65, 0x28, 0xf6, 0x59, 0xc7, 0x78, + 0x66, 0xa5, 0x10, 0xd9, 0xc1, 0xd6, 0xae, 0x5e } }, + } + } + }; + unsigned char scratch[16]; + gpg_error_t err; + int tvi, idx; + gcry_cipher_hd_t hdenc = NULL; + gcry_cipher_hd_t hddec = NULL; + +#define Fail(a) do { \ + _gcry_cipher_close (hdenc); \ + _gcry_cipher_close (hddec); \ + return a; \ + } while (0) + + gcry_assert (sizeof tv[0].data[0].input == sizeof scratch); + gcry_assert (sizeof tv[0].data[0].output == sizeof scratch); + + for (tvi=0; tvi < DIM (tv); tvi++) + if (tv[tvi].mode == requested_mode) + break; + if (tvi == DIM (tv)) + Fail ("no test data for this mode"); + + err = _gcry_cipher_open (&hdenc, GCRY_CIPHER_AES, tv[tvi].mode, 0); + if (err) + Fail ("open"); + err = _gcry_cipher_open (&hddec, GCRY_CIPHER_AES, tv[tvi].mode, 0); + if (err) + Fail ("open"); + err = _gcry_cipher_setkey (hdenc, tv[tvi].key, sizeof tv[tvi].key); + if (!err) + err = _gcry_cipher_setkey (hddec, tv[tvi].key, sizeof tv[tvi].key); + if (err) + Fail ("set key"); + err = _gcry_cipher_setiv (hdenc, tv[tvi].iv, sizeof tv[tvi].iv); + if (!err) + err = _gcry_cipher_setiv (hddec, tv[tvi].iv, sizeof tv[tvi].iv); + if (err) + Fail ("set IV"); + for (idx=0; idx < DIM (tv[tvi].data); idx++) + { + err = _gcry_cipher_encrypt (hdenc, scratch, sizeof scratch, + tv[tvi].data[idx].input, + sizeof tv[tvi].data[idx].input); + if (err) + Fail ("encrypt command"); + if (memcmp (scratch, tv[tvi].data[idx].output, sizeof scratch)) + Fail ("encrypt mismatch"); + err = _gcry_cipher_decrypt (hddec, scratch, sizeof scratch, + tv[tvi].data[idx].output, + sizeof tv[tvi].data[idx].output); + if (err) + Fail ("decrypt command"); + if (memcmp (scratch, tv[tvi].data[idx].input, sizeof scratch)) + Fail ("decrypt mismatch"); + } + +#undef Fail + _gcry_cipher_close (hdenc); + _gcry_cipher_close (hddec); + return NULL; +} + + +/* Complete selftest for AES-128 with all modes and driver code. */ +static gpg_err_code_t +selftest_fips_128 (int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + + what = "low-level"; + errtxt = selftest_basic_128 (); + if (errtxt) + goto failed; + + if (extended) + { + what = "cfb"; + errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_CFB); + if (errtxt) + goto failed; + + what = "ofb"; + errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_OFB); + if (errtxt) + goto failed; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("cipher", GCRY_CIPHER_AES128, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + +/* Complete selftest for AES-192. */ +static gpg_err_code_t +selftest_fips_192 (int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + + (void)extended; /* No extended tests available. */ + + what = "low-level"; + errtxt = selftest_basic_192 (); + if (errtxt) + goto failed; + + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("cipher", GCRY_CIPHER_AES192, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +/* Complete selftest for AES-256. */ +static gpg_err_code_t +selftest_fips_256 (int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + + (void)extended; /* No extended tests available. */ + + what = "low-level"; + errtxt = selftest_basic_256 (); + if (errtxt) + goto failed; + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("cipher", GCRY_CIPHER_AES256, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + + +/* Run a full self-test for ALGO and return 0 on success. */ +static gpg_err_code_t +run_selftests (int algo, int extended, selftest_report_func_t report) +{ + gpg_err_code_t ec; + + switch (algo) + { + case GCRY_CIPHER_AES128: + ec = selftest_fips_128 (extended, report); + break; + case GCRY_CIPHER_AES192: + ec = selftest_fips_192 (extended, report); + break; + case GCRY_CIPHER_AES256: + ec = selftest_fips_256 (extended, report); + break; + default: + ec = GPG_ERR_CIPHER_ALGO; + break; + + } + return ec; +} + + + + +static const char *rijndael_names[] = + { + "RIJNDAEL", + "AES128", + "AES-128", + NULL + }; + +static gcry_cipher_oid_spec_t rijndael_oids[] = + { + { "2.16.840.1.101.3.4.1.1", GCRY_CIPHER_MODE_ECB }, + { "2.16.840.1.101.3.4.1.2", GCRY_CIPHER_MODE_CBC }, + { "2.16.840.1.101.3.4.1.3", GCRY_CIPHER_MODE_OFB }, + { "2.16.840.1.101.3.4.1.4", GCRY_CIPHER_MODE_CFB }, + { NULL } + }; + +gcry_cipher_spec_t _gcry_cipher_spec_aes = + { + GCRY_CIPHER_AES, {0, 1}, + "AES", rijndael_names, rijndael_oids, 16, 128, + sizeof (RIJNDAEL_context), + rijndael_setkey, rijndael_encrypt, rijndael_decrypt, + NULL, NULL, + run_selftests + }; + + +static const char *rijndael192_names[] = + { + "RIJNDAEL192", + "AES-192", + NULL + }; + +static gcry_cipher_oid_spec_t rijndael192_oids[] = + { + { "2.16.840.1.101.3.4.1.21", GCRY_CIPHER_MODE_ECB }, + { "2.16.840.1.101.3.4.1.22", GCRY_CIPHER_MODE_CBC }, + { "2.16.840.1.101.3.4.1.23", GCRY_CIPHER_MODE_OFB }, + { "2.16.840.1.101.3.4.1.24", GCRY_CIPHER_MODE_CFB }, + { NULL } + }; + +gcry_cipher_spec_t _gcry_cipher_spec_aes192 = + { + GCRY_CIPHER_AES192, {0, 1}, + "AES192", rijndael192_names, rijndael192_oids, 16, 192, + sizeof (RIJNDAEL_context), + rijndael_setkey, rijndael_encrypt, rijndael_decrypt, + NULL, NULL, + run_selftests + }; + + +static const char *rijndael256_names[] = + { + "RIJNDAEL256", + "AES-256", + NULL + }; + +static gcry_cipher_oid_spec_t rijndael256_oids[] = + { + { "2.16.840.1.101.3.4.1.41", GCRY_CIPHER_MODE_ECB }, + { "2.16.840.1.101.3.4.1.42", GCRY_CIPHER_MODE_CBC }, + { "2.16.840.1.101.3.4.1.43", GCRY_CIPHER_MODE_OFB }, + { "2.16.840.1.101.3.4.1.44", GCRY_CIPHER_MODE_CFB }, + { NULL } + }; + +gcry_cipher_spec_t _gcry_cipher_spec_aes256 = + { + GCRY_CIPHER_AES256, {0, 1}, + "AES256", rijndael256_names, rijndael256_oids, 16, 256, + sizeof (RIJNDAEL_context), + rijndael_setkey, rijndael_encrypt, rijndael_decrypt, + NULL, NULL, + run_selftests + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/rmd160.c b/libotr/libgcrypt-1.8.7/cipher/rmd160.c new file mode 100644 index 0000000..0a019b9 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rmd160.c @@ -0,0 +1,508 @@ +/* rmd160.c - RIPE-MD160 + * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" +#include "hash-common.h" +#include "cipher.h" /* Only used for the rmd160_hash_buffer() prototype. */ + +#include "bithelp.h" +#include "bufhelp.h" + +/********************************* + * RIPEMD-160 is not patented, see (as of 25.10.97) + * http://www.esat.kuleuven.ac.be/~bosselae/ripemd160.html + * Note that the code uses Little Endian byteorder, which is good for + * 386 etc, but we must add some conversion when used on a big endian box. + * + * + * Pseudo-code for RIPEMD-160 + * + * RIPEMD-160 is an iterative hash function that operates on 32-bit words. + * The round function takes as input a 5-word chaining variable and a 16-word + * message block and maps this to a new chaining variable. All operations are + * defined on 32-bit words. Padding is identical to that of MD4. + * + * + * RIPEMD-160: definitions + * + * + * nonlinear functions at bit level: exor, mux, -, mux, - + * + * f(j, x, y, z) = x XOR y XOR z (0 <= j <= 15) + * f(j, x, y, z) = (x AND y) OR (NOT(x) AND z) (16 <= j <= 31) + * f(j, x, y, z) = (x OR NOT(y)) XOR z (32 <= j <= 47) + * f(j, x, y, z) = (x AND z) OR (y AND NOT(z)) (48 <= j <= 63) + * f(j, x, y, z) = x XOR (y OR NOT(z)) (64 <= j <= 79) + * + * + * added constants (hexadecimal) + * + * K(j) = 0x00000000 (0 <= j <= 15) + * K(j) = 0x5A827999 (16 <= j <= 31) int(2**30 x sqrt(2)) + * K(j) = 0x6ED9EBA1 (32 <= j <= 47) int(2**30 x sqrt(3)) + * K(j) = 0x8F1BBCDC (48 <= j <= 63) int(2**30 x sqrt(5)) + * K(j) = 0xA953FD4E (64 <= j <= 79) int(2**30 x sqrt(7)) + * K'(j) = 0x50A28BE6 (0 <= j <= 15) int(2**30 x cbrt(2)) + * K'(j) = 0x5C4DD124 (16 <= j <= 31) int(2**30 x cbrt(3)) + * K'(j) = 0x6D703EF3 (32 <= j <= 47) int(2**30 x cbrt(5)) + * K'(j) = 0x7A6D76E9 (48 <= j <= 63) int(2**30 x cbrt(7)) + * K'(j) = 0x00000000 (64 <= j <= 79) + * + * + * selection of message word + * + * r(j) = j (0 <= j <= 15) + * r(16..31) = 7, 4, 13, 1, 10, 6, 15, 3, 12, 0, 9, 5, 2, 14, 11, 8 + * r(32..47) = 3, 10, 14, 4, 9, 15, 8, 1, 2, 7, 0, 6, 13, 11, 5, 12 + * r(48..63) = 1, 9, 11, 10, 0, 8, 12, 4, 13, 3, 7, 15, 14, 5, 6, 2 + * r(64..79) = 4, 0, 5, 9, 7, 12, 2, 10, 14, 1, 3, 8, 11, 6, 15, 13 + * r0(0..15) = 5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12 + * r0(16..31)= 6, 11, 3, 7, 0, 13, 5, 10, 14, 15, 8, 12, 4, 9, 1, 2 + * r0(32..47)= 15, 5, 1, 3, 7, 14, 6, 9, 11, 8, 12, 2, 10, 0, 4, 13 + * r0(48..63)= 8, 6, 4, 1, 3, 11, 15, 0, 5, 12, 2, 13, 9, 7, 10, 14 + * r0(64..79)= 12, 15, 10, 4, 1, 5, 8, 7, 6, 2, 13, 14, 0, 3, 9, 11 + * + * + * amount for rotate left (rol) + * + * s(0..15) = 11, 14, 15, 12, 5, 8, 7, 9, 11, 13, 14, 15, 6, 7, 9, 8 + * s(16..31) = 7, 6, 8, 13, 11, 9, 7, 15, 7, 12, 15, 9, 11, 7, 13, 12 + * s(32..47) = 11, 13, 6, 7, 14, 9, 13, 15, 14, 8, 13, 6, 5, 12, 7, 5 + * s(48..63) = 11, 12, 14, 15, 14, 15, 9, 8, 9, 14, 5, 6, 8, 6, 5, 12 + * s(64..79) = 9, 15, 5, 11, 6, 8, 13, 12, 5, 12, 13, 14, 11, 8, 5, 6 + * s'(0..15) = 8, 9, 9, 11, 13, 15, 15, 5, 7, 7, 8, 11, 14, 14, 12, 6 + * s'(16..31)= 9, 13, 15, 7, 12, 8, 9, 11, 7, 7, 12, 7, 6, 15, 13, 11 + * s'(32..47)= 9, 7, 15, 11, 8, 6, 6, 14, 12, 13, 5, 14, 13, 13, 7, 5 + * s'(48..63)= 15, 5, 8, 11, 14, 14, 6, 14, 6, 9, 12, 9, 12, 5, 15, 8 + * s'(64..79)= 8, 5, 12, 9, 12, 5, 14, 6, 8, 13, 6, 5, 15, 13, 11, 11 + * + * + * initial value (hexadecimal) + * + * h0 = 0x67452301; h1 = 0xEFCDAB89; h2 = 0x98BADCFE; h3 = 0x10325476; + * h4 = 0xC3D2E1F0; + * + * + * RIPEMD-160: pseudo-code + * + * It is assumed that the message after padding consists of t 16-word blocks + * that will be denoted with X[i][j], with 0 <= i <= t-1 and 0 <= j <= 15. + * The symbol [+] denotes addition modulo 2**32 and rol_s denotes cyclic left + * shift (rotate) over s positions. + * + * + * for i := 0 to t-1 { + * A := h0; B := h1; C := h2; D = h3; E = h4; + * A' := h0; B' := h1; C' := h2; D' = h3; E' = h4; + * for j := 0 to 79 { + * T := rol_s(j)(A [+] f(j, B, C, D) [+] X[i][r(j)] [+] K(j)) [+] E; + * A := E; E := D; D := rol_10(C); C := B; B := T; + * T := rol_s'(j)(A' [+] f(79-j, B', C', D') [+] X[i][r'(j)] + [+] K'(j)) [+] E'; + * A' := E'; E' := D'; D' := rol_10(C'); C' := B'; B' := T; + * } + * T := h1 [+] C [+] D'; h1 := h2 [+] D [+] E'; h2 := h3 [+] E [+] A'; + * h3 := h4 [+] A [+] B'; h4 := h0 [+] B [+] C'; h0 := T; + * } + */ + +/* Some examples: + * "" 9c1185a5c5e9fc54612808977ee8f548b2258d31 + * "a" 0bdc9d2d256b3ee9daae347be6f4dc835a467ffe + * "abc" 8eb208f7e05d987a9b044a8e98c6b087f15a0bfc + * "message digest" 5d0689ef49d2fae572b881b123a85ffa21595f36 + * "a...z" f71c27109c692c1b56bbdceb5b9d2865b3708dbc + * "abcdbcde...nopq" 12a053384a9c0c88e405a06c27dcf49ada62eb2b + * "A...Za...z0...9" b0e20b6e3116640286ed3a87a5713079b21f5189 + * 8 times "1234567890" 9b752e45573d4b39f4dbd3323cab82bf63326bfb + * 1 million times "a" 52783243c1697bdbe16d37f97f68f08325dc1528 + */ + +typedef struct +{ + gcry_md_block_ctx_t bctx; + u32 h0,h1,h2,h3,h4; +} RMD160_CONTEXT; + + +static unsigned int +transform ( void *ctx, const unsigned char *data, size_t nblks ); + +static void +rmd160_init (void *context, unsigned int flags) +{ + RMD160_CONTEXT *hd = context; + + (void)flags; + + hd->h0 = 0x67452301; + hd->h1 = 0xEFCDAB89; + hd->h2 = 0x98BADCFE; + hd->h3 = 0x10325476; + hd->h4 = 0xC3D2E1F0; + + hd->bctx.nblocks = 0; + hd->bctx.nblocks_high = 0; + hd->bctx.count = 0; + hd->bctx.blocksize = 64; + hd->bctx.bwrite = transform; +} + + +/**************** + * Transform the message X which consists of 16 32-bit-words + */ +static unsigned int +transform_blk ( void *ctx, const unsigned char *data ) +{ + RMD160_CONTEXT *hd = ctx; + register u32 al, ar, bl, br, cl, cr, dl, dr, el, er; + u32 x[16]; + int i; + + for ( i = 0; i < 16; i++ ) + x[i] = buf_get_le32(data + i * 4); + +#define K0 0x00000000 +#define K1 0x5A827999 +#define K2 0x6ED9EBA1 +#define K3 0x8F1BBCDC +#define K4 0xA953FD4E +#define KK0 0x50A28BE6 +#define KK1 0x5C4DD124 +#define KK2 0x6D703EF3 +#define KK3 0x7A6D76E9 +#define KK4 0x00000000 +#define F0(x,y,z) ( (x) ^ (y) ^ (z) ) +#define F1(x,y,z) ( ((x) & (y)) | (~(x) & (z)) ) +#define F2(x,y,z) ( ((x) | ~(y)) ^ (z) ) +#define F3(x,y,z) ( ((x) & (z)) | ((y) & ~(z)) ) +#define F4(x,y,z) ( (x) ^ ((y) | ~(z)) ) +#define R(a,b,c,d,e,f,k,r,s) do { a += f(b,c,d) + k + x[r]; \ + a = rol(a,s) + e; \ + c = rol(c,10); \ + } while(0) + + /* left lane and right lanes interleaved */ + al = ar = hd->h0; + bl = br = hd->h1; + cl = cr = hd->h2; + dl = dr = hd->h3; + el = er = hd->h4; + R( al, bl, cl, dl, el, F0, K0, 0, 11 ); + R( ar, br, cr, dr, er, F4, KK0, 5, 8); + R( el, al, bl, cl, dl, F0, K0, 1, 14 ); + R( er, ar, br, cr, dr, F4, KK0, 14, 9); + R( dl, el, al, bl, cl, F0, K0, 2, 15 ); + R( dr, er, ar, br, cr, F4, KK0, 7, 9); + R( cl, dl, el, al, bl, F0, K0, 3, 12 ); + R( cr, dr, er, ar, br, F4, KK0, 0, 11); + R( bl, cl, dl, el, al, F0, K0, 4, 5 ); + R( br, cr, dr, er, ar, F4, KK0, 9, 13); + R( al, bl, cl, dl, el, F0, K0, 5, 8 ); + R( ar, br, cr, dr, er, F4, KK0, 2, 15); + R( el, al, bl, cl, dl, F0, K0, 6, 7 ); + R( er, ar, br, cr, dr, F4, KK0, 11, 15); + R( dl, el, al, bl, cl, F0, K0, 7, 9 ); + R( dr, er, ar, br, cr, F4, KK0, 4, 5); + R( cl, dl, el, al, bl, F0, K0, 8, 11 ); + R( cr, dr, er, ar, br, F4, KK0, 13, 7); + R( bl, cl, dl, el, al, F0, K0, 9, 13 ); + R( br, cr, dr, er, ar, F4, KK0, 6, 7); + R( al, bl, cl, dl, el, F0, K0, 10, 14 ); + R( ar, br, cr, dr, er, F4, KK0, 15, 8); + R( el, al, bl, cl, dl, F0, K0, 11, 15 ); + R( er, ar, br, cr, dr, F4, KK0, 8, 11); + R( dl, el, al, bl, cl, F0, K0, 12, 6 ); + R( dr, er, ar, br, cr, F4, KK0, 1, 14); + R( cl, dl, el, al, bl, F0, K0, 13, 7 ); + R( cr, dr, er, ar, br, F4, KK0, 10, 14); + R( bl, cl, dl, el, al, F0, K0, 14, 9 ); + R( br, cr, dr, er, ar, F4, KK0, 3, 12); + R( al, bl, cl, dl, el, F0, K0, 15, 8 ); + R( ar, br, cr, dr, er, F4, KK0, 12, 6); + R( el, al, bl, cl, dl, F1, K1, 7, 7 ); + R( er, ar, br, cr, dr, F3, KK1, 6, 9); + R( dl, el, al, bl, cl, F1, K1, 4, 6 ); + R( dr, er, ar, br, cr, F3, KK1, 11, 13); + R( cl, dl, el, al, bl, F1, K1, 13, 8 ); + R( cr, dr, er, ar, br, F3, KK1, 3, 15); + R( bl, cl, dl, el, al, F1, K1, 1, 13 ); + R( br, cr, dr, er, ar, F3, KK1, 7, 7); + R( al, bl, cl, dl, el, F1, K1, 10, 11 ); + R( ar, br, cr, dr, er, F3, KK1, 0, 12); + R( el, al, bl, cl, dl, F1, K1, 6, 9 ); + R( er, ar, br, cr, dr, F3, KK1, 13, 8); + R( dl, el, al, bl, cl, F1, K1, 15, 7 ); + R( dr, er, ar, br, cr, F3, KK1, 5, 9); + R( cl, dl, el, al, bl, F1, K1, 3, 15 ); + R( cr, dr, er, ar, br, F3, KK1, 10, 11); + R( bl, cl, dl, el, al, F1, K1, 12, 7 ); + R( br, cr, dr, er, ar, F3, KK1, 14, 7); + R( al, bl, cl, dl, el, F1, K1, 0, 12 ); + R( ar, br, cr, dr, er, F3, KK1, 15, 7); + R( el, al, bl, cl, dl, F1, K1, 9, 15 ); + R( er, ar, br, cr, dr, F3, KK1, 8, 12); + R( dl, el, al, bl, cl, F1, K1, 5, 9 ); + R( dr, er, ar, br, cr, F3, KK1, 12, 7); + R( cl, dl, el, al, bl, F1, K1, 2, 11 ); + R( cr, dr, er, ar, br, F3, KK1, 4, 6); + R( bl, cl, dl, el, al, F1, K1, 14, 7 ); + R( br, cr, dr, er, ar, F3, KK1, 9, 15); + R( al, bl, cl, dl, el, F1, K1, 11, 13 ); + R( ar, br, cr, dr, er, F3, KK1, 1, 13); + R( el, al, bl, cl, dl, F1, K1, 8, 12 ); + R( er, ar, br, cr, dr, F3, KK1, 2, 11); + R( dl, el, al, bl, cl, F2, K2, 3, 11 ); + R( dr, er, ar, br, cr, F2, KK2, 15, 9); + R( cl, dl, el, al, bl, F2, K2, 10, 13 ); + R( cr, dr, er, ar, br, F2, KK2, 5, 7); + R( bl, cl, dl, el, al, F2, K2, 14, 6 ); + R( br, cr, dr, er, ar, F2, KK2, 1, 15); + R( al, bl, cl, dl, el, F2, K2, 4, 7 ); + R( ar, br, cr, dr, er, F2, KK2, 3, 11); + R( el, al, bl, cl, dl, F2, K2, 9, 14 ); + R( er, ar, br, cr, dr, F2, KK2, 7, 8); + R( dl, el, al, bl, cl, F2, K2, 15, 9 ); + R( dr, er, ar, br, cr, F2, KK2, 14, 6); + R( cl, dl, el, al, bl, F2, K2, 8, 13 ); + R( cr, dr, er, ar, br, F2, KK2, 6, 6); + R( bl, cl, dl, el, al, F2, K2, 1, 15 ); + R( br, cr, dr, er, ar, F2, KK2, 9, 14); + R( al, bl, cl, dl, el, F2, K2, 2, 14 ); + R( ar, br, cr, dr, er, F2, KK2, 11, 12); + R( el, al, bl, cl, dl, F2, K2, 7, 8 ); + R( er, ar, br, cr, dr, F2, KK2, 8, 13); + R( dl, el, al, bl, cl, F2, K2, 0, 13 ); + R( dr, er, ar, br, cr, F2, KK2, 12, 5); + R( cl, dl, el, al, bl, F2, K2, 6, 6 ); + R( cr, dr, er, ar, br, F2, KK2, 2, 14); + R( bl, cl, dl, el, al, F2, K2, 13, 5 ); + R( br, cr, dr, er, ar, F2, KK2, 10, 13); + R( al, bl, cl, dl, el, F2, K2, 11, 12 ); + R( ar, br, cr, dr, er, F2, KK2, 0, 13); + R( el, al, bl, cl, dl, F2, K2, 5, 7 ); + R( er, ar, br, cr, dr, F2, KK2, 4, 7); + R( dl, el, al, bl, cl, F2, K2, 12, 5 ); + R( dr, er, ar, br, cr, F2, KK2, 13, 5); + R( cl, dl, el, al, bl, F3, K3, 1, 11 ); + R( cr, dr, er, ar, br, F1, KK3, 8, 15); + R( bl, cl, dl, el, al, F3, K3, 9, 12 ); + R( br, cr, dr, er, ar, F1, KK3, 6, 5); + R( al, bl, cl, dl, el, F3, K3, 11, 14 ); + R( ar, br, cr, dr, er, F1, KK3, 4, 8); + R( el, al, bl, cl, dl, F3, K3, 10, 15 ); + R( er, ar, br, cr, dr, F1, KK3, 1, 11); + R( dl, el, al, bl, cl, F3, K3, 0, 14 ); + R( dr, er, ar, br, cr, F1, KK3, 3, 14); + R( cl, dl, el, al, bl, F3, K3, 8, 15 ); + R( cr, dr, er, ar, br, F1, KK3, 11, 14); + R( bl, cl, dl, el, al, F3, K3, 12, 9 ); + R( br, cr, dr, er, ar, F1, KK3, 15, 6); + R( al, bl, cl, dl, el, F3, K3, 4, 8 ); + R( ar, br, cr, dr, er, F1, KK3, 0, 14); + R( el, al, bl, cl, dl, F3, K3, 13, 9 ); + R( er, ar, br, cr, dr, F1, KK3, 5, 6); + R( dl, el, al, bl, cl, F3, K3, 3, 14 ); + R( dr, er, ar, br, cr, F1, KK3, 12, 9); + R( cl, dl, el, al, bl, F3, K3, 7, 5 ); + R( cr, dr, er, ar, br, F1, KK3, 2, 12); + R( bl, cl, dl, el, al, F3, K3, 15, 6 ); + R( br, cr, dr, er, ar, F1, KK3, 13, 9); + R( al, bl, cl, dl, el, F3, K3, 14, 8 ); + R( ar, br, cr, dr, er, F1, KK3, 9, 12); + R( el, al, bl, cl, dl, F3, K3, 5, 6 ); + R( er, ar, br, cr, dr, F1, KK3, 7, 5); + R( dl, el, al, bl, cl, F3, K3, 6, 5 ); + R( dr, er, ar, br, cr, F1, KK3, 10, 15); + R( cl, dl, el, al, bl, F3, K3, 2, 12 ); + R( cr, dr, er, ar, br, F1, KK3, 14, 8); + R( bl, cl, dl, el, al, F4, K4, 4, 9 ); + R( br, cr, dr, er, ar, F0, KK4, 12, 8); + R( al, bl, cl, dl, el, F4, K4, 0, 15 ); + R( ar, br, cr, dr, er, F0, KK4, 15, 5); + R( el, al, bl, cl, dl, F4, K4, 5, 5 ); + R( er, ar, br, cr, dr, F0, KK4, 10, 12); + R( dl, el, al, bl, cl, F4, K4, 9, 11 ); + R( dr, er, ar, br, cr, F0, KK4, 4, 9); + R( cl, dl, el, al, bl, F4, K4, 7, 6 ); + R( cr, dr, er, ar, br, F0, KK4, 1, 12); + R( bl, cl, dl, el, al, F4, K4, 12, 8 ); + R( br, cr, dr, er, ar, F0, KK4, 5, 5); + R( al, bl, cl, dl, el, F4, K4, 2, 13 ); + R( ar, br, cr, dr, er, F0, KK4, 8, 14); + R( el, al, bl, cl, dl, F4, K4, 10, 12 ); + R( er, ar, br, cr, dr, F0, KK4, 7, 6); + R( dl, el, al, bl, cl, F4, K4, 14, 5 ); + R( dr, er, ar, br, cr, F0, KK4, 6, 8); + R( cl, dl, el, al, bl, F4, K4, 1, 12 ); + R( cr, dr, er, ar, br, F0, KK4, 2, 13); + R( bl, cl, dl, el, al, F4, K4, 3, 13 ); + R( br, cr, dr, er, ar, F0, KK4, 13, 6); + R( al, bl, cl, dl, el, F4, K4, 8, 14 ); + R( ar, br, cr, dr, er, F0, KK4, 14, 5); + R( el, al, bl, cl, dl, F4, K4, 11, 11 ); + R( er, ar, br, cr, dr, F0, KK4, 0, 15); + R( dl, el, al, bl, cl, F4, K4, 6, 8 ); + R( dr, er, ar, br, cr, F0, KK4, 3, 13); + R( cl, dl, el, al, bl, F4, K4, 15, 5 ); + R( cr, dr, er, ar, br, F0, KK4, 9, 11); + R( bl, cl, dl, el, al, F4, K4, 13, 6 ); + R( br, cr, dr, er, ar, F0, KK4, 11, 11); + + dr += cl + hd->h1; + hd->h1 = hd->h2 + dl + er; + hd->h2 = hd->h3 + el + ar; + hd->h3 = hd->h4 + al + br; + hd->h4 = hd->h0 + bl + cr; + hd->h0 = dr; + + return /*burn_stack*/ 104+5*sizeof(void*); +} + + +static unsigned int +transform ( void *c, const unsigned char *data, size_t nblks ) +{ + unsigned int burn; + + do + { + burn = transform_blk (c, data); + data += 64; + } + while (--nblks); + + return burn; +} + + +/* + * The routine terminates the computation + */ +static void +rmd160_final( void *context ) +{ + RMD160_CONTEXT *hd = context; + u32 t, th, msb, lsb; + byte *p; + unsigned int burn; + + _gcry_md_block_write(hd, NULL, 0); /* flush */; + + t = hd->bctx.nblocks; + if (sizeof t == sizeof hd->bctx.nblocks) + th = hd->bctx.nblocks_high; + else + th = hd->bctx.nblocks >> 32; + + /* multiply by 64 to make a byte count */ + lsb = t << 6; + msb = (th << 6) | (t >> 26); + /* add the count */ + t = lsb; + if( (lsb += hd->bctx.count) < t ) + msb++; + /* multiply by 8 to make a bit count */ + t = lsb; + lsb <<= 3; + msb <<= 3; + msb |= t >> 29; + + if( hd->bctx.count < 56 ) /* enough room */ + { + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */ + while( hd->bctx.count < 56 ) + hd->bctx.buf[hd->bctx.count++] = 0; /* pad */ + } + else /* need one extra block */ + { + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */ + while( hd->bctx.count < 64 ) + hd->bctx.buf[hd->bctx.count++] = 0; + _gcry_md_block_write(hd, NULL, 0); /* flush */; + memset(hd->bctx.buf, 0, 56 ); /* fill next block with zeroes */ + } + /* append the 64 bit count */ + buf_put_le32(hd->bctx.buf + 56, lsb); + buf_put_le32(hd->bctx.buf + 60, msb); + burn = transform ( hd, hd->bctx.buf, 1 ); + _gcry_burn_stack (burn); + + p = hd->bctx.buf; +#define X(a) do { buf_put_le32(p, hd->h##a); p += 4; } while(0) + X(0); + X(1); + X(2); + X(3); + X(4); +#undef X +} + +static byte * +rmd160_read( void *context ) +{ + RMD160_CONTEXT *hd = context; + + return hd->bctx.buf; +} + + + +/**************** + * Shortcut functions which puts the hash value of the supplied buffer + * into outbuf which must have a size of 20 bytes. + */ +void +_gcry_rmd160_hash_buffer (void *outbuf, const void *buffer, size_t length ) +{ + RMD160_CONTEXT hd; + + rmd160_init (&hd, 0); + _gcry_md_block_write ( &hd, buffer, length ); + rmd160_final ( &hd ); + memcpy ( outbuf, hd.bctx.buf, 20 ); +} + +static byte asn[15] = /* Object ID is 1.3.36.3.2.1 */ + { 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, 0x2b, 0x24, 0x03, + 0x02, 0x01, 0x05, 0x00, 0x04, 0x14 }; + +static gcry_md_oid_spec_t oid_spec_rmd160[] = + { + /* rsaSignatureWithripemd160 */ + { "1.3.36.3.3.1.2" }, + /* TeleTrust hash algorithm. */ + { "1.3.36.3.2.1" }, + { NULL } + }; + +gcry_md_spec_t _gcry_digest_spec_rmd160 = + { + GCRY_MD_RMD160, {0, 0}, + "RIPEMD160", asn, DIM (asn), oid_spec_rmd160, 20, + rmd160_init, _gcry_md_block_write, rmd160_final, rmd160_read, NULL, + sizeof (RMD160_CONTEXT) + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/rsa-common.c b/libotr/libgcrypt-1.8.7/cipher/rsa-common.c new file mode 100644 index 0000000..29b7bc8 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rsa-common.c @@ -0,0 +1,1038 @@ +/* rsa-common.c - Supporting functions for RSA + * Copyright (C) 2011 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "pubkey-internal.h" + + +/* Turn VALUE into an octet string and store it in an allocated buffer + at R_FRAME or - if R_RAME is NULL - copy it into the caller + provided buffer SPACE; either SPACE or R_FRAME may be used. If + SPACE if not NULL, the caller must provide a buffer of at least + NBYTES. If the resulting octet string is shorter than NBYTES pad + it to the left with zeroes. If VALUE does not fit into NBYTES + return an error code. */ +static gpg_err_code_t +octet_string_from_mpi (unsigned char **r_frame, void *space, + gcry_mpi_t value, size_t nbytes) +{ + return _gcry_mpi_to_octet_string (r_frame, space, value, nbytes); +} + + + +/* Encode {VALUE,VALUELEN} for an NBITS keys using the pkcs#1 block + type 2 padding. On success the result is stored as a new MPI at + R_RESULT. On error the value at R_RESULT is undefined. + + If {RANDOM_OVERRIDE, RANDOM_OVERRIDE_LEN} is given it is used as + the seed instead of using a random string for it. This feature is + only useful for regression tests. Note that this value may not + contain zero bytes. + + We encode the value in this way: + + 0 2 RND(n bytes) 0 VALUE + + 0 is a marker we unfortunately can't encode because we return an + MPI which strips all leading zeroes. + 2 is the block type. + RND are non-zero random bytes. + + (Note that OpenPGP includes the cipher algorithm and a checksum in + VALUE; the caller needs to prepare the value accordingly.) + */ +gpg_err_code_t +_gcry_rsa_pkcs1_encode_for_enc (gcry_mpi_t *r_result, unsigned int nbits, + const unsigned char *value, size_t valuelen, + const unsigned char *random_override, + size_t random_override_len) +{ + gcry_err_code_t rc = 0; + unsigned char *frame = NULL; + size_t nframe = (nbits+7) / 8; + int i; + size_t n; + unsigned char *p; + + if (valuelen + 7 > nframe || !nframe) + { + /* Can't encode a VALUELEN value in a NFRAME bytes frame. */ + return GPG_ERR_TOO_SHORT; /* The key is too short. */ + } + + if ( !(frame = xtrymalloc_secure (nframe))) + return gpg_err_code_from_syserror (); + + n = 0; + frame[n++] = 0; + frame[n++] = 2; /* block type */ + i = nframe - 3 - valuelen; + gcry_assert (i > 0); + + if (random_override) + { + int j; + + if (random_override_len != i) + { + xfree (frame); + return GPG_ERR_INV_ARG; + } + /* Check that random does not include a zero byte. */ + for (j=0; j < random_override_len; j++) + if (!random_override[j]) + { + xfree (frame); + return GPG_ERR_INV_ARG; + } + memcpy (frame + n, random_override, random_override_len); + n += random_override_len; + } + else + { + p = _gcry_random_bytes_secure (i, GCRY_STRONG_RANDOM); + /* Replace zero bytes by new values. */ + for (;;) + { + int j, k; + unsigned char *pp; + + /* Count the zero bytes. */ + for (j=k=0; j < i; j++) + { + if (!p[j]) + k++; + } + if (!k) + break; /* Okay: no (more) zero bytes. */ + + k += k/128 + 3; /* Better get some more. */ + pp = _gcry_random_bytes_secure (k, GCRY_STRONG_RANDOM); + for (j=0; j < i && k; ) + { + if (!p[j]) + p[j] = pp[--k]; + if (p[j]) + j++; + } + xfree (pp); + } + memcpy (frame+n, p, i); + n += i; + xfree (p); + } + + frame[n++] = 0; + memcpy (frame+n, value, valuelen); + n += valuelen; + gcry_assert (n == nframe); + + rc = _gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, frame, n, &nframe); + if (!rc &&DBG_CIPHER) + log_mpidump ("PKCS#1 block type 2 encoded data", *r_result); + xfree (frame); + + return rc; +} + + +/* Decode a plaintext in VALUE assuming pkcs#1 block type 2 padding. + NBITS is the size of the secret key. On success the result is + stored as a newly allocated buffer at R_RESULT and its valid length at + R_RESULTLEN. On error NULL is stored at R_RESULT. */ +gpg_err_code_t +_gcry_rsa_pkcs1_decode_for_enc (unsigned char **r_result, size_t *r_resultlen, + unsigned int nbits, gcry_mpi_t value) +{ + gcry_error_t err; + unsigned char *frame = NULL; + size_t nframe = (nbits+7) / 8; + size_t n; + + *r_result = NULL; + + if ( !(frame = xtrymalloc_secure (nframe))) + return gpg_err_code_from_syserror (); + + err = _gcry_mpi_print (GCRYMPI_FMT_USG, frame, nframe, &n, value); + if (err) + { + xfree (frame); + return gcry_err_code (err); + } + + nframe = n; /* Set NFRAME to the actual length. */ + + /* FRAME = 0x00 || 0x02 || PS || 0x00 || M + + pkcs#1 requires that the first byte is zero. Our MPIs usually + strip leading zero bytes; thus we are not able to detect them. + However due to the way gcry_mpi_print is implemented we may see + leading zero bytes nevertheless. We handle this by making the + first zero byte optional. */ + if (nframe < 4) + { + xfree (frame); + return GPG_ERR_ENCODING_PROBLEM; /* Too short. */ + } + n = 0; + if (!frame[0]) + n++; + if (frame[n++] != 0x02) + { + xfree (frame); + return GPG_ERR_ENCODING_PROBLEM; /* Wrong block type. */ + } + + /* Skip the non-zero random bytes and the terminating zero byte. */ + for (; n < nframe && frame[n] != 0x00; n++) + ; + if (n+1 >= nframe) + { + xfree (frame); + return GPG_ERR_ENCODING_PROBLEM; /* No zero byte. */ + } + n++; /* Skip the zero byte. */ + + /* To avoid an extra allocation we reuse the frame buffer. The only + caller of this function will anyway free the result soon. */ + memmove (frame, frame + n, nframe - n); + *r_result = frame; + *r_resultlen = nframe - n; + + if (DBG_CIPHER) + log_printhex ("value extracted from PKCS#1 block type 2 encoded data", + *r_result, *r_resultlen); + + return 0; +} + + +/* Encode {VALUE,VALUELEN} for an NBITS keys and hash algorithm ALGO + using the pkcs#1 block type 1 padding. On success the result is + stored as a new MPI at R_RESULT. On error the value at R_RESULT is + undefined. + + We encode the value in this way: + + 0 1 PAD(n bytes) 0 ASN(asnlen bytes) VALUE(valuelen bytes) + + 0 is a marker we unfortunately can't encode because we return an + MPI which strips all leading zeroes. + 1 is the block type. + PAD consists of 0xff bytes. + 0 marks the end of the padding. + ASN is the DER encoding of the hash algorithm; along with the VALUE + it yields a valid DER encoding. + + (Note that PGP prior to version 2.3 encoded the message digest as: + 0 1 MD(16 bytes) 0 PAD(n bytes) 1 + The MD is always 16 bytes here because it's always MD5. GnuPG + does not not support pre-v2.3 signatures, but I'm including this + comment so the information is easily found if needed.) +*/ +gpg_err_code_t +_gcry_rsa_pkcs1_encode_for_sig (gcry_mpi_t *r_result, unsigned int nbits, + const unsigned char *value, size_t valuelen, + int algo) +{ + gcry_err_code_t rc = 0; + byte asn[100]; + byte *frame = NULL; + size_t nframe = (nbits+7) / 8; + int i; + size_t n; + size_t asnlen, dlen; + + asnlen = DIM(asn); + dlen = _gcry_md_get_algo_dlen (algo); + + if (_gcry_md_algo_info (algo, GCRYCTL_GET_ASNOID, asn, &asnlen)) + { + /* We don't have yet all of the above algorithms. */ + return GPG_ERR_NOT_IMPLEMENTED; + } + + if ( valuelen != dlen ) + { + /* Hash value does not match the length of digest for + the given algorithm. */ + return GPG_ERR_CONFLICT; + } + + if ( !dlen || dlen + asnlen + 4 > nframe) + { + /* Can't encode an DLEN byte digest MD into an NFRAME byte + frame. */ + return GPG_ERR_TOO_SHORT; + } + + if ( !(frame = xtrymalloc (nframe)) ) + return gpg_err_code_from_syserror (); + + /* Assemble the pkcs#1 block type 1. */ + n = 0; + frame[n++] = 0; + frame[n++] = 1; /* block type */ + i = nframe - valuelen - asnlen - 3 ; + gcry_assert (i > 1); + memset (frame+n, 0xff, i ); + n += i; + frame[n++] = 0; + memcpy (frame+n, asn, asnlen); + n += asnlen; + memcpy (frame+n, value, valuelen ); + n += valuelen; + gcry_assert (n == nframe); + + /* Convert it into an MPI. */ + rc = _gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, frame, n, &nframe); + if (!rc && DBG_CIPHER) + log_mpidump ("PKCS#1 block type 1 encoded data", *r_result); + xfree (frame); + + return rc; +} + +/* Encode {VALUE,VALUELEN} for an NBITS keys using the pkcs#1 block + type 1 padding. On success the result is stored as a new MPI at + R_RESULT. On error the value at R_RESULT is undefined. + + We encode the value in this way: + + 0 1 PAD(n bytes) 0 VALUE(valuelen bytes) + + 0 is a marker we unfortunately can't encode because we return an + MPI which strips all leading zeroes. + 1 is the block type. + PAD consists of 0xff bytes. + 0 marks the end of the padding. + + (Note that PGP prior to version 2.3 encoded the message digest as: + 0 1 MD(16 bytes) 0 PAD(n bytes) 1 + The MD is always 16 bytes here because it's always MD5. GnuPG + does not not support pre-v2.3 signatures, but I'm including this + comment so the information is easily found if needed.) +*/ +gpg_err_code_t +_gcry_rsa_pkcs1_encode_raw_for_sig (gcry_mpi_t *r_result, unsigned int nbits, + const unsigned char *value, size_t valuelen) +{ + gcry_err_code_t rc = 0; + gcry_error_t err; + byte *frame = NULL; + size_t nframe = (nbits+7) / 8; + int i; + size_t n; + + if ( !valuelen || valuelen + 4 > nframe) + { + /* Can't encode an DLEN byte digest MD into an NFRAME byte + frame. */ + return GPG_ERR_TOO_SHORT; + } + + if ( !(frame = xtrymalloc (nframe)) ) + return gpg_err_code_from_syserror (); + + /* Assemble the pkcs#1 block type 1. */ + n = 0; + frame[n++] = 0; + frame[n++] = 1; /* block type */ + i = nframe - valuelen - 3 ; + gcry_assert (i > 1); + memset (frame+n, 0xff, i ); + n += i; + frame[n++] = 0; + memcpy (frame+n, value, valuelen ); + n += valuelen; + gcry_assert (n == nframe); + + /* Convert it into an MPI. */ + err = _gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, frame, n, &nframe); + if (err) + rc = gcry_err_code (err); + else if (DBG_CIPHER) + log_mpidump ("PKCS#1 block type 1 encoded data", *r_result); + xfree (frame); + + return rc; +} + + +/* Mask generation function for OAEP. See RFC-3447 B.2.1. */ +static gcry_err_code_t +mgf1 (unsigned char *output, size_t outlen, unsigned char *seed, size_t seedlen, + int algo) +{ + size_t dlen, nbytes, n; + int idx; + gcry_md_hd_t hd; + gcry_err_code_t err; + + err = _gcry_md_open (&hd, algo, 0); + if (err) + return err; + + dlen = _gcry_md_get_algo_dlen (algo); + + /* We skip step 1 which would be assert(OUTLEN <= 2^32). The loop + in step 3 is merged with step 4 by concatenating no more octets + than what would fit into OUTPUT. The ceiling for the counter IDX + is implemented indirectly. */ + nbytes = 0; /* Step 2. */ + idx = 0; + while ( nbytes < outlen ) + { + unsigned char c[4], *digest; + + if (idx) + _gcry_md_reset (hd); + + c[0] = (idx >> 24) & 0xFF; + c[1] = (idx >> 16) & 0xFF; + c[2] = (idx >> 8) & 0xFF; + c[3] = idx & 0xFF; + idx++; + + _gcry_md_write (hd, seed, seedlen); + _gcry_md_write (hd, c, 4); + digest = _gcry_md_read (hd, 0); + + n = (outlen - nbytes < dlen)? (outlen - nbytes) : dlen; + memcpy (output+nbytes, digest, n); + nbytes += n; + } + + _gcry_md_close (hd); + return GPG_ERR_NO_ERROR; +} + + +/* RFC-3447 (pkcs#1 v2.1) OAEP encoding. NBITS is the length of the + key measured in bits. ALGO is the hash function; it must be a + valid and usable algorithm. {VALUE,VALUELEN} is the message to + encrypt. {LABEL,LABELLEN} is the optional label to be associated + with the message, if LABEL is NULL the default is to use the empty + string as label. On success the encoded ciphertext is returned at + R_RESULT. + + If {RANDOM_OVERRIDE, RANDOM_OVERRIDE_LEN} is given it is used as + the seed instead of using a random string for it. This feature is + only useful for regression tests. + + Here is figure 1 from the RFC depicting the process: + + +----------+---------+-------+ + DB = | lHash | PS | M | + +----------+---------+-------+ + | + +----------+ V + | seed |--> MGF ---> xor + +----------+ | + | | + +--+ V | + |00| xor <----- MGF <-----| + +--+ | | + | | | + V V V + +--+----------+----------------------------+ + EM = |00|maskedSeed| maskedDB | + +--+----------+----------------------------+ + */ +gpg_err_code_t +_gcry_rsa_oaep_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo, + const unsigned char *value, size_t valuelen, + const unsigned char *label, size_t labellen, + const void *random_override, size_t random_override_len) +{ + gcry_err_code_t rc = 0; + unsigned char *frame = NULL; + size_t nframe = (nbits+7) / 8; + unsigned char *p; + size_t hlen; + size_t n; + + *r_result = NULL; + + /* Set defaults for LABEL. */ + if (!label || !labellen) + { + label = (const unsigned char*)""; + labellen = 0; + } + + hlen = _gcry_md_get_algo_dlen (algo); + + /* We skip step 1a which would be to check that LABELLEN is not + greater than 2^61-1. See rfc-3447 7.1.1. */ + + /* Step 1b. Note that the obsolete rfc-2437 uses the check: + valuelen > nframe - 2 * hlen - 1 . */ + if (valuelen > nframe - 2 * hlen - 2 || !nframe) + { + /* Can't encode a VALUELEN value in a NFRAME bytes frame. */ + return GPG_ERR_TOO_SHORT; /* The key is too short. */ + } + + /* Allocate the frame. */ + frame = xtrycalloc_secure (1, nframe); + if (!frame) + return gpg_err_code_from_syserror (); + + /* Step 2a: Compute the hash of the label. We store it in the frame + where later the maskedDB will commence. */ + _gcry_md_hash_buffer (algo, frame + 1 + hlen, label, labellen); + + /* Step 2b: Set octet string to zero. */ + /* This has already been done while allocating FRAME. */ + + /* Step 2c: Create DB by concatenating lHash, PS, 0x01 and M. */ + n = nframe - valuelen - 1; + frame[n] = 0x01; + memcpy (frame + n + 1, value, valuelen); + + /* Step 3d: Generate seed. We store it where the maskedSeed will go + later. */ + if (random_override) + { + if (random_override_len != hlen) + { + xfree (frame); + return GPG_ERR_INV_ARG; + } + memcpy (frame + 1, random_override, hlen); + } + else + _gcry_randomize (frame + 1, hlen, GCRY_STRONG_RANDOM); + + /* Step 2e and 2f: Create maskedDB. */ + { + unsigned char *dmask; + + dmask = xtrymalloc_secure (nframe - hlen - 1); + if (!dmask) + { + rc = gpg_err_code_from_syserror (); + xfree (frame); + return rc; + } + rc = mgf1 (dmask, nframe - hlen - 1, frame+1, hlen, algo); + if (rc) + { + xfree (dmask); + xfree (frame); + return rc; + } + for (n = 1 + hlen, p = dmask; n < nframe; n++) + frame[n] ^= *p++; + xfree (dmask); + } + + /* Step 2g and 2h: Create maskedSeed. */ + { + unsigned char *smask; + + smask = xtrymalloc_secure (hlen); + if (!smask) + { + rc = gpg_err_code_from_syserror (); + xfree (frame); + return rc; + } + rc = mgf1 (smask, hlen, frame + 1 + hlen, nframe - hlen - 1, algo); + if (rc) + { + xfree (smask); + xfree (frame); + return rc; + } + for (n = 1, p = smask; n < 1 + hlen; n++) + frame[n] ^= *p++; + xfree (smask); + } + + /* Step 2i: Concatenate 0x00, maskedSeed and maskedDB. */ + /* This has already been done by using in-place operations. */ + + /* Convert the stuff into an MPI as expected by the caller. */ + rc = _gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, frame, nframe, NULL); + if (!rc && DBG_CIPHER) + log_mpidump ("OAEP encoded data", *r_result); + xfree (frame); + + return rc; +} + + +/* RFC-3447 (pkcs#1 v2.1) OAEP decoding. NBITS is the length of the + key measured in bits. ALGO is the hash function; it must be a + valid and usable algorithm. VALUE is the raw decrypted message + {LABEL,LABELLEN} is the optional label to be associated with the + message, if LABEL is NULL the default is to use the empty string as + label. On success the plaintext is returned as a newly allocated + buffer at R_RESULT; its valid length is stored at R_RESULTLEN. On + error NULL is stored at R_RESULT. */ +gpg_err_code_t +_gcry_rsa_oaep_decode (unsigned char **r_result, size_t *r_resultlen, + unsigned int nbits, int algo, + gcry_mpi_t value, + const unsigned char *label, size_t labellen) +{ + gcry_err_code_t rc; + unsigned char *frame = NULL; /* Encoded messages (EM). */ + unsigned char *masked_seed; /* Points into FRAME. */ + unsigned char *masked_db; /* Points into FRAME. */ + unsigned char *seed = NULL; /* Allocated space for the seed and DB. */ + unsigned char *db; /* Points into SEED. */ + unsigned char *lhash = NULL; /* Hash of the label. */ + size_t nframe; /* Length of the ciphertext (EM). */ + size_t hlen; /* Length of the hash digest. */ + size_t db_len; /* Length of DB and masked_db. */ + size_t nkey = (nbits+7)/8; /* Length of the key in bytes. */ + int failed = 0; /* Error indicator. */ + size_t n; + + *r_result = NULL; + + /* This code is implemented as described by rfc-3447 7.1.2. */ + + /* Set defaults for LABEL. */ + if (!label || !labellen) + { + label = (const unsigned char*)""; + labellen = 0; + } + + /* Get the length of the digest. */ + hlen = _gcry_md_get_algo_dlen (algo); + + /* Hash the label right away. */ + lhash = xtrymalloc (hlen); + if (!lhash) + return gpg_err_code_from_syserror (); + _gcry_md_hash_buffer (algo, lhash, label, labellen); + + /* Turn the MPI into an octet string. If the octet string is + shorter than the key we pad it to the left with zeroes. This may + happen due to the leading zero in OAEP frames and due to the + following random octets (seed^mask) which may have leading zero + bytes. This all is needed to cope with our leading zeroes + suppressing MPI implementation. The code implictly implements + Step 1b (bail out if NFRAME != N). */ + rc = octet_string_from_mpi (&frame, NULL, value, nkey); + if (rc) + { + xfree (lhash); + return GPG_ERR_ENCODING_PROBLEM; + } + nframe = nkey; + + /* Step 1c: Check that the key is long enough. */ + if ( nframe < 2 * hlen + 2 ) + { + xfree (frame); + xfree (lhash); + return GPG_ERR_ENCODING_PROBLEM; + } + + /* Step 2 has already been done by the caller and the + gcry_mpi_aprint above. */ + + /* Allocate space for SEED and DB. */ + seed = xtrymalloc_secure (nframe - 1); + if (!seed) + { + rc = gpg_err_code_from_syserror (); + xfree (frame); + xfree (lhash); + return rc; + } + db = seed + hlen; + + /* To avoid chosen ciphertext attacks from now on we make sure to + run all code even in the error case; this avoids possible timing + attacks as described by Manger. */ + + /* Step 3a: Hash the label. */ + /* This has already been done. */ + + /* Step 3b: Separate the encoded message. */ + masked_seed = frame + 1; + masked_db = frame + 1 + hlen; + db_len = nframe - 1 - hlen; + + /* Step 3c and 3d: seed = maskedSeed ^ mgf(maskedDB, hlen). */ + if (mgf1 (seed, hlen, masked_db, db_len, algo)) + failed = 1; + for (n = 0; n < hlen; n++) + seed[n] ^= masked_seed[n]; + + /* Step 3e and 3f: db = maskedDB ^ mgf(seed, db_len). */ + if (mgf1 (db, db_len, seed, hlen, algo)) + failed = 1; + for (n = 0; n < db_len; n++) + db[n] ^= masked_db[n]; + + /* Step 3g: Check lhash, an possible empty padding string terminated + by 0x01 and the first byte of EM being 0. */ + if (memcmp (lhash, db, hlen)) + failed = 1; + for (n = hlen; n < db_len; n++) + if (db[n] == 0x01) + break; + if (n == db_len) + failed = 1; + if (frame[0]) + failed = 1; + + xfree (lhash); + xfree (frame); + if (failed) + { + xfree (seed); + return GPG_ERR_ENCODING_PROBLEM; + } + + /* Step 4: Output M. */ + /* To avoid an extra allocation we reuse the seed buffer. The only + caller of this function will anyway free the result soon. */ + n++; + memmove (seed, db + n, db_len - n); + *r_result = seed; + *r_resultlen = db_len - n; + seed = NULL; + + if (DBG_CIPHER) + log_printhex ("value extracted from OAEP encoded data", + *r_result, *r_resultlen); + + return 0; +} + + +/* RFC-3447 (pkcs#1 v2.1) PSS encoding. Encode {VALUE,VALUELEN} for + an NBITS key. Note that VALUE is already the mHash from the + picture below. ALGO is a valid hash algorithm and SALTLEN is the + length of salt to be used. On success the result is stored as a + new MPI at R_RESULT. On error the value at R_RESULT is undefined. + + If {RANDOM_OVERRIDE, RANDOM_OVERRIDE_LEN} is given it is used as + the salt instead of using a random string for the salt. This + feature is only useful for regression tests. + + Here is figure 2 from the RFC (errata 595 applied) depicting the + process: + + +-----------+ + | M | + +-----------+ + | + V + Hash + | + V + +--------+----------+----------+ + M' = |Padding1| mHash | salt | + +--------+----------+----------+ + | + +--------+----------+ V + DB = |Padding2| salt | Hash + +--------+----------+ | + | | + V | +----+ + xor <--- MGF <---| |0xbc| + | | +----+ + | | | + V V V + +-------------------+----------+----+ + EM = | maskedDB | H |0xbc| + +-------------------+----------+----+ + + */ +gpg_err_code_t +_gcry_rsa_pss_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo, + const unsigned char *value, size_t valuelen, int saltlen, + const void *random_override, size_t random_override_len) +{ + gcry_err_code_t rc = 0; + size_t hlen; /* Length of the hash digest. */ + unsigned char *em = NULL; /* Encoded message. */ + size_t emlen = (nbits+7)/8; /* Length in bytes of EM. */ + unsigned char *h; /* Points into EM. */ + unsigned char *buf = NULL; /* Help buffer. */ + size_t buflen; /* Length of BUF. */ + unsigned char *mhash; /* Points into BUF. */ + unsigned char *salt; /* Points into BUF. */ + unsigned char *dbmask; /* Points into BUF. */ + unsigned char *p; + size_t n; + + /* This code is implemented as described by rfc-3447 9.1.1. */ + + /* Get the length of the digest. */ + hlen = _gcry_md_get_algo_dlen (algo); + gcry_assert (hlen); /* We expect a valid ALGO here. */ + + /* Allocate a help buffer and setup some pointers. */ + buflen = 8 + hlen + saltlen + (emlen - hlen - 1); + buf = xtrymalloc (buflen); + if (!buf) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + mhash = buf + 8; + salt = mhash + hlen; + dbmask= salt + saltlen; + + /* Step 2: That would be: mHash = Hash(M) but our input is already + mHash thus we do only a consistency check and copy to MHASH. */ + if (valuelen != hlen) + { + rc = GPG_ERR_INV_LENGTH; + goto leave; + } + memcpy (mhash, value, hlen); + + /* Step 3: Check length constraints. */ + if (emlen < hlen + saltlen + 2) + { + rc = GPG_ERR_TOO_SHORT; + goto leave; + } + + /* Allocate space for EM. */ + em = xtrymalloc (emlen); + if (!em) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + h = em + emlen - 1 - hlen; + + /* Step 4: Create a salt. */ + if (saltlen) + { + if (random_override) + { + if (random_override_len != saltlen) + { + rc = GPG_ERR_INV_ARG; + goto leave; + } + memcpy (salt, random_override, saltlen); + } + else + _gcry_randomize (salt, saltlen, GCRY_STRONG_RANDOM); + } + + /* Step 5 and 6: M' = Hash(Padding1 || mHash || salt). */ + memset (buf, 0, 8); /* Padding. */ + _gcry_md_hash_buffer (algo, h, buf, 8 + hlen + saltlen); + + /* Step 7 and 8: DB = PS || 0x01 || salt. */ + /* Note that we use EM to store DB and later Xor in-place. */ + p = em + emlen - 1 - hlen - saltlen - 1; + memset (em, 0, p - em); + *p++ = 0x01; + memcpy (p, salt, saltlen); + + /* Step 9: dbmask = MGF(H, emlen - hlen - 1). */ + mgf1 (dbmask, emlen - hlen - 1, h, hlen, algo); + + /* Step 10: maskedDB = DB ^ dbMask */ + for (n = 0, p = dbmask; n < emlen - hlen - 1; n++, p++) + em[n] ^= *p; + + /* Step 11: Set the leftmost bits to zero. */ + em[0] &= 0xFF >> (8 * emlen - nbits); + + /* Step 12: EM = maskedDB || H || 0xbc. */ + em[emlen-1] = 0xbc; + + /* Convert EM into an MPI. */ + rc = _gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, em, emlen, NULL); + if (!rc && DBG_CIPHER) + log_mpidump ("PSS encoded data", *r_result); + + leave: + if (em) + { + wipememory (em, emlen); + xfree (em); + } + if (buf) + { + wipememory (buf, buflen); + xfree (buf); + } + return rc; +} + + +/* Verify a signature assuming PSS padding. VALUE is the hash of the + message (mHash) encoded as an MPI; its length must match the digest + length of ALGO. ENCODED is the output of the RSA public key + function (EM). NBITS is the size of the public key. ALGO is the + hash algorithm and SALTLEN is the length of the used salt. The + function returns 0 on success or on error code. */ +gpg_err_code_t +_gcry_rsa_pss_verify (gcry_mpi_t value, gcry_mpi_t encoded, + unsigned int nbits, int algo, size_t saltlen) +{ + gcry_err_code_t rc = 0; + size_t hlen; /* Length of the hash digest. */ + unsigned char *em = NULL; /* Encoded message. */ + size_t emlen = (nbits+7)/8; /* Length in bytes of EM. */ + unsigned char *salt; /* Points into EM. */ + unsigned char *h; /* Points into EM. */ + unsigned char *buf = NULL; /* Help buffer. */ + size_t buflen; /* Length of BUF. */ + unsigned char *dbmask; /* Points into BUF. */ + unsigned char *mhash; /* Points into BUF. */ + unsigned char *p; + size_t n; + + /* This code is implemented as described by rfc-3447 9.1.2. */ + + /* Get the length of the digest. */ + hlen = _gcry_md_get_algo_dlen (algo); + gcry_assert (hlen); /* We expect a valid ALGO here. */ + + /* Allocate a help buffer and setup some pointers. + This buffer is used for two purposes: + +------------------------------+-------+ + 1. | dbmask | mHash | + +------------------------------+-------+ + emlen - hlen - 1 hlen + + +----------+-------+---------+-+-------+ + 2. | padding1 | mHash | salt | | mHash | + +----------+-------+---------+-+-------+ + 8 hlen saltlen hlen + */ + buflen = 8 + hlen + saltlen; + if (buflen < emlen - hlen - 1) + buflen = emlen - hlen - 1; + buflen += hlen; + buf = xtrymalloc (buflen); + if (!buf) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + dbmask = buf; + mhash = buf + buflen - hlen; + + /* Step 2: That would be: mHash = Hash(M) but our input is already + mHash thus we only need to convert VALUE into MHASH. */ + rc = octet_string_from_mpi (NULL, mhash, value, hlen); + if (rc) + goto leave; + + /* Convert the signature into an octet string. */ + rc = octet_string_from_mpi (&em, NULL, encoded, emlen); + if (rc) + goto leave; + + /* Step 3: Check length of EM. Because we internally use MPI + functions we can't do this properly; EMLEN is always the length + of the key because octet_string_from_mpi needs to left pad the + result with zero to cope with the fact that our MPIs suppress all + leading zeroes. Thus what we test here are merely the digest and + salt lengths to the key. */ + if (emlen < hlen + saltlen + 2) + { + rc = GPG_ERR_TOO_SHORT; /* For the hash and saltlen. */ + goto leave; + } + + /* Step 4: Check last octet. */ + if (em[emlen - 1] != 0xbc) + { + rc = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + + /* Step 5: Split EM. */ + h = em + emlen - 1 - hlen; + + /* Step 6: Check the leftmost bits. */ + if ((em[0] & ~(0xFF >> (8 * emlen - nbits)))) + { + rc = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + + /* Step 7: dbmask = MGF(H, emlen - hlen - 1). */ + mgf1 (dbmask, emlen - hlen - 1, h, hlen, algo); + + /* Step 8: maskedDB = DB ^ dbMask. */ + for (n = 0, p = dbmask; n < emlen - hlen - 1; n++, p++) + em[n] ^= *p; + + /* Step 9: Set leftmost bits in DB to zero. */ + em[0] &= 0xFF >> (8 * emlen - nbits); + + /* Step 10: Check the padding of DB. */ + for (n = 0; n < emlen - hlen - saltlen - 2 && !em[n]; n++) + ; + if (n != emlen - hlen - saltlen - 2 || em[n++] != 1) + { + rc = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + + /* Step 11: Extract salt from DB. */ + salt = em + n; + + /* Step 12: M' = (0x)00 00 00 00 00 00 00 00 || mHash || salt */ + memset (buf, 0, 8); + memcpy (buf+8, mhash, hlen); + memcpy (buf+8+hlen, salt, saltlen); + + /* Step 13: H' = Hash(M'). */ + _gcry_md_hash_buffer (algo, buf, buf, 8 + hlen + saltlen); + + /* Step 14: Check H == H'. */ + rc = memcmp (h, buf, hlen) ? GPG_ERR_BAD_SIGNATURE : GPG_ERR_NO_ERROR; + + leave: + if (em) + { + wipememory (em, emlen); + xfree (em); + } + if (buf) + { + wipememory (buf, buflen); + xfree (buf); + } + return rc; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/rsa.c b/libotr/libgcrypt-1.8.7/cipher/rsa.c new file mode 100644 index 0000000..575ea94 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/rsa.c @@ -0,0 +1,2035 @@ +/* rsa.c - RSA implementation + * Copyright (C) 1997, 1998, 1999 by Werner Koch (dd9jn) + * Copyright (C) 2000, 2001, 2002, 2003, 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* This code uses an algorithm protected by U.S. Patent #4,405,829 + which expired on September 20, 2000. The patent holder placed that + patent into the public domain on Sep 6th, 2000. +*/ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "pubkey-internal.h" + + +typedef struct +{ + gcry_mpi_t n; /* modulus */ + gcry_mpi_t e; /* exponent */ +} RSA_public_key; + + +typedef struct +{ + gcry_mpi_t n; /* public modulus */ + gcry_mpi_t e; /* public exponent */ + gcry_mpi_t d; /* exponent */ + gcry_mpi_t p; /* prime p. */ + gcry_mpi_t q; /* prime q. */ + gcry_mpi_t u; /* inverse of p mod q. */ +} RSA_secret_key; + + +static const char *rsa_names[] = + { + "rsa", + "openpgp-rsa", + "oid.1.2.840.113549.1.1.1", + NULL, + }; + + +/* A sample 2048 bit RSA key used for the selftests. */ +static const char sample_secret_key[] = +" (private-key" +" (rsa" +" (n #009F56231A3D82E3E7D613D59D53E9AB921BEF9F08A782AED0B6E46ADBC853EC" +" 7C71C422435A3CD8FA0DB9EFD55CD3295BADC4E8E2E2B94E15AE82866AB8ADE8" +" 7E469FAE76DC3577DE87F1F419C4EB41123DFAF8D16922D5EDBAD6E9076D5A1C" +" 958106F0AE5E2E9193C6B49124C64C2A241C4075D4AF16299EB87A6585BAE917" +" DEF27FCDD165764D069BC18D16527B29DAAB549F7BBED4A7C6A842D203ED6613" +" 6E2411744E432CD26D940132F25874483DCAEECDFD95744819CBCF1EA810681C" +" 42907EBCB1C7EAFBE75C87EC32C5413EA10476545D3FC7B2ADB1B66B7F200918" +" 664B0E5261C2895AA28B0DE321E921B3F877172CCCAB81F43EF98002916156F6CB#)" +" (e #010001#)" +" (d #07EF82500C403899934FE993AC5A36F14FF2DF38CF1EF315F205EE4C83EDAA19" +" 8890FC23DE9AA933CAFB37B6A8A8DBA675411958337287310D3FF2F1DDC0CB93" +" 7E70F57F75F833C021852B631D2B9A520E4431A03C5C3FCB5742DCD841D9FB12" +" 771AA1620DCEC3F1583426066ED9DC3F7028C5B59202C88FDF20396E2FA0EC4F" +" 5A22D9008F3043673931BC14A5046D6327398327900867E39CC61B2D1AFE2F48" +" EC8E1E3861C68D257D7425F4E6F99ABD77D61F10CA100EFC14389071831B33DD" +" 69CC8EABEF860D1DC2AAA84ABEAE5DFC91BC124DAF0F4C8EF5BBEA436751DE84" +" 3A8063E827A024466F44C28614F93B0732A100D4A0D86D532FE1E22C7725E401#)" +" (p #00C29D438F115825779631CD665A5739367F3E128ADC29766483A46CA80897E0" +" 79B32881860B8F9A6A04C2614A904F6F2578DAE13EA67CD60AE3D0AA00A1FF9B" +" 441485E44B2DC3D0B60260FBFE073B5AC72FAF67964DE15C8212C389D20DB9CF" +" 54AF6AEF5C4196EAA56495DD30CF709F499D5AB30CA35E086C2A1589D6283F1783#)" +" (q #00D1984135231CB243FE959C0CBEF551EDD986AD7BEDF71EDF447BE3DA27AF46" +" 79C974A6FA69E4D52FE796650623DE70622862713932AA2FD9F2EC856EAEAA77" +" 88B4EA6084DC81C902F014829B18EA8B2666EC41586818E0589E18876065F97E" +" 8D22CE2DA53A05951EC132DCEF41E70A9C35F4ACC268FFAC2ADF54FA1DA110B919#)" +" (u #67CF0FD7635205DD80FA814EE9E9C267C17376BF3209FB5D1BC42890D2822A04" +" 479DAF4D5B6ED69D0F8D1AF94164D07F8CD52ECEFE880641FA0F41DDAB1785E4" +" A37A32F997A516480B4CD4F6482B9466A1765093ED95023CA32D5EDC1E34CEE9" +" AF595BC51FE43C4BF810FA225AF697FB473B83815966188A4312C048B885E3F7#)))"; + +/* A sample 2048 bit RSA key used for the selftests (public only). */ +static const char sample_public_key[] = +" (public-key" +" (rsa" +" (n #009F56231A3D82E3E7D613D59D53E9AB921BEF9F08A782AED0B6E46ADBC853EC" +" 7C71C422435A3CD8FA0DB9EFD55CD3295BADC4E8E2E2B94E15AE82866AB8ADE8" +" 7E469FAE76DC3577DE87F1F419C4EB41123DFAF8D16922D5EDBAD6E9076D5A1C" +" 958106F0AE5E2E9193C6B49124C64C2A241C4075D4AF16299EB87A6585BAE917" +" DEF27FCDD165764D069BC18D16527B29DAAB549F7BBED4A7C6A842D203ED6613" +" 6E2411744E432CD26D940132F25874483DCAEECDFD95744819CBCF1EA810681C" +" 42907EBCB1C7EAFBE75C87EC32C5413EA10476545D3FC7B2ADB1B66B7F200918" +" 664B0E5261C2895AA28B0DE321E921B3F877172CCCAB81F43EF98002916156F6CB#)" +" (e #010001#)))"; + + +static int test_keys (RSA_secret_key *sk, unsigned nbits); +static int check_secret_key (RSA_secret_key *sk); +static void public (gcry_mpi_t output, gcry_mpi_t input, RSA_public_key *skey); +static void secret (gcry_mpi_t output, gcry_mpi_t input, RSA_secret_key *skey); +static unsigned int rsa_get_nbits (gcry_sexp_t parms); + + +/* Check that a freshly generated key actually works. Returns 0 on success. */ +static int +test_keys (RSA_secret_key *sk, unsigned int nbits) +{ + int result = -1; /* Default to failure. */ + RSA_public_key pk; + gcry_mpi_t plaintext = mpi_new (nbits); + gcry_mpi_t ciphertext = mpi_new (nbits); + gcry_mpi_t decr_plaintext = mpi_new (nbits); + gcry_mpi_t signature = mpi_new (nbits); + + /* Put the relevant parameters into a public key structure. */ + pk.n = sk->n; + pk.e = sk->e; + + /* Create a random plaintext. */ + _gcry_mpi_randomize (plaintext, nbits, GCRY_WEAK_RANDOM); + + /* Encrypt using the public key. */ + public (ciphertext, plaintext, &pk); + + /* Check that the cipher text does not match the plaintext. */ + if (!mpi_cmp (ciphertext, plaintext)) + goto leave; /* Ciphertext is identical to the plaintext. */ + + /* Decrypt using the secret key. */ + secret (decr_plaintext, ciphertext, sk); + + /* Check that the decrypted plaintext matches the original plaintext. */ + if (mpi_cmp (decr_plaintext, plaintext)) + goto leave; /* Plaintext does not match. */ + + /* Create another random plaintext as data for signature checking. */ + _gcry_mpi_randomize (plaintext, nbits, GCRY_WEAK_RANDOM); + + /* Use the RSA secret function to create a signature of the plaintext. */ + secret (signature, plaintext, sk); + + /* Use the RSA public function to verify this signature. */ + public (decr_plaintext, signature, &pk); + if (mpi_cmp (decr_plaintext, plaintext)) + goto leave; /* Signature does not match. */ + + /* Modify the signature and check that the signing fails. */ + mpi_add_ui (signature, signature, 1); + public (decr_plaintext, signature, &pk); + if (!mpi_cmp (decr_plaintext, plaintext)) + goto leave; /* Signature matches but should not. */ + + result = 0; /* All tests succeeded. */ + + leave: + _gcry_mpi_release (signature); + _gcry_mpi_release (decr_plaintext); + _gcry_mpi_release (ciphertext); + _gcry_mpi_release (plaintext); + return result; +} + + +/* Callback used by the prime generation to test whether the exponent + is suitable. Returns 0 if the test has been passed. */ +static int +check_exponent (void *arg, gcry_mpi_t a) +{ + gcry_mpi_t e = arg; + gcry_mpi_t tmp; + int result; + + mpi_sub_ui (a, a, 1); + tmp = _gcry_mpi_alloc_like (a); + result = !mpi_gcd(tmp, e, a); /* GCD is not 1. */ + _gcry_mpi_release (tmp); + mpi_add_ui (a, a, 1); + return result; +} + +/**************** + * Generate a key pair with a key of size NBITS. + * USE_E = 0 let Libcgrypt decide what exponent to use. + * = 1 request the use of a "secure" exponent; this is required by some + * specification to be 65537. + * > 2 Use this public exponent. If the given exponent + * is not odd one is internally added to it. + * TRANSIENT_KEY: If true, generate the primes using the standard RNG. + * Returns: 2 structures filled with all needed values + */ +static gpg_err_code_t +generate_std (RSA_secret_key *sk, unsigned int nbits, unsigned long use_e, + int transient_key) +{ + gcry_mpi_t p, q; /* the two primes */ + gcry_mpi_t d; /* the private key */ + gcry_mpi_t u; + gcry_mpi_t t1, t2; + gcry_mpi_t n; /* the public key */ + gcry_mpi_t e; /* the exponent */ + gcry_mpi_t phi; /* helper: (p-1)(q-1) */ + gcry_mpi_t g; + gcry_mpi_t f; + gcry_random_level_t random_level; + + if (fips_mode ()) + { + if (nbits < 1024) + return GPG_ERR_INV_VALUE; + if (transient_key) + return GPG_ERR_INV_VALUE; + } + + /* The random quality depends on the transient_key flag. */ + random_level = transient_key ? GCRY_STRONG_RANDOM : GCRY_VERY_STRONG_RANDOM; + + /* Make sure that nbits is even so that we generate p, q of equal size. */ + if ( (nbits&1) ) + nbits++; + + if (use_e == 1) /* Alias for a secure value */ + use_e = 65537; /* as demanded by Sphinx. */ + + /* Public exponent: + In general we use 41 as this is quite fast and more secure than the + commonly used 17. Benchmarking the RSA verify function + with a 1024 bit key yields (2001-11-08): + e=17 0.54 ms + e=41 0.75 ms + e=257 0.95 ms + e=65537 1.80 ms + */ + e = mpi_alloc( (32+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB ); + if (!use_e) + mpi_set_ui (e, 41); /* This is a reasonable secure and fast value */ + else + { + use_e |= 1; /* make sure this is odd */ + mpi_set_ui (e, use_e); + } + + n = mpi_new (nbits); + + p = q = NULL; + do + { + /* select two (very secret) primes */ + if (p) + _gcry_mpi_release (p); + if (q) + _gcry_mpi_release (q); + if (use_e) + { /* Do an extra test to ensure that the given exponent is + suitable. */ + p = _gcry_generate_secret_prime (nbits/2, random_level, + check_exponent, e); + q = _gcry_generate_secret_prime (nbits/2, random_level, + check_exponent, e); + } + else + { /* We check the exponent later. */ + p = _gcry_generate_secret_prime (nbits/2, random_level, NULL, NULL); + q = _gcry_generate_secret_prime (nbits/2, random_level, NULL, NULL); + } + if (mpi_cmp (p, q) > 0 ) /* p shall be smaller than q (for calc of u)*/ + mpi_swap(p,q); + /* calculate the modulus */ + mpi_mul( n, p, q ); + } + while ( mpi_get_nbits(n) != nbits ); + + /* calculate Euler totient: phi = (p-1)(q-1) */ + t1 = mpi_alloc_secure( mpi_get_nlimbs(p) ); + t2 = mpi_alloc_secure( mpi_get_nlimbs(p) ); + phi = mpi_snew ( nbits ); + g = mpi_snew ( nbits ); + f = mpi_snew ( nbits ); + mpi_sub_ui( t1, p, 1 ); + mpi_sub_ui( t2, q, 1 ); + mpi_mul( phi, t1, t2 ); + mpi_gcd (g, t1, t2); + mpi_fdiv_q(f, phi, g); + + while (!mpi_gcd(t1, e, phi)) /* (while gcd is not 1) */ + { + if (use_e) + BUG (); /* The prime generator already made sure that we + never can get to here. */ + mpi_add_ui (e, e, 2); + } + + /* calculate the secret key d = e^-1 mod phi */ + d = mpi_snew ( nbits ); + mpi_invm (d, e, f ); + /* calculate the inverse of p and q (used for chinese remainder theorem)*/ + u = mpi_snew ( nbits ); + mpi_invm(u, p, q ); + + if( DBG_CIPHER ) + { + log_mpidump(" p= ", p ); + log_mpidump(" q= ", q ); + log_mpidump("phi= ", phi ); + log_mpidump(" g= ", g ); + log_mpidump(" f= ", f ); + log_mpidump(" n= ", n ); + log_mpidump(" e= ", e ); + log_mpidump(" d= ", d ); + log_mpidump(" u= ", u ); + } + + _gcry_mpi_release (t1); + _gcry_mpi_release (t2); + _gcry_mpi_release (phi); + _gcry_mpi_release (f); + _gcry_mpi_release (g); + + sk->n = n; + sk->e = e; + sk->p = p; + sk->q = q; + sk->d = d; + sk->u = u; + + /* Now we can test our keys. */ + if (test_keys (sk, nbits - 64)) + { + _gcry_mpi_release (sk->n); sk->n = NULL; + _gcry_mpi_release (sk->e); sk->e = NULL; + _gcry_mpi_release (sk->p); sk->p = NULL; + _gcry_mpi_release (sk->q); sk->q = NULL; + _gcry_mpi_release (sk->d); sk->d = NULL; + _gcry_mpi_release (sk->u); sk->u = NULL; + fips_signal_error ("self-test after key generation failed"); + return GPG_ERR_SELFTEST_FAILED; + } + + return 0; +} + + +/**************** + * Generate a key pair with a key of size NBITS. + * USE_E = 0 let Libcgrypt decide what exponent to use. + * = 1 request the use of a "secure" exponent; this is required by some + * specification to be 65537. + * > 2 Use this public exponent. If the given exponent + * is not odd one is internally added to it. + * TESTPARMS: If set, do not generate but test whether the p,q is probably prime + * Returns key with zeroes to not break code calling this function. + * TRANSIENT_KEY: If true, generate the primes using the standard RNG. + * Returns: 2 structures filled with all needed values + */ +static gpg_err_code_t +generate_fips (RSA_secret_key *sk, unsigned int nbits, unsigned long use_e, + gcry_sexp_t testparms, int transient_key) +{ + gcry_mpi_t p, q; /* the two primes */ + gcry_mpi_t d; /* the private key */ + gcry_mpi_t u; + gcry_mpi_t p1, q1; + gcry_mpi_t n; /* the public key */ + gcry_mpi_t e; /* the exponent */ + gcry_mpi_t g; + gcry_mpi_t minp; + gcry_mpi_t diff, mindiff; + gcry_random_level_t random_level; + unsigned int pbits = nbits/2; + unsigned int i; + int pqswitch; + gpg_err_code_t ec = GPG_ERR_NO_PRIME; + + if (nbits < 1024 || (nbits & 0x1FF)) + return GPG_ERR_INV_VALUE; + if (_gcry_enforced_fips_mode() && nbits != 2048 && nbits != 3072) + return GPG_ERR_INV_VALUE; + + /* The random quality depends on the transient_key flag. */ + random_level = transient_key ? GCRY_STRONG_RANDOM : GCRY_VERY_STRONG_RANDOM; + + if (testparms) + { + /* Parameters to derive the key are given. */ + /* Note that we explicitly need to setup the values of tbl + because some compilers (e.g. OpenWatcom, IRIX) don't allow to + initialize a structure with automatic variables. */ + struct { const char *name; gcry_mpi_t *value; } tbl[] = { + { "e" }, + { "p" }, + { "q" }, + { NULL } + }; + int idx; + gcry_sexp_t oneparm; + + tbl[0].value = &e; + tbl[1].value = &p; + tbl[2].value = &q; + + for (idx=0; tbl[idx].name; idx++) + { + oneparm = sexp_find_token (testparms, tbl[idx].name, 0); + if (oneparm) + { + *tbl[idx].value = sexp_nth_mpi (oneparm, 1, GCRYMPI_FMT_USG); + sexp_release (oneparm); + } + } + for (idx=0; tbl[idx].name; idx++) + if (!*tbl[idx].value) + break; + if (tbl[idx].name) + { + /* At least one parameter is missing. */ + for (idx=0; tbl[idx].name; idx++) + _gcry_mpi_release (*tbl[idx].value); + return GPG_ERR_MISSING_VALUE; + } + } + else + { + if (use_e < 65537) + use_e = 65537; /* This is the smallest value allowed by FIPS */ + + e = mpi_alloc ((32+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB); + + use_e |= 1; /* make sure this is odd */ + mpi_set_ui (e, use_e); + + p = mpi_snew (pbits); + q = mpi_snew (pbits); + } + + n = mpi_new (nbits); + d = mpi_snew (nbits); + u = mpi_snew (nbits); + + /* prepare approximate minimum p and q */ + minp = mpi_new (pbits); + mpi_set_ui (minp, 0xB504F334); + mpi_lshift (minp, minp, pbits - 32); + + /* prepare minimum p and q difference */ + diff = mpi_new (pbits); + mindiff = mpi_new (pbits - 99); + mpi_set_ui (mindiff, 1); + mpi_lshift (mindiff, mindiff, pbits - 100); + + p1 = mpi_snew (pbits); + q1 = mpi_snew (pbits); + g = mpi_snew (pbits); + + retry: + /* generate p and q */ + for (i = 0; i < 5 * pbits; i++) + { + ploop: + if (!testparms) + { + _gcry_mpi_randomize (p, pbits, random_level); + } + if (mpi_cmp (p, minp) < 0) + { + if (testparms) + goto err; + goto ploop; + } + + mpi_sub_ui (p1, p, 1); + if (mpi_gcd (g, p1, e)) + { + if (_gcry_fips186_4_prime_check (p, pbits) != GPG_ERR_NO_ERROR) + { + /* not a prime */ + if (testparms) + goto err; + } + else + break; + } + else if (testparms) + goto err; + } + if (i >= 5 * pbits) + goto err; + + for (i = 0; i < 5 * pbits; i++) + { + qloop: + if (!testparms) + { + _gcry_mpi_randomize (q, pbits, random_level); + } + if (mpi_cmp (q, minp) < 0) + { + if (testparms) + goto err; + goto qloop; + } + if (mpi_cmp (p, q) > 0) + { + pqswitch = 1; + mpi_sub (diff, p, q); + } + else + { + pqswitch = 0; + mpi_sub (diff, q, p); + } + if (mpi_cmp (diff, mindiff) < 0) + { + if (testparms) + goto err; + goto qloop; + } + + mpi_sub_ui (q1, q, 1); + if (mpi_gcd (g, q1, e)) + { + if (_gcry_fips186_4_prime_check (q, pbits) != GPG_ERR_NO_ERROR) + { + /* not a prime */ + if (testparms) + goto err; + } + else + break; + } + else if (testparms) + goto err; + } + if (i >= 5 * pbits) + goto err; + + if (testparms) + { + mpi_clear (p); + mpi_clear (q); + } + else + { + gcry_mpi_t f; + + if (pqswitch) + { + gcry_mpi_t tmp; + + tmp = p; + p = q; + q = tmp; + } + + f = mpi_snew (nbits); + + /* calculate the modulus */ + mpi_mul (n, p, q); + + /* calculate the secret key d = e^1 mod phi */ + mpi_gcd (g, p1, q1); + mpi_fdiv_q (f, p1, g); + mpi_mul (f, f, q1); + + mpi_invm (d, e, f); + + _gcry_mpi_release (f); + + if (mpi_get_nbits (d) < pbits) + goto retry; + + /* calculate the inverse of p and q (used for chinese remainder theorem)*/ + mpi_invm (u, p, q ); + } + + ec = 0; + + if (DBG_CIPHER) + { + log_mpidump(" p= ", p ); + log_mpidump(" q= ", q ); + log_mpidump(" n= ", n ); + log_mpidump(" e= ", e ); + log_mpidump(" d= ", d ); + log_mpidump(" u= ", u ); + } + + err: + + _gcry_mpi_release (p1); + _gcry_mpi_release (q1); + _gcry_mpi_release (g); + _gcry_mpi_release (minp); + _gcry_mpi_release (mindiff); + _gcry_mpi_release (diff); + + sk->n = n; + sk->e = e; + sk->p = p; + sk->q = q; + sk->d = d; + sk->u = u; + + /* Now we can test our keys. */ + if (ec || (!testparms && test_keys (sk, nbits - 64))) + { + _gcry_mpi_release (sk->n); sk->n = NULL; + _gcry_mpi_release (sk->e); sk->e = NULL; + _gcry_mpi_release (sk->p); sk->p = NULL; + _gcry_mpi_release (sk->q); sk->q = NULL; + _gcry_mpi_release (sk->d); sk->d = NULL; + _gcry_mpi_release (sk->u); sk->u = NULL; + if (!ec) + { + fips_signal_error ("self-test after key generation failed"); + return GPG_ERR_SELFTEST_FAILED; + } + } + + return ec; +} + + +/* Helper for generate_x931. */ +static gcry_mpi_t +gen_x931_parm_xp (unsigned int nbits) +{ + gcry_mpi_t xp; + + xp = mpi_snew (nbits); + _gcry_mpi_randomize (xp, nbits, GCRY_VERY_STRONG_RANDOM); + + /* The requirement for Xp is: + + sqrt{2}*2^{nbits-1} <= xp <= 2^{nbits} - 1 + + We set the two high order bits to 1 to satisfy the lower bound. + By using mpi_set_highbit we make sure that the upper bound is + satisfied as well. */ + mpi_set_highbit (xp, nbits-1); + mpi_set_bit (xp, nbits-2); + gcry_assert ( mpi_get_nbits (xp) == nbits ); + + return xp; +} + + +/* Helper for generate_x931. */ +static gcry_mpi_t +gen_x931_parm_xi (void) +{ + gcry_mpi_t xi; + + xi = mpi_snew (101); + _gcry_mpi_randomize (xi, 101, GCRY_VERY_STRONG_RANDOM); + mpi_set_highbit (xi, 100); + gcry_assert ( mpi_get_nbits (xi) == 101 ); + + return xi; +} + + + +/* Variant of the standard key generation code using the algorithm + from X9.31. Using this algorithm has the advantage that the + generation can be made deterministic which is required for CAVS + testing. */ +static gpg_err_code_t +generate_x931 (RSA_secret_key *sk, unsigned int nbits, unsigned long e_value, + gcry_sexp_t deriveparms, int *swapped) +{ + gcry_mpi_t p, q; /* The two primes. */ + gcry_mpi_t e; /* The public exponent. */ + gcry_mpi_t n; /* The public key. */ + gcry_mpi_t d; /* The private key */ + gcry_mpi_t u; /* The inverse of p and q. */ + gcry_mpi_t pm1; /* p - 1 */ + gcry_mpi_t qm1; /* q - 1 */ + gcry_mpi_t phi; /* Euler totient. */ + gcry_mpi_t f, g; /* Helper. */ + + *swapped = 0; + + if (e_value == 1) /* Alias for a secure value. */ + e_value = 65537; + + /* Point 1 of section 4.1: k = 1024 + 256s with S >= 0 */ + if (nbits < 1024 || (nbits % 256)) + return GPG_ERR_INV_VALUE; + + /* Point 2: 2 <= bitlength(e) < 2^{k-2} + Note that we do not need to check the upper bound because we use + an unsigned long for E and thus there is no way for E to reach + that limit. */ + if (e_value < 3) + return GPG_ERR_INV_VALUE; + + /* Our implementation requires E to be odd. */ + if (!(e_value & 1)) + return GPG_ERR_INV_VALUE; + + /* Point 3: e > 0 or e 0 if it is to be randomly generated. + We support only a fixed E and thus there is no need for an extra test. */ + + + /* Compute or extract the derive parameters. */ + { + gcry_mpi_t xp1 = NULL; + gcry_mpi_t xp2 = NULL; + gcry_mpi_t xp = NULL; + gcry_mpi_t xq1 = NULL; + gcry_mpi_t xq2 = NULL; + gcry_mpi_t xq = NULL; + gcry_mpi_t tmpval; + + if (!deriveparms) + { + /* Not given: Generate them. */ + xp = gen_x931_parm_xp (nbits/2); + /* Make sure that |xp - xq| > 2^{nbits - 100} holds. */ + tmpval = mpi_snew (nbits/2); + do + { + _gcry_mpi_release (xq); + xq = gen_x931_parm_xp (nbits/2); + mpi_sub (tmpval, xp, xq); + } + while (mpi_get_nbits (tmpval) <= (nbits/2 - 100)); + _gcry_mpi_release (tmpval); + + xp1 = gen_x931_parm_xi (); + xp2 = gen_x931_parm_xi (); + xq1 = gen_x931_parm_xi (); + xq2 = gen_x931_parm_xi (); + + } + else + { + /* Parameters to derive the key are given. */ + /* Note that we explicitly need to setup the values of tbl + because some compilers (e.g. OpenWatcom, IRIX) don't allow + to initialize a structure with automatic variables. */ + struct { const char *name; gcry_mpi_t *value; } tbl[] = { + { "Xp1" }, + { "Xp2" }, + { "Xp" }, + { "Xq1" }, + { "Xq2" }, + { "Xq" }, + { NULL } + }; + int idx; + gcry_sexp_t oneparm; + + tbl[0].value = &xp1; + tbl[1].value = &xp2; + tbl[2].value = &xp; + tbl[3].value = &xq1; + tbl[4].value = &xq2; + tbl[5].value = &xq; + + for (idx=0; tbl[idx].name; idx++) + { + oneparm = sexp_find_token (deriveparms, tbl[idx].name, 0); + if (oneparm) + { + *tbl[idx].value = sexp_nth_mpi (oneparm, 1, GCRYMPI_FMT_USG); + sexp_release (oneparm); + } + } + for (idx=0; tbl[idx].name; idx++) + if (!*tbl[idx].value) + break; + if (tbl[idx].name) + { + /* At least one parameter is missing. */ + for (idx=0; tbl[idx].name; idx++) + _gcry_mpi_release (*tbl[idx].value); + return GPG_ERR_MISSING_VALUE; + } + } + + e = mpi_alloc_set_ui (e_value); + + /* Find two prime numbers. */ + p = _gcry_derive_x931_prime (xp, xp1, xp2, e, NULL, NULL); + q = _gcry_derive_x931_prime (xq, xq1, xq2, e, NULL, NULL); + _gcry_mpi_release (xp); xp = NULL; + _gcry_mpi_release (xp1); xp1 = NULL; + _gcry_mpi_release (xp2); xp2 = NULL; + _gcry_mpi_release (xq); xq = NULL; + _gcry_mpi_release (xq1); xq1 = NULL; + _gcry_mpi_release (xq2); xq2 = NULL; + if (!p || !q) + { + _gcry_mpi_release (p); + _gcry_mpi_release (q); + _gcry_mpi_release (e); + return GPG_ERR_NO_PRIME; + } + } + + + /* Compute the public modulus. We make sure that p is smaller than + q to allow the use of the CRT. */ + if (mpi_cmp (p, q) > 0 ) + { + mpi_swap (p, q); + *swapped = 1; + } + n = mpi_new (nbits); + mpi_mul (n, p, q); + + /* Compute the Euler totient: phi = (p-1)(q-1) */ + pm1 = mpi_snew (nbits/2); + qm1 = mpi_snew (nbits/2); + phi = mpi_snew (nbits); + mpi_sub_ui (pm1, p, 1); + mpi_sub_ui (qm1, q, 1); + mpi_mul (phi, pm1, qm1); + + g = mpi_snew (nbits); + gcry_assert (mpi_gcd (g, e, phi)); + + /* Compute: f = lcm(p-1,q-1) = phi / gcd(p-1,q-1) */ + mpi_gcd (g, pm1, qm1); + f = pm1; pm1 = NULL; + _gcry_mpi_release (qm1); qm1 = NULL; + mpi_fdiv_q (f, phi, g); + _gcry_mpi_release (phi); phi = NULL; + d = g; g = NULL; + /* Compute the secret key: d = e^{-1} mod lcm(p-1,q-1) */ + mpi_invm (d, e, f); + + /* Compute the inverse of p and q. */ + u = f; f = NULL; + mpi_invm (u, p, q ); + + if( DBG_CIPHER ) + { + if (*swapped) + log_debug ("p and q are swapped\n"); + log_mpidump(" p", p ); + log_mpidump(" q", q ); + log_mpidump(" n", n ); + log_mpidump(" e", e ); + log_mpidump(" d", d ); + log_mpidump(" u", u ); + } + + + sk->n = n; + sk->e = e; + sk->p = p; + sk->q = q; + sk->d = d; + sk->u = u; + + /* Now we can test our keys. */ + if (test_keys (sk, nbits - 64)) + { + _gcry_mpi_release (sk->n); sk->n = NULL; + _gcry_mpi_release (sk->e); sk->e = NULL; + _gcry_mpi_release (sk->p); sk->p = NULL; + _gcry_mpi_release (sk->q); sk->q = NULL; + _gcry_mpi_release (sk->d); sk->d = NULL; + _gcry_mpi_release (sk->u); sk->u = NULL; + fips_signal_error ("self-test after key generation failed"); + return GPG_ERR_SELFTEST_FAILED; + } + + return 0; +} + + +/**************** + * Test whether the secret key is valid. + * Returns: true if this is a valid key. + */ +static int +check_secret_key( RSA_secret_key *sk ) +{ + int rc; + gcry_mpi_t temp = mpi_alloc( mpi_get_nlimbs(sk->p)*2 ); + + mpi_mul(temp, sk->p, sk->q ); + rc = mpi_cmp( temp, sk->n ); + mpi_free(temp); + return !rc; +} + + + +/**************** + * Public key operation. Encrypt INPUT with PKEY and put result into OUTPUT. + * + * c = m^e mod n + * + * Where c is OUTPUT, m is INPUT and e,n are elements of PKEY. + */ +static void +public(gcry_mpi_t output, gcry_mpi_t input, RSA_public_key *pkey ) +{ + if( output == input ) /* powm doesn't like output and input the same */ + { + gcry_mpi_t x = mpi_alloc( mpi_get_nlimbs(input)*2 ); + mpi_powm( x, input, pkey->e, pkey->n ); + mpi_set(output, x); + mpi_free(x); + } + else + mpi_powm( output, input, pkey->e, pkey->n ); +} + +#if 0 +static void +stronger_key_check ( RSA_secret_key *skey ) +{ + gcry_mpi_t t = mpi_alloc_secure ( 0 ); + gcry_mpi_t t1 = mpi_alloc_secure ( 0 ); + gcry_mpi_t t2 = mpi_alloc_secure ( 0 ); + gcry_mpi_t phi = mpi_alloc_secure ( 0 ); + + /* check that n == p * q */ + mpi_mul( t, skey->p, skey->q); + if (mpi_cmp( t, skey->n) ) + log_info ( "RSA Oops: n != p * q\n" ); + + /* check that p is less than q */ + if( mpi_cmp( skey->p, skey->q ) > 0 ) + { + log_info ("RSA Oops: p >= q - fixed\n"); + _gcry_mpi_swap ( skey->p, skey->q); + } + + /* check that e divides neither p-1 nor q-1 */ + mpi_sub_ui(t, skey->p, 1 ); + mpi_fdiv_r(t, t, skey->e ); + if ( !mpi_cmp_ui( t, 0) ) + log_info ( "RSA Oops: e divides p-1\n" ); + mpi_sub_ui(t, skey->q, 1 ); + mpi_fdiv_r(t, t, skey->e ); + if ( !mpi_cmp_ui( t, 0) ) + log_info ( "RSA Oops: e divides q-1\n" ); + + /* check that d is correct */ + mpi_sub_ui( t1, skey->p, 1 ); + mpi_sub_ui( t2, skey->q, 1 ); + mpi_mul( phi, t1, t2 ); + gcry_mpi_gcd(t, t1, t2); + mpi_fdiv_q(t, phi, t); + mpi_invm(t, skey->e, t ); + if ( mpi_cmp(t, skey->d ) ) + { + log_info ( "RSA Oops: d is wrong - fixed\n"); + mpi_set (skey->d, t); + log_printmpi (" fixed d", skey->d); + } + + /* check for correctness of u */ + mpi_invm(t, skey->p, skey->q ); + if ( mpi_cmp(t, skey->u ) ) + { + log_info ( "RSA Oops: u is wrong - fixed\n"); + mpi_set (skey->u, t); + log_printmpi (" fixed u", skey->u); + } + + log_info ( "RSA secret key check finished\n"); + + mpi_free (t); + mpi_free (t1); + mpi_free (t2); + mpi_free (phi); +} +#endif + + + +/* Secret key operation - standard version. + * + * m = c^d mod n + */ +static void +secret_core_std (gcry_mpi_t M, gcry_mpi_t C, + gcry_mpi_t D, gcry_mpi_t N) +{ + mpi_powm (M, C, D, N); +} + + +/* Secret key operation - using the CRT. + * + * m1 = c ^ (d mod (p-1)) mod p + * m2 = c ^ (d mod (q-1)) mod q + * h = u * (m2 - m1) mod q + * m = m1 + h * p + */ +static void +secret_core_crt (gcry_mpi_t M, gcry_mpi_t C, + gcry_mpi_t D, unsigned int Nlimbs, + gcry_mpi_t P, gcry_mpi_t Q, gcry_mpi_t U) +{ + gcry_mpi_t m1 = mpi_alloc_secure ( Nlimbs + 1 ); + gcry_mpi_t m2 = mpi_alloc_secure ( Nlimbs + 1 ); + gcry_mpi_t h = mpi_alloc_secure ( Nlimbs + 1 ); + gcry_mpi_t D_blind = mpi_alloc_secure ( Nlimbs + 1 ); + gcry_mpi_t r; + unsigned int r_nbits; + + r_nbits = mpi_get_nbits (P) / 4; + if (r_nbits < 96) + r_nbits = 96; + r = mpi_secure_new (r_nbits); + + /* d_blind = (d mod (p-1)) + (p-1) * r */ + /* m1 = c ^ d_blind mod p */ + _gcry_mpi_randomize (r, r_nbits, GCRY_WEAK_RANDOM); + mpi_set_highbit (r, r_nbits - 1); + mpi_sub_ui ( h, P, 1 ); + mpi_mul ( D_blind, h, r ); + mpi_fdiv_r ( h, D, h ); + mpi_add ( D_blind, D_blind, h ); + mpi_powm ( m1, C, D_blind, P ); + + /* d_blind = (d mod (q-1)) + (q-1) * r */ + /* m2 = c ^ d_blind mod q */ + _gcry_mpi_randomize (r, r_nbits, GCRY_WEAK_RANDOM); + mpi_set_highbit (r, r_nbits - 1); + mpi_sub_ui ( h, Q, 1 ); + mpi_mul ( D_blind, h, r ); + mpi_fdiv_r ( h, D, h ); + mpi_add ( D_blind, D_blind, h ); + mpi_powm ( m2, C, D_blind, Q ); + + mpi_free ( r ); + mpi_free ( D_blind ); + + /* h = u * ( m2 - m1 ) mod q */ + mpi_sub ( h, m2, m1 ); + if ( mpi_has_sign ( h ) ) + mpi_add ( h, h, Q ); + mpi_mulm ( h, U, h, Q ); + + /* m = m1 + h * p */ + mpi_mul ( h, h, P ); + mpi_add ( M, m1, h ); + + mpi_free ( h ); + mpi_free ( m1 ); + mpi_free ( m2 ); +} + + +/* Secret key operation. + * Encrypt INPUT with SKEY and put result into + * OUTPUT. SKEY has the secret key parameters. + */ +static void +secret (gcry_mpi_t output, gcry_mpi_t input, RSA_secret_key *skey ) +{ + /* Remove superfluous leading zeroes from INPUT. */ + mpi_normalize (input); + + if (!skey->p || !skey->q || !skey->u) + { + secret_core_std (output, input, skey->d, skey->n); + } + else + { + secret_core_crt (output, input, skey->d, mpi_get_nlimbs (skey->n), + skey->p, skey->q, skey->u); + } +} + + +static void +secret_blinded (gcry_mpi_t output, gcry_mpi_t input, + RSA_secret_key *sk, unsigned int nbits) +{ + gcry_mpi_t r; /* Random number needed for blinding. */ + gcry_mpi_t ri; /* Modular multiplicative inverse of r. */ + gcry_mpi_t bldata; /* Blinded data to decrypt. */ + + /* First, we need a random number r between 0 and n - 1, which is + * relatively prime to n (i.e. it is neither p nor q). The random + * number needs to be only unpredictable, thus we employ the + * gcry_create_nonce function by using GCRY_WEAK_RANDOM with + * gcry_mpi_randomize. */ + r = mpi_snew (nbits); + ri = mpi_snew (nbits); + bldata = mpi_snew (nbits); + + do + { + _gcry_mpi_randomize (r, nbits, GCRY_WEAK_RANDOM); + mpi_mod (r, r, sk->n); + } + while (!mpi_invm (ri, r, sk->n)); + + /* Do blinding. We calculate: y = (x * r^e) mod n, where r is the + * random number, e is the public exponent, x is the non-blinded + * input data and n is the RSA modulus. */ + mpi_powm (bldata, r, sk->e, sk->n); + mpi_mulm (bldata, bldata, input, sk->n); + + /* Perform decryption. */ + secret (output, bldata, sk); + _gcry_mpi_release (bldata); + + /* Undo blinding. Here we calculate: y = (x * r^-1) mod n, where x + * is the blinded decrypted data, ri is the modular multiplicative + * inverse of r and n is the RSA modulus. */ + mpi_mulm (output, output, ri, sk->n); + + _gcry_mpi_release (r); + _gcry_mpi_release (ri); +} + + +/********************************************* + ************** interface ****************** + *********************************************/ + +static gcry_err_code_t +rsa_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) +{ + gpg_err_code_t ec; + unsigned int nbits; + unsigned long evalue; + RSA_secret_key sk; + gcry_sexp_t deriveparms; + int flags = 0; + gcry_sexp_t l1; + gcry_sexp_t swap_info = NULL; + + memset (&sk, 0, sizeof sk); + + ec = _gcry_pk_util_get_nbits (genparms, &nbits); + if (ec) + return ec; + + ec = _gcry_pk_util_get_rsa_use_e (genparms, &evalue); + if (ec) + return ec; + + /* Parse the optional flags list. */ + l1 = sexp_find_token (genparms, "flags", 0); + if (l1) + { + ec = _gcry_pk_util_parse_flaglist (l1, &flags, NULL); + sexp_release (l1); + if (ec) + return ec; + } + + deriveparms = (genparms? + sexp_find_token (genparms, "derive-parms", 0) : NULL); + if (!deriveparms) + { + /* Parse the optional "use-x931" flag. */ + l1 = sexp_find_token (genparms, "use-x931", 0); + if (l1) + { + flags |= PUBKEY_FLAG_USE_X931; + sexp_release (l1); + } + } + + if (deriveparms || (flags & PUBKEY_FLAG_USE_X931)) + { + int swapped; + ec = generate_x931 (&sk, nbits, evalue, deriveparms, &swapped); + sexp_release (deriveparms); + if (!ec && swapped) + ec = sexp_new (&swap_info, "(misc-key-info(p-q-swapped))", 0, 1); + } + else + { + /* Parse the optional "transient-key" flag. */ + if (!(flags & PUBKEY_FLAG_TRANSIENT_KEY)) + { + l1 = sexp_find_token (genparms, "transient-key", 0); + if (l1) + { + flags |= PUBKEY_FLAG_TRANSIENT_KEY; + sexp_release (l1); + } + } + deriveparms = (genparms? sexp_find_token (genparms, "test-parms", 0) + /**/ : NULL); + + /* Generate. */ + if (deriveparms || fips_mode()) + { + ec = generate_fips (&sk, nbits, evalue, deriveparms, + !!(flags & PUBKEY_FLAG_TRANSIENT_KEY)); + } + else + { + ec = generate_std (&sk, nbits, evalue, + !!(flags & PUBKEY_FLAG_TRANSIENT_KEY)); + } + sexp_release (deriveparms); + } + + if (!ec) + { + ec = sexp_build (r_skey, NULL, + "(key-data" + " (public-key" + " (rsa(n%m)(e%m)))" + " (private-key" + " (rsa(n%m)(e%m)(d%m)(p%m)(q%m)(u%m)))" + " %S)", + sk.n, sk.e, + sk.n, sk.e, sk.d, sk.p, sk.q, sk.u, + swap_info); + } + + mpi_free (sk.n); + mpi_free (sk.e); + mpi_free (sk.p); + mpi_free (sk.q); + mpi_free (sk.d); + mpi_free (sk.u); + sexp_release (swap_info); + + return ec; +} + + +static gcry_err_code_t +rsa_check_secret_key (gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + RSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL, NULL}; + + /* To check the key we need the optional parameters. */ + rc = sexp_extract_param (keyparms, NULL, "nedpqu", + &sk.n, &sk.e, &sk.d, &sk.p, &sk.q, &sk.u, + NULL); + if (rc) + goto leave; + + if (!check_secret_key (&sk)) + rc = GPG_ERR_BAD_SECKEY; + + leave: + _gcry_mpi_release (sk.n); + _gcry_mpi_release (sk.e); + _gcry_mpi_release (sk.d); + _gcry_mpi_release (sk.p); + _gcry_mpi_release (sk.q); + _gcry_mpi_release (sk.u); + if (DBG_CIPHER) + log_debug ("rsa_testkey => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +rsa_encrypt (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_mpi_t data = NULL; + RSA_public_key pk = {NULL, NULL}; + gcry_mpi_t ciph = NULL; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_ENCRYPT, + rsa_get_nbits (keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_mpidump ("rsa_encrypt data", data); + if (!data || mpi_is_opaque (data)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* Extract the key. */ + rc = sexp_extract_param (keyparms, NULL, "ne", &pk.n, &pk.e, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("rsa_encrypt n", pk.n); + log_mpidump ("rsa_encrypt e", pk.e); + } + + /* Do RSA computation and build result. */ + ciph = mpi_new (0); + public (ciph, data, &pk); + if (DBG_CIPHER) + log_mpidump ("rsa_encrypt res", ciph); + if ((ctx.flags & PUBKEY_FLAG_FIXEDLEN)) + { + /* We need to make sure to return the correct length to avoid + problems with missing leading zeroes. */ + unsigned char *em; + size_t emlen = (mpi_get_nbits (pk.n)+7)/8; + + rc = _gcry_mpi_to_octet_string (&em, NULL, ciph, emlen); + if (!rc) + { + rc = sexp_build (r_ciph, NULL, "(enc-val(rsa(a%b)))", (int)emlen, em); + xfree (em); + } + } + else + rc = sexp_build (r_ciph, NULL, "(enc-val(rsa(a%m)))", ciph); + + leave: + _gcry_mpi_release (ciph); + _gcry_mpi_release (pk.n); + _gcry_mpi_release (pk.e); + _gcry_mpi_release (data); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("rsa_encrypt => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +rsa_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) + +{ + gpg_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_sexp_t l1 = NULL; + gcry_mpi_t data = NULL; + RSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL, NULL}; + gcry_mpi_t plain = NULL; + unsigned char *unpad = NULL; + size_t unpadlen = 0; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT, + rsa_get_nbits (keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_preparse_encval (s_data, rsa_names, &l1, &ctx); + if (rc) + goto leave; + rc = sexp_extract_param (l1, NULL, "a", &data, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printmpi ("rsa_decrypt data", data); + if (mpi_is_opaque (data)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* Extract the key. */ + rc = sexp_extract_param (keyparms, NULL, "nedp?q?u?", + &sk.n, &sk.e, &sk.d, &sk.p, &sk.q, &sk.u, + NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_printmpi ("rsa_decrypt n", sk.n); + log_printmpi ("rsa_decrypt e", sk.e); + if (!fips_mode ()) + { + log_printmpi ("rsa_decrypt d", sk.d); + log_printmpi ("rsa_decrypt p", sk.p); + log_printmpi ("rsa_decrypt q", sk.q); + log_printmpi ("rsa_decrypt u", sk.u); + } + } + + /* Better make sure that there are no superfluous leading zeroes in + the input and it has not been "padded" using multiples of N. + This mitigates side-channel attacks (CVE-2013-4576). */ + mpi_normalize (data); + mpi_fdiv_r (data, data, sk.n); + + /* Allocate MPI for the plaintext. */ + plain = mpi_snew (ctx.nbits); + + /* We use blinding by default to mitigate timing attacks which can + be practically mounted over the network as shown by Brumley and + Boney in 2003. */ + if ((ctx.flags & PUBKEY_FLAG_NO_BLINDING)) + secret (plain, data, &sk); + else + secret_blinded (plain, data, &sk, ctx.nbits); + + if (DBG_CIPHER) + log_printmpi ("rsa_decrypt res", plain); + + /* Reverse the encoding and build the s-expression. */ + switch (ctx.encoding) + { + case PUBKEY_ENC_PKCS1: + rc = _gcry_rsa_pkcs1_decode_for_enc (&unpad, &unpadlen, ctx.nbits, plain); + mpi_free (plain); + plain = NULL; + if (!rc) + rc = sexp_build (r_plain, NULL, "(value %b)", (int)unpadlen, unpad); + break; + + case PUBKEY_ENC_OAEP: + rc = _gcry_rsa_oaep_decode (&unpad, &unpadlen, + ctx.nbits, ctx.hash_algo, + plain, ctx.label, ctx.labellen); + mpi_free (plain); + plain = NULL; + if (!rc) + rc = sexp_build (r_plain, NULL, "(value %b)", (int)unpadlen, unpad); + break; + + default: + /* Raw format. For backward compatibility we need to assume a + signed mpi by using the sexp format string "%m". */ + rc = sexp_build (r_plain, NULL, + (ctx.flags & PUBKEY_FLAG_LEGACYRESULT) + ? "%m":"(value %m)", plain); + break; + } + + leave: + xfree (unpad); + _gcry_mpi_release (plain); + _gcry_mpi_release (sk.n); + _gcry_mpi_release (sk.e); + _gcry_mpi_release (sk.d); + _gcry_mpi_release (sk.p); + _gcry_mpi_release (sk.q); + _gcry_mpi_release (sk.u); + _gcry_mpi_release (data); + sexp_release (l1); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("rsa_decrypt => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +rsa_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + gpg_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_mpi_t data = NULL; + RSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL, NULL}; + RSA_public_key pk; + gcry_mpi_t sig = NULL; + gcry_mpi_t result = NULL; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN, + rsa_get_nbits (keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printmpi ("rsa_sign data", data); + if (mpi_is_opaque (data)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* Extract the key. */ + rc = sexp_extract_param (keyparms, NULL, "nedp?q?u?", + &sk.n, &sk.e, &sk.d, &sk.p, &sk.q, &sk.u, + NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_printmpi ("rsa_sign n", sk.n); + log_printmpi ("rsa_sign e", sk.e); + if (!fips_mode ()) + { + log_printmpi ("rsa_sign d", sk.d); + log_printmpi ("rsa_sign p", sk.p); + log_printmpi ("rsa_sign q", sk.q); + log_printmpi ("rsa_sign u", sk.u); + } + } + + /* Do RSA computation. */ + sig = mpi_new (0); + if ((ctx.flags & PUBKEY_FLAG_NO_BLINDING)) + secret (sig, data, &sk); + else + secret_blinded (sig, data, &sk, ctx.nbits); + if (DBG_CIPHER) + log_printmpi ("rsa_sign res", sig); + + /* Check that the created signature is good. This detects a failure + of the CRT algorithm (Lenstra's attack on RSA's use of the CRT). */ + result = mpi_new (0); + pk.n = sk.n; + pk.e = sk.e; + public (result, sig, &pk); + if (mpi_cmp (result, data)) + { + rc = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + + /* Convert the result. */ + if ((ctx.flags & PUBKEY_FLAG_FIXEDLEN)) + { + /* We need to make sure to return the correct length to avoid + problems with missing leading zeroes. */ + unsigned char *em; + size_t emlen = (mpi_get_nbits (sk.n)+7)/8; + + rc = _gcry_mpi_to_octet_string (&em, NULL, sig, emlen); + if (!rc) + { + rc = sexp_build (r_sig, NULL, "(sig-val(rsa(s%b)))", (int)emlen, em); + xfree (em); + } + } + else + rc = sexp_build (r_sig, NULL, "(sig-val(rsa(s%M)))", sig); + + + leave: + _gcry_mpi_release (result); + _gcry_mpi_release (sig); + _gcry_mpi_release (sk.n); + _gcry_mpi_release (sk.e); + _gcry_mpi_release (sk.d); + _gcry_mpi_release (sk.p); + _gcry_mpi_release (sk.q); + _gcry_mpi_release (sk.u); + _gcry_mpi_release (data); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("rsa_sign => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +rsa_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_sexp_t l1 = NULL; + gcry_mpi_t sig = NULL; + gcry_mpi_t data = NULL; + RSA_public_key pk = { NULL, NULL }; + gcry_mpi_t result = NULL; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY, + rsa_get_nbits (keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printmpi ("rsa_verify data", data); + if (mpi_is_opaque (data)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* Extract the signature value. */ + rc = _gcry_pk_util_preparse_sigval (s_sig, rsa_names, &l1, NULL); + if (rc) + goto leave; + rc = sexp_extract_param (l1, NULL, "s", &sig, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printmpi ("rsa_verify sig", sig); + + /* Extract the key. */ + rc = sexp_extract_param (keyparms, NULL, "ne", &pk.n, &pk.e, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_printmpi ("rsa_verify n", pk.n); + log_printmpi ("rsa_verify e", pk.e); + } + + /* Do RSA computation and compare. */ + result = mpi_new (0); + public (result, sig, &pk); + if (DBG_CIPHER) + log_printmpi ("rsa_verify cmp", result); + if (ctx.verify_cmp) + rc = ctx.verify_cmp (&ctx, result); + else + rc = mpi_cmp (result, data) ? GPG_ERR_BAD_SIGNATURE : 0; + + leave: + _gcry_mpi_release (result); + _gcry_mpi_release (pk.n); + _gcry_mpi_release (pk.e); + _gcry_mpi_release (data); + _gcry_mpi_release (sig); + sexp_release (l1); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("rsa_verify => %s\n", rc?gpg_strerror (rc):"Good"); + return rc; +} + + + +/* Return the number of bits for the key described by PARMS. On error + * 0 is returned. The format of PARMS starts with the algorithm name; + * for example: + * + * (rsa + * (n <mpi>) + * (e <mpi>)) + * + * More parameters may be given but we only need N here. + */ +static unsigned int +rsa_get_nbits (gcry_sexp_t parms) +{ + gcry_sexp_t l1; + gcry_mpi_t n; + unsigned int nbits; + + l1 = sexp_find_token (parms, "n", 1); + if (!l1) + return 0; /* Parameter N not found. */ + + n = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG); + sexp_release (l1); + nbits = n? mpi_get_nbits (n) : 0; + _gcry_mpi_release (n); + return nbits; +} + + +/* Compute a keygrip. MD is the hash context which we are going to + update. KEYPARAM is an S-expression with the key parameters, this + is usually a public key but may also be a secret key. An example + of such an S-expression is: + + (rsa + (n #00B...#) + (e #010001#)) + + PKCS-15 says that for RSA only the modulus should be hashed - + however, it is not clear whether this is meant to use the raw bytes + (assuming this is an unsigned integer) or whether the DER required + 0 should be prefixed. We hash the raw bytes. */ +static gpg_err_code_t +compute_keygrip (gcry_md_hd_t md, gcry_sexp_t keyparam) +{ + gcry_sexp_t l1; + const char *data; + size_t datalen; + + l1 = sexp_find_token (keyparam, "n", 1); + if (!l1) + return GPG_ERR_NO_OBJ; + + data = sexp_nth_data (l1, 1, &datalen); + if (!data) + { + sexp_release (l1); + return GPG_ERR_NO_OBJ; + } + + _gcry_md_write (md, data, datalen); + sexp_release (l1); + + return 0; +} + + + + +/* + Self-test section. + */ + +static const char * +selftest_sign_2048 (gcry_sexp_t pkey, gcry_sexp_t skey) +{ + static const char sample_data[] = + "(data (flags pkcs1)" + " (hash sha256 #11223344556677889900aabbccddeeff" + /**/ "102030405060708090a0b0c0d0f01121#))"; + static const char sample_data_bad[] = + "(data (flags pkcs1)" + " (hash sha256 #11223344556677889900aabbccddeeff" + /**/ "802030405060708090a0b0c0d0f01121#))"; + + const char *errtxt = NULL; + gcry_error_t err; + gcry_sexp_t data = NULL; + gcry_sexp_t data_bad = NULL; + gcry_sexp_t sig = NULL; + /* raw signature data reference */ + const char ref_data[] = + "6252a19a11e1d5155ed9376036277193d644fa239397fff03e9b92d6f86415d6" + "d30da9273775f290e580d038295ff8ff89522becccfa6ae870bf76b76df402a8" + "54f69347e3db3de8e1e7d4dada281ec556810c7a8ecd0b5f51f9b1c0e7aa7557" + "61aa2b8ba5f811304acc6af0eca41fe49baf33bf34eddaf44e21e036ac7f0b68" + "03cdef1c60021fb7b5b97ebacdd88ab755ce29af568dbc5728cc6e6eff42618d" + "62a0386ca8beed46402bdeeef29b6a3feded906bace411a06a39192bf516ae10" + "67e4320fa8ea113968525f4574d022a3ceeaafdc41079efe1f22cc94bf59d8d3" + "328085da9674857db56de5978a62394aab48aa3b72e23a1b16260cfd9daafe65"; + gcry_mpi_t ref_mpi = NULL; + gcry_mpi_t sig_mpi = NULL; + + err = sexp_sscan (&data, NULL, sample_data, strlen (sample_data)); + if (!err) + err = sexp_sscan (&data_bad, NULL, + sample_data_bad, strlen (sample_data_bad)); + if (err) + { + errtxt = "converting data failed"; + goto leave; + } + + err = _gcry_pk_sign (&sig, data, skey); + if (err) + { + errtxt = "signing failed"; + goto leave; + } + + err = _gcry_mpi_scan(&ref_mpi, GCRYMPI_FMT_HEX, ref_data, 0, NULL); + if (err) + { + errtxt = "converting ref_data to mpi failed"; + goto leave; + } + + err = _gcry_sexp_extract_param(sig, "sig-val!rsa", "s", &sig_mpi, NULL); + if (err) + { + errtxt = "extracting signature data failed"; + goto leave; + } + + if (mpi_cmp (sig_mpi, ref_mpi)) + { + errtxt = "signature does not match reference data"; + goto leave; + } + + err = _gcry_pk_verify (sig, data, pkey); + if (err) + { + errtxt = "verify failed"; + goto leave; + } + err = _gcry_pk_verify (sig, data_bad, pkey); + if (gcry_err_code (err) != GPG_ERR_BAD_SIGNATURE) + { + errtxt = "bad signature not detected"; + goto leave; + } + + + leave: + sexp_release (sig); + sexp_release (data_bad); + sexp_release (data); + _gcry_mpi_release (ref_mpi); + _gcry_mpi_release (sig_mpi); + return errtxt; +} + + + +/* Given an S-expression ENCR_DATA of the form: + + (enc-val + (rsa + (a a-value))) + + as returned by gcry_pk_decrypt, return the the A-VALUE. On error, + return NULL. */ +static gcry_mpi_t +extract_a_from_sexp (gcry_sexp_t encr_data) +{ + gcry_sexp_t l1, l2, l3; + gcry_mpi_t a_value; + + l1 = sexp_find_token (encr_data, "enc-val", 0); + if (!l1) + return NULL; + l2 = sexp_find_token (l1, "rsa", 0); + sexp_release (l1); + if (!l2) + return NULL; + l3 = sexp_find_token (l2, "a", 0); + sexp_release (l2); + if (!l3) + return NULL; + a_value = sexp_nth_mpi (l3, 1, 0); + sexp_release (l3); + + return a_value; +} + + +static const char * +selftest_encr_2048 (gcry_sexp_t pkey, gcry_sexp_t skey) +{ + const char *errtxt = NULL; + gcry_error_t err; + static const char plaintext[] = + "Jim quickly realized that the beautiful gowns are expensive."; + gcry_sexp_t plain = NULL; + gcry_sexp_t encr = NULL; + gcry_mpi_t ciphertext = NULL; + gcry_sexp_t decr = NULL; + char *decr_plaintext = NULL; + gcry_sexp_t tmplist = NULL; + /* expected result of encrypting the plaintext with sample_secret_key */ + static const char ref_data[] = + "18022e2593a402a737caaa93b4c7e750e20ca265452980e1d6b7710fbd3e" + "7dce72be5c2110fb47691cb38f42170ee3b4a37f2498d4a51567d762585e" + "4cb81d04fbc7df4144f8e5eac2d4b8688521b64011f11d7ad53f4c874004" + "819856f2e2a6f83d1c9c4e73ac26089789c14482b0b8d44139133c88c4a5" + "2dba9dd6d6ffc622666b7d129168333d999706af30a2d7d272db7734e5ed" + "fb8c64ea3018af3ad20f4a013a5060cb0f5e72753967bebe294280a6ed0d" + "dbd3c4f11d0a8696e9d32a0dc03deb0b5e49b2cbd1503392642d4e1211f3" + "e8e2ee38abaa3671ccd57fcde8ca76e85fd2cb77c35706a970a213a27352" + "cec92a9604d543ddb5fc478ff50e0622"; + gcry_mpi_t ref_mpi = NULL; + + /* Put the plaintext into an S-expression. */ + err = sexp_build (&plain, NULL, "(data (flags raw) (value %s))", plaintext); + if (err) + { + errtxt = "converting data failed"; + goto leave; + } + + /* Encrypt. */ + err = _gcry_pk_encrypt (&encr, plain, pkey); + if (err) + { + errtxt = "encrypt failed"; + goto leave; + } + + err = _gcry_mpi_scan(&ref_mpi, GCRYMPI_FMT_HEX, ref_data, 0, NULL); + if (err) + { + errtxt = "converting encrydata to mpi failed"; + goto leave; + } + + /* Extraxt the ciphertext from the returned S-expression. */ + /*sexp_dump (encr);*/ + ciphertext = extract_a_from_sexp (encr); + if (!ciphertext) + { + errtxt = "gcry_pk_decrypt returned garbage"; + goto leave; + } + + /* Check that the ciphertext does no match the plaintext. */ + /* _gcry_log_printmpi ("plaintext", plaintext); */ + /* _gcry_log_printmpi ("ciphertxt", ciphertext); */ + if (mpi_cmp (ref_mpi, ciphertext)) + { + errtxt = "ciphertext doesn't match reference data"; + goto leave; + } + + /* Decrypt. */ + err = _gcry_pk_decrypt (&decr, encr, skey); + if (err) + { + errtxt = "decrypt failed"; + goto leave; + } + + /* Extract the decrypted data from the S-expression. Note that the + output of gcry_pk_decrypt depends on whether a flags lists occurs + in its input data. Because we passed the output of + gcry_pk_encrypt directly to gcry_pk_decrypt, such a flag value + won't be there as of today. To be prepared for future changes we + take care of it anyway. */ + tmplist = sexp_find_token (decr, "value", 0); + if (tmplist) + decr_plaintext = sexp_nth_string (tmplist, 1); + else + decr_plaintext = sexp_nth_string (decr, 0); + if (!decr_plaintext) + { + errtxt = "decrypt returned no plaintext"; + goto leave; + } + + /* Check that the decrypted plaintext matches the original plaintext. */ + if (strcmp (plaintext, decr_plaintext)) + { + errtxt = "mismatch"; + goto leave; + } + + leave: + sexp_release (tmplist); + xfree (decr_plaintext); + sexp_release (decr); + _gcry_mpi_release (ciphertext); + _gcry_mpi_release (ref_mpi); + sexp_release (encr); + sexp_release (plain); + return errtxt; +} + + +static gpg_err_code_t +selftests_rsa (selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + gcry_error_t err; + gcry_sexp_t skey = NULL; + gcry_sexp_t pkey = NULL; + + /* Convert the S-expressions into the internal representation. */ + what = "convert"; + err = sexp_sscan (&skey, NULL, sample_secret_key, strlen (sample_secret_key)); + if (!err) + err = sexp_sscan (&pkey, NULL, + sample_public_key, strlen (sample_public_key)); + if (err) + { + errtxt = _gcry_strerror (err); + goto failed; + } + + what = "key consistency"; + err = _gcry_pk_testkey (skey); + if (err) + { + errtxt = _gcry_strerror (err); + goto failed; + } + + what = "sign"; + errtxt = selftest_sign_2048 (pkey, skey); + if (errtxt) + goto failed; + + what = "encrypt"; + errtxt = selftest_encr_2048 (pkey, skey); + if (errtxt) + goto failed; + + sexp_release (pkey); + sexp_release (skey); + return 0; /* Succeeded. */ + + failed: + sexp_release (pkey); + sexp_release (skey); + if (report) + report ("pubkey", GCRY_PK_RSA, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +/* Run a full self-test for ALGO and return 0 on success. */ +static gpg_err_code_t +run_selftests (int algo, int extended, selftest_report_func_t report) +{ + gpg_err_code_t ec; + + (void)extended; + + switch (algo) + { + case GCRY_PK_RSA: + ec = selftests_rsa (report); + break; + default: + ec = GPG_ERR_PUBKEY_ALGO; + break; + + } + return ec; +} + + + + +gcry_pk_spec_t _gcry_pubkey_spec_rsa = + { + GCRY_PK_RSA, { 0, 1 }, + (GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR), + "RSA", rsa_names, + "ne", "nedpqu", "a", "s", "n", + rsa_generate, + rsa_check_secret_key, + rsa_encrypt, + rsa_decrypt, + rsa_sign, + rsa_verify, + rsa_get_nbits, + run_selftests, + compute_keygrip + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/salsa20-amd64.S b/libotr/libgcrypt-1.8.7/cipher/salsa20-amd64.S new file mode 100644 index 0000000..470c32a --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/salsa20-amd64.S @@ -0,0 +1,931 @@ +/* salsa20-amd64.S - AMD64 implementation of Salsa20 + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Based on public domain implementation by D. J. Bernstein at + * http://cr.yp.to/snuffle.html + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SALSA20) + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.text + +.align 8 +.globl _gcry_salsa20_amd64_keysetup +ELF(.type _gcry_salsa20_amd64_keysetup,@function;) +_gcry_salsa20_amd64_keysetup: + movl 0(%rsi),%r8d + movl 4(%rsi),%r9d + movl 8(%rsi),%eax + movl 12(%rsi),%r10d + movl %r8d,20(%rdi) + movl %r9d,40(%rdi) + movl %eax,60(%rdi) + movl %r10d,48(%rdi) + cmp $256,%rdx + jb .L_kbits128 +.L_kbits256: + movl 16(%rsi),%edx + movl 20(%rsi),%ecx + movl 24(%rsi),%r8d + movl 28(%rsi),%esi + movl %edx,28(%rdi) + movl %ecx,16(%rdi) + movl %r8d,36(%rdi) + movl %esi,56(%rdi) + mov $1634760805,%rsi + mov $857760878,%rdx + mov $2036477234,%rcx + mov $1797285236,%r8 + movl %esi,0(%rdi) + movl %edx,4(%rdi) + movl %ecx,8(%rdi) + movl %r8d,12(%rdi) + jmp .L_keysetupdone +.L_kbits128: + movl 0(%rsi),%edx + movl 4(%rsi),%ecx + movl 8(%rsi),%r8d + movl 12(%rsi),%esi + movl %edx,28(%rdi) + movl %ecx,16(%rdi) + movl %r8d,36(%rdi) + movl %esi,56(%rdi) + mov $1634760805,%rsi + mov $824206446,%rdx + mov $2036477238,%rcx + mov $1797285236,%r8 + movl %esi,0(%rdi) + movl %edx,4(%rdi) + movl %ecx,8(%rdi) + movl %r8d,12(%rdi) +.L_keysetupdone: + ret + +.align 8 +.globl _gcry_salsa20_amd64_ivsetup +ELF(.type _gcry_salsa20_amd64_ivsetup,@function;) +_gcry_salsa20_amd64_ivsetup: + movl 0(%rsi),%r8d + movl 4(%rsi),%esi + mov $0,%r9 + mov $0,%rax + movl %r8d,24(%rdi) + movl %esi,44(%rdi) + movl %r9d,32(%rdi) + movl %eax,52(%rdi) + ret + +.align 8 +.globl _gcry_salsa20_amd64_encrypt_blocks +ELF(.type _gcry_salsa20_amd64_encrypt_blocks,@function;) +_gcry_salsa20_amd64_encrypt_blocks: + /* + * Modifications to original implementation: + * - Number of rounds passing in register %r8 (for Salsa20/12). + * - Length is input as number of blocks, so don't handle tail bytes + * (this is done in salsa20.c). + */ + push %rbx + shlq $6, %rcx /* blocks to bytes */ + mov %r8, %rbx + mov %rsp,%r11 + and $31,%r11 + add $384,%r11 + sub %r11,%rsp + mov %rdi,%r8 + mov %rsi,%rsi + mov %rdx,%rdi + mov %rcx,%rdx + cmp $0,%rdx + jbe .L_done +.L_start: + cmp $256,%rdx + jb .L_bytes_are_64_128_or_192 + movdqa 0(%r8),%xmm0 + pshufd $0x55,%xmm0,%xmm1 + pshufd $0xaa,%xmm0,%xmm2 + pshufd $0xff,%xmm0,%xmm3 + pshufd $0x00,%xmm0,%xmm0 + movdqa %xmm1,0(%rsp) + movdqa %xmm2,16(%rsp) + movdqa %xmm3,32(%rsp) + movdqa %xmm0,48(%rsp) + movdqa 16(%r8),%xmm0 + pshufd $0xaa,%xmm0,%xmm1 + pshufd $0xff,%xmm0,%xmm2 + pshufd $0x00,%xmm0,%xmm3 + pshufd $0x55,%xmm0,%xmm0 + movdqa %xmm1,64(%rsp) + movdqa %xmm2,80(%rsp) + movdqa %xmm3,96(%rsp) + movdqa %xmm0,112(%rsp) + movdqa 32(%r8),%xmm0 + pshufd $0xff,%xmm0,%xmm1 + pshufd $0x55,%xmm0,%xmm2 + pshufd $0xaa,%xmm0,%xmm0 + movdqa %xmm1,128(%rsp) + movdqa %xmm2,144(%rsp) + movdqa %xmm0,160(%rsp) + movdqa 48(%r8),%xmm0 + pshufd $0x00,%xmm0,%xmm1 + pshufd $0xaa,%xmm0,%xmm2 + pshufd $0xff,%xmm0,%xmm0 + movdqa %xmm1,176(%rsp) + movdqa %xmm2,192(%rsp) + movdqa %xmm0,208(%rsp) +.L_bytesatleast256: + movl 32(%r8),%ecx + movl 52(%r8),%r9d + movl %ecx,224(%rsp) + movl %r9d,240(%rsp) + add $1,%ecx + adc $0,%r9d + movl %ecx,4+224(%rsp) + movl %r9d,4+240(%rsp) + add $1,%ecx + adc $0,%r9d + movl %ecx,8+224(%rsp) + movl %r9d,8+240(%rsp) + add $1,%ecx + adc $0,%r9d + movl %ecx,12+224(%rsp) + movl %r9d,12+240(%rsp) + add $1,%ecx + adc $0,%r9d + movl %ecx,32(%r8) + movl %r9d,52(%r8) + movq %rdx,288(%rsp) + mov %rbx,%rdx + movdqa 0(%rsp),%xmm0 + movdqa 16(%rsp),%xmm1 + movdqa 32(%rsp),%xmm2 + movdqa 192(%rsp),%xmm3 + movdqa 208(%rsp),%xmm4 + movdqa 64(%rsp),%xmm5 + movdqa 80(%rsp),%xmm6 + movdqa 112(%rsp),%xmm7 + movdqa 128(%rsp),%xmm8 + movdqa 144(%rsp),%xmm9 + movdqa 160(%rsp),%xmm10 + movdqa 240(%rsp),%xmm11 + movdqa 48(%rsp),%xmm12 + movdqa 96(%rsp),%xmm13 + movdqa 176(%rsp),%xmm14 + movdqa 224(%rsp),%xmm15 +.L_mainloop1: + movdqa %xmm1,256(%rsp) + movdqa %xmm2,272(%rsp) + movdqa %xmm13,%xmm1 + paddd %xmm12,%xmm1 + movdqa %xmm1,%xmm2 + pslld $7,%xmm1 + pxor %xmm1,%xmm14 + psrld $25,%xmm2 + pxor %xmm2,%xmm14 + movdqa %xmm7,%xmm1 + paddd %xmm0,%xmm1 + movdqa %xmm1,%xmm2 + pslld $7,%xmm1 + pxor %xmm1,%xmm11 + psrld $25,%xmm2 + pxor %xmm2,%xmm11 + movdqa %xmm12,%xmm1 + paddd %xmm14,%xmm1 + movdqa %xmm1,%xmm2 + pslld $9,%xmm1 + pxor %xmm1,%xmm15 + psrld $23,%xmm2 + pxor %xmm2,%xmm15 + movdqa %xmm0,%xmm1 + paddd %xmm11,%xmm1 + movdqa %xmm1,%xmm2 + pslld $9,%xmm1 + pxor %xmm1,%xmm9 + psrld $23,%xmm2 + pxor %xmm2,%xmm9 + movdqa %xmm14,%xmm1 + paddd %xmm15,%xmm1 + movdqa %xmm1,%xmm2 + pslld $13,%xmm1 + pxor %xmm1,%xmm13 + psrld $19,%xmm2 + pxor %xmm2,%xmm13 + movdqa %xmm11,%xmm1 + paddd %xmm9,%xmm1 + movdqa %xmm1,%xmm2 + pslld $13,%xmm1 + pxor %xmm1,%xmm7 + psrld $19,%xmm2 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm1 + paddd %xmm13,%xmm1 + movdqa %xmm1,%xmm2 + pslld $18,%xmm1 + pxor %xmm1,%xmm12 + psrld $14,%xmm2 + pxor %xmm2,%xmm12 + movdqa 256(%rsp),%xmm1 + movdqa %xmm12,256(%rsp) + movdqa %xmm9,%xmm2 + paddd %xmm7,%xmm2 + movdqa %xmm2,%xmm12 + pslld $18,%xmm2 + pxor %xmm2,%xmm0 + psrld $14,%xmm12 + pxor %xmm12,%xmm0 + movdqa %xmm5,%xmm2 + paddd %xmm1,%xmm2 + movdqa %xmm2,%xmm12 + pslld $7,%xmm2 + pxor %xmm2,%xmm3 + psrld $25,%xmm12 + pxor %xmm12,%xmm3 + movdqa 272(%rsp),%xmm2 + movdqa %xmm0,272(%rsp) + movdqa %xmm6,%xmm0 + paddd %xmm2,%xmm0 + movdqa %xmm0,%xmm12 + pslld $7,%xmm0 + pxor %xmm0,%xmm4 + psrld $25,%xmm12 + pxor %xmm12,%xmm4 + movdqa %xmm1,%xmm0 + paddd %xmm3,%xmm0 + movdqa %xmm0,%xmm12 + pslld $9,%xmm0 + pxor %xmm0,%xmm10 + psrld $23,%xmm12 + pxor %xmm12,%xmm10 + movdqa %xmm2,%xmm0 + paddd %xmm4,%xmm0 + movdqa %xmm0,%xmm12 + pslld $9,%xmm0 + pxor %xmm0,%xmm8 + psrld $23,%xmm12 + pxor %xmm12,%xmm8 + movdqa %xmm3,%xmm0 + paddd %xmm10,%xmm0 + movdqa %xmm0,%xmm12 + pslld $13,%xmm0 + pxor %xmm0,%xmm5 + psrld $19,%xmm12 + pxor %xmm12,%xmm5 + movdqa %xmm4,%xmm0 + paddd %xmm8,%xmm0 + movdqa %xmm0,%xmm12 + pslld $13,%xmm0 + pxor %xmm0,%xmm6 + psrld $19,%xmm12 + pxor %xmm12,%xmm6 + movdqa %xmm10,%xmm0 + paddd %xmm5,%xmm0 + movdqa %xmm0,%xmm12 + pslld $18,%xmm0 + pxor %xmm0,%xmm1 + psrld $14,%xmm12 + pxor %xmm12,%xmm1 + movdqa 256(%rsp),%xmm0 + movdqa %xmm1,256(%rsp) + movdqa %xmm4,%xmm1 + paddd %xmm0,%xmm1 + movdqa %xmm1,%xmm12 + pslld $7,%xmm1 + pxor %xmm1,%xmm7 + psrld $25,%xmm12 + pxor %xmm12,%xmm7 + movdqa %xmm8,%xmm1 + paddd %xmm6,%xmm1 + movdqa %xmm1,%xmm12 + pslld $18,%xmm1 + pxor %xmm1,%xmm2 + psrld $14,%xmm12 + pxor %xmm12,%xmm2 + movdqa 272(%rsp),%xmm12 + movdqa %xmm2,272(%rsp) + movdqa %xmm14,%xmm1 + paddd %xmm12,%xmm1 + movdqa %xmm1,%xmm2 + pslld $7,%xmm1 + pxor %xmm1,%xmm5 + psrld $25,%xmm2 + pxor %xmm2,%xmm5 + movdqa %xmm0,%xmm1 + paddd %xmm7,%xmm1 + movdqa %xmm1,%xmm2 + pslld $9,%xmm1 + pxor %xmm1,%xmm10 + psrld $23,%xmm2 + pxor %xmm2,%xmm10 + movdqa %xmm12,%xmm1 + paddd %xmm5,%xmm1 + movdqa %xmm1,%xmm2 + pslld $9,%xmm1 + pxor %xmm1,%xmm8 + psrld $23,%xmm2 + pxor %xmm2,%xmm8 + movdqa %xmm7,%xmm1 + paddd %xmm10,%xmm1 + movdqa %xmm1,%xmm2 + pslld $13,%xmm1 + pxor %xmm1,%xmm4 + psrld $19,%xmm2 + pxor %xmm2,%xmm4 + movdqa %xmm5,%xmm1 + paddd %xmm8,%xmm1 + movdqa %xmm1,%xmm2 + pslld $13,%xmm1 + pxor %xmm1,%xmm14 + psrld $19,%xmm2 + pxor %xmm2,%xmm14 + movdqa %xmm10,%xmm1 + paddd %xmm4,%xmm1 + movdqa %xmm1,%xmm2 + pslld $18,%xmm1 + pxor %xmm1,%xmm0 + psrld $14,%xmm2 + pxor %xmm2,%xmm0 + movdqa 256(%rsp),%xmm1 + movdqa %xmm0,256(%rsp) + movdqa %xmm8,%xmm0 + paddd %xmm14,%xmm0 + movdqa %xmm0,%xmm2 + pslld $18,%xmm0 + pxor %xmm0,%xmm12 + psrld $14,%xmm2 + pxor %xmm2,%xmm12 + movdqa %xmm11,%xmm0 + paddd %xmm1,%xmm0 + movdqa %xmm0,%xmm2 + pslld $7,%xmm0 + pxor %xmm0,%xmm6 + psrld $25,%xmm2 + pxor %xmm2,%xmm6 + movdqa 272(%rsp),%xmm2 + movdqa %xmm12,272(%rsp) + movdqa %xmm3,%xmm0 + paddd %xmm2,%xmm0 + movdqa %xmm0,%xmm12 + pslld $7,%xmm0 + pxor %xmm0,%xmm13 + psrld $25,%xmm12 + pxor %xmm12,%xmm13 + movdqa %xmm1,%xmm0 + paddd %xmm6,%xmm0 + movdqa %xmm0,%xmm12 + pslld $9,%xmm0 + pxor %xmm0,%xmm15 + psrld $23,%xmm12 + pxor %xmm12,%xmm15 + movdqa %xmm2,%xmm0 + paddd %xmm13,%xmm0 + movdqa %xmm0,%xmm12 + pslld $9,%xmm0 + pxor %xmm0,%xmm9 + psrld $23,%xmm12 + pxor %xmm12,%xmm9 + movdqa %xmm6,%xmm0 + paddd %xmm15,%xmm0 + movdqa %xmm0,%xmm12 + pslld $13,%xmm0 + pxor %xmm0,%xmm11 + psrld $19,%xmm12 + pxor %xmm12,%xmm11 + movdqa %xmm13,%xmm0 + paddd %xmm9,%xmm0 + movdqa %xmm0,%xmm12 + pslld $13,%xmm0 + pxor %xmm0,%xmm3 + psrld $19,%xmm12 + pxor %xmm12,%xmm3 + movdqa %xmm15,%xmm0 + paddd %xmm11,%xmm0 + movdqa %xmm0,%xmm12 + pslld $18,%xmm0 + pxor %xmm0,%xmm1 + psrld $14,%xmm12 + pxor %xmm12,%xmm1 + movdqa %xmm9,%xmm0 + paddd %xmm3,%xmm0 + movdqa %xmm0,%xmm12 + pslld $18,%xmm0 + pxor %xmm0,%xmm2 + psrld $14,%xmm12 + pxor %xmm12,%xmm2 + movdqa 256(%rsp),%xmm12 + movdqa 272(%rsp),%xmm0 + sub $2,%rdx + ja .L_mainloop1 + paddd 48(%rsp),%xmm12 + paddd 112(%rsp),%xmm7 + paddd 160(%rsp),%xmm10 + paddd 208(%rsp),%xmm4 + movd %xmm12,%rdx + movd %xmm7,%rcx + movd %xmm10,%r9 + movd %xmm4,%rax + pshufd $0x39,%xmm12,%xmm12 + pshufd $0x39,%xmm7,%xmm7 + pshufd $0x39,%xmm10,%xmm10 + pshufd $0x39,%xmm4,%xmm4 + xorl 0(%rsi),%edx + xorl 4(%rsi),%ecx + xorl 8(%rsi),%r9d + xorl 12(%rsi),%eax + movl %edx,0(%rdi) + movl %ecx,4(%rdi) + movl %r9d,8(%rdi) + movl %eax,12(%rdi) + movd %xmm12,%rdx + movd %xmm7,%rcx + movd %xmm10,%r9 + movd %xmm4,%rax + pshufd $0x39,%xmm12,%xmm12 + pshufd $0x39,%xmm7,%xmm7 + pshufd $0x39,%xmm10,%xmm10 + pshufd $0x39,%xmm4,%xmm4 + xorl 64(%rsi),%edx + xorl 68(%rsi),%ecx + xorl 72(%rsi),%r9d + xorl 76(%rsi),%eax + movl %edx,64(%rdi) + movl %ecx,68(%rdi) + movl %r9d,72(%rdi) + movl %eax,76(%rdi) + movd %xmm12,%rdx + movd %xmm7,%rcx + movd %xmm10,%r9 + movd %xmm4,%rax + pshufd $0x39,%xmm12,%xmm12 + pshufd $0x39,%xmm7,%xmm7 + pshufd $0x39,%xmm10,%xmm10 + pshufd $0x39,%xmm4,%xmm4 + xorl 128(%rsi),%edx + xorl 132(%rsi),%ecx + xorl 136(%rsi),%r9d + xorl 140(%rsi),%eax + movl %edx,128(%rdi) + movl %ecx,132(%rdi) + movl %r9d,136(%rdi) + movl %eax,140(%rdi) + movd %xmm12,%rdx + movd %xmm7,%rcx + movd %xmm10,%r9 + movd %xmm4,%rax + xorl 192(%rsi),%edx + xorl 196(%rsi),%ecx + xorl 200(%rsi),%r9d + xorl 204(%rsi),%eax + movl %edx,192(%rdi) + movl %ecx,196(%rdi) + movl %r9d,200(%rdi) + movl %eax,204(%rdi) + paddd 176(%rsp),%xmm14 + paddd 0(%rsp),%xmm0 + paddd 64(%rsp),%xmm5 + paddd 128(%rsp),%xmm8 + movd %xmm14,%rdx + movd %xmm0,%rcx + movd %xmm5,%r9 + movd %xmm8,%rax + pshufd $0x39,%xmm14,%xmm14 + pshufd $0x39,%xmm0,%xmm0 + pshufd $0x39,%xmm5,%xmm5 + pshufd $0x39,%xmm8,%xmm8 + xorl 16(%rsi),%edx + xorl 20(%rsi),%ecx + xorl 24(%rsi),%r9d + xorl 28(%rsi),%eax + movl %edx,16(%rdi) + movl %ecx,20(%rdi) + movl %r9d,24(%rdi) + movl %eax,28(%rdi) + movd %xmm14,%rdx + movd %xmm0,%rcx + movd %xmm5,%r9 + movd %xmm8,%rax + pshufd $0x39,%xmm14,%xmm14 + pshufd $0x39,%xmm0,%xmm0 + pshufd $0x39,%xmm5,%xmm5 + pshufd $0x39,%xmm8,%xmm8 + xorl 80(%rsi),%edx + xorl 84(%rsi),%ecx + xorl 88(%rsi),%r9d + xorl 92(%rsi),%eax + movl %edx,80(%rdi) + movl %ecx,84(%rdi) + movl %r9d,88(%rdi) + movl %eax,92(%rdi) + movd %xmm14,%rdx + movd %xmm0,%rcx + movd %xmm5,%r9 + movd %xmm8,%rax + pshufd $0x39,%xmm14,%xmm14 + pshufd $0x39,%xmm0,%xmm0 + pshufd $0x39,%xmm5,%xmm5 + pshufd $0x39,%xmm8,%xmm8 + xorl 144(%rsi),%edx + xorl 148(%rsi),%ecx + xorl 152(%rsi),%r9d + xorl 156(%rsi),%eax + movl %edx,144(%rdi) + movl %ecx,148(%rdi) + movl %r9d,152(%rdi) + movl %eax,156(%rdi) + movd %xmm14,%rdx + movd %xmm0,%rcx + movd %xmm5,%r9 + movd %xmm8,%rax + xorl 208(%rsi),%edx + xorl 212(%rsi),%ecx + xorl 216(%rsi),%r9d + xorl 220(%rsi),%eax + movl %edx,208(%rdi) + movl %ecx,212(%rdi) + movl %r9d,216(%rdi) + movl %eax,220(%rdi) + paddd 224(%rsp),%xmm15 + paddd 240(%rsp),%xmm11 + paddd 16(%rsp),%xmm1 + paddd 80(%rsp),%xmm6 + movd %xmm15,%rdx + movd %xmm11,%rcx + movd %xmm1,%r9 + movd %xmm6,%rax + pshufd $0x39,%xmm15,%xmm15 + pshufd $0x39,%xmm11,%xmm11 + pshufd $0x39,%xmm1,%xmm1 + pshufd $0x39,%xmm6,%xmm6 + xorl 32(%rsi),%edx + xorl 36(%rsi),%ecx + xorl 40(%rsi),%r9d + xorl 44(%rsi),%eax + movl %edx,32(%rdi) + movl %ecx,36(%rdi) + movl %r9d,40(%rdi) + movl %eax,44(%rdi) + movd %xmm15,%rdx + movd %xmm11,%rcx + movd %xmm1,%r9 + movd %xmm6,%rax + pshufd $0x39,%xmm15,%xmm15 + pshufd $0x39,%xmm11,%xmm11 + pshufd $0x39,%xmm1,%xmm1 + pshufd $0x39,%xmm6,%xmm6 + xorl 96(%rsi),%edx + xorl 100(%rsi),%ecx + xorl 104(%rsi),%r9d + xorl 108(%rsi),%eax + movl %edx,96(%rdi) + movl %ecx,100(%rdi) + movl %r9d,104(%rdi) + movl %eax,108(%rdi) + movd %xmm15,%rdx + movd %xmm11,%rcx + movd %xmm1,%r9 + movd %xmm6,%rax + pshufd $0x39,%xmm15,%xmm15 + pshufd $0x39,%xmm11,%xmm11 + pshufd $0x39,%xmm1,%xmm1 + pshufd $0x39,%xmm6,%xmm6 + xorl 160(%rsi),%edx + xorl 164(%rsi),%ecx + xorl 168(%rsi),%r9d + xorl 172(%rsi),%eax + movl %edx,160(%rdi) + movl %ecx,164(%rdi) + movl %r9d,168(%rdi) + movl %eax,172(%rdi) + movd %xmm15,%rdx + movd %xmm11,%rcx + movd %xmm1,%r9 + movd %xmm6,%rax + xorl 224(%rsi),%edx + xorl 228(%rsi),%ecx + xorl 232(%rsi),%r9d + xorl 236(%rsi),%eax + movl %edx,224(%rdi) + movl %ecx,228(%rdi) + movl %r9d,232(%rdi) + movl %eax,236(%rdi) + paddd 96(%rsp),%xmm13 + paddd 144(%rsp),%xmm9 + paddd 192(%rsp),%xmm3 + paddd 32(%rsp),%xmm2 + movd %xmm13,%rdx + movd %xmm9,%rcx + movd %xmm3,%r9 + movd %xmm2,%rax + pshufd $0x39,%xmm13,%xmm13 + pshufd $0x39,%xmm9,%xmm9 + pshufd $0x39,%xmm3,%xmm3 + pshufd $0x39,%xmm2,%xmm2 + xorl 48(%rsi),%edx + xorl 52(%rsi),%ecx + xorl 56(%rsi),%r9d + xorl 60(%rsi),%eax + movl %edx,48(%rdi) + movl %ecx,52(%rdi) + movl %r9d,56(%rdi) + movl %eax,60(%rdi) + movd %xmm13,%rdx + movd %xmm9,%rcx + movd %xmm3,%r9 + movd %xmm2,%rax + pshufd $0x39,%xmm13,%xmm13 + pshufd $0x39,%xmm9,%xmm9 + pshufd $0x39,%xmm3,%xmm3 + pshufd $0x39,%xmm2,%xmm2 + xorl 112(%rsi),%edx + xorl 116(%rsi),%ecx + xorl 120(%rsi),%r9d + xorl 124(%rsi),%eax + movl %edx,112(%rdi) + movl %ecx,116(%rdi) + movl %r9d,120(%rdi) + movl %eax,124(%rdi) + movd %xmm13,%rdx + movd %xmm9,%rcx + movd %xmm3,%r9 + movd %xmm2,%rax + pshufd $0x39,%xmm13,%xmm13 + pshufd $0x39,%xmm9,%xmm9 + pshufd $0x39,%xmm3,%xmm3 + pshufd $0x39,%xmm2,%xmm2 + xorl 176(%rsi),%edx + xorl 180(%rsi),%ecx + xorl 184(%rsi),%r9d + xorl 188(%rsi),%eax + movl %edx,176(%rdi) + movl %ecx,180(%rdi) + movl %r9d,184(%rdi) + movl %eax,188(%rdi) + movd %xmm13,%rdx + movd %xmm9,%rcx + movd %xmm3,%r9 + movd %xmm2,%rax + xorl 240(%rsi),%edx + xorl 244(%rsi),%ecx + xorl 248(%rsi),%r9d + xorl 252(%rsi),%eax + movl %edx,240(%rdi) + movl %ecx,244(%rdi) + movl %r9d,248(%rdi) + movl %eax,252(%rdi) + movq 288(%rsp),%rdx + sub $256,%rdx + add $256,%rsi + add $256,%rdi + cmp $256,%rdx + jae .L_bytesatleast256 + cmp $0,%rdx + jbe .L_done +.L_bytes_are_64_128_or_192: + movq %rdx,288(%rsp) + movdqa 0(%r8),%xmm0 + movdqa 16(%r8),%xmm1 + movdqa 32(%r8),%xmm2 + movdqa 48(%r8),%xmm3 + movdqa %xmm1,%xmm4 + mov %rbx,%rdx +.L_mainloop2: + paddd %xmm0,%xmm4 + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm6 + pslld $7,%xmm4 + psrld $25,%xmm6 + pxor %xmm4,%xmm3 + pxor %xmm6,%xmm3 + paddd %xmm3,%xmm5 + movdqa %xmm3,%xmm4 + movdqa %xmm5,%xmm6 + pslld $9,%xmm5 + psrld $23,%xmm6 + pxor %xmm5,%xmm2 + pshufd $0x93,%xmm3,%xmm3 + pxor %xmm6,%xmm2 + paddd %xmm2,%xmm4 + movdqa %xmm2,%xmm5 + movdqa %xmm4,%xmm6 + pslld $13,%xmm4 + psrld $19,%xmm6 + pxor %xmm4,%xmm1 + pshufd $0x4e,%xmm2,%xmm2 + pxor %xmm6,%xmm1 + paddd %xmm1,%xmm5 + movdqa %xmm3,%xmm4 + movdqa %xmm5,%xmm6 + pslld $18,%xmm5 + psrld $14,%xmm6 + pxor %xmm5,%xmm0 + pshufd $0x39,%xmm1,%xmm1 + pxor %xmm6,%xmm0 + paddd %xmm0,%xmm4 + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm6 + pslld $7,%xmm4 + psrld $25,%xmm6 + pxor %xmm4,%xmm1 + pxor %xmm6,%xmm1 + paddd %xmm1,%xmm5 + movdqa %xmm1,%xmm4 + movdqa %xmm5,%xmm6 + pslld $9,%xmm5 + psrld $23,%xmm6 + pxor %xmm5,%xmm2 + pshufd $0x93,%xmm1,%xmm1 + pxor %xmm6,%xmm2 + paddd %xmm2,%xmm4 + movdqa %xmm2,%xmm5 + movdqa %xmm4,%xmm6 + pslld $13,%xmm4 + psrld $19,%xmm6 + pxor %xmm4,%xmm3 + pshufd $0x4e,%xmm2,%xmm2 + pxor %xmm6,%xmm3 + paddd %xmm3,%xmm5 + movdqa %xmm1,%xmm4 + movdqa %xmm5,%xmm6 + pslld $18,%xmm5 + psrld $14,%xmm6 + pxor %xmm5,%xmm0 + pshufd $0x39,%xmm3,%xmm3 + pxor %xmm6,%xmm0 + paddd %xmm0,%xmm4 + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm6 + pslld $7,%xmm4 + psrld $25,%xmm6 + pxor %xmm4,%xmm3 + pxor %xmm6,%xmm3 + paddd %xmm3,%xmm5 + movdqa %xmm3,%xmm4 + movdqa %xmm5,%xmm6 + pslld $9,%xmm5 + psrld $23,%xmm6 + pxor %xmm5,%xmm2 + pshufd $0x93,%xmm3,%xmm3 + pxor %xmm6,%xmm2 + paddd %xmm2,%xmm4 + movdqa %xmm2,%xmm5 + movdqa %xmm4,%xmm6 + pslld $13,%xmm4 + psrld $19,%xmm6 + pxor %xmm4,%xmm1 + pshufd $0x4e,%xmm2,%xmm2 + pxor %xmm6,%xmm1 + paddd %xmm1,%xmm5 + movdqa %xmm3,%xmm4 + movdqa %xmm5,%xmm6 + pslld $18,%xmm5 + psrld $14,%xmm6 + pxor %xmm5,%xmm0 + pshufd $0x39,%xmm1,%xmm1 + pxor %xmm6,%xmm0 + paddd %xmm0,%xmm4 + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm6 + pslld $7,%xmm4 + psrld $25,%xmm6 + pxor %xmm4,%xmm1 + pxor %xmm6,%xmm1 + paddd %xmm1,%xmm5 + movdqa %xmm1,%xmm4 + movdqa %xmm5,%xmm6 + pslld $9,%xmm5 + psrld $23,%xmm6 + pxor %xmm5,%xmm2 + pshufd $0x93,%xmm1,%xmm1 + pxor %xmm6,%xmm2 + paddd %xmm2,%xmm4 + movdqa %xmm2,%xmm5 + movdqa %xmm4,%xmm6 + pslld $13,%xmm4 + psrld $19,%xmm6 + pxor %xmm4,%xmm3 + pshufd $0x4e,%xmm2,%xmm2 + pxor %xmm6,%xmm3 + sub $4,%rdx + paddd %xmm3,%xmm5 + movdqa %xmm1,%xmm4 + movdqa %xmm5,%xmm6 + pslld $18,%xmm5 + pxor %xmm7,%xmm7 + psrld $14,%xmm6 + pxor %xmm5,%xmm0 + pshufd $0x39,%xmm3,%xmm3 + pxor %xmm6,%xmm0 + ja .L_mainloop2 + paddd 0(%r8),%xmm0 + paddd 16(%r8),%xmm1 + paddd 32(%r8),%xmm2 + paddd 48(%r8),%xmm3 + movd %xmm0,%rdx + movd %xmm1,%rcx + movd %xmm2,%rax + movd %xmm3,%r10 + pshufd $0x39,%xmm0,%xmm0 + pshufd $0x39,%xmm1,%xmm1 + pshufd $0x39,%xmm2,%xmm2 + pshufd $0x39,%xmm3,%xmm3 + xorl 0(%rsi),%edx + xorl 48(%rsi),%ecx + xorl 32(%rsi),%eax + xorl 16(%rsi),%r10d + movl %edx,0(%rdi) + movl %ecx,48(%rdi) + movl %eax,32(%rdi) + movl %r10d,16(%rdi) + movd %xmm0,%rdx + movd %xmm1,%rcx + movd %xmm2,%rax + movd %xmm3,%r10 + pshufd $0x39,%xmm0,%xmm0 + pshufd $0x39,%xmm1,%xmm1 + pshufd $0x39,%xmm2,%xmm2 + pshufd $0x39,%xmm3,%xmm3 + xorl 20(%rsi),%edx + xorl 4(%rsi),%ecx + xorl 52(%rsi),%eax + xorl 36(%rsi),%r10d + movl %edx,20(%rdi) + movl %ecx,4(%rdi) + movl %eax,52(%rdi) + movl %r10d,36(%rdi) + movd %xmm0,%rdx + movd %xmm1,%rcx + movd %xmm2,%rax + movd %xmm3,%r10 + pshufd $0x39,%xmm0,%xmm0 + pshufd $0x39,%xmm1,%xmm1 + pshufd $0x39,%xmm2,%xmm2 + pshufd $0x39,%xmm3,%xmm3 + xorl 40(%rsi),%edx + xorl 24(%rsi),%ecx + xorl 8(%rsi),%eax + xorl 56(%rsi),%r10d + movl %edx,40(%rdi) + movl %ecx,24(%rdi) + movl %eax,8(%rdi) + movl %r10d,56(%rdi) + movd %xmm0,%rdx + movd %xmm1,%rcx + movd %xmm2,%rax + movd %xmm3,%r10 + xorl 60(%rsi),%edx + xorl 44(%rsi),%ecx + xorl 28(%rsi),%eax + xorl 12(%rsi),%r10d + movl %edx,60(%rdi) + movl %ecx,44(%rdi) + movl %eax,28(%rdi) + movl %r10d,12(%rdi) + movq 288(%rsp),%rdx + movl 32(%r8),%ecx + movl 52(%r8),%eax + add $1,%ecx + adc $0,%eax + movl %ecx,32(%r8) + movl %eax,52(%r8) + cmp $64,%rdx + ja .L_bytes_are_128_or_192 +.L_done: + add %r11,%rsp + mov %r11,%rax + pop %rbx + ret +.L_bytes_are_128_or_192: + sub $64,%rdx + add $64,%rdi + add $64,%rsi + jmp .L_bytes_are_64_128_or_192 +ELF(.size _gcry_salsa20_amd64_encrypt_blocks,.-_gcry_salsa20_amd64_encrypt_blocks;) + +#endif /*defined(USE_SALSA20)*/ +#endif /*__x86_64*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/salsa20-armv7-neon.S b/libotr/libgcrypt-1.8.7/cipher/salsa20-armv7-neon.S new file mode 100644 index 0000000..3686e3f --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/salsa20-armv7-neon.S @@ -0,0 +1,899 @@ +/* salsa-armv7-neon.S - ARM NEON implementation of Salsa20 cipher + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) && defined(USE_SALSA20) + +/* + * Based on public domain implementation from SUPERCOP benchmarking framework + * by Peter Schwabe and D. J. Bernstein. Paper about the implementation at: + * http://cryptojedi.org/papers/#neoncrypto + */ + +.syntax unified +.arm +.fpu neon +.text + +.align 2 +.globl _gcry_arm_neon_salsa20_encrypt +.type _gcry_arm_neon_salsa20_encrypt,%function; +_gcry_arm_neon_salsa20_encrypt: + /* Modifications: + * - arguments changed to (void *c, const void *m, unsigned int nblks, + * void *ctx, unsigned int rounds) from (void *c, const void *m, + * unsigned long long mlen, const void *n, const void *k) + * - nonce and key read from 'ctx' as well as sigma and counter. + * - read in counter from 'ctx' at the start. + * - update counter in 'ctx' at the end. + * - length is input as number of blocks, so don't handle tail bytes + * (this is done in salsa20.c). + */ + lsl r2,r2,#6 + vpush {q4,q5,q6,q7} + mov r12,sp + sub sp,sp,#352 + and sp,sp,#0xffffffe0 + strd r4,[sp,#0] + strd r6,[sp,#8] + strd r8,[sp,#16] + strd r10,[sp,#24] + str r14,[sp,#224] + str r12,[sp,#228] + str r0,[sp,#232] + str r1,[sp,#236] + str r2,[sp,#240] + ldr r4,[r12,#64] + str r4,[sp,#244] + mov r2,r3 + add r3,r2,#48 + vld1.8 {q3},[r2] + add r0,r2,#32 + add r14,r2,#40 + vmov.i64 q3,#0xff + str r14,[sp,#160] + ldrd r8,[r2,#4] + vld1.8 {d0},[r0] + ldrd r4,[r2,#20] + vld1.8 {d8-d9},[r2]! + ldrd r6,[r0,#0] + vmov d4,d9 + ldr r0,[r14] + vrev64.i32 d0,d0 + ldr r1,[r14,#4] + vld1.8 {d10-d11},[r2] + strd r6,[sp,#32] + sub r2,r2,#16 + strd r0,[sp,#40] + vmov d5,d11 + strd r8,[sp,#48] + vext.32 d1,d0,d10,#1 + strd r4,[sp,#56] + ldr r1,[r2,#0] + vshr.u32 q3,q3,#7 + ldr r4,[r2,#12] + vext.32 d3,d11,d9,#1 + ldr r11,[r2,#16] + vext.32 d2,d8,d0,#1 + ldr r8,[r2,#28] + vext.32 d0,d10,d8,#1 + ldr r0,[r3,#0] + add r2,r2,#44 + vmov q4,q3 + vld1.8 {d6-d7},[r14] + vadd.i64 q3,q3,q4 + ldr r5,[r3,#4] + add r12,sp,#256 + vst1.8 {d4-d5},[r12,: 128] + ldr r10,[r3,#8] + add r14,sp,#272 + vst1.8 {d2-d3},[r14,: 128] + ldr r9,[r3,#12] + vld1.8 {d2-d3},[r3] + strd r0,[sp,#64] + ldr r0,[sp,#240] + strd r4,[sp,#72] + strd r10,[sp,#80] + strd r8,[sp,#88] + nop + cmp r0,#192 + blo .L_mlenlowbelow192 +.L_mlenatleast192: + ldrd r2,[sp,#48] + vext.32 d7,d6,d6,#1 + vmov q8,q1 + ldrd r6,[sp,#32] + vld1.8 {d18-d19},[r12,: 128] + vmov q10,q0 + str r0,[sp,#240] + vext.32 d4,d7,d19,#1 + vmov q11,q8 + vext.32 d10,d18,d7,#1 + vadd.i64 q3,q3,q4 + ldrd r0,[sp,#64] + vld1.8 {d24-d25},[r14,: 128] + vmov d5,d24 + add r8,sp,#288 + ldrd r4,[sp,#72] + vmov d11,d25 + add r9,sp,#304 + ldrd r10,[sp,#80] + vst1.8 {d4-d5},[r8,: 128] + strd r2,[sp,#96] + vext.32 d7,d6,d6,#1 + vmov q13,q10 + strd r6,[sp,#104] + vmov d13,d24 + vst1.8 {d10-d11},[r9,: 128] + add r2,sp,#320 + vext.32 d12,d7,d19,#1 + vmov d15,d25 + add r6,sp,#336 + ldr r12,[sp,#244] + vext.32 d14,d18,d7,#1 + vadd.i64 q3,q3,q4 + ldrd r8,[sp,#88] + vst1.8 {d12-d13},[r2,: 128] + ldrd r2,[sp,#56] + vst1.8 {d14-d15},[r6,: 128] + ldrd r6,[sp,#40] +.L_mainloop2: + str r12,[sp,#248] + vadd.i32 q4,q10,q8 + vadd.i32 q9,q13,q11 + add r12,r0,r2 + add r14,r5,r1 + vshl.i32 q12,q4,#7 + vshl.i32 q14,q9,#7 + vshr.u32 q4,q4,#25 + vshr.u32 q9,q9,#25 + eor r4,r4,r12,ROR #25 + eor r7,r7,r14,ROR #25 + add r12,r4,r0 + add r14,r7,r5 + veor q5,q5,q12 + veor q7,q7,q14 + veor q4,q5,q4 + veor q5,q7,q9 + eor r6,r6,r12,ROR #23 + eor r3,r3,r14,ROR #23 + add r12,r6,r4 + str r7,[sp,#116] + add r7,r3,r7 + ldr r14,[sp,#108] + vadd.i32 q7,q8,q4 + vadd.i32 q9,q11,q5 + vshl.i32 q12,q7,#9 + vshl.i32 q14,q9,#9 + vshr.u32 q7,q7,#23 + vshr.u32 q9,q9,#23 + veor q2,q2,q12 + veor q6,q6,q14 + veor q2,q2,q7 + veor q6,q6,q9 + eor r2,r2,r12,ROR #19 + str r2,[sp,#120] + eor r1,r1,r7,ROR #19 + ldr r7,[sp,#96] + add r2,r2,r6 + str r6,[sp,#112] + add r6,r1,r3 + ldr r12,[sp,#104] + vadd.i32 q7,q4,q2 + vext.32 q4,q4,q4,#3 + vadd.i32 q9,q5,q6 + vshl.i32 q12,q7,#13 + vext.32 q5,q5,q5,#3 + vshl.i32 q14,q9,#13 + eor r0,r0,r2,ROR #14 + eor r2,r5,r6,ROR #14 + str r3,[sp,#124] + add r3,r10,r12 + ldr r5,[sp,#100] + add r6,r9,r11 + vshr.u32 q7,q7,#19 + vshr.u32 q9,q9,#19 + veor q10,q10,q12 + veor q12,q13,q14 + eor r8,r8,r3,ROR #25 + eor r3,r5,r6,ROR #25 + add r5,r8,r10 + add r6,r3,r9 + veor q7,q10,q7 + veor q9,q12,q9 + eor r5,r7,r5,ROR #23 + eor r6,r14,r6,ROR #23 + add r7,r5,r8 + add r14,r6,r3 + vadd.i32 q10,q2,q7 + vswp d4,d5 + vadd.i32 q12,q6,q9 + vshl.i32 q13,q10,#18 + vswp d12,d13 + vshl.i32 q14,q12,#18 + eor r7,r12,r7,ROR #19 + eor r11,r11,r14,ROR #19 + add r12,r7,r5 + add r14,r11,r6 + vshr.u32 q10,q10,#14 + vext.32 q7,q7,q7,#1 + vshr.u32 q12,q12,#14 + veor q8,q8,q13 + vext.32 q9,q9,q9,#1 + veor q11,q11,q14 + eor r10,r10,r12,ROR #14 + eor r9,r9,r14,ROR #14 + add r12,r0,r3 + add r14,r2,r4 + veor q8,q8,q10 + veor q10,q11,q12 + eor r1,r1,r12,ROR #25 + eor r7,r7,r14,ROR #25 + add r12,r1,r0 + add r14,r7,r2 + vadd.i32 q11,q4,q8 + vadd.i32 q12,q5,q10 + vshl.i32 q13,q11,#7 + vshl.i32 q14,q12,#7 + eor r5,r5,r12,ROR #23 + eor r6,r6,r14,ROR #23 + vshr.u32 q11,q11,#25 + vshr.u32 q12,q12,#25 + add r12,r5,r1 + add r14,r6,r7 + veor q7,q7,q13 + veor q9,q9,q14 + veor q7,q7,q11 + veor q9,q9,q12 + vadd.i32 q11,q8,q7 + vadd.i32 q12,q10,q9 + vshl.i32 q13,q11,#9 + vshl.i32 q14,q12,#9 + eor r3,r3,r12,ROR #19 + str r7,[sp,#104] + eor r4,r4,r14,ROR #19 + ldr r7,[sp,#112] + add r12,r3,r5 + str r6,[sp,#108] + add r6,r4,r6 + ldr r14,[sp,#116] + eor r0,r0,r12,ROR #14 + str r5,[sp,#96] + eor r5,r2,r6,ROR #14 + ldr r2,[sp,#120] + vshr.u32 q11,q11,#23 + vshr.u32 q12,q12,#23 + veor q2,q2,q13 + veor q6,q6,q14 + veor q2,q2,q11 + veor q6,q6,q12 + add r6,r10,r14 + add r12,r9,r8 + vadd.i32 q11,q7,q2 + vext.32 q7,q7,q7,#3 + vadd.i32 q12,q9,q6 + vshl.i32 q13,q11,#13 + vext.32 q9,q9,q9,#3 + vshl.i32 q14,q12,#13 + vshr.u32 q11,q11,#19 + vshr.u32 q12,q12,#19 + eor r11,r11,r6,ROR #25 + eor r2,r2,r12,ROR #25 + add r6,r11,r10 + str r3,[sp,#100] + add r3,r2,r9 + ldr r12,[sp,#124] + veor q4,q4,q13 + veor q5,q5,q14 + veor q4,q4,q11 + veor q5,q5,q12 + eor r6,r7,r6,ROR #23 + eor r3,r12,r3,ROR #23 + add r7,r6,r11 + add r12,r3,r2 + vadd.i32 q11,q2,q4 + vswp d4,d5 + vadd.i32 q12,q6,q5 + vshl.i32 q13,q11,#18 + vswp d12,d13 + vshl.i32 q14,q12,#18 + eor r7,r14,r7,ROR #19 + eor r8,r8,r12,ROR #19 + add r12,r7,r6 + add r14,r8,r3 + vshr.u32 q11,q11,#14 + vext.32 q4,q4,q4,#1 + vshr.u32 q12,q12,#14 + veor q8,q8,q13 + vext.32 q5,q5,q5,#1 + veor q10,q10,q14 + eor r10,r10,r12,ROR #14 + veor q8,q8,q11 + eor r9,r9,r14,ROR #14 + veor q10,q10,q12 + vadd.i32 q11,q7,q8 + vadd.i32 q12,q9,q10 + add r12,r0,r2 + add r14,r5,r1 + vshl.i32 q13,q11,#7 + vshl.i32 q14,q12,#7 + vshr.u32 q11,q11,#25 + vshr.u32 q12,q12,#25 + eor r4,r4,r12,ROR #25 + eor r7,r7,r14,ROR #25 + add r12,r4,r0 + add r14,r7,r5 + veor q4,q4,q13 + veor q5,q5,q14 + veor q4,q4,q11 + veor q5,q5,q12 + eor r6,r6,r12,ROR #23 + eor r3,r3,r14,ROR #23 + add r12,r6,r4 + str r7,[sp,#116] + add r7,r3,r7 + ldr r14,[sp,#108] + vadd.i32 q11,q8,q4 + vadd.i32 q12,q10,q5 + vshl.i32 q13,q11,#9 + vshl.i32 q14,q12,#9 + vshr.u32 q11,q11,#23 + vshr.u32 q12,q12,#23 + veor q2,q2,q13 + veor q6,q6,q14 + veor q2,q2,q11 + veor q6,q6,q12 + eor r2,r2,r12,ROR #19 + str r2,[sp,#120] + eor r1,r1,r7,ROR #19 + ldr r7,[sp,#96] + add r2,r2,r6 + str r6,[sp,#112] + add r6,r1,r3 + ldr r12,[sp,#104] + vadd.i32 q11,q4,q2 + vext.32 q4,q4,q4,#3 + vadd.i32 q12,q5,q6 + vshl.i32 q13,q11,#13 + vext.32 q5,q5,q5,#3 + vshl.i32 q14,q12,#13 + eor r0,r0,r2,ROR #14 + eor r2,r5,r6,ROR #14 + str r3,[sp,#124] + add r3,r10,r12 + ldr r5,[sp,#100] + add r6,r9,r11 + vshr.u32 q11,q11,#19 + vshr.u32 q12,q12,#19 + veor q7,q7,q13 + veor q9,q9,q14 + eor r8,r8,r3,ROR #25 + eor r3,r5,r6,ROR #25 + add r5,r8,r10 + add r6,r3,r9 + veor q7,q7,q11 + veor q9,q9,q12 + eor r5,r7,r5,ROR #23 + eor r6,r14,r6,ROR #23 + add r7,r5,r8 + add r14,r6,r3 + vadd.i32 q11,q2,q7 + vswp d4,d5 + vadd.i32 q12,q6,q9 + vshl.i32 q13,q11,#18 + vswp d12,d13 + vshl.i32 q14,q12,#18 + eor r7,r12,r7,ROR #19 + eor r11,r11,r14,ROR #19 + add r12,r7,r5 + add r14,r11,r6 + vshr.u32 q11,q11,#14 + vext.32 q7,q7,q7,#1 + vshr.u32 q12,q12,#14 + veor q8,q8,q13 + vext.32 q9,q9,q9,#1 + veor q10,q10,q14 + eor r10,r10,r12,ROR #14 + eor r9,r9,r14,ROR #14 + add r12,r0,r3 + add r14,r2,r4 + veor q8,q8,q11 + veor q11,q10,q12 + eor r1,r1,r12,ROR #25 + eor r7,r7,r14,ROR #25 + add r12,r1,r0 + add r14,r7,r2 + vadd.i32 q10,q4,q8 + vadd.i32 q12,q5,q11 + vshl.i32 q13,q10,#7 + vshl.i32 q14,q12,#7 + eor r5,r5,r12,ROR #23 + eor r6,r6,r14,ROR #23 + vshr.u32 q10,q10,#25 + vshr.u32 q12,q12,#25 + add r12,r5,r1 + add r14,r6,r7 + veor q7,q7,q13 + veor q9,q9,q14 + veor q7,q7,q10 + veor q9,q9,q12 + vadd.i32 q10,q8,q7 + vadd.i32 q12,q11,q9 + vshl.i32 q13,q10,#9 + vshl.i32 q14,q12,#9 + eor r3,r3,r12,ROR #19 + str r7,[sp,#104] + eor r4,r4,r14,ROR #19 + ldr r7,[sp,#112] + add r12,r3,r5 + str r6,[sp,#108] + add r6,r4,r6 + ldr r14,[sp,#116] + eor r0,r0,r12,ROR #14 + str r5,[sp,#96] + eor r5,r2,r6,ROR #14 + ldr r2,[sp,#120] + vshr.u32 q10,q10,#23 + vshr.u32 q12,q12,#23 + veor q2,q2,q13 + veor q6,q6,q14 + veor q2,q2,q10 + veor q6,q6,q12 + add r6,r10,r14 + add r12,r9,r8 + vadd.i32 q12,q7,q2 + vext.32 q10,q7,q7,#3 + vadd.i32 q7,q9,q6 + vshl.i32 q14,q12,#13 + vext.32 q13,q9,q9,#3 + vshl.i32 q9,q7,#13 + vshr.u32 q12,q12,#19 + vshr.u32 q7,q7,#19 + eor r11,r11,r6,ROR #25 + eor r2,r2,r12,ROR #25 + add r6,r11,r10 + str r3,[sp,#100] + add r3,r2,r9 + ldr r12,[sp,#124] + veor q4,q4,q14 + veor q5,q5,q9 + veor q4,q4,q12 + veor q7,q5,q7 + eor r6,r7,r6,ROR #23 + eor r3,r12,r3,ROR #23 + add r7,r6,r11 + add r12,r3,r2 + vadd.i32 q5,q2,q4 + vswp d4,d5 + vadd.i32 q9,q6,q7 + vshl.i32 q12,q5,#18 + vswp d12,d13 + vshl.i32 q14,q9,#18 + eor r7,r14,r7,ROR #19 + eor r8,r8,r12,ROR #19 + add r12,r7,r6 + add r14,r8,r3 + vshr.u32 q15,q5,#14 + vext.32 q5,q4,q4,#1 + vshr.u32 q4,q9,#14 + veor q8,q8,q12 + vext.32 q7,q7,q7,#1 + veor q9,q11,q14 + eor r10,r10,r12,ROR #14 + ldr r12,[sp,#248] + veor q8,q8,q15 + eor r9,r9,r14,ROR #14 + veor q11,q9,q4 + subs r12,r12,#4 + bhi .L_mainloop2 + strd r8,[sp,#112] + ldrd r8,[sp,#64] + strd r2,[sp,#120] + ldrd r2,[sp,#96] + add r0,r0,r8 + strd r10,[sp,#96] + add r1,r1,r9 + ldrd r10,[sp,#48] + ldrd r8,[sp,#72] + add r2,r2,r10 + strd r6,[sp,#128] + add r3,r3,r11 + ldrd r6,[sp,#104] + ldrd r10,[sp,#32] + ldr r12,[sp,#236] + add r4,r4,r8 + add r5,r5,r9 + add r6,r6,r10 + add r7,r7,r11 + cmp r12,#0 + beq .L_nomessage1 + ldr r8,[r12,#0] + ldr r9,[r12,#4] + ldr r10,[r12,#8] + ldr r11,[r12,#12] + eor r0,r0,r8 + ldr r8,[r12,#16] + eor r1,r1,r9 + ldr r9,[r12,#20] + eor r2,r2,r10 + ldr r10,[r12,#24] + eor r3,r3,r11 + ldr r11,[r12,#28] + eor r4,r4,r8 + eor r5,r5,r9 + eor r6,r6,r10 + eor r7,r7,r11 +.L_nomessage1: + ldr r14,[sp,#232] + vadd.i32 q4,q8,q1 + str r0,[r14,#0] + add r0,sp,#304 + str r1,[r14,#4] + vld1.8 {d16-d17},[r0,: 128] + str r2,[r14,#8] + vadd.i32 q5,q8,q5 + str r3,[r14,#12] + add r0,sp,#288 + str r4,[r14,#16] + vld1.8 {d16-d17},[r0,: 128] + str r5,[r14,#20] + vadd.i32 q9,q10,q0 + str r6,[r14,#24] + vadd.i32 q2,q8,q2 + str r7,[r14,#28] + vmov.i64 q8,#0xffffffff + ldrd r6,[sp,#128] + vext.32 d20,d8,d10,#1 + ldrd r0,[sp,#40] + vext.32 d25,d9,d11,#1 + ldrd r2,[sp,#120] + vbif q4,q9,q8 + ldrd r4,[sp,#56] + vext.32 d21,d5,d19,#1 + add r6,r6,r0 + vext.32 d24,d4,d18,#1 + add r7,r7,r1 + vbif q2,q5,q8 + add r2,r2,r4 + vrev64.i32 q5,q10 + add r3,r3,r5 + vrev64.i32 q9,q12 + adds r0,r0,#3 + vswp d5,d9 + adc r1,r1,#0 + strd r0,[sp,#40] + ldrd r8,[sp,#112] + ldrd r0,[sp,#88] + ldrd r10,[sp,#96] + ldrd r4,[sp,#80] + add r0,r8,r0 + add r1,r9,r1 + add r4,r10,r4 + add r5,r11,r5 + add r8,r14,#64 + cmp r12,#0 + beq .L_nomessage2 + ldr r9,[r12,#32] + ldr r10,[r12,#36] + ldr r11,[r12,#40] + ldr r14,[r12,#44] + eor r6,r6,r9 + ldr r9,[r12,#48] + eor r7,r7,r10 + ldr r10,[r12,#52] + eor r4,r4,r11 + ldr r11,[r12,#56] + eor r5,r5,r14 + ldr r14,[r12,#60] + add r12,r12,#64 + eor r2,r2,r9 + vld1.8 {d20-d21},[r12]! + veor q4,q4,q10 + eor r3,r3,r10 + vld1.8 {d20-d21},[r12]! + veor q5,q5,q10 + eor r0,r0,r11 + vld1.8 {d20-d21},[r12]! + veor q2,q2,q10 + eor r1,r1,r14 + vld1.8 {d20-d21},[r12]! + veor q9,q9,q10 +.L_nomessage2: + vst1.8 {d8-d9},[r8]! + vst1.8 {d10-d11},[r8]! + vmov.i64 q4,#0xff + vst1.8 {d4-d5},[r8]! + vst1.8 {d18-d19},[r8]! + str r6,[r8,#-96] + add r6,sp,#336 + str r7,[r8,#-92] + add r7,sp,#320 + str r4,[r8,#-88] + vadd.i32 q2,q11,q1 + vld1.8 {d10-d11},[r6,: 128] + vadd.i32 q5,q5,q7 + vld1.8 {d14-d15},[r7,: 128] + vadd.i32 q9,q13,q0 + vadd.i32 q6,q7,q6 + str r5,[r8,#-84] + vext.32 d14,d4,d10,#1 + str r2,[r8,#-80] + vext.32 d21,d5,d11,#1 + str r3,[r8,#-76] + vbif q2,q9,q8 + str r0,[r8,#-72] + vext.32 d15,d13,d19,#1 + vshr.u32 q4,q4,#7 + str r1,[r8,#-68] + vext.32 d20,d12,d18,#1 + vbif q6,q5,q8 + ldr r0,[sp,#240] + vrev64.i32 q5,q7 + vrev64.i32 q7,q10 + vswp d13,d5 + vadd.i64 q3,q3,q4 + sub r0,r0,#192 + cmp r12,#0 + beq .L_nomessage21 + vld1.8 {d16-d17},[r12]! + veor q2,q2,q8 + vld1.8 {d16-d17},[r12]! + veor q5,q5,q8 + vld1.8 {d16-d17},[r12]! + veor q6,q6,q8 + vld1.8 {d16-d17},[r12]! + veor q7,q7,q8 +.L_nomessage21: + vst1.8 {d4-d5},[r8]! + vst1.8 {d10-d11},[r8]! + vst1.8 {d12-d13},[r8]! + vst1.8 {d14-d15},[r8]! + str r12,[sp,#236] + add r14,sp,#272 + add r12,sp,#256 + str r8,[sp,#232] + cmp r0,#192 + bhs .L_mlenatleast192 +.L_mlenlowbelow192: + cmp r0,#0 + beq .L_done + b .L_mlenatleast1 +.L_nextblock: + sub r0,r0,#64 +.L_mlenatleast1: +.L_handleblock: + str r0,[sp,#248] + ldrd r2,[sp,#48] + ldrd r6,[sp,#32] + ldrd r0,[sp,#64] + ldrd r4,[sp,#72] + ldrd r10,[sp,#80] + ldrd r8,[sp,#88] + strd r2,[sp,#96] + strd r6,[sp,#104] + ldrd r2,[sp,#56] + ldrd r6,[sp,#40] + ldr r12,[sp,#244] +.L_mainloop1: + str r12,[sp,#252] + add r12,r0,r2 + add r14,r5,r1 + eor r4,r4,r12,ROR #25 + eor r7,r7,r14,ROR #25 + add r12,r4,r0 + add r14,r7,r5 + eor r6,r6,r12,ROR #23 + eor r3,r3,r14,ROR #23 + add r12,r6,r4 + str r7,[sp,#132] + add r7,r3,r7 + ldr r14,[sp,#104] + eor r2,r2,r12,ROR #19 + str r6,[sp,#128] + eor r1,r1,r7,ROR #19 + ldr r7,[sp,#100] + add r6,r2,r6 + str r2,[sp,#120] + add r2,r1,r3 + ldr r12,[sp,#96] + eor r0,r0,r6,ROR #14 + str r3,[sp,#124] + eor r2,r5,r2,ROR #14 + ldr r3,[sp,#108] + add r5,r10,r14 + add r6,r9,r11 + eor r8,r8,r5,ROR #25 + eor r5,r7,r6,ROR #25 + add r6,r8,r10 + add r7,r5,r9 + eor r6,r12,r6,ROR #23 + eor r3,r3,r7,ROR #23 + add r7,r6,r8 + add r12,r3,r5 + eor r7,r14,r7,ROR #19 + eor r11,r11,r12,ROR #19 + add r12,r7,r6 + add r14,r11,r3 + eor r10,r10,r12,ROR #14 + eor r9,r9,r14,ROR #14 + add r12,r0,r5 + add r14,r2,r4 + eor r1,r1,r12,ROR #25 + eor r7,r7,r14,ROR #25 + add r12,r1,r0 + add r14,r7,r2 + eor r6,r6,r12,ROR #23 + eor r3,r3,r14,ROR #23 + add r12,r6,r1 + str r7,[sp,#104] + add r7,r3,r7 + ldr r14,[sp,#128] + eor r5,r5,r12,ROR #19 + str r3,[sp,#108] + eor r4,r4,r7,ROR #19 + ldr r7,[sp,#132] + add r12,r5,r6 + str r6,[sp,#96] + add r3,r4,r3 + ldr r6,[sp,#120] + eor r0,r0,r12,ROR #14 + str r5,[sp,#100] + eor r5,r2,r3,ROR #14 + ldr r3,[sp,#124] + add r2,r10,r7 + add r12,r9,r8 + eor r11,r11,r2,ROR #25 + eor r2,r6,r12,ROR #25 + add r6,r11,r10 + add r12,r2,r9 + eor r6,r14,r6,ROR #23 + eor r3,r3,r12,ROR #23 + add r12,r6,r11 + add r14,r3,r2 + eor r7,r7,r12,ROR #19 + eor r8,r8,r14,ROR #19 + add r12,r7,r6 + add r14,r8,r3 + eor r10,r10,r12,ROR #14 + eor r9,r9,r14,ROR #14 + ldr r12,[sp,#252] + subs r12,r12,#2 + bhi .L_mainloop1 + strd r6,[sp,#128] + strd r2,[sp,#120] + strd r10,[sp,#112] + strd r8,[sp,#136] + ldrd r2,[sp,#96] + ldrd r6,[sp,#104] + ldrd r8,[sp,#64] + ldrd r10,[sp,#48] + add r0,r0,r8 + add r1,r1,r9 + add r2,r2,r10 + add r3,r3,r11 + ldrd r8,[sp,#72] + ldrd r10,[sp,#32] + add r4,r4,r8 + add r5,r5,r9 + add r6,r6,r10 + add r7,r7,r11 + ldr r12,[sp,#236] + cmp r12,#0 + beq .L_nomessage10 + ldr r8,[r12,#0] + ldr r9,[r12,#4] + ldr r10,[r12,#8] + ldr r11,[r12,#12] + eor r0,r0,r8 + ldr r8,[r12,#16] + eor r1,r1,r9 + ldr r9,[r12,#20] + eor r2,r2,r10 + ldr r10,[r12,#24] + eor r3,r3,r11 + ldr r11,[r12,#28] + eor r4,r4,r8 + eor r5,r5,r9 + eor r6,r6,r10 + eor r7,r7,r11 +.L_nomessage10: + ldr r14,[sp,#232] + str r0,[r14,#0] + str r1,[r14,#4] + str r2,[r14,#8] + str r3,[r14,#12] + str r4,[r14,#16] + str r5,[r14,#20] + str r6,[r14,#24] + str r7,[r14,#28] + ldrd r6,[sp,#128] + ldrd r10,[sp,#112] + ldrd r0,[sp,#40] + ldrd r4,[sp,#80] + add r6,r6,r0 + add r7,r7,r1 + add r10,r10,r4 + add r11,r11,r5 + adds r0,r0,#1 + adc r1,r1,#0 + strd r0,[sp,#40] + ldrd r2,[sp,#120] + ldrd r8,[sp,#136] + ldrd r4,[sp,#56] + ldrd r0,[sp,#88] + add r2,r2,r4 + add r3,r3,r5 + add r0,r8,r0 + add r1,r9,r1 + cmp r12,#0 + beq .L_nomessage11 + ldr r4,[r12,#32] + ldr r5,[r12,#36] + ldr r8,[r12,#40] + ldr r9,[r12,#44] + eor r6,r6,r4 + ldr r4,[r12,#48] + eor r7,r7,r5 + ldr r5,[r12,#52] + eor r10,r10,r8 + ldr r8,[r12,#56] + eor r11,r11,r9 + ldr r9,[r12,#60] + eor r2,r2,r4 + eor r3,r3,r5 + eor r0,r0,r8 + eor r1,r1,r9 + add r4,r12,#64 + str r4,[sp,#236] +.L_nomessage11: + str r6,[r14,#32] + str r7,[r14,#36] + str r10,[r14,#40] + str r11,[r14,#44] + str r2,[r14,#48] + str r3,[r14,#52] + str r0,[r14,#56] + str r1,[r14,#60] + add r0,r14,#64 + str r0,[sp,#232] + ldr r0,[sp,#248] + cmp r0,#64 + bhi .L_nextblock +.L_done: + ldr r2,[sp,#160] + ldrd r4,[sp,#0] + ldrd r6,[sp,#8] + ldrd r8,[sp,#16] + ldrd r10,[sp,#24] + ldr r12,[sp,#228] + ldr r14,[sp,#224] + ldrd r0,[sp,#40] + strd r0,[r2] + sub r0,r12,sp + mov sp,r12 + vpop {q4,q5,q6,q7} + add r0,r0,#64 + bx lr +.size _gcry_arm_neon_salsa20_encrypt,.-_gcry_arm_neon_salsa20_encrypt; + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/salsa20.c b/libotr/libgcrypt-1.8.7/cipher/salsa20.c new file mode 100644 index 0000000..9768198 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/salsa20.c @@ -0,0 +1,597 @@ +/* salsa20.c - Bernstein's Salsa20 cipher + * Copyright (C) 2012 Simon Josefsson, Niels Möller + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * For a description of the algorithm, see: + * http://cr.yp.to/snuffle/spec.pdf + * http://cr.yp.to/snuffle/design.pdf + */ + +/* The code is based on the code in Nettle + (git commit id 9d2d8ddaee35b91a4e1a32ae77cba04bea3480e7) + which in turn is based on + salsa20-ref.c version 20051118 + D. J. Bernstein + Public domain. +*/ + + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" + + +/* USE_AMD64 indicates whether to compile with AMD64 code. */ +#undef USE_AMD64 +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AMD64 1 +#endif + +/* USE_ARM_NEON_ASM indicates whether to enable ARM NEON assembly code. */ +#undef USE_ARM_NEON_ASM +#ifdef ENABLE_NEON_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_NEON) +# define USE_ARM_NEON_ASM 1 +# endif +#endif /*ENABLE_NEON_SUPPORT*/ + + +#define SALSA20_MIN_KEY_SIZE 16 /* Bytes. */ +#define SALSA20_MAX_KEY_SIZE 32 /* Bytes. */ +#define SALSA20_BLOCK_SIZE 64 /* Bytes. */ +#define SALSA20_IV_SIZE 8 /* Bytes. */ +#define SALSA20_INPUT_LENGTH 16 /* Bytes. */ + +/* Number of rounds. The standard uses 20 rounds. In any case the + number of rounds must be even. */ +#define SALSA20_ROUNDS 20 +#define SALSA20R12_ROUNDS 12 + + +struct SALSA20_context_s; + +typedef unsigned int (*salsa20_core_t) (u32 *dst, struct SALSA20_context_s *ctx, + unsigned int rounds); +typedef void (* salsa20_keysetup_t)(struct SALSA20_context_s *ctx, + const byte *key, int keylen); +typedef void (* salsa20_ivsetup_t)(struct SALSA20_context_s *ctx, + const byte *iv); + +typedef struct SALSA20_context_s +{ + /* Indices 1-4 and 11-14 holds the key (two identical copies for the + shorter key size), indices 0, 5, 10, 15 are constant, indices 6, 7 + are the IV, and indices 8, 9 are the block counter: + + C K K K + K C I I + B B C K + K K K C + */ + u32 input[SALSA20_INPUT_LENGTH]; + u32 pad[SALSA20_INPUT_LENGTH]; + unsigned int unused; /* bytes in the pad. */ +#ifdef USE_ARM_NEON_ASM + int use_neon; +#endif + salsa20_keysetup_t keysetup; + salsa20_ivsetup_t ivsetup; + salsa20_core_t core; +} SALSA20_context_t; + + +/* The masking of the right shift is needed to allow n == 0 (using + just 32 - n and 64 - n results in undefined behaviour). Most uses + of these macros use a constant and non-zero rotation count. */ +#define ROTL32(n,x) (((x)<<(n)) | ((x)>>((-(n)&31)))) + + +#define LE_SWAP32(v) le_bswap32(v) + +#define LE_READ_UINT32(p) buf_get_le32(p) + + +static void salsa20_setiv (void *context, const byte *iv, size_t ivlen); +static const char *selftest (void); + + +#ifdef USE_AMD64 + +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_EXTRA_STACK (10 * 16) +#else +# define ASM_FUNC_ABI +# define ASM_EXTRA_STACK 0 +#endif + +/* AMD64 assembly implementations of Salsa20. */ +void _gcry_salsa20_amd64_keysetup(u32 *ctxinput, const void *key, int keybits) + ASM_FUNC_ABI; +void _gcry_salsa20_amd64_ivsetup(u32 *ctxinput, const void *iv) + ASM_FUNC_ABI; +unsigned int +_gcry_salsa20_amd64_encrypt_blocks(u32 *ctxinput, const void *src, void *dst, + size_t len, int rounds) ASM_FUNC_ABI; + +static void +salsa20_keysetup(SALSA20_context_t *ctx, const byte *key, int keylen) +{ + _gcry_salsa20_amd64_keysetup(ctx->input, key, keylen * 8); +} + +static void +salsa20_ivsetup(SALSA20_context_t *ctx, const byte *iv) +{ + _gcry_salsa20_amd64_ivsetup(ctx->input, iv); +} + +static unsigned int +salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned int rounds) +{ + memset(dst, 0, SALSA20_BLOCK_SIZE); + return _gcry_salsa20_amd64_encrypt_blocks(ctx->input, dst, dst, 1, rounds) + + ASM_EXTRA_STACK; +} + +#else /* USE_AMD64 */ + + + +#if 0 +# define SALSA20_CORE_DEBUG(i) do { \ + unsigned debug_j; \ + for (debug_j = 0; debug_j < 16; debug_j++) \ + { \ + if (debug_j == 0) \ + fprintf(stderr, "%2d:", (i)); \ + else if (debug_j % 4 == 0) \ + fprintf(stderr, "\n "); \ + fprintf(stderr, " %8x", pad[debug_j]); \ + } \ + fprintf(stderr, "\n"); \ + } while (0) +#else +# define SALSA20_CORE_DEBUG(i) +#endif + +#define QROUND(x0, x1, x2, x3) \ + do { \ + x1 ^= ROTL32 ( 7, x0 + x3); \ + x2 ^= ROTL32 ( 9, x1 + x0); \ + x3 ^= ROTL32 (13, x2 + x1); \ + x0 ^= ROTL32 (18, x3 + x2); \ + } while(0) + +static unsigned int +salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned rounds) +{ + u32 pad[SALSA20_INPUT_LENGTH], *src = ctx->input; + unsigned int i; + + memcpy (pad, src, sizeof(pad)); + for (i = 0; i < rounds; i += 2) + { + SALSA20_CORE_DEBUG (i); + QROUND (pad[0], pad[4], pad[8], pad[12]); + QROUND (pad[5], pad[9], pad[13], pad[1] ); + QROUND (pad[10], pad[14], pad[2], pad[6] ); + QROUND (pad[15], pad[3], pad[7], pad[11]); + + SALSA20_CORE_DEBUG (i+1); + QROUND (pad[0], pad[1], pad[2], pad[3] ); + QROUND (pad[5], pad[6], pad[7], pad[4] ); + QROUND (pad[10], pad[11], pad[8], pad[9] ); + QROUND (pad[15], pad[12], pad[13], pad[14]); + } + SALSA20_CORE_DEBUG (i); + + for (i = 0; i < SALSA20_INPUT_LENGTH; i++) + { + u32 t = pad[i] + src[i]; + dst[i] = LE_SWAP32 (t); + } + + /* Update counter. */ + if (!++src[8]) + src[9]++; + + /* burn_stack */ + return ( 3*sizeof (void*) \ + + 2*sizeof (void*) \ + + 64 \ + + sizeof (unsigned int) \ + + sizeof (u32) ); +} +#undef QROUND +#undef SALSA20_CORE_DEBUG + +static void +salsa20_keysetup(SALSA20_context_t *ctx, const byte *key, int keylen) +{ + /* These constants are the little endian encoding of the string + "expand 32-byte k". For the 128 bit variant, the "32" in that + string will be fixed up to "16". */ + ctx->input[0] = 0x61707865; /* "apxe" */ + ctx->input[5] = 0x3320646e; /* "3 dn" */ + ctx->input[10] = 0x79622d32; /* "yb-2" */ + ctx->input[15] = 0x6b206574; /* "k et" */ + + ctx->input[1] = LE_READ_UINT32(key + 0); + ctx->input[2] = LE_READ_UINT32(key + 4); + ctx->input[3] = LE_READ_UINT32(key + 8); + ctx->input[4] = LE_READ_UINT32(key + 12); + if (keylen == SALSA20_MAX_KEY_SIZE) /* 256 bits */ + { + ctx->input[11] = LE_READ_UINT32(key + 16); + ctx->input[12] = LE_READ_UINT32(key + 20); + ctx->input[13] = LE_READ_UINT32(key + 24); + ctx->input[14] = LE_READ_UINT32(key + 28); + } + else /* 128 bits */ + { + ctx->input[11] = ctx->input[1]; + ctx->input[12] = ctx->input[2]; + ctx->input[13] = ctx->input[3]; + ctx->input[14] = ctx->input[4]; + + ctx->input[5] -= 0x02000000; /* Change to "1 dn". */ + ctx->input[10] += 0x00000004; /* Change to "yb-6". */ + } +} + +static void salsa20_ivsetup(SALSA20_context_t *ctx, const byte *iv) +{ + ctx->input[6] = LE_READ_UINT32(iv + 0); + ctx->input[7] = LE_READ_UINT32(iv + 4); + /* Reset the block counter. */ + ctx->input[8] = 0; + ctx->input[9] = 0; +} + +#endif /*!USE_AMD64*/ + +#ifdef USE_ARM_NEON_ASM + +/* ARM NEON implementation of Salsa20. */ +unsigned int +_gcry_arm_neon_salsa20_encrypt(void *c, const void *m, unsigned int nblks, + void *k, unsigned int rounds); + +static unsigned int +salsa20_core_neon (u32 *dst, SALSA20_context_t *ctx, unsigned int rounds) +{ + return _gcry_arm_neon_salsa20_encrypt(dst, NULL, 1, ctx->input, rounds); +} + +static void salsa20_ivsetup_neon(SALSA20_context_t *ctx, const byte *iv) +{ + memcpy(ctx->input + 8, iv, 8); + /* Reset the block counter. */ + memset(ctx->input + 10, 0, 8); +} + +static void +salsa20_keysetup_neon(SALSA20_context_t *ctx, const byte *key, int klen) +{ + static const unsigned char sigma32[16] = "expand 32-byte k"; + static const unsigned char sigma16[16] = "expand 16-byte k"; + + if (klen == 16) + { + memcpy (ctx->input, key, 16); + memcpy (ctx->input + 4, key, 16); /* Duplicate 128-bit key. */ + memcpy (ctx->input + 12, sigma16, 16); + } + else + { + /* 32-byte key */ + memcpy (ctx->input, key, 32); + memcpy (ctx->input + 12, sigma32, 16); + } +} + +#endif /*USE_ARM_NEON_ASM*/ + + +static gcry_err_code_t +salsa20_do_setkey (SALSA20_context_t *ctx, + const byte *key, unsigned int keylen) +{ + static int initialized; + static const char *selftest_failed; + + if (!initialized ) + { + initialized = 1; + selftest_failed = selftest (); + if (selftest_failed) + log_error ("SALSA20 selftest failed (%s)\n", selftest_failed ); + } + if (selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + + if (keylen != SALSA20_MIN_KEY_SIZE + && keylen != SALSA20_MAX_KEY_SIZE) + return GPG_ERR_INV_KEYLEN; + + /* Default ops. */ + ctx->keysetup = salsa20_keysetup; + ctx->ivsetup = salsa20_ivsetup; + ctx->core = salsa20_core; + +#ifdef USE_ARM_NEON_ASM + ctx->use_neon = (_gcry_get_hw_features () & HWF_ARM_NEON) != 0; + if (ctx->use_neon) + { + /* Use ARM NEON ops instead. */ + ctx->keysetup = salsa20_keysetup_neon; + ctx->ivsetup = salsa20_ivsetup_neon; + ctx->core = salsa20_core_neon; + } +#endif + + ctx->keysetup (ctx, key, keylen); + + /* We default to a zero nonce. */ + salsa20_setiv (ctx, NULL, 0); + + return 0; +} + + +static gcry_err_code_t +salsa20_setkey (void *context, const byte *key, unsigned int keylen) +{ + SALSA20_context_t *ctx = (SALSA20_context_t *)context; + gcry_err_code_t rc = salsa20_do_setkey (ctx, key, keylen); + _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *)); + return rc; +} + + +static void +salsa20_setiv (void *context, const byte *iv, size_t ivlen) +{ + SALSA20_context_t *ctx = (SALSA20_context_t *)context; + byte tmp[SALSA20_IV_SIZE]; + + if (iv && ivlen != SALSA20_IV_SIZE) + log_info ("WARNING: salsa20_setiv: bad ivlen=%u\n", (u32)ivlen); + + if (!iv || ivlen != SALSA20_IV_SIZE) + memset (tmp, 0, sizeof(tmp)); + else + memcpy (tmp, iv, SALSA20_IV_SIZE); + + ctx->ivsetup (ctx, tmp); + + /* Reset the unused pad bytes counter. */ + ctx->unused = 0; + + wipememory (tmp, sizeof(tmp)); +} + + + +/* Note: This function requires LENGTH > 0. */ +static void +salsa20_do_encrypt_stream (SALSA20_context_t *ctx, + byte *outbuf, const byte *inbuf, + size_t length, unsigned rounds) +{ + unsigned int nburn, burn = 0; + + if (ctx->unused) + { + unsigned char *p = (void*)ctx->pad; + size_t n; + + gcry_assert (ctx->unused < SALSA20_BLOCK_SIZE); + + n = ctx->unused; + if (n > length) + n = length; + buf_xor (outbuf, inbuf, p + SALSA20_BLOCK_SIZE - ctx->unused, n); + length -= n; + outbuf += n; + inbuf += n; + ctx->unused -= n; + if (!length) + return; + gcry_assert (!ctx->unused); + } + +#ifdef USE_AMD64 + if (length >= SALSA20_BLOCK_SIZE) + { + size_t nblocks = length / SALSA20_BLOCK_SIZE; + burn = _gcry_salsa20_amd64_encrypt_blocks(ctx->input, inbuf, outbuf, + nblocks, rounds); + burn += ASM_EXTRA_STACK; + length -= SALSA20_BLOCK_SIZE * nblocks; + outbuf += SALSA20_BLOCK_SIZE * nblocks; + inbuf += SALSA20_BLOCK_SIZE * nblocks; + } +#endif + +#ifdef USE_ARM_NEON_ASM + if (ctx->use_neon && length >= SALSA20_BLOCK_SIZE) + { + unsigned int nblocks = length / SALSA20_BLOCK_SIZE; + _gcry_arm_neon_salsa20_encrypt (outbuf, inbuf, nblocks, ctx->input, + rounds); + length -= SALSA20_BLOCK_SIZE * nblocks; + outbuf += SALSA20_BLOCK_SIZE * nblocks; + inbuf += SALSA20_BLOCK_SIZE * nblocks; + } +#endif + + while (length > 0) + { + /* Create the next pad and bump the block counter. Note that it + is the user's duty to change to another nonce not later than + after 2^70 processed bytes. */ + nburn = ctx->core (ctx->pad, ctx, rounds); + burn = nburn > burn ? nburn : burn; + + if (length <= SALSA20_BLOCK_SIZE) + { + buf_xor (outbuf, inbuf, ctx->pad, length); + ctx->unused = SALSA20_BLOCK_SIZE - length; + break; + } + buf_xor (outbuf, inbuf, ctx->pad, SALSA20_BLOCK_SIZE); + length -= SALSA20_BLOCK_SIZE; + outbuf += SALSA20_BLOCK_SIZE; + inbuf += SALSA20_BLOCK_SIZE; + } + + _gcry_burn_stack (burn); +} + + +static void +salsa20_encrypt_stream (void *context, + byte *outbuf, const byte *inbuf, size_t length) +{ + SALSA20_context_t *ctx = (SALSA20_context_t *)context; + + if (length) + salsa20_do_encrypt_stream (ctx, outbuf, inbuf, length, SALSA20_ROUNDS); +} + + +static void +salsa20r12_encrypt_stream (void *context, + byte *outbuf, const byte *inbuf, size_t length) +{ + SALSA20_context_t *ctx = (SALSA20_context_t *)context; + + if (length) + salsa20_do_encrypt_stream (ctx, outbuf, inbuf, length, SALSA20R12_ROUNDS); +} + + +static const char* +selftest (void) +{ + byte ctxbuf[sizeof(SALSA20_context_t) + 15]; + SALSA20_context_t *ctx; + byte scratch[8+1]; + byte buf[256+64+4]; + int i; + + static byte key_1[] = + { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + static const byte nonce_1[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + static const byte plaintext_1[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + static const byte ciphertext_1[] = + { 0xE3, 0xBE, 0x8F, 0xDD, 0x8B, 0xEC, 0xA2, 0xE3}; + + /* 16-byte alignment required for amd64 implementation. */ + ctx = (SALSA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15); + + salsa20_setkey (ctx, key_1, sizeof key_1); + salsa20_setiv (ctx, nonce_1, sizeof nonce_1); + scratch[8] = 0; + salsa20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1); + if (memcmp (scratch, ciphertext_1, sizeof ciphertext_1)) + return "Salsa20 encryption test 1 failed."; + if (scratch[8]) + return "Salsa20 wrote too much."; + salsa20_setkey( ctx, key_1, sizeof(key_1)); + salsa20_setiv (ctx, nonce_1, sizeof nonce_1); + salsa20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1); + if (memcmp (scratch, plaintext_1, sizeof plaintext_1)) + return "Salsa20 decryption test 1 failed."; + + for (i = 0; i < sizeof buf; i++) + buf[i] = i; + salsa20_setkey (ctx, key_1, sizeof key_1); + salsa20_setiv (ctx, nonce_1, sizeof nonce_1); + /*encrypt*/ + salsa20_encrypt_stream (ctx, buf, buf, sizeof buf); + /*decrypt*/ + salsa20_setkey (ctx, key_1, sizeof key_1); + salsa20_setiv (ctx, nonce_1, sizeof nonce_1); + salsa20_encrypt_stream (ctx, buf, buf, 1); + salsa20_encrypt_stream (ctx, buf+1, buf+1, (sizeof buf)-1-1); + salsa20_encrypt_stream (ctx, buf+(sizeof buf)-1, buf+(sizeof buf)-1, 1); + for (i = 0; i < sizeof buf; i++) + if (buf[i] != (byte)i) + return "Salsa20 encryption test 2 failed."; + + return NULL; +} + + +gcry_cipher_spec_t _gcry_cipher_spec_salsa20 = + { + GCRY_CIPHER_SALSA20, + {0, 0}, /* flags */ + "SALSA20", /* name */ + NULL, /* aliases */ + NULL, /* oids */ + 1, /* blocksize in bytes. */ + SALSA20_MAX_KEY_SIZE*8, /* standard key length in bits. */ + sizeof (SALSA20_context_t), + salsa20_setkey, + NULL, + NULL, + salsa20_encrypt_stream, + salsa20_encrypt_stream, + NULL, + NULL, + salsa20_setiv + }; + +gcry_cipher_spec_t _gcry_cipher_spec_salsa20r12 = + { + GCRY_CIPHER_SALSA20R12, + {0, 0}, /* flags */ + "SALSA20R12", /* name */ + NULL, /* aliases */ + NULL, /* oids */ + 1, /* blocksize in bytes. */ + SALSA20_MAX_KEY_SIZE*8, /* standard key length in bits. */ + sizeof (SALSA20_context_t), + salsa20_setkey, + NULL, + NULL, + salsa20r12_encrypt_stream, + salsa20r12_encrypt_stream, + NULL, + NULL, + salsa20_setiv + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/scrypt.c b/libotr/libgcrypt-1.8.7/cipher/scrypt.c new file mode 100644 index 0000000..13fd1cf --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/scrypt.c @@ -0,0 +1,322 @@ +/* scrypt.c - Scrypt password-based key derivation function. + * Copyright (C) 2012 Simon Josefsson + * Copyright (C) 2013 Christian Grothoff + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* Adapted from the nettle, low-level cryptographics library for + * libgcrypt by Christian Grothoff; original license: + * + * Copyright (C) 2012 Simon Josefsson + * + * The nettle library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * The nettle library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the nettle library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02111-1301, USA. + */ + +#include <config.h> +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" +#include "kdf-internal.h" +#include "bufhelp.h" + +/* We really need a 64 bit type for this code. */ +#define SALSA20_INPUT_LENGTH 16 + +#define ROTL32(n,x) (((x)<<(n)) | ((x)>>(32-(n)))) + + +/* Reads a 64-bit integer, in network, big-endian, byte order */ +#define READ_UINT64(p) buf_get_be64(p) + + +/* And the other, little-endian, byteorder */ +#define LE_READ_UINT64(p) buf_get_le64(p) + +#define LE_SWAP32(v) le_bswap32(v) + + +#define QROUND(x0, x1, x2, x3) do { \ + x1 ^= ROTL32(7, x0 + x3); \ + x2 ^= ROTL32(9, x1 + x0); \ + x3 ^= ROTL32(13, x2 + x1); \ + x0 ^= ROTL32(18, x3 + x2); \ + } while(0) + + +static void +salsa20_core (u32 *dst, const u32 *src, unsigned int rounds) +{ + u32 x[SALSA20_INPUT_LENGTH]; + unsigned i; + + assert ( (rounds & 1) == 0); + + for (i = 0; i < SALSA20_INPUT_LENGTH; i++) + x[i] = LE_SWAP32(src[i]); + + for (i = 0; i < rounds;i += 2) + { + QROUND(x[0], x[4], x[8], x[12]); + QROUND(x[5], x[9], x[13], x[1]); + QROUND(x[10], x[14], x[2], x[6]); + QROUND(x[15], x[3], x[7], x[11]); + + QROUND(x[0], x[1], x[2], x[3]); + QROUND(x[5], x[6], x[7], x[4]); + QROUND(x[10], x[11], x[8], x[9]); + QROUND(x[15], x[12], x[13], x[14]); + } + + for (i = 0; i < SALSA20_INPUT_LENGTH; i++) + { + u32 t = x[i] + LE_SWAP32(src[i]); + dst[i] = LE_SWAP32(t); + } +} + + +static void +scrypt_block_mix (u32 r, unsigned char *B, unsigned char *tmp2) +{ + u64 i; + unsigned char *X = tmp2; + unsigned char *Y = tmp2 + 64; + +#if 0 + if (r == 1) + { + for (i = 0; i < 2 * r; i++) + { + size_t j; + printf ("B[%d] = ", (int)i); + for (j = 0; j < 64; j++) + { + if (j && !(j % 16)) + printf ("\n "); + printf (" %02x", B[i * 64 + j]); + } + putchar ('\n'); + } + } +#endif + + /* X = B[2 * r - 1] */ + memcpy (X, &B[(2 * r - 1) * 64], 64); + + /* for i = 0 to 2 * r - 1 do */ + for (i = 0; i <= 2 * r - 1; i++) + { + /* T = X xor B[i] */ + buf_xor(X, X, &B[i * 64], 64); + + /* X = Salsa (T) */ + salsa20_core ((u32*)(void*)X, (u32*)(void*)X, 8); + + /* Y[i] = X */ + memcpy (&Y[i * 64], X, 64); + } + + for (i = 0; i < r; i++) + { + memcpy (&B[i * 64], &Y[2 * i * 64], 64); + memcpy (&B[(r + i) * 64], &Y[(2 * i + 1) * 64], 64); + } + +#if 0 + if (r==1) + { + for (i = 0; i < 2 * r; i++) + { + size_t j; + printf ("B'[%d] =", (int)i); + for (j = 0; j < 64; j++) + { + if (j && !(j % 16)) + printf ("\n "); + printf (" %02x", B[i * 64 + j]); + } + putchar ('\n'); + } + } +#endif +} + + +static void +scrypt_ro_mix (u32 r, unsigned char *B, u64 N, + unsigned char *tmp1, unsigned char *tmp2) +{ + unsigned char *X = B, *T = B; + u64 i; + +#if 0 + if (r == 1) + { + printf ("B = "); + for (i = 0; i < 128 * r; i++) + { + if (i && !(i % 16)) + printf ("\n "); + printf (" %02x", B[i]); + } + putchar ('\n'); + } +#endif + + /* for i = 0 to N - 1 do */ + for (i = 0; i <= N - 1; i++) + { + /* V[i] = X */ + memcpy (&tmp1[i * 128 * r], X, 128 * r); + + /* X = ScryptBlockMix (X) */ + scrypt_block_mix (r, X, tmp2); + } + + /* for i = 0 to N - 1 do */ + for (i = 0; i <= N - 1; i++) + { + u64 j; + + /* j = Integerify (X) mod N */ + j = LE_READ_UINT64 (&X[128 * r - 64]) % N; + + /* T = X xor V[j] */ + buf_xor (T, T, &tmp1[j * 128 * r], 128 * r); + + /* X = scryptBlockMix (T) */ + scrypt_block_mix (r, T, tmp2); + } + +#if 0 + if (r == 1) + { + printf ("B' ="); + for (i = 0; i < 128 * r; i++) + { + if (i && !(i % 16)) + printf ("\n "); + printf (" %02x", B[i]); + } + putchar ('\n'); + } +#endif +} + + +/* + * + */ +gcry_err_code_t +_gcry_kdf_scrypt (const unsigned char *passwd, size_t passwdlen, + int algo, int subalgo, + const unsigned char *salt, size_t saltlen, + unsigned long iterations, + size_t dkLen, unsigned char *DK) +{ + u64 N = subalgo; /* CPU/memory cost parameter. */ + u32 r; /* Block size. */ + u32 p = iterations; /* Parallelization parameter. */ + + gpg_err_code_t ec; + u32 i; + unsigned char *B = NULL; + unsigned char *tmp1 = NULL; + unsigned char *tmp2 = NULL; + size_t r128; + size_t nbytes; + + if (subalgo < 1 || !iterations) + return GPG_ERR_INV_VALUE; + + if (algo == GCRY_KDF_SCRYPT) + r = 8; + else if (algo == 41) /* Hack to allow the use of all test vectors. */ + r = 1; + else + return GPG_ERR_UNKNOWN_ALGORITHM; + + r128 = r * 128; + if (r128 / 128 != r) + return GPG_ERR_ENOMEM; + + nbytes = p * r128; + if (r128 && nbytes / r128 != p) + return GPG_ERR_ENOMEM; + + nbytes = N * r128; + if (r128 && nbytes / r128 != N) + return GPG_ERR_ENOMEM; + + nbytes = 64 + r128; + if (nbytes < r128) + return GPG_ERR_ENOMEM; + + B = xtrymalloc (p * r128); + if (!B) + { + ec = gpg_err_code_from_syserror (); + goto leave; + } + + tmp1 = xtrymalloc (N * r128); + if (!tmp1) + { + ec = gpg_err_code_from_syserror (); + goto leave; + } + + tmp2 = xtrymalloc (64 + r128); + if (!tmp2) + { + ec = gpg_err_code_from_syserror (); + goto leave; + } + + ec = _gcry_kdf_pkdf2 (passwd, passwdlen, GCRY_MD_SHA256, salt, saltlen, + 1 /* iterations */, p * r128, B); + + for (i = 0; !ec && i < p; i++) + scrypt_ro_mix (r, &B[i * r128], N, tmp1, tmp2); + + for (i = 0; !ec && i < p; i++) + ec = _gcry_kdf_pkdf2 (passwd, passwdlen, GCRY_MD_SHA256, B, p * r128, + 1 /* iterations */, dkLen, DK); + + leave: + xfree (tmp2); + xfree (tmp1); + xfree (B); + + return ec; +} diff --git a/libotr/libgcrypt-1.8.7/cipher/seed.c b/libotr/libgcrypt-1.8.7/cipher/seed.c new file mode 100644 index 0000000..9f87c05 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/seed.c @@ -0,0 +1,476 @@ +/* SEED for libgcrypt + * Copyright (C) 2006 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * -- + * This implementation was provided for libgcrypt in public domain + * by Hye-Shik Chang <perky@FreeBSD.org>, July 2006. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> + +#include "types.h" /* for byte and u32 typedefs */ +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" + +#define NUMKC 16 + +#define GETU32(pt) buf_get_be32(pt) +#define PUTU32(ct, st) buf_put_be32(ct, st) + +union wordbuf +{ + u32 w; + byte b[4]; +}; + +#ifdef WORDS_BIGENDIAN +#define b0 b[3] +#define b1 b[2] +#define b2 b[1] +#define b3 b[0] +#else +#define b0 b[0] +#define b1 b[1] +#define b2 b[2] +#define b3 b[3] +#endif + +static const char *selftest(void); + +typedef struct +{ + u32 keyschedule[32]; +} SEED_context; + +static const u32 SS0[256] = { + 0x2989a1a8, 0x05858184, 0x16c6d2d4, 0x13c3d3d0, 0x14445054, 0x1d0d111c, + 0x2c8ca0ac, 0x25052124, 0x1d4d515c, 0x03434340, 0x18081018, 0x1e0e121c, + 0x11415150, 0x3cccf0fc, 0x0acac2c8, 0x23436360, 0x28082028, 0x04444044, + 0x20002020, 0x1d8d919c, 0x20c0e0e0, 0x22c2e2e0, 0x08c8c0c8, 0x17071314, + 0x2585a1a4, 0x0f8f838c, 0x03030300, 0x3b4b7378, 0x3b8bb3b8, 0x13031310, + 0x12c2d2d0, 0x2ecee2ec, 0x30407070, 0x0c8c808c, 0x3f0f333c, 0x2888a0a8, + 0x32023230, 0x1dcdd1dc, 0x36c6f2f4, 0x34447074, 0x2ccce0ec, 0x15859194, + 0x0b0b0308, 0x17475354, 0x1c4c505c, 0x1b4b5358, 0x3d8db1bc, 0x01010100, + 0x24042024, 0x1c0c101c, 0x33437370, 0x18889098, 0x10001010, 0x0cccc0cc, + 0x32c2f2f0, 0x19c9d1d8, 0x2c0c202c, 0x27c7e3e4, 0x32427270, 0x03838380, + 0x1b8b9398, 0x11c1d1d0, 0x06868284, 0x09c9c1c8, 0x20406060, 0x10405050, + 0x2383a3a0, 0x2bcbe3e8, 0x0d0d010c, 0x3686b2b4, 0x1e8e929c, 0x0f4f434c, + 0x3787b3b4, 0x1a4a5258, 0x06c6c2c4, 0x38487078, 0x2686a2a4, 0x12021210, + 0x2f8fa3ac, 0x15c5d1d4, 0x21416160, 0x03c3c3c0, 0x3484b0b4, 0x01414140, + 0x12425250, 0x3d4d717c, 0x0d8d818c, 0x08080008, 0x1f0f131c, 0x19899198, + 0x00000000, 0x19091118, 0x04040004, 0x13435350, 0x37c7f3f4, 0x21c1e1e0, + 0x3dcdf1fc, 0x36467274, 0x2f0f232c, 0x27072324, 0x3080b0b0, 0x0b8b8388, + 0x0e0e020c, 0x2b8ba3a8, 0x2282a2a0, 0x2e4e626c, 0x13839390, 0x0d4d414c, + 0x29496168, 0x3c4c707c, 0x09090108, 0x0a0a0208, 0x3f8fb3bc, 0x2fcfe3ec, + 0x33c3f3f0, 0x05c5c1c4, 0x07878384, 0x14041014, 0x3ecef2fc, 0x24446064, + 0x1eced2dc, 0x2e0e222c, 0x0b4b4348, 0x1a0a1218, 0x06060204, 0x21012120, + 0x2b4b6368, 0x26466264, 0x02020200, 0x35c5f1f4, 0x12829290, 0x0a8a8288, + 0x0c0c000c, 0x3383b3b0, 0x3e4e727c, 0x10c0d0d0, 0x3a4a7278, 0x07474344, + 0x16869294, 0x25c5e1e4, 0x26062224, 0x00808080, 0x2d8da1ac, 0x1fcfd3dc, + 0x2181a1a0, 0x30003030, 0x37073334, 0x2e8ea2ac, 0x36063234, 0x15051114, + 0x22022220, 0x38083038, 0x34c4f0f4, 0x2787a3a4, 0x05454144, 0x0c4c404c, + 0x01818180, 0x29c9e1e8, 0x04848084, 0x17879394, 0x35053134, 0x0bcbc3c8, + 0x0ecec2cc, 0x3c0c303c, 0x31417170, 0x11011110, 0x07c7c3c4, 0x09898188, + 0x35457174, 0x3bcbf3f8, 0x1acad2d8, 0x38c8f0f8, 0x14849094, 0x19495158, + 0x02828280, 0x04c4c0c4, 0x3fcff3fc, 0x09494148, 0x39093138, 0x27476364, + 0x00c0c0c0, 0x0fcfc3cc, 0x17c7d3d4, 0x3888b0b8, 0x0f0f030c, 0x0e8e828c, + 0x02424240, 0x23032320, 0x11819190, 0x2c4c606c, 0x1bcbd3d8, 0x2484a0a4, + 0x34043034, 0x31c1f1f0, 0x08484048, 0x02c2c2c0, 0x2f4f636c, 0x3d0d313c, + 0x2d0d212c, 0x00404040, 0x3e8eb2bc, 0x3e0e323c, 0x3c8cb0bc, 0x01c1c1c0, + 0x2a8aa2a8, 0x3a8ab2b8, 0x0e4e424c, 0x15455154, 0x3b0b3338, 0x1cccd0dc, + 0x28486068, 0x3f4f737c, 0x1c8c909c, 0x18c8d0d8, 0x0a4a4248, 0x16465254, + 0x37477374, 0x2080a0a0, 0x2dcde1ec, 0x06464244, 0x3585b1b4, 0x2b0b2328, + 0x25456164, 0x3acaf2f8, 0x23c3e3e0, 0x3989b1b8, 0x3181b1b0, 0x1f8f939c, + 0x1e4e525c, 0x39c9f1f8, 0x26c6e2e4, 0x3282b2b0, 0x31013130, 0x2acae2e8, + 0x2d4d616c, 0x1f4f535c, 0x24c4e0e4, 0x30c0f0f0, 0x0dcdc1cc, 0x08888088, + 0x16061214, 0x3a0a3238, 0x18485058, 0x14c4d0d4, 0x22426260, 0x29092128, + 0x07070304, 0x33033330, 0x28c8e0e8, 0x1b0b1318, 0x05050104, 0x39497178, + 0x10809090, 0x2a4a6268, 0x2a0a2228, 0x1a8a9298, +}; + +static const u32 SS1[256] = { + 0x38380830, 0xe828c8e0, 0x2c2d0d21, 0xa42686a2, 0xcc0fcfc3, 0xdc1eced2, + 0xb03383b3, 0xb83888b0, 0xac2f8fa3, 0x60204060, 0x54154551, 0xc407c7c3, + 0x44044440, 0x6c2f4f63, 0x682b4b63, 0x581b4b53, 0xc003c3c3, 0x60224262, + 0x30330333, 0xb43585b1, 0x28290921, 0xa02080a0, 0xe022c2e2, 0xa42787a3, + 0xd013c3d3, 0x90118191, 0x10110111, 0x04060602, 0x1c1c0c10, 0xbc3c8cb0, + 0x34360632, 0x480b4b43, 0xec2fcfe3, 0x88088880, 0x6c2c4c60, 0xa82888a0, + 0x14170713, 0xc404c4c0, 0x14160612, 0xf434c4f0, 0xc002c2c2, 0x44054541, + 0xe021c1e1, 0xd416c6d2, 0x3c3f0f33, 0x3c3d0d31, 0x8c0e8e82, 0x98188890, + 0x28280820, 0x4c0e4e42, 0xf436c6f2, 0x3c3e0e32, 0xa42585a1, 0xf839c9f1, + 0x0c0d0d01, 0xdc1fcfd3, 0xd818c8d0, 0x282b0b23, 0x64264662, 0x783a4a72, + 0x24270723, 0x2c2f0f23, 0xf031c1f1, 0x70324272, 0x40024242, 0xd414c4d0, + 0x40014141, 0xc000c0c0, 0x70334373, 0x64274763, 0xac2c8ca0, 0x880b8b83, + 0xf437c7f3, 0xac2d8da1, 0x80008080, 0x1c1f0f13, 0xc80acac2, 0x2c2c0c20, + 0xa82a8aa2, 0x34340430, 0xd012c2d2, 0x080b0b03, 0xec2ecee2, 0xe829c9e1, + 0x5c1d4d51, 0x94148490, 0x18180810, 0xf838c8f0, 0x54174753, 0xac2e8ea2, + 0x08080800, 0xc405c5c1, 0x10130313, 0xcc0dcdc1, 0x84068682, 0xb83989b1, + 0xfc3fcff3, 0x7c3d4d71, 0xc001c1c1, 0x30310131, 0xf435c5f1, 0x880a8a82, + 0x682a4a62, 0xb03181b1, 0xd011c1d1, 0x20200020, 0xd417c7d3, 0x00020202, + 0x20220222, 0x04040400, 0x68284860, 0x70314171, 0x04070703, 0xd81bcbd3, + 0x9c1d8d91, 0x98198991, 0x60214161, 0xbc3e8eb2, 0xe426c6e2, 0x58194951, + 0xdc1dcdd1, 0x50114151, 0x90108090, 0xdc1cccd0, 0x981a8a92, 0xa02383a3, + 0xa82b8ba3, 0xd010c0d0, 0x80018181, 0x0c0f0f03, 0x44074743, 0x181a0a12, + 0xe023c3e3, 0xec2ccce0, 0x8c0d8d81, 0xbc3f8fb3, 0x94168692, 0x783b4b73, + 0x5c1c4c50, 0xa02282a2, 0xa02181a1, 0x60234363, 0x20230323, 0x4c0d4d41, + 0xc808c8c0, 0x9c1e8e92, 0x9c1c8c90, 0x383a0a32, 0x0c0c0c00, 0x2c2e0e22, + 0xb83a8ab2, 0x6c2e4e62, 0x9c1f8f93, 0x581a4a52, 0xf032c2f2, 0x90128292, + 0xf033c3f3, 0x48094941, 0x78384870, 0xcc0cccc0, 0x14150511, 0xf83bcbf3, + 0x70304070, 0x74354571, 0x7c3f4f73, 0x34350531, 0x10100010, 0x00030303, + 0x64244460, 0x6c2d4d61, 0xc406c6c2, 0x74344470, 0xd415c5d1, 0xb43484b0, + 0xe82acae2, 0x08090901, 0x74364672, 0x18190911, 0xfc3ecef2, 0x40004040, + 0x10120212, 0xe020c0e0, 0xbc3d8db1, 0x04050501, 0xf83acaf2, 0x00010101, + 0xf030c0f0, 0x282a0a22, 0x5c1e4e52, 0xa82989a1, 0x54164652, 0x40034343, + 0x84058581, 0x14140410, 0x88098981, 0x981b8b93, 0xb03080b0, 0xe425c5e1, + 0x48084840, 0x78394971, 0x94178793, 0xfc3cccf0, 0x1c1e0e12, 0x80028282, + 0x20210121, 0x8c0c8c80, 0x181b0b13, 0x5c1f4f53, 0x74374773, 0x54144450, + 0xb03282b2, 0x1c1d0d11, 0x24250521, 0x4c0f4f43, 0x00000000, 0x44064642, + 0xec2dcde1, 0x58184850, 0x50124252, 0xe82bcbe3, 0x7c3e4e72, 0xd81acad2, + 0xc809c9c1, 0xfc3dcdf1, 0x30300030, 0x94158591, 0x64254561, 0x3c3c0c30, + 0xb43686b2, 0xe424c4e0, 0xb83b8bb3, 0x7c3c4c70, 0x0c0e0e02, 0x50104050, + 0x38390931, 0x24260622, 0x30320232, 0x84048480, 0x68294961, 0x90138393, + 0x34370733, 0xe427c7e3, 0x24240420, 0xa42484a0, 0xc80bcbc3, 0x50134353, + 0x080a0a02, 0x84078783, 0xd819c9d1, 0x4c0c4c40, 0x80038383, 0x8c0f8f83, + 0xcc0ecec2, 0x383b0b33, 0x480a4a42, 0xb43787b3, +}; + +static const u32 SS2[256] = { + 0xa1a82989, 0x81840585, 0xd2d416c6, 0xd3d013c3, 0x50541444, 0x111c1d0d, + 0xa0ac2c8c, 0x21242505, 0x515c1d4d, 0x43400343, 0x10181808, 0x121c1e0e, + 0x51501141, 0xf0fc3ccc, 0xc2c80aca, 0x63602343, 0x20282808, 0x40440444, + 0x20202000, 0x919c1d8d, 0xe0e020c0, 0xe2e022c2, 0xc0c808c8, 0x13141707, + 0xa1a42585, 0x838c0f8f, 0x03000303, 0x73783b4b, 0xb3b83b8b, 0x13101303, + 0xd2d012c2, 0xe2ec2ece, 0x70703040, 0x808c0c8c, 0x333c3f0f, 0xa0a82888, + 0x32303202, 0xd1dc1dcd, 0xf2f436c6, 0x70743444, 0xe0ec2ccc, 0x91941585, + 0x03080b0b, 0x53541747, 0x505c1c4c, 0x53581b4b, 0xb1bc3d8d, 0x01000101, + 0x20242404, 0x101c1c0c, 0x73703343, 0x90981888, 0x10101000, 0xc0cc0ccc, + 0xf2f032c2, 0xd1d819c9, 0x202c2c0c, 0xe3e427c7, 0x72703242, 0x83800383, + 0x93981b8b, 0xd1d011c1, 0x82840686, 0xc1c809c9, 0x60602040, 0x50501040, + 0xa3a02383, 0xe3e82bcb, 0x010c0d0d, 0xb2b43686, 0x929c1e8e, 0x434c0f4f, + 0xb3b43787, 0x52581a4a, 0xc2c406c6, 0x70783848, 0xa2a42686, 0x12101202, + 0xa3ac2f8f, 0xd1d415c5, 0x61602141, 0xc3c003c3, 0xb0b43484, 0x41400141, + 0x52501242, 0x717c3d4d, 0x818c0d8d, 0x00080808, 0x131c1f0f, 0x91981989, + 0x00000000, 0x11181909, 0x00040404, 0x53501343, 0xf3f437c7, 0xe1e021c1, + 0xf1fc3dcd, 0x72743646, 0x232c2f0f, 0x23242707, 0xb0b03080, 0x83880b8b, + 0x020c0e0e, 0xa3a82b8b, 0xa2a02282, 0x626c2e4e, 0x93901383, 0x414c0d4d, + 0x61682949, 0x707c3c4c, 0x01080909, 0x02080a0a, 0xb3bc3f8f, 0xe3ec2fcf, + 0xf3f033c3, 0xc1c405c5, 0x83840787, 0x10141404, 0xf2fc3ece, 0x60642444, + 0xd2dc1ece, 0x222c2e0e, 0x43480b4b, 0x12181a0a, 0x02040606, 0x21202101, + 0x63682b4b, 0x62642646, 0x02000202, 0xf1f435c5, 0x92901282, 0x82880a8a, + 0x000c0c0c, 0xb3b03383, 0x727c3e4e, 0xd0d010c0, 0x72783a4a, 0x43440747, + 0x92941686, 0xe1e425c5, 0x22242606, 0x80800080, 0xa1ac2d8d, 0xd3dc1fcf, + 0xa1a02181, 0x30303000, 0x33343707, 0xa2ac2e8e, 0x32343606, 0x11141505, + 0x22202202, 0x30383808, 0xf0f434c4, 0xa3a42787, 0x41440545, 0x404c0c4c, + 0x81800181, 0xe1e829c9, 0x80840484, 0x93941787, 0x31343505, 0xc3c80bcb, + 0xc2cc0ece, 0x303c3c0c, 0x71703141, 0x11101101, 0xc3c407c7, 0x81880989, + 0x71743545, 0xf3f83bcb, 0xd2d81aca, 0xf0f838c8, 0x90941484, 0x51581949, + 0x82800282, 0xc0c404c4, 0xf3fc3fcf, 0x41480949, 0x31383909, 0x63642747, + 0xc0c000c0, 0xc3cc0fcf, 0xd3d417c7, 0xb0b83888, 0x030c0f0f, 0x828c0e8e, + 0x42400242, 0x23202303, 0x91901181, 0x606c2c4c, 0xd3d81bcb, 0xa0a42484, + 0x30343404, 0xf1f031c1, 0x40480848, 0xc2c002c2, 0x636c2f4f, 0x313c3d0d, + 0x212c2d0d, 0x40400040, 0xb2bc3e8e, 0x323c3e0e, 0xb0bc3c8c, 0xc1c001c1, + 0xa2a82a8a, 0xb2b83a8a, 0x424c0e4e, 0x51541545, 0x33383b0b, 0xd0dc1ccc, + 0x60682848, 0x737c3f4f, 0x909c1c8c, 0xd0d818c8, 0x42480a4a, 0x52541646, + 0x73743747, 0xa0a02080, 0xe1ec2dcd, 0x42440646, 0xb1b43585, 0x23282b0b, + 0x61642545, 0xf2f83aca, 0xe3e023c3, 0xb1b83989, 0xb1b03181, 0x939c1f8f, + 0x525c1e4e, 0xf1f839c9, 0xe2e426c6, 0xb2b03282, 0x31303101, 0xe2e82aca, + 0x616c2d4d, 0x535c1f4f, 0xe0e424c4, 0xf0f030c0, 0xc1cc0dcd, 0x80880888, + 0x12141606, 0x32383a0a, 0x50581848, 0xd0d414c4, 0x62602242, 0x21282909, + 0x03040707, 0x33303303, 0xe0e828c8, 0x13181b0b, 0x01040505, 0x71783949, + 0x90901080, 0x62682a4a, 0x22282a0a, 0x92981a8a, +}; + +static const u32 SS3[256] = { + 0x08303838, 0xc8e0e828, 0x0d212c2d, 0x86a2a426, 0xcfc3cc0f, 0xced2dc1e, + 0x83b3b033, 0x88b0b838, 0x8fa3ac2f, 0x40606020, 0x45515415, 0xc7c3c407, + 0x44404404, 0x4f636c2f, 0x4b63682b, 0x4b53581b, 0xc3c3c003, 0x42626022, + 0x03333033, 0x85b1b435, 0x09212829, 0x80a0a020, 0xc2e2e022, 0x87a3a427, + 0xc3d3d013, 0x81919011, 0x01111011, 0x06020406, 0x0c101c1c, 0x8cb0bc3c, + 0x06323436, 0x4b43480b, 0xcfe3ec2f, 0x88808808, 0x4c606c2c, 0x88a0a828, + 0x07131417, 0xc4c0c404, 0x06121416, 0xc4f0f434, 0xc2c2c002, 0x45414405, + 0xc1e1e021, 0xc6d2d416, 0x0f333c3f, 0x0d313c3d, 0x8e828c0e, 0x88909818, + 0x08202828, 0x4e424c0e, 0xc6f2f436, 0x0e323c3e, 0x85a1a425, 0xc9f1f839, + 0x0d010c0d, 0xcfd3dc1f, 0xc8d0d818, 0x0b23282b, 0x46626426, 0x4a72783a, + 0x07232427, 0x0f232c2f, 0xc1f1f031, 0x42727032, 0x42424002, 0xc4d0d414, + 0x41414001, 0xc0c0c000, 0x43737033, 0x47636427, 0x8ca0ac2c, 0x8b83880b, + 0xc7f3f437, 0x8da1ac2d, 0x80808000, 0x0f131c1f, 0xcac2c80a, 0x0c202c2c, + 0x8aa2a82a, 0x04303434, 0xc2d2d012, 0x0b03080b, 0xcee2ec2e, 0xc9e1e829, + 0x4d515c1d, 0x84909414, 0x08101818, 0xc8f0f838, 0x47535417, 0x8ea2ac2e, + 0x08000808, 0xc5c1c405, 0x03131013, 0xcdc1cc0d, 0x86828406, 0x89b1b839, + 0xcff3fc3f, 0x4d717c3d, 0xc1c1c001, 0x01313031, 0xc5f1f435, 0x8a82880a, + 0x4a62682a, 0x81b1b031, 0xc1d1d011, 0x00202020, 0xc7d3d417, 0x02020002, + 0x02222022, 0x04000404, 0x48606828, 0x41717031, 0x07030407, 0xcbd3d81b, + 0x8d919c1d, 0x89919819, 0x41616021, 0x8eb2bc3e, 0xc6e2e426, 0x49515819, + 0xcdd1dc1d, 0x41515011, 0x80909010, 0xccd0dc1c, 0x8a92981a, 0x83a3a023, + 0x8ba3a82b, 0xc0d0d010, 0x81818001, 0x0f030c0f, 0x47434407, 0x0a12181a, + 0xc3e3e023, 0xcce0ec2c, 0x8d818c0d, 0x8fb3bc3f, 0x86929416, 0x4b73783b, + 0x4c505c1c, 0x82a2a022, 0x81a1a021, 0x43636023, 0x03232023, 0x4d414c0d, + 0xc8c0c808, 0x8e929c1e, 0x8c909c1c, 0x0a32383a, 0x0c000c0c, 0x0e222c2e, + 0x8ab2b83a, 0x4e626c2e, 0x8f939c1f, 0x4a52581a, 0xc2f2f032, 0x82929012, + 0xc3f3f033, 0x49414809, 0x48707838, 0xccc0cc0c, 0x05111415, 0xcbf3f83b, + 0x40707030, 0x45717435, 0x4f737c3f, 0x05313435, 0x00101010, 0x03030003, + 0x44606424, 0x4d616c2d, 0xc6c2c406, 0x44707434, 0xc5d1d415, 0x84b0b434, + 0xcae2e82a, 0x09010809, 0x46727436, 0x09111819, 0xcef2fc3e, 0x40404000, + 0x02121012, 0xc0e0e020, 0x8db1bc3d, 0x05010405, 0xcaf2f83a, 0x01010001, + 0xc0f0f030, 0x0a22282a, 0x4e525c1e, 0x89a1a829, 0x46525416, 0x43434003, + 0x85818405, 0x04101414, 0x89818809, 0x8b93981b, 0x80b0b030, 0xc5e1e425, + 0x48404808, 0x49717839, 0x87939417, 0xccf0fc3c, 0x0e121c1e, 0x82828002, + 0x01212021, 0x8c808c0c, 0x0b13181b, 0x4f535c1f, 0x47737437, 0x44505414, + 0x82b2b032, 0x0d111c1d, 0x05212425, 0x4f434c0f, 0x00000000, 0x46424406, + 0xcde1ec2d, 0x48505818, 0x42525012, 0xcbe3e82b, 0x4e727c3e, 0xcad2d81a, + 0xc9c1c809, 0xcdf1fc3d, 0x00303030, 0x85919415, 0x45616425, 0x0c303c3c, + 0x86b2b436, 0xc4e0e424, 0x8bb3b83b, 0x4c707c3c, 0x0e020c0e, 0x40505010, + 0x09313839, 0x06222426, 0x02323032, 0x84808404, 0x49616829, 0x83939013, + 0x07333437, 0xc7e3e427, 0x04202424, 0x84a0a424, 0xcbc3c80b, 0x43535013, + 0x0a02080a, 0x87838407, 0xc9d1d819, 0x4c404c0c, 0x83838003, 0x8f838c0f, + 0xcec2cc0e, 0x0b33383b, 0x4a42480a, 0x87b3b437, +}; + +static const u32 KC[NUMKC] = { + 0x9e3779b9, 0x3c6ef373, 0x78dde6e6, 0xf1bbcdcc, + 0xe3779b99, 0xc6ef3733, 0x8dde6e67, 0x1bbcdccf, + 0x3779b99e, 0x6ef3733c, 0xdde6e678, 0xbbcdccf1, + 0x779b99e3, 0xef3733c6, 0xde6e678d, 0xbcdccf1b, +}; + + + +/* Perform the key setup. + */ +static gcry_err_code_t +do_setkey (SEED_context *ctx, const byte *key, const unsigned keylen) +{ + static int initialized = 0; + static const char *selftest_failed=0; + u32 x1, x2, x3, x4; + union wordbuf t0, t1; + u32 *keyout = ctx->keyschedule; + int i; + + if (!initialized) + { + initialized = 1; + selftest_failed = selftest (); + if( selftest_failed ) + log_error ("%s\n", selftest_failed ); + } + if (selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + + if (keylen != 16) + return GPG_ERR_INV_KEYLEN; + + x1 = GETU32 (key); + x2 = GETU32 (key+4); + x3 = GETU32 (key+8); + x4 = GETU32 (key+12); + + for (i = 0; i < NUMKC; i++) + { + t0.w = x1 + x3 - KC[i]; + t1.w = x2 + KC[i] - x4; + *(keyout++) = SS0[t0.b0] ^ SS1[t0.b1] ^ SS2[t0.b2] ^ SS3[t0.b3]; + *(keyout++) = SS0[t1.b0] ^ SS1[t1.b1] ^ SS2[t1.b2] ^ SS3[t1.b3]; + + if (i % 2 == 0) + { + t0.w = x1; + x1 = (x1>>8) ^ (x2<<24); + x2 = (x2>>8) ^ (t0.w<<24); + } + else + { + t0.w = x3; + x3 = (x3<<8) ^ (x4>>24); + x4 = (x4<<8) ^ (t0.w>>24); + } + } + + return 0; +} + +static gcry_err_code_t +seed_setkey (void *context, const byte *key, const unsigned keylen) +{ + SEED_context *ctx = context; + + int rc = do_setkey (ctx, key, keylen); + _gcry_burn_stack (4*6 + sizeof(void*)*2 + sizeof(int)*2); + return rc; +} + + + +#define OP(X1, X2, X3, X4, rbase) \ + t0.w = X3 ^ ctx->keyschedule[rbase]; \ + t1.w = X4 ^ ctx->keyschedule[rbase+1]; \ + t1.w ^= t0.w; \ + t1.w = SS0[t1.b0] ^ SS1[t1.b1] ^ SS2[t1.b2] ^ SS3[t1.b3]; \ + t0.w += t1.w; \ + t0.w = SS0[t0.b0] ^ SS1[t0.b1] ^ SS2[t0.b2] ^ SS3[t0.b3]; \ + t1.w += t0.w; \ + t1.w = SS0[t1.b0] ^ SS1[t1.b1] ^ SS2[t1.b2] ^ SS3[t1.b3]; \ + t0.w += t1.w; \ + X1 ^= t0.w; \ + X2 ^= t1.w; + +/* Encrypt one block. inbuf and outbuf may be the same. */ +static void +do_encrypt (const SEED_context *ctx, byte *outbuf, const byte *inbuf) +{ + u32 x1, x2, x3, x4; + union wordbuf t0, t1; + + x1 = GETU32 (inbuf); + x2 = GETU32 (inbuf+4); + x3 = GETU32 (inbuf+8); + x4 = GETU32 (inbuf+12); + + OP (x1, x2, x3, x4, 0); + OP (x3, x4, x1, x2, 2); + OP (x1, x2, x3, x4, 4); + OP (x3, x4, x1, x2, 6); + OP (x1, x2, x3, x4, 8); + OP (x3, x4, x1, x2, 10); + OP (x1, x2, x3, x4, 12); + OP (x3, x4, x1, x2, 14); + OP (x1, x2, x3, x4, 16); + OP (x3, x4, x1, x2, 18); + OP (x1, x2, x3, x4, 20); + OP (x3, x4, x1, x2, 22); + OP (x1, x2, x3, x4, 24); + OP (x3, x4, x1, x2, 26); + OP (x1, x2, x3, x4, 28); + OP (x3, x4, x1, x2, 30); + + PUTU32 (outbuf, x3); + PUTU32 (outbuf+4, x4); + PUTU32 (outbuf+8, x1); + PUTU32 (outbuf+12, x2); +} + +static unsigned int +seed_encrypt (void *context, byte *outbuf, const byte *inbuf) +{ + SEED_context *ctx = context; + + do_encrypt (ctx, outbuf, inbuf); + return /*burn_stack*/ (4*6); +} + + + +/* Decrypt one block. inbuf and outbuf may be the same. */ +static void +do_decrypt (SEED_context *ctx, byte *outbuf, const byte *inbuf) +{ + u32 x1, x2, x3, x4; + union wordbuf t0, t1; + + x1 = GETU32 (inbuf); + x2 = GETU32 (inbuf+4); + x3 = GETU32 (inbuf+8); + x4 = GETU32 (inbuf+12); + + OP (x1, x2, x3, x4, 30); + OP (x3, x4, x1, x2, 28); + OP (x1, x2, x3, x4, 26); + OP (x3, x4, x1, x2, 24); + OP (x1, x2, x3, x4, 22); + OP (x3, x4, x1, x2, 20); + OP (x1, x2, x3, x4, 18); + OP (x3, x4, x1, x2, 16); + OP (x1, x2, x3, x4, 14); + OP (x3, x4, x1, x2, 12); + OP (x1, x2, x3, x4, 10); + OP (x3, x4, x1, x2, 8); + OP (x1, x2, x3, x4, 6); + OP (x3, x4, x1, x2, 4); + OP (x1, x2, x3, x4, 2); + OP (x3, x4, x1, x2, 0); + + PUTU32 (outbuf, x3); + PUTU32 (outbuf+4, x4); + PUTU32 (outbuf+8, x1); + PUTU32 (outbuf+12, x2); +} + +static unsigned int +seed_decrypt (void *context, byte *outbuf, const byte *inbuf) +{ + SEED_context *ctx = context; + + do_decrypt (ctx, outbuf, inbuf); + return /*burn_stack*/ (4*6); +} + + +/* Test a single encryption and decryption with each key size. */ +static const char* +selftest (void) +{ + SEED_context ctx; + byte scratch[16]; + + /* The test vector is taken from the appendix section B.3 of RFC4269. + */ + static const byte plaintext[16] = { + 0x83, 0xA2, 0xF8, 0xA2, 0x88, 0x64, 0x1F, 0xB9, + 0xA4, 0xE9, 0xA5, 0xCC, 0x2F, 0x13, 0x1C, 0x7D + }; + static const byte key[16] = { + 0x47, 0x06, 0x48, 0x08, 0x51, 0xE6, 0x1B, 0xE8, + 0x5D, 0x74, 0xBF, 0xB3, 0xFD, 0x95, 0x61, 0x85 + }; + static const byte ciphertext[16] = { + 0xEE, 0x54, 0xD1, 0x3E, 0xBC, 0xAE, 0x70, 0x6D, + 0x22, 0x6B, 0xC3, 0x14, 0x2C, 0xD4, 0x0D, 0x4A, + }; + + seed_setkey (&ctx, key, sizeof(key)); + seed_encrypt (&ctx, scratch, plaintext); + if (memcmp (scratch, ciphertext, sizeof (ciphertext))) + return "SEED test encryption failed."; + seed_decrypt (&ctx, scratch, scratch); + if (memcmp (scratch, plaintext, sizeof (plaintext))) + return "SEED test decryption failed."; + + return NULL; +} + + + +static gcry_cipher_oid_spec_t seed_oids[] = + { + { "1.2.410.200004.1.3", GCRY_CIPHER_MODE_ECB }, + { "1.2.410.200004.1.4", GCRY_CIPHER_MODE_CBC }, + { "1.2.410.200004.1.5", GCRY_CIPHER_MODE_CFB }, + { "1.2.410.200004.1.6", GCRY_CIPHER_MODE_OFB }, + { NULL } + }; + +gcry_cipher_spec_t _gcry_cipher_spec_seed = + { + GCRY_CIPHER_SEED, {0, 0}, + "SEED", NULL, seed_oids, 16, 128, sizeof (SEED_context), + seed_setkey, seed_encrypt, seed_decrypt, + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/serpent-armv7-neon.S b/libotr/libgcrypt-1.8.7/cipher/serpent-armv7-neon.S new file mode 100644 index 0000000..adff639 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/serpent-armv7-neon.S @@ -0,0 +1,1124 @@ +/* serpent-armv7-neon.S - ARM/NEON assembly implementation of Serpent cipher + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) + +.text + +.syntax unified +.fpu neon +.arm + +/* ARM registers */ +#define RROUND r0 + +/* NEON vector registers */ +#define RA0 q0 +#define RA1 q1 +#define RA2 q2 +#define RA3 q3 +#define RA4 q4 +#define RB0 q5 +#define RB1 q6 +#define RB2 q7 +#define RB3 q8 +#define RB4 q9 + +#define RT0 q10 +#define RT1 q11 +#define RT2 q12 +#define RT3 q13 + +#define RA0d0 d0 +#define RA0d1 d1 +#define RA1d0 d2 +#define RA1d1 d3 +#define RA2d0 d4 +#define RA2d1 d5 +#define RA3d0 d6 +#define RA3d1 d7 +#define RA4d0 d8 +#define RA4d1 d9 +#define RB0d0 d10 +#define RB0d1 d11 +#define RB1d0 d12 +#define RB1d1 d13 +#define RB2d0 d14 +#define RB2d1 d15 +#define RB3d0 d16 +#define RB3d1 d17 +#define RB4d0 d18 +#define RB4d1 d19 +#define RT0d0 d20 +#define RT0d1 d21 +#define RT1d0 d22 +#define RT1d1 d23 +#define RT2d0 d24 +#define RT2d1 d25 + +/********************************************************************** + helper macros + **********************************************************************/ + +#define transpose_4x4(_q0, _q1, _q2, _q3) \ + vtrn.32 _q0, _q1; \ + vtrn.32 _q2, _q3; \ + vswp _q0##d1, _q2##d0; \ + vswp _q1##d1, _q3##d0; + +/********************************************************************** + 8-way serpent + **********************************************************************/ + +/* + * These are the S-Boxes of Serpent from following research paper. + * + * D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference, + * (New York, New York, USA), p. 317–329, National Institute of Standards and + * Technology, 2000. + * + * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf + * + */ +#define SBOX0(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + veor a3, a3, a0; veor b3, b3, b0; vmov a4, a1; vmov b4, b1; \ + vand a1, a1, a3; vand b1, b1, b3; veor a4, a4, a2; veor b4, b4, b2; \ + veor a1, a1, a0; veor b1, b1, b0; vorr a0, a0, a3; vorr b0, b0, b3; \ + veor a0, a0, a4; veor b0, b0, b4; veor a4, a4, a3; veor b4, b4, b3; \ + veor a3, a3, a2; veor b3, b3, b2; vorr a2, a2, a1; vorr b2, b2, b1; \ + veor a2, a2, a4; veor b2, b2, b4; vmvn a4, a4; vmvn b4, b4; \ + vorr a4, a4, a1; vorr b4, b4, b1; veor a1, a1, a3; veor b1, b1, b3; \ + veor a1, a1, a4; veor b1, b1, b4; vorr a3, a3, a0; vorr b3, b3, b0; \ + veor a1, a1, a3; veor b1, b1, b3; veor a4, a3; veor b4, b3; + +#define SBOX0_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + vmvn a2, a2; vmvn b2, b2; vmov a4, a1; vmov b4, b1; \ + vorr a1, a1, a0; vorr b1, b1, b0; vmvn a4, a4; vmvn b4, b4; \ + veor a1, a1, a2; veor b1, b1, b2; vorr a2, a2, a4; vorr b2, b2, b4; \ + veor a1, a1, a3; veor b1, b1, b3; veor a0, a0, a4; veor b0, b0, b4; \ + veor a2, a2, a0; veor b2, b2, b0; vand a0, a0, a3; vand b0, b0, b3; \ + veor a4, a4, a0; veor b4, b4, b0; vorr a0, a0, a1; vorr b0, b0, b1; \ + veor a0, a0, a2; veor b0, b0, b2; veor a3, a3, a4; veor b3, b3, b4; \ + veor a2, a2, a1; veor b2, b2, b1; veor a3, a3, a0; veor b3, b3, b0; \ + veor a3, a3, a1; veor b3, b3, b1;\ + vand a2, a2, a3; vand b2, b2, b3;\ + veor a4, a2; veor b4, b2; + +#define SBOX1(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + vmvn a0, a0; vmvn b0, b0; vmvn a2, a2; vmvn b2, b2; \ + vmov a4, a0; vmov b4, b0; vand a0, a0, a1; vand b0, b0, b1; \ + veor a2, a2, a0; veor b2, b2, b0; vorr a0, a0, a3; vorr b0, b0, b3; \ + veor a3, a3, a2; veor b3, b3, b2; veor a1, a1, a0; veor b1, b1, b0; \ + veor a0, a0, a4; veor b0, b0, b4; vorr a4, a4, a1; vorr b4, b4, b1; \ + veor a1, a1, a3; veor b1, b1, b3; vorr a2, a2, a0; vorr b2, b2, b0; \ + vand a2, a2, a4; vand b2, b2, b4; veor a0, a0, a1; veor b0, b0, b1; \ + vand a1, a1, a2; vand b1, b1, b2;\ + veor a1, a1, a0; veor b1, b1, b0; vand a0, a0, a2; vand b0, b0, b2; \ + veor a0, a4; veor b0, b4; + +#define SBOX1_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + vmov a4, a1; vmov b4, b1; veor a1, a1, a3; veor b1, b1, b3; \ + vand a3, a3, a1; vand b3, b3, b1; veor a4, a4, a2; veor b4, b4, b2; \ + veor a3, a3, a0; veor b3, b3, b0; vorr a0, a0, a1; vorr b0, b0, b1; \ + veor a2, a2, a3; veor b2, b2, b3; veor a0, a0, a4; veor b0, b0, b4; \ + vorr a0, a0, a2; vorr b0, b0, b2; veor a1, a1, a3; veor b1, b1, b3; \ + veor a0, a0, a1; veor b0, b0, b1; vorr a1, a1, a3; vorr b1, b1, b3; \ + veor a1, a1, a0; veor b1, b1, b0; vmvn a4, a4; vmvn b4, b4; \ + veor a4, a4, a1; veor b4, b4, b1; vorr a1, a1, a0; vorr b1, b1, b0; \ + veor a1, a1, a0; veor b1, b1, b0;\ + vorr a1, a1, a4; vorr b1, b1, b4;\ + veor a3, a1; veor b3, b1; + +#define SBOX2(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + vmov a4, a0; vmov b4, b0; vand a0, a0, a2; vand b0, b0, b2; \ + veor a0, a0, a3; veor b0, b0, b3; veor a2, a2, a1; veor b2, b2, b1; \ + veor a2, a2, a0; veor b2, b2, b0; vorr a3, a3, a4; vorr b3, b3, b4; \ + veor a3, a3, a1; veor b3, b3, b1; veor a4, a4, a2; veor b4, b4, b2; \ + vmov a1, a3; vmov b1, b3; vorr a3, a3, a4; vorr b3, b3, b4; \ + veor a3, a3, a0; veor b3, b3, b0; vand a0, a0, a1; vand b0, b0, b1; \ + veor a4, a4, a0; veor b4, b4, b0; veor a1, a1, a3; veor b1, b1, b3; \ + veor a1, a1, a4; veor b1, b1, b4; vmvn a4, a4; vmvn b4, b4; + +#define SBOX2_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + veor a2, a2, a3; veor b2, b2, b3; veor a3, a3, a0; veor b3, b3, b0; \ + vmov a4, a3; vmov b4, b3; vand a3, a3, a2; vand b3, b3, b2; \ + veor a3, a3, a1; veor b3, b3, b1; vorr a1, a1, a2; vorr b1, b1, b2; \ + veor a1, a1, a4; veor b1, b1, b4; vand a4, a4, a3; vand b4, b4, b3; \ + veor a2, a2, a3; veor b2, b2, b3; vand a4, a4, a0; vand b4, b4, b0; \ + veor a4, a4, a2; veor b4, b4, b2; vand a2, a2, a1; vand b2, b2, b1; \ + vorr a2, a2, a0; vorr b2, b2, b0; vmvn a3, a3; vmvn b3, b3; \ + veor a2, a2, a3; veor b2, b2, b3; veor a0, a0, a3; veor b0, b0, b3; \ + vand a0, a0, a1; vand b0, b0, b1; veor a3, a3, a4; veor b3, b3, b4; \ + veor a3, a0; veor b3, b0; + +#define SBOX3(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + vmov a4, a0; vmov b4, b0; vorr a0, a0, a3; vorr b0, b0, b3; \ + veor a3, a3, a1; veor b3, b3, b1; vand a1, a1, a4; vand b1, b1, b4; \ + veor a4, a4, a2; veor b4, b4, b2; veor a2, a2, a3; veor b2, b2, b3; \ + vand a3, a3, a0; vand b3, b3, b0; vorr a4, a4, a1; vorr b4, b4, b1; \ + veor a3, a3, a4; veor b3, b3, b4; veor a0, a0, a1; veor b0, b0, b1; \ + vand a4, a4, a0; vand b4, b4, b0; veor a1, a1, a3; veor b1, b1, b3; \ + veor a4, a4, a2; veor b4, b4, b2; vorr a1, a1, a0; vorr b1, b1, b0; \ + veor a1, a1, a2; veor b1, b1, b2; veor a0, a0, a3; veor b0, b0, b3; \ + vmov a2, a1; vmov b2, b1; vorr a1, a1, a3; vorr b1, b1, b3; \ + veor a1, a0; veor b1, b0; + +#define SBOX3_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + vmov a4, a2; vmov b4, b2; veor a2, a2, a1; veor b2, b2, b1; \ + veor a0, a0, a2; veor b0, b0, b2; vand a4, a4, a2; vand b4, b4, b2; \ + veor a4, a4, a0; veor b4, b4, b0; vand a0, a0, a1; vand b0, b0, b1; \ + veor a1, a1, a3; veor b1, b1, b3; vorr a3, a3, a4; vorr b3, b3, b4; \ + veor a2, a2, a3; veor b2, b2, b3; veor a0, a0, a3; veor b0, b0, b3; \ + veor a1, a1, a4; veor b1, b1, b4; vand a3, a3, a2; vand b3, b3, b2; \ + veor a3, a3, a1; veor b3, b3, b1; veor a1, a1, a0; veor b1, b1, b0; \ + vorr a1, a1, a2; vorr b1, b1, b2; veor a0, a0, a3; veor b0, b0, b3; \ + veor a1, a1, a4; veor b1, b1, b4;\ + veor a0, a1; veor b0, b1; + +#define SBOX4(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + veor a1, a1, a3; veor b1, b1, b3; vmvn a3, a3; vmvn b3, b3; \ + veor a2, a2, a3; veor b2, b2, b3; veor a3, a3, a0; veor b3, b3, b0; \ + vmov a4, a1; vmov b4, b1; vand a1, a1, a3; vand b1, b1, b3; \ + veor a1, a1, a2; veor b1, b1, b2; veor a4, a4, a3; veor b4, b4, b3; \ + veor a0, a0, a4; veor b0, b0, b4; vand a2, a2, a4; vand b2, b2, b4; \ + veor a2, a2, a0; veor b2, b2, b0; vand a0, a0, a1; vand b0, b0, b1; \ + veor a3, a3, a0; veor b3, b3, b0; vorr a4, a4, a1; vorr b4, b4, b1; \ + veor a4, a4, a0; veor b4, b4, b0; vorr a0, a0, a3; vorr b0, b0, b3; \ + veor a0, a0, a2; veor b0, b0, b2; vand a2, a2, a3; vand b2, b2, b3; \ + vmvn a0, a0; vmvn b0, b0; veor a4, a2; veor b4, b2; + +#define SBOX4_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + vmov a4, a2; vmov b4, b2; vand a2, a2, a3; vand b2, b2, b3; \ + veor a2, a2, a1; veor b2, b2, b1; vorr a1, a1, a3; vorr b1, b1, b3; \ + vand a1, a1, a0; vand b1, b1, b0; veor a4, a4, a2; veor b4, b4, b2; \ + veor a4, a4, a1; veor b4, b4, b1; vand a1, a1, a2; vand b1, b1, b2; \ + vmvn a0, a0; vmvn b0, b0; veor a3, a3, a4; veor b3, b3, b4; \ + veor a1, a1, a3; veor b1, b1, b3; vand a3, a3, a0; vand b3, b3, b0; \ + veor a3, a3, a2; veor b3, b3, b2; veor a0, a0, a1; veor b0, b0, b1; \ + vand a2, a2, a0; vand b2, b2, b0; veor a3, a3, a0; veor b3, b3, b0; \ + veor a2, a2, a4; veor b2, b2, b4;\ + vorr a2, a2, a3; vorr b2, b2, b3; veor a3, a3, a0; veor b3, b3, b0; \ + veor a2, a1; veor b2, b1; + +#define SBOX5(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + veor a0, a0, a1; veor b0, b0, b1; veor a1, a1, a3; veor b1, b1, b3; \ + vmvn a3, a3; vmvn b3, b3; vmov a4, a1; vmov b4, b1; \ + vand a1, a1, a0; vand b1, b1, b0; veor a2, a2, a3; veor b2, b2, b3; \ + veor a1, a1, a2; veor b1, b1, b2; vorr a2, a2, a4; vorr b2, b2, b4; \ + veor a4, a4, a3; veor b4, b4, b3; vand a3, a3, a1; vand b3, b3, b1; \ + veor a3, a3, a0; veor b3, b3, b0; veor a4, a4, a1; veor b4, b4, b1; \ + veor a4, a4, a2; veor b4, b4, b2; veor a2, a2, a0; veor b2, b2, b0; \ + vand a0, a0, a3; vand b0, b0, b3; vmvn a2, a2; vmvn b2, b2; \ + veor a0, a0, a4; veor b0, b0, b4; vorr a4, a4, a3; vorr b4, b4, b3; \ + veor a2, a4; veor b2, b4; + +#define SBOX5_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + vmvn a1, a1; vmvn b1, b1; vmov a4, a3; vmov b4, b3; \ + veor a2, a2, a1; veor b2, b2, b1; vorr a3, a3, a0; vorr b3, b3, b0; \ + veor a3, a3, a2; veor b3, b3, b2; vorr a2, a2, a1; vorr b2, b2, b1; \ + vand a2, a2, a0; vand b2, b2, b0; veor a4, a4, a3; veor b4, b4, b3; \ + veor a2, a2, a4; veor b2, b2, b4; vorr a4, a4, a0; vorr b4, b4, b0; \ + veor a4, a4, a1; veor b4, b4, b1; vand a1, a1, a2; vand b1, b1, b2; \ + veor a1, a1, a3; veor b1, b1, b3; veor a4, a4, a2; veor b4, b4, b2; \ + vand a3, a3, a4; vand b3, b3, b4; veor a4, a4, a1; veor b4, b4, b1; \ + veor a3, a3, a4; veor b3, b3, b4; vmvn a4, a4; vmvn b4, b4; \ + veor a3, a0; veor b3, b0; + +#define SBOX6(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + vmvn a2, a2; vmvn b2, b2; vmov a4, a3; vmov b4, b3; \ + vand a3, a3, a0; vand b3, b3, b0; veor a0, a0, a4; veor b0, b0, b4; \ + veor a3, a3, a2; veor b3, b3, b2; vorr a2, a2, a4; vorr b2, b2, b4; \ + veor a1, a1, a3; veor b1, b1, b3; veor a2, a2, a0; veor b2, b2, b0; \ + vorr a0, a0, a1; vorr b0, b0, b1; veor a2, a2, a1; veor b2, b2, b1; \ + veor a4, a4, a0; veor b4, b4, b0; vorr a0, a0, a3; vorr b0, b0, b3; \ + veor a0, a0, a2; veor b0, b0, b2; veor a4, a4, a3; veor b4, b4, b3; \ + veor a4, a4, a0; veor b4, b4, b0; vmvn a3, a3; vmvn b3, b3; \ + vand a2, a2, a4; vand b2, b2, b4;\ + veor a2, a3; veor b2, b3; + +#define SBOX6_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + veor a0, a0, a2; veor b0, b0, b2; vmov a4, a2; vmov b4, b2; \ + vand a2, a2, a0; vand b2, b2, b0; veor a4, a4, a3; veor b4, b4, b3; \ + vmvn a2, a2; vmvn b2, b2; veor a3, a3, a1; veor b3, b3, b1; \ + veor a2, a2, a3; veor b2, b2, b3; vorr a4, a4, a0; vorr b4, b4, b0; \ + veor a0, a0, a2; veor b0, b0, b2; veor a3, a3, a4; veor b3, b3, b4; \ + veor a4, a4, a1; veor b4, b4, b1; vand a1, a1, a3; vand b1, b1, b3; \ + veor a1, a1, a0; veor b1, b1, b0; veor a0, a0, a3; veor b0, b0, b3; \ + vorr a0, a0, a2; vorr b0, b0, b2; veor a3, a3, a1; veor b3, b3, b1; \ + veor a4, a0; veor b4, b0; + +#define SBOX7(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + vmov a4, a1; vmov b4, b1; vorr a1, a1, a2; vorr b1, b1, b2; \ + veor a1, a1, a3; veor b1, b1, b3; veor a4, a4, a2; veor b4, b4, b2; \ + veor a2, a2, a1; veor b2, b2, b1; vorr a3, a3, a4; vorr b3, b3, b4; \ + vand a3, a3, a0; vand b3, b3, b0; veor a4, a4, a2; veor b4, b4, b2; \ + veor a3, a3, a1; veor b3, b3, b1; vorr a1, a1, a4; vorr b1, b1, b4; \ + veor a1, a1, a0; veor b1, b1, b0; vorr a0, a0, a4; vorr b0, b0, b4; \ + veor a0, a0, a2; veor b0, b0, b2; veor a1, a1, a4; veor b1, b1, b4; \ + veor a2, a2, a1; veor b2, b2, b1; vand a1, a1, a0; vand b1, b1, b0; \ + veor a1, a1, a4; veor b1, b1, b4; vmvn a2, a2; vmvn b2, b2; \ + vorr a2, a2, a0; vorr b2, b2, b0;\ + veor a4, a2; veor b4, b2; + +#define SBOX7_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + vmov a4, a2; vmov b4, b2; veor a2, a2, a0; veor b2, b2, b0; \ + vand a0, a0, a3; vand b0, b0, b3; vorr a4, a4, a3; vorr b4, b4, b3; \ + vmvn a2, a2; vmvn b2, b2; veor a3, a3, a1; veor b3, b3, b1; \ + vorr a1, a1, a0; vorr b1, b1, b0; veor a0, a0, a2; veor b0, b0, b2; \ + vand a2, a2, a4; vand b2, b2, b4; vand a3, a3, a4; vand b3, b3, b4; \ + veor a1, a1, a2; veor b1, b1, b2; veor a2, a2, a0; veor b2, b2, b0; \ + vorr a0, a0, a2; vorr b0, b0, b2; veor a4, a4, a1; veor b4, b4, b1; \ + veor a0, a0, a3; veor b0, b0, b3; veor a3, a3, a4; veor b3, b3, b4; \ + vorr a4, a4, a0; vorr b4, b4, b0; veor a3, a3, a2; veor b3, b3, b2; \ + veor a4, a2; veor b4, b2; + +/* Apply SBOX number WHICH to to the block. */ +#define SBOX(which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + SBOX##which (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) + +/* Apply inverse SBOX number WHICH to to the block. */ +#define SBOX_INVERSE(which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + SBOX##which##_INVERSE (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) + +/* XOR round key into block state in a0,a1,a2,a3. a4 used as temporary. */ +#define BLOCK_XOR_KEY(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + vdup.32 RT3, RT0d0[0]; \ + vdup.32 RT1, RT0d0[1]; \ + vdup.32 RT2, RT0d1[0]; \ + vdup.32 RT0, RT0d1[1]; \ + veor a0, a0, RT3; veor b0, b0, RT3; \ + veor a1, a1, RT1; veor b1, b1, RT1; \ + veor a2, a2, RT2; veor b2, b2, RT2; \ + veor a3, a3, RT0; veor b3, b3, RT0; + +#define BLOCK_LOAD_KEY_ENC() \ + vld1.8 {RT0d0, RT0d1}, [RROUND]!; + +#define BLOCK_LOAD_KEY_DEC() \ + vld1.8 {RT0d0, RT0d1}, [RROUND]; \ + sub RROUND, RROUND, #16 + +/* Apply the linear transformation to BLOCK. */ +#define LINEAR_TRANSFORMATION(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + vshl.u32 a4, a0, #13; vshl.u32 b4, b0, #13; \ + vshr.u32 a0, a0, #(32-13); vshr.u32 b0, b0, #(32-13); \ + veor a0, a0, a4; veor b0, b0, b4; \ + vshl.u32 a4, a2, #3; vshl.u32 b4, b2, #3; \ + vshr.u32 a2, a2, #(32-3); vshr.u32 b2, b2, #(32-3); \ + veor a2, a2, a4; veor b2, b2, b4; \ + veor a1, a0, a1; veor b1, b0, b1; \ + veor a1, a2, a1; veor b1, b2, b1; \ + vshl.u32 a4, a0, #3; vshl.u32 b4, b0, #3; \ + veor a3, a2, a3; veor b3, b2, b3; \ + veor a3, a4, a3; veor b3, b4, b3; \ + vshl.u32 a4, a1, #1; vshl.u32 b4, b1, #1; \ + vshr.u32 a1, a1, #(32-1); vshr.u32 b1, b1, #(32-1); \ + veor a1, a1, a4; veor b1, b1, b4; \ + vshl.u32 a4, a3, #7; vshl.u32 b4, b3, #7; \ + vshr.u32 a3, a3, #(32-7); vshr.u32 b3, b3, #(32-7); \ + veor a3, a3, a4; veor b3, b3, b4; \ + veor a0, a1, a0; veor b0, b1, b0; \ + veor a0, a3, a0; veor b0, b3, b0; \ + vshl.u32 a4, a1, #7; vshl.u32 b4, b1, #7; \ + veor a2, a3, a2; veor b2, b3, b2; \ + veor a2, a4, a2; veor b2, b4, b2; \ + vshl.u32 a4, a0, #5; vshl.u32 b4, b0, #5; \ + vshr.u32 a0, a0, #(32-5); vshr.u32 b0, b0, #(32-5); \ + veor a0, a0, a4; veor b0, b0, b4; \ + vshl.u32 a4, a2, #22; vshl.u32 b4, b2, #22; \ + vshr.u32 a2, a2, #(32-22); vshr.u32 b2, b2, #(32-22); \ + veor a2, a2, a4; veor b2, b2, b4; + +/* Apply the inverse linear transformation to BLOCK. */ +#define LINEAR_TRANSFORMATION_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ + vshr.u32 a4, a2, #22; vshr.u32 b4, b2, #22; \ + vshl.u32 a2, a2, #(32-22); vshl.u32 b2, b2, #(32-22); \ + veor a2, a2, a4; veor b2, b2, b4; \ + vshr.u32 a4, a0, #5; vshr.u32 b4, b0, #5; \ + vshl.u32 a0, a0, #(32-5); vshl.u32 b0, b0, #(32-5); \ + veor a0, a0, a4; veor b0, b0, b4; \ + vshl.u32 a4, a1, #7; vshl.u32 b4, b1, #7; \ + veor a2, a3, a2; veor b2, b3, b2; \ + veor a2, a4, a2; veor b2, b4, b2; \ + veor a0, a1, a0; veor b0, b1, b0; \ + veor a0, a3, a0; veor b0, b3, b0; \ + vshr.u32 a4, a3, #7; vshr.u32 b4, b3, #7; \ + vshl.u32 a3, a3, #(32-7); vshl.u32 b3, b3, #(32-7); \ + veor a3, a3, a4; veor b3, b3, b4; \ + vshr.u32 a4, a1, #1; vshr.u32 b4, b1, #1; \ + vshl.u32 a1, a1, #(32-1); vshl.u32 b1, b1, #(32-1); \ + veor a1, a1, a4; veor b1, b1, b4; \ + vshl.u32 a4, a0, #3; vshl.u32 b4, b0, #3; \ + veor a3, a2, a3; veor b3, b2, b3; \ + veor a3, a4, a3; veor b3, b4, b3; \ + veor a1, a0, a1; veor b1, b0, b1; \ + veor a1, a2, a1; veor b1, b2, b1; \ + vshr.u32 a4, a2, #3; vshr.u32 b4, b2, #3; \ + vshl.u32 a2, a2, #(32-3); vshl.u32 b2, b2, #(32-3); \ + veor a2, a2, a4; veor b2, b2, b4; \ + vshr.u32 a4, a0, #13; vshr.u32 b4, b0, #13; \ + vshl.u32 a0, a0, #(32-13); vshl.u32 b0, b0, #(32-13); \ + veor a0, a0, a4; veor b0, b0, b4; + +/* Apply a Serpent round to eight parallel blocks. This macro increments + `round'. */ +#define ROUND(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \ + b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \ + BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ + BLOCK_LOAD_KEY_ENC (); \ + SBOX (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ + LINEAR_TRANSFORMATION (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); + +/* Apply the last Serpent round to eight parallel blocks. This macro increments + `round'. */ +#define ROUND_LAST(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \ + b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \ + BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ + BLOCK_LOAD_KEY_ENC (); \ + SBOX (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ + BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); + +/* Apply an inverse Serpent round to eight parallel blocks. This macro + increments `round'. */ +#define ROUND_INVERSE(round, which, a0, a1, a2, a3, a4, \ + na0, na1, na2, na3, na4, \ + b0, b1, b2, b3, b4, \ + nb0, nb1, nb2, nb3, nb4) \ + LINEAR_TRANSFORMATION_INVERSE (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ + SBOX_INVERSE (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ + BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); \ + BLOCK_LOAD_KEY_DEC (); + +/* Apply the first inverse Serpent round to eight parallel blocks. This macro + increments `round'. */ +#define ROUND_FIRST_INVERSE(round, which, a0, a1, a2, a3, a4, \ + na0, na1, na2, na3, na4, \ + b0, b1, b2, b3, b4, \ + nb0, nb1, nb2, nb3, nb4) \ + BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ + BLOCK_LOAD_KEY_DEC (); \ + SBOX_INVERSE (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ + BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); \ + BLOCK_LOAD_KEY_DEC (); + +.align 3 +.type __serpent_enc_blk8,%function; +__serpent_enc_blk8: + /* input: + * r0: round key pointer + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext + * blocks + * output: + * RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: eight parallel + * ciphertext blocks + */ + + transpose_4x4(RA0, RA1, RA2, RA3); + BLOCK_LOAD_KEY_ENC (); + transpose_4x4(RB0, RB1, RB2, RB3); + + ROUND (0, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3, + RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3); + ROUND (1, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3, + RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3); + ROUND (2, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2, + RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2); + ROUND (3, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0, + RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0); + ROUND (4, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3, + RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3); + ROUND (5, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3, + RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3); + ROUND (6, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4, + RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4); + ROUND (7, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3, + RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3); + ROUND (8, 0, RA4, RA1, RA2, RA0, RA3, RA1, RA3, RA2, RA4, RA0, + RB4, RB1, RB2, RB0, RB3, RB1, RB3, RB2, RB4, RB0); + ROUND (9, 1, RA1, RA3, RA2, RA4, RA0, RA2, RA1, RA4, RA3, RA0, + RB1, RB3, RB2, RB4, RB0, RB2, RB1, RB4, RB3, RB0); + ROUND (10, 2, RA2, RA1, RA4, RA3, RA0, RA4, RA3, RA1, RA0, RA2, + RB2, RB1, RB4, RB3, RB0, RB4, RB3, RB1, RB0, RB2); + ROUND (11, 3, RA4, RA3, RA1, RA0, RA2, RA3, RA1, RA0, RA2, RA4, + RB4, RB3, RB1, RB0, RB2, RB3, RB1, RB0, RB2, RB4); + ROUND (12, 4, RA3, RA1, RA0, RA2, RA4, RA1, RA4, RA3, RA2, RA0, + RB3, RB1, RB0, RB2, RB4, RB1, RB4, RB3, RB2, RB0); + ROUND (13, 5, RA1, RA4, RA3, RA2, RA0, RA4, RA2, RA1, RA3, RA0, + RB1, RB4, RB3, RB2, RB0, RB4, RB2, RB1, RB3, RB0); + ROUND (14, 6, RA4, RA2, RA1, RA3, RA0, RA4, RA2, RA0, RA1, RA3, + RB4, RB2, RB1, RB3, RB0, RB4, RB2, RB0, RB1, RB3); + ROUND (15, 7, RA4, RA2, RA0, RA1, RA3, RA3, RA1, RA2, RA4, RA0, + RB4, RB2, RB0, RB1, RB3, RB3, RB1, RB2, RB4, RB0); + ROUND (16, 0, RA3, RA1, RA2, RA4, RA0, RA1, RA0, RA2, RA3, RA4, + RB3, RB1, RB2, RB4, RB0, RB1, RB0, RB2, RB3, RB4); + ROUND (17, 1, RA1, RA0, RA2, RA3, RA4, RA2, RA1, RA3, RA0, RA4, + RB1, RB0, RB2, RB3, RB4, RB2, RB1, RB3, RB0, RB4); + ROUND (18, 2, RA2, RA1, RA3, RA0, RA4, RA3, RA0, RA1, RA4, RA2, + RB2, RB1, RB3, RB0, RB4, RB3, RB0, RB1, RB4, RB2); + ROUND (19, 3, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA4, RA2, RA3, + RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB4, RB2, RB3); + ROUND (20, 4, RA0, RA1, RA4, RA2, RA3, RA1, RA3, RA0, RA2, RA4, + RB0, RB1, RB4, RB2, RB3, RB1, RB3, RB0, RB2, RB4); + ROUND (21, 5, RA1, RA3, RA0, RA2, RA4, RA3, RA2, RA1, RA0, RA4, + RB1, RB3, RB0, RB2, RB4, RB3, RB2, RB1, RB0, RB4); + ROUND (22, 6, RA3, RA2, RA1, RA0, RA4, RA3, RA2, RA4, RA1, RA0, + RB3, RB2, RB1, RB0, RB4, RB3, RB2, RB4, RB1, RB0); + ROUND (23, 7, RA3, RA2, RA4, RA1, RA0, RA0, RA1, RA2, RA3, RA4, + RB3, RB2, RB4, RB1, RB0, RB0, RB1, RB2, RB3, RB4); + ROUND (24, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3, + RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3); + ROUND (25, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3, + RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3); + ROUND (26, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2, + RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2); + ROUND (27, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0, + RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0); + ROUND (28, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3, + RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3); + ROUND (29, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3, + RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3); + ROUND (30, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4, + RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4); + ROUND_LAST (31, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3, + RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3); + + transpose_4x4(RA4, RA1, RA2, RA0); + transpose_4x4(RB4, RB1, RB2, RB0); + + bx lr; +.size __serpent_enc_blk8,.-__serpent_enc_blk8; + +.align 3 +.type __serpent_dec_blk8,%function; +__serpent_dec_blk8: + /* input: + * r0: round key pointer + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel + * ciphertext blocks + * output: + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext + * blocks + */ + + add RROUND, RROUND, #(32*16); + + transpose_4x4(RA0, RA1, RA2, RA3); + BLOCK_LOAD_KEY_DEC (); + transpose_4x4(RB0, RB1, RB2, RB3); + + ROUND_FIRST_INVERSE (31, 7, RA0, RA1, RA2, RA3, RA4, + RA3, RA0, RA1, RA4, RA2, + RB0, RB1, RB2, RB3, RB4, + RB3, RB0, RB1, RB4, RB2); + ROUND_INVERSE (30, 6, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA2, RA4, RA3, + RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB2, RB4, RB3); + ROUND_INVERSE (29, 5, RA0, RA1, RA2, RA4, RA3, RA1, RA3, RA4, RA2, RA0, + RB0, RB1, RB2, RB4, RB3, RB1, RB3, RB4, RB2, RB0); + ROUND_INVERSE (28, 4, RA1, RA3, RA4, RA2, RA0, RA1, RA2, RA4, RA0, RA3, + RB1, RB3, RB4, RB2, RB0, RB1, RB2, RB4, RB0, RB3); + ROUND_INVERSE (27, 3, RA1, RA2, RA4, RA0, RA3, RA4, RA2, RA0, RA1, RA3, + RB1, RB2, RB4, RB0, RB3, RB4, RB2, RB0, RB1, RB3); + ROUND_INVERSE (26, 2, RA4, RA2, RA0, RA1, RA3, RA2, RA3, RA0, RA1, RA4, + RB4, RB2, RB0, RB1, RB3, RB2, RB3, RB0, RB1, RB4); + ROUND_INVERSE (25, 1, RA2, RA3, RA0, RA1, RA4, RA4, RA2, RA1, RA0, RA3, + RB2, RB3, RB0, RB1, RB4, RB4, RB2, RB1, RB0, RB3); + ROUND_INVERSE (24, 0, RA4, RA2, RA1, RA0, RA3, RA4, RA3, RA2, RA0, RA1, + RB4, RB2, RB1, RB0, RB3, RB4, RB3, RB2, RB0, RB1); + ROUND_INVERSE (23, 7, RA4, RA3, RA2, RA0, RA1, RA0, RA4, RA3, RA1, RA2, + RB4, RB3, RB2, RB0, RB1, RB0, RB4, RB3, RB1, RB2); + ROUND_INVERSE (22, 6, RA0, RA4, RA3, RA1, RA2, RA4, RA3, RA2, RA1, RA0, + RB0, RB4, RB3, RB1, RB2, RB4, RB3, RB2, RB1, RB0); + ROUND_INVERSE (21, 5, RA4, RA3, RA2, RA1, RA0, RA3, RA0, RA1, RA2, RA4, + RB4, RB3, RB2, RB1, RB0, RB3, RB0, RB1, RB2, RB4); + ROUND_INVERSE (20, 4, RA3, RA0, RA1, RA2, RA4, RA3, RA2, RA1, RA4, RA0, + RB3, RB0, RB1, RB2, RB4, RB3, RB2, RB1, RB4, RB0); + ROUND_INVERSE (19, 3, RA3, RA2, RA1, RA4, RA0, RA1, RA2, RA4, RA3, RA0, + RB3, RB2, RB1, RB4, RB0, RB1, RB2, RB4, RB3, RB0); + ROUND_INVERSE (18, 2, RA1, RA2, RA4, RA3, RA0, RA2, RA0, RA4, RA3, RA1, + RB1, RB2, RB4, RB3, RB0, RB2, RB0, RB4, RB3, RB1); + ROUND_INVERSE (17, 1, RA2, RA0, RA4, RA3, RA1, RA1, RA2, RA3, RA4, RA0, + RB2, RB0, RB4, RB3, RB1, RB1, RB2, RB3, RB4, RB0); + ROUND_INVERSE (16, 0, RA1, RA2, RA3, RA4, RA0, RA1, RA0, RA2, RA4, RA3, + RB1, RB2, RB3, RB4, RB0, RB1, RB0, RB2, RB4, RB3); + ROUND_INVERSE (15, 7, RA1, RA0, RA2, RA4, RA3, RA4, RA1, RA0, RA3, RA2, + RB1, RB0, RB2, RB4, RB3, RB4, RB1, RB0, RB3, RB2); + ROUND_INVERSE (14, 6, RA4, RA1, RA0, RA3, RA2, RA1, RA0, RA2, RA3, RA4, + RB4, RB1, RB0, RB3, RB2, RB1, RB0, RB2, RB3, RB4); + ROUND_INVERSE (13, 5, RA1, RA0, RA2, RA3, RA4, RA0, RA4, RA3, RA2, RA1, + RB1, RB0, RB2, RB3, RB4, RB0, RB4, RB3, RB2, RB1); + ROUND_INVERSE (12, 4, RA0, RA4, RA3, RA2, RA1, RA0, RA2, RA3, RA1, RA4, + RB0, RB4, RB3, RB2, RB1, RB0, RB2, RB3, RB1, RB4); + ROUND_INVERSE (11, 3, RA0, RA2, RA3, RA1, RA4, RA3, RA2, RA1, RA0, RA4, + RB0, RB2, RB3, RB1, RB4, RB3, RB2, RB1, RB0, RB4); + ROUND_INVERSE (10, 2, RA3, RA2, RA1, RA0, RA4, RA2, RA4, RA1, RA0, RA3, + RB3, RB2, RB1, RB0, RB4, RB2, RB4, RB1, RB0, RB3); + ROUND_INVERSE (9, 1, RA2, RA4, RA1, RA0, RA3, RA3, RA2, RA0, RA1, RA4, + RB2, RB4, RB1, RB0, RB3, RB3, RB2, RB0, RB1, RB4); + ROUND_INVERSE (8, 0, RA3, RA2, RA0, RA1, RA4, RA3, RA4, RA2, RA1, RA0, + RB3, RB2, RB0, RB1, RB4, RB3, RB4, RB2, RB1, RB0); + ROUND_INVERSE (7, 7, RA3, RA4, RA2, RA1, RA0, RA1, RA3, RA4, RA0, RA2, + RB3, RB4, RB2, RB1, RB0, RB1, RB3, RB4, RB0, RB2); + ROUND_INVERSE (6, 6, RA1, RA3, RA4, RA0, RA2, RA3, RA4, RA2, RA0, RA1, + RB1, RB3, RB4, RB0, RB2, RB3, RB4, RB2, RB0, RB1); + ROUND_INVERSE (5, 5, RA3, RA4, RA2, RA0, RA1, RA4, RA1, RA0, RA2, RA3, + RB3, RB4, RB2, RB0, RB1, RB4, RB1, RB0, RB2, RB3); + ROUND_INVERSE (4, 4, RA4, RA1, RA0, RA2, RA3, RA4, RA2, RA0, RA3, RA1, + RB4, RB1, RB0, RB2, RB3, RB4, RB2, RB0, RB3, RB1); + ROUND_INVERSE (3, 3, RA4, RA2, RA0, RA3, RA1, RA0, RA2, RA3, RA4, RA1, + RB4, RB2, RB0, RB3, RB1, RB0, RB2, RB3, RB4, RB1); + ROUND_INVERSE (2, 2, RA0, RA2, RA3, RA4, RA1, RA2, RA1, RA3, RA4, RA0, + RB0, RB2, RB3, RB4, RB1, RB2, RB1, RB3, RB4, RB0); + ROUND_INVERSE (1, 1, RA2, RA1, RA3, RA4, RA0, RA0, RA2, RA4, RA3, RA1, + RB2, RB1, RB3, RB4, RB0, RB0, RB2, RB4, RB3, RB1); + ROUND_INVERSE (0, 0, RA0, RA2, RA4, RA3, RA1, RA0, RA1, RA2, RA3, RA4, + RB0, RB2, RB4, RB3, RB1, RB0, RB1, RB2, RB3, RB4); + + transpose_4x4(RA0, RA1, RA2, RA3); + transpose_4x4(RB0, RB1, RB2, RB3); + + bx lr; +.size __serpent_dec_blk8,.-__serpent_dec_blk8; + +.align 3 +.globl _gcry_serpent_neon_ctr_enc +.type _gcry_serpent_neon_ctr_enc,%function; +_gcry_serpent_neon_ctr_enc: + /* input: + * r0: ctx, CTX + * r1: dst (8 blocks) + * r2: src (8 blocks) + * r3: iv + */ + + vmov.u8 RT1d0, #0xff; /* u64: -1 */ + push {r4,lr}; + vadd.u64 RT2d0, RT1d0, RT1d0; /* u64: -2 */ + vpush {RA4-RB2}; + + /* load IV and byteswap */ + vld1.8 {RA0}, [r3]; + vrev64.u8 RT0, RA0; /* be => le */ + ldr r4, [r3, #8]; + + /* construct IVs */ + vsub.u64 RA2d1, RT0d1, RT2d0; /* +2 */ + vsub.u64 RA1d1, RT0d1, RT1d0; /* +1 */ + cmp r4, #-1; + + vsub.u64 RB0d1, RA2d1, RT2d0; /* +4 */ + vsub.u64 RA3d1, RA2d1, RT1d0; /* +3 */ + ldr r4, [r3, #12]; + + vsub.u64 RB2d1, RB0d1, RT2d0; /* +6 */ + vsub.u64 RB1d1, RB0d1, RT1d0; /* +5 */ + + vsub.u64 RT2d1, RB2d1, RT2d0; /* +8 */ + vsub.u64 RB3d1, RB2d1, RT1d0; /* +7 */ + + vmov RA1d0, RT0d0; + vmov RA2d0, RT0d0; + vmov RA3d0, RT0d0; + vmov RB0d0, RT0d0; + rev r4, r4; + vmov RB1d0, RT0d0; + vmov RB2d0, RT0d0; + vmov RB3d0, RT0d0; + vmov RT2d0, RT0d0; + + /* check need for handling 64-bit overflow and carry */ + beq .Ldo_ctr_carry; + +.Lctr_carry_done: + /* le => be */ + vrev64.u8 RA1, RA1; + vrev64.u8 RA2, RA2; + vrev64.u8 RA3, RA3; + vrev64.u8 RB0, RB0; + vrev64.u8 RT2, RT2; + vrev64.u8 RB1, RB1; + vrev64.u8 RB2, RB2; + vrev64.u8 RB3, RB3; + /* store new IV */ + vst1.8 {RT2}, [r3]; + + bl __serpent_enc_blk8; + + vld1.8 {RT0, RT1}, [r2]!; + vld1.8 {RT2, RT3}, [r2]!; + veor RA4, RA4, RT0; + veor RA1, RA1, RT1; + vld1.8 {RT0, RT1}, [r2]!; + veor RA2, RA2, RT2; + veor RA0, RA0, RT3; + vld1.8 {RT2, RT3}, [r2]!; + veor RB4, RB4, RT0; + veor RT0, RT0; + veor RB1, RB1, RT1; + veor RT1, RT1; + veor RB2, RB2, RT2; + veor RT2, RT2; + veor RB0, RB0, RT3; + veor RT3, RT3; + + vst1.8 {RA4}, [r1]!; + vst1.8 {RA1}, [r1]!; + veor RA1, RA1; + vst1.8 {RA2}, [r1]!; + veor RA2, RA2; + vst1.8 {RA0}, [r1]!; + veor RA0, RA0; + vst1.8 {RB4}, [r1]!; + veor RB4, RB4; + vst1.8 {RB1}, [r1]!; + vst1.8 {RB2}, [r1]!; + vst1.8 {RB0}, [r1]!; + + vpop {RA4-RB2}; + + /* clear the used registers */ + veor RA3, RA3; + veor RB3, RB3; + + pop {r4,pc}; + +.Ldo_ctr_carry: + cmp r4, #-8; + blo .Lctr_carry_done; + beq .Lcarry_RT2; + + cmp r4, #-6; + blo .Lcarry_RB3; + beq .Lcarry_RB2; + + cmp r4, #-4; + blo .Lcarry_RB1; + beq .Lcarry_RB0; + + cmp r4, #-2; + blo .Lcarry_RA3; + beq .Lcarry_RA2; + + vsub.u64 RA1d0, RT1d0; +.Lcarry_RA2: + vsub.u64 RA2d0, RT1d0; +.Lcarry_RA3: + vsub.u64 RA3d0, RT1d0; +.Lcarry_RB0: + vsub.u64 RB0d0, RT1d0; +.Lcarry_RB1: + vsub.u64 RB1d0, RT1d0; +.Lcarry_RB2: + vsub.u64 RB2d0, RT1d0; +.Lcarry_RB3: + vsub.u64 RB3d0, RT1d0; +.Lcarry_RT2: + vsub.u64 RT2d0, RT1d0; + + b .Lctr_carry_done; +.size _gcry_serpent_neon_ctr_enc,.-_gcry_serpent_neon_ctr_enc; + +.align 3 +.globl _gcry_serpent_neon_cfb_dec +.type _gcry_serpent_neon_cfb_dec,%function; +_gcry_serpent_neon_cfb_dec: + /* input: + * r0: ctx, CTX + * r1: dst (8 blocks) + * r2: src (8 blocks) + * r3: iv + */ + + push {lr}; + vpush {RA4-RB2}; + + /* Load input */ + vld1.8 {RA0}, [r3]; + vld1.8 {RA1, RA2}, [r2]!; + vld1.8 {RA3}, [r2]!; + vld1.8 {RB0}, [r2]!; + vld1.8 {RB1, RB2}, [r2]!; + vld1.8 {RB3}, [r2]!; + + /* Update IV */ + vld1.8 {RT0}, [r2]!; + vst1.8 {RT0}, [r3]; + mov r3, lr; + sub r2, r2, #(8*16); + + bl __serpent_enc_blk8; + + vld1.8 {RT0, RT1}, [r2]!; + vld1.8 {RT2, RT3}, [r2]!; + veor RA4, RA4, RT0; + veor RA1, RA1, RT1; + vld1.8 {RT0, RT1}, [r2]!; + veor RA2, RA2, RT2; + veor RA0, RA0, RT3; + vld1.8 {RT2, RT3}, [r2]!; + veor RB4, RB4, RT0; + veor RT0, RT0; + veor RB1, RB1, RT1; + veor RT1, RT1; + veor RB2, RB2, RT2; + veor RT2, RT2; + veor RB0, RB0, RT3; + veor RT3, RT3; + + vst1.8 {RA4}, [r1]!; + vst1.8 {RA1}, [r1]!; + veor RA1, RA1; + vst1.8 {RA2}, [r1]!; + veor RA2, RA2; + vst1.8 {RA0}, [r1]!; + veor RA0, RA0; + vst1.8 {RB4}, [r1]!; + veor RB4, RB4; + vst1.8 {RB1}, [r1]!; + vst1.8 {RB2}, [r1]!; + vst1.8 {RB0}, [r1]!; + + vpop {RA4-RB2}; + + /* clear the used registers */ + veor RA3, RA3; + veor RB3, RB3; + + pop {pc}; +.size _gcry_serpent_neon_cfb_dec,.-_gcry_serpent_neon_cfb_dec; + +.align 3 +.globl _gcry_serpent_neon_cbc_dec +.type _gcry_serpent_neon_cbc_dec,%function; +_gcry_serpent_neon_cbc_dec: + /* input: + * r0: ctx, CTX + * r1: dst (8 blocks) + * r2: src (8 blocks) + * r3: iv + */ + + push {lr}; + vpush {RA4-RB2}; + + vld1.8 {RA0, RA1}, [r2]!; + vld1.8 {RA2, RA3}, [r2]!; + vld1.8 {RB0, RB1}, [r2]!; + vld1.8 {RB2, RB3}, [r2]!; + sub r2, r2, #(8*16); + + bl __serpent_dec_blk8; + + vld1.8 {RB4}, [r3]; + vld1.8 {RT0, RT1}, [r2]!; + vld1.8 {RT2, RT3}, [r2]!; + veor RA0, RA0, RB4; + veor RA1, RA1, RT0; + veor RA2, RA2, RT1; + vld1.8 {RT0, RT1}, [r2]!; + veor RA3, RA3, RT2; + veor RB0, RB0, RT3; + vld1.8 {RT2, RT3}, [r2]!; + veor RB1, RB1, RT0; + veor RT0, RT0; + veor RB2, RB2, RT1; + veor RT1, RT1; + veor RB3, RB3, RT2; + veor RT2, RT2; + vst1.8 {RT3}, [r3]; /* store new IV */ + veor RT3, RT3; + + vst1.8 {RA0, RA1}, [r1]!; + veor RA0, RA0; + veor RA1, RA1; + vst1.8 {RA2, RA3}, [r1]!; + veor RA2, RA2; + vst1.8 {RB0, RB1}, [r1]!; + veor RA3, RA3; + vst1.8 {RB2, RB3}, [r1]!; + veor RB3, RB3; + + vpop {RA4-RB2}; + + /* clear the used registers */ + veor RB4, RB4; + + pop {pc}; +.size _gcry_serpent_neon_cbc_dec,.-_gcry_serpent_neon_cbc_dec; + +.align 3 +.globl _gcry_serpent_neon_ocb_enc +.type _gcry_serpent_neon_ocb_enc,%function; +_gcry_serpent_neon_ocb_enc: + /* input: + * r0 : ctx, CTX + * r1 : dst (8 blocks) + * r2 : src (8 blocks) + * r3 : offset + * sp+0: checksum + * sp+4: L pointers (void *L[8]) + */ + + push {r4-r11, ip, lr}; + add ip, sp, #(10*4); + + vpush {RA4-RB2}; + + ldm ip, {r4, lr}; + + vld1.8 {RT0}, [r3]; + vld1.8 {RT1}, [r4]; + + /* Load L pointers */ + ldm lr!, {r5, r6, r7, r8}; + ldm lr, {r9, r10, r11, ip}; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + + vld1.8 {RA0, RA1}, [r2]!; + vld1.8 {RA2, RA3}, [r2]!; + vld1.8 {RB0, RB1}, [r2]!; + vld1.8 {RB2, RB3}, [r2]; + +#define OCB_INPUT(lreg, vreg) \ + vld1.8 {RT3}, [lreg]; \ + veor RT0, RT3; \ + veor RT1, vreg; \ + veor vreg, RT0; \ + vst1.8 {RT0}, [r1]!; + + OCB_INPUT(r5, RA0); + OCB_INPUT(r6, RA1); + OCB_INPUT(r7, RA2); + OCB_INPUT(r8, RA3); + OCB_INPUT(r9, RB0); + OCB_INPUT(r10, RB1); + OCB_INPUT(r11, RB2); + OCB_INPUT(ip, RB3); +#undef OCB_INPUT + + sub r1, r1, #(8*16); + vst1.8 {RT0}, [r3]; + vst1.8 {RT1}, [r4]; + mov r2, r1; + + bl __serpent_enc_blk8; + + vld1.8 {RT0, RT1}, [r1]!; + veor RT0, RA4, RT0; + veor RT1, RA1, RT1; + vld1.8 {RT2, RT3}, [r1]!; + vst1.8 {RT0, RT1}, [r2]!; + veor RT2, RA2, RT2; + veor RT3, RA0, RT3; + vld1.8 {RT0, RT1}, [r1]!; + vst1.8 {RT2, RT3}, [r2]!; + veor RT0, RB4, RT0; + veor RT1, RB1, RT1; + vld1.8 {RT2, RT3}, [r1]!; + vst1.8 {RT0, RT1}, [r2]!; + veor RT2, RB2, RT2; + veor RT3, RB0, RT3; + vst1.8 {RT2, RT3}, [r2]!; + + vpop {RA4-RB2}; + + /* clear the used registers */ + veor RA3, RA3; + veor RB3, RB3; + + pop {r4-r11, ip, pc}; +.size _gcry_serpent_neon_ocb_enc,.-_gcry_serpent_neon_ocb_enc; + +.align 3 +.globl _gcry_serpent_neon_ocb_dec +.type _gcry_serpent_neon_ocb_dec,%function; +_gcry_serpent_neon_ocb_dec: + /* input: + * r0 : ctx, CTX + * r1 : dst (8 blocks) + * r2 : src (8 blocks) + * r3 : offset + * sp+0: checksum + * sp+4: L pointers (void *L[8]) + */ + + push {r4-r11, ip, lr}; + add ip, sp, #(10*4); + + vpush {RA4-RB2}; + + ldm ip, {r4, lr}; + + vld1.8 {RT0}, [r3]; + + /* Load L pointers */ + ldm lr!, {r5, r6, r7, r8}; + ldm lr, {r9, r10, r11, ip}; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + + vld1.8 {RA0, RA1}, [r2]!; + vld1.8 {RA2, RA3}, [r2]!; + vld1.8 {RB0, RB1}, [r2]!; + vld1.8 {RB2, RB3}, [r2]; + +#define OCB_INPUT(lreg, vreg) \ + vld1.8 {RT3}, [lreg]; \ + veor RT0, RT3; \ + veor vreg, RT0; \ + vst1.8 {RT0}, [r1]!; + + OCB_INPUT(r5, RA0); + OCB_INPUT(r6, RA1); + OCB_INPUT(r7, RA2); + OCB_INPUT(r8, RA3); + OCB_INPUT(r9, RB0); + OCB_INPUT(r10, RB1); + OCB_INPUT(r11, RB2); + OCB_INPUT(ip, RB3); +#undef OCB_INPUT + + sub r1, r1, #(8*16); + vst1.8 {RT0}, [r3]; + mov r2, r1; + + bl __serpent_dec_blk8; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + vld1.8 {RA4}, [r4]; + + vld1.8 {RT0, RT1}, [r1]!; + veor RA0, RA0, RT0; + veor RA1, RA1, RT1; + vld1.8 {RT2, RT3}, [r1]!; + veor RA4, RA4, RA0; + vst1.8 {RA0, RA1}, [r2]!; + veor RA4, RA4, RA1; + veor RA2, RA2, RT2; + veor RA3, RA3, RT3; + vld1.8 {RT0, RT1}, [r1]!; + veor RA4, RA4, RA2; + vst1.8 {RA2, RA3}, [r2]!; + veor RA4, RA4, RA3; + veor RB0, RB0, RT0; + veor RB1, RB1, RT1; + vld1.8 {RT2, RT3}, [r1]!; + veor RA4, RA4, RB0; + vst1.8 {RB0, RB1}, [r2]!; + veor RA4, RA4, RB1; + veor RB2, RB2, RT2; + veor RB3, RB3, RT3; + veor RA4, RA4, RB2; + vst1.8 {RB2, RB3}, [r2]!; + + veor RA4, RA4, RB3; + vst1.8 {RA4}, [r4]; + + vpop {RA4-RB2}; + + /* clear the used registers */ + veor RB4, RB4; + + pop {r4-r11, ip, pc}; +.size _gcry_serpent_neon_ocb_dec,.-_gcry_serpent_neon_ocb_dec; + +.align 3 +.globl _gcry_serpent_neon_ocb_auth +.type _gcry_serpent_neon_ocb_auth,%function; +_gcry_serpent_neon_ocb_auth: + /* input: + * r0 : ctx, CTX + * r1 : abuf (8 blocks) + * r2 : offset + * r3 : checksum + * sp+0: L pointers (void *L[8]) + */ + + push {r5-r11, ip, lr}; + ldr lr, [sp, #(9*4)]; + + vpush {RA4-RB2}; + + vld1.8 {RT0}, [r2]; + + /* Load L pointers */ + ldm lr!, {r5, r6, r7, r8}; + ldm lr, {r9, r10, r11, ip}; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + + vld1.8 {RA0, RA1}, [r1]!; + vld1.8 {RA2, RA3}, [r1]!; + vld1.8 {RB0, RB1}, [r1]!; + vld1.8 {RB2, RB3}, [r1]; + +#define OCB_INPUT(lreg, vreg) \ + vld1.8 {RT3}, [lreg]; \ + veor RT0, RT3; \ + veor vreg, RT0; + + OCB_INPUT(r5, RA0); + OCB_INPUT(r6, RA1); + OCB_INPUT(r7, RA2); + OCB_INPUT(r8, RA3); + OCB_INPUT(r9, RB0); + OCB_INPUT(r10, RB1); + OCB_INPUT(r11, RB2); + OCB_INPUT(ip, RB3); +#undef OCB_INPUT + + vst1.8 {RT0}, [r2]; + + bl __serpent_enc_blk8; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + vld1.8 {RT0}, [r3]; + + veor RA4, RB4; + veor RA1, RB1; + veor RA2, RB2; + veor RA0, RB0; + + veor RA2, RT0; + veor RA1, RA4; + veor RA0, RA2; + + veor RA0, RA1; + + vst1.8 {RA0}, [r3]; + + vpop {RA4-RB2}; + + /* clear the used registers */ + veor RA3, RA3; + veor RB3, RB3; + + pop {r5-r11, ip, pc}; +.size _gcry_serpent_neon_ocb_auth,.-_gcry_serpent_neon_ocb_auth; + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/serpent-avx2-amd64.S b/libotr/libgcrypt-1.8.7/cipher/serpent-avx2-amd64.S new file mode 100644 index 0000000..8d60a15 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/serpent-avx2-amd64.S @@ -0,0 +1,1123 @@ +/* serpent-avx2-amd64.S - AVX2 implementation of Serpent cipher + * + * Copyright (C) 2013-2015 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT) && \ + defined(ENABLE_AVX2_SUPPORT) + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#ifdef __PIC__ +# define RIP (%rip) +#else +# define RIP +#endif + +/* struct serpent_context: */ +#define ctx_keys 0 + +/* register macros */ +#define CTX %rdi + +/* vector registers */ +#define RA0 %ymm0 +#define RA1 %ymm1 +#define RA2 %ymm2 +#define RA3 %ymm3 +#define RA4 %ymm4 + +#define RB0 %ymm5 +#define RB1 %ymm6 +#define RB2 %ymm7 +#define RB3 %ymm8 +#define RB4 %ymm9 + +#define RNOT %ymm10 +#define RTMP0 %ymm11 +#define RTMP1 %ymm12 +#define RTMP2 %ymm13 +#define RTMP3 %ymm14 +#define RTMP4 %ymm15 + +#define RNOTx %xmm10 +#define RTMP0x %xmm11 +#define RTMP1x %xmm12 +#define RTMP2x %xmm13 +#define RTMP3x %xmm14 +#define RTMP4x %xmm15 + +/********************************************************************** + helper macros + **********************************************************************/ + +/* vector 32-bit rotation to left */ +#define vec_rol(reg, nleft, tmp) \ + vpslld $(nleft), reg, tmp; \ + vpsrld $(32 - (nleft)), reg, reg; \ + vpor tmp, reg, reg; + +/* vector 32-bit rotation to right */ +#define vec_ror(reg, nright, tmp) \ + vec_rol(reg, 32 - nright, tmp) + +/* 4x4 32-bit integer matrix transpose */ +#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +/********************************************************************** + 16-way serpent + **********************************************************************/ + +/* + * These are the S-Boxes of Serpent from following research paper. + * + * D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference, + * (New York, New York, USA), p. 317–329, National Institute of Standards and + * Technology, 2000. + * + * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf + * + */ +#define SBOX0(r0, r1, r2, r3, r4) \ + vpxor r0, r3, r3; vmovdqa r1, r4; \ + vpand r3, r1, r1; vpxor r2, r4, r4; \ + vpxor r0, r1, r1; vpor r3, r0, r0; \ + vpxor r4, r0, r0; vpxor r3, r4, r4; \ + vpxor r2, r3, r3; vpor r1, r2, r2; \ + vpxor r4, r2, r2; vpxor RNOT, r4, r4; \ + vpor r1, r4, r4; vpxor r3, r1, r1; \ + vpxor r4, r1, r1; vpor r0, r3, r3; \ + vpxor r3, r1, r1; vpxor r3, r4, r4; + +#define SBOX0_INVERSE(r0, r1, r2, r3, r4) \ + vpxor RNOT, r2, r2; vmovdqa r1, r4; \ + vpor r0, r1, r1; vpxor RNOT, r4, r4; \ + vpxor r2, r1, r1; vpor r4, r2, r2; \ + vpxor r3, r1, r1; vpxor r4, r0, r0; \ + vpxor r0, r2, r2; vpand r3, r0, r0; \ + vpxor r0, r4, r4; vpor r1, r0, r0; \ + vpxor r2, r0, r0; vpxor r4, r3, r3; \ + vpxor r1, r2, r2; vpxor r0, r3, r3; \ + vpxor r1, r3, r3; \ + vpand r3, r2, r2; \ + vpxor r2, r4, r4; + +#define SBOX1(r0, r1, r2, r3, r4) \ + vpxor RNOT, r0, r0; vpxor RNOT, r2, r2; \ + vmovdqa r0, r4; vpand r1, r0, r0; \ + vpxor r0, r2, r2; vpor r3, r0, r0; \ + vpxor r2, r3, r3; vpxor r0, r1, r1; \ + vpxor r4, r0, r0; vpor r1, r4, r4; \ + vpxor r3, r1, r1; vpor r0, r2, r2; \ + vpand r4, r2, r2; vpxor r1, r0, r0; \ + vpand r2, r1, r1; \ + vpxor r0, r1, r1; vpand r2, r0, r0; \ + vpxor r4, r0, r0; + +#define SBOX1_INVERSE(r0, r1, r2, r3, r4) \ + vmovdqa r1, r4; vpxor r3, r1, r1; \ + vpand r1, r3, r3; vpxor r2, r4, r4; \ + vpxor r0, r3, r3; vpor r1, r0, r0; \ + vpxor r3, r2, r2; vpxor r4, r0, r0; \ + vpor r2, r0, r0; vpxor r3, r1, r1; \ + vpxor r1, r0, r0; vpor r3, r1, r1; \ + vpxor r0, r1, r1; vpxor RNOT, r4, r4; \ + vpxor r1, r4, r4; vpor r0, r1, r1; \ + vpxor r0, r1, r1; \ + vpor r4, r1, r1; \ + vpxor r1, r3, r3; + +#define SBOX2(r0, r1, r2, r3, r4) \ + vmovdqa r0, r4; vpand r2, r0, r0; \ + vpxor r3, r0, r0; vpxor r1, r2, r2; \ + vpxor r0, r2, r2; vpor r4, r3, r3; \ + vpxor r1, r3, r3; vpxor r2, r4, r4; \ + vmovdqa r3, r1; vpor r4, r3, r3; \ + vpxor r0, r3, r3; vpand r1, r0, r0; \ + vpxor r0, r4, r4; vpxor r3, r1, r1; \ + vpxor r4, r1, r1; vpxor RNOT, r4, r4; + +#define SBOX2_INVERSE(r0, r1, r2, r3, r4) \ + vpxor r3, r2, r2; vpxor r0, r3, r3; \ + vmovdqa r3, r4; vpand r2, r3, r3; \ + vpxor r1, r3, r3; vpor r2, r1, r1; \ + vpxor r4, r1, r1; vpand r3, r4, r4; \ + vpxor r3, r2, r2; vpand r0, r4, r4; \ + vpxor r2, r4, r4; vpand r1, r2, r2; \ + vpor r0, r2, r2; vpxor RNOT, r3, r3; \ + vpxor r3, r2, r2; vpxor r3, r0, r0; \ + vpand r1, r0, r0; vpxor r4, r3, r3; \ + vpxor r0, r3, r3; + +#define SBOX3(r0, r1, r2, r3, r4) \ + vmovdqa r0, r4; vpor r3, r0, r0; \ + vpxor r1, r3, r3; vpand r4, r1, r1; \ + vpxor r2, r4, r4; vpxor r3, r2, r2; \ + vpand r0, r3, r3; vpor r1, r4, r4; \ + vpxor r4, r3, r3; vpxor r1, r0, r0; \ + vpand r0, r4, r4; vpxor r3, r1, r1; \ + vpxor r2, r4, r4; vpor r0, r1, r1; \ + vpxor r2, r1, r1; vpxor r3, r0, r0; \ + vmovdqa r1, r2; vpor r3, r1, r1; \ + vpxor r0, r1, r1; + +#define SBOX3_INVERSE(r0, r1, r2, r3, r4) \ + vmovdqa r2, r4; vpxor r1, r2, r2; \ + vpxor r2, r0, r0; vpand r2, r4, r4; \ + vpxor r0, r4, r4; vpand r1, r0, r0; \ + vpxor r3, r1, r1; vpor r4, r3, r3; \ + vpxor r3, r2, r2; vpxor r3, r0, r0; \ + vpxor r4, r1, r1; vpand r2, r3, r3; \ + vpxor r1, r3, r3; vpxor r0, r1, r1; \ + vpor r2, r1, r1; vpxor r3, r0, r0; \ + vpxor r4, r1, r1; \ + vpxor r1, r0, r0; + +#define SBOX4(r0, r1, r2, r3, r4) \ + vpxor r3, r1, r1; vpxor RNOT, r3, r3; \ + vpxor r3, r2, r2; vpxor r0, r3, r3; \ + vmovdqa r1, r4; vpand r3, r1, r1; \ + vpxor r2, r1, r1; vpxor r3, r4, r4; \ + vpxor r4, r0, r0; vpand r4, r2, r2; \ + vpxor r0, r2, r2; vpand r1, r0, r0; \ + vpxor r0, r3, r3; vpor r1, r4, r4; \ + vpxor r0, r4, r4; vpor r3, r0, r0; \ + vpxor r2, r0, r0; vpand r3, r2, r2; \ + vpxor RNOT, r0, r0; vpxor r2, r4, r4; + +#define SBOX4_INVERSE(r0, r1, r2, r3, r4) \ + vmovdqa r2, r4; vpand r3, r2, r2; \ + vpxor r1, r2, r2; vpor r3, r1, r1; \ + vpand r0, r1, r1; vpxor r2, r4, r4; \ + vpxor r1, r4, r4; vpand r2, r1, r1; \ + vpxor RNOT, r0, r0; vpxor r4, r3, r3; \ + vpxor r3, r1, r1; vpand r0, r3, r3; \ + vpxor r2, r3, r3; vpxor r1, r0, r0; \ + vpand r0, r2, r2; vpxor r0, r3, r3; \ + vpxor r4, r2, r2; \ + vpor r3, r2, r2; vpxor r0, r3, r3; \ + vpxor r1, r2, r2; + +#define SBOX5(r0, r1, r2, r3, r4) \ + vpxor r1, r0, r0; vpxor r3, r1, r1; \ + vpxor RNOT, r3, r3; vmovdqa r1, r4; \ + vpand r0, r1, r1; vpxor r3, r2, r2; \ + vpxor r2, r1, r1; vpor r4, r2, r2; \ + vpxor r3, r4, r4; vpand r1, r3, r3; \ + vpxor r0, r3, r3; vpxor r1, r4, r4; \ + vpxor r2, r4, r4; vpxor r0, r2, r2; \ + vpand r3, r0, r0; vpxor RNOT, r2, r2; \ + vpxor r4, r0, r0; vpor r3, r4, r4; \ + vpxor r4, r2, r2; + +#define SBOX5_INVERSE(r0, r1, r2, r3, r4) \ + vpxor RNOT, r1, r1; vmovdqa r3, r4; \ + vpxor r1, r2, r2; vpor r0, r3, r3; \ + vpxor r2, r3, r3; vpor r1, r2, r2; \ + vpand r0, r2, r2; vpxor r3, r4, r4; \ + vpxor r4, r2, r2; vpor r0, r4, r4; \ + vpxor r1, r4, r4; vpand r2, r1, r1; \ + vpxor r3, r1, r1; vpxor r2, r4, r4; \ + vpand r4, r3, r3; vpxor r1, r4, r4; \ + vpxor r4, r3, r3; vpxor RNOT, r4, r4; \ + vpxor r0, r3, r3; + +#define SBOX6(r0, r1, r2, r3, r4) \ + vpxor RNOT, r2, r2; vmovdqa r3, r4; \ + vpand r0, r3, r3; vpxor r4, r0, r0; \ + vpxor r2, r3, r3; vpor r4, r2, r2; \ + vpxor r3, r1, r1; vpxor r0, r2, r2; \ + vpor r1, r0, r0; vpxor r1, r2, r2; \ + vpxor r0, r4, r4; vpor r3, r0, r0; \ + vpxor r2, r0, r0; vpxor r3, r4, r4; \ + vpxor r0, r4, r4; vpxor RNOT, r3, r3; \ + vpand r4, r2, r2; \ + vpxor r3, r2, r2; + +#define SBOX6_INVERSE(r0, r1, r2, r3, r4) \ + vpxor r2, r0, r0; vmovdqa r2, r4; \ + vpand r0, r2, r2; vpxor r3, r4, r4; \ + vpxor RNOT, r2, r2; vpxor r1, r3, r3; \ + vpxor r3, r2, r2; vpor r0, r4, r4; \ + vpxor r2, r0, r0; vpxor r4, r3, r3; \ + vpxor r1, r4, r4; vpand r3, r1, r1; \ + vpxor r0, r1, r1; vpxor r3, r0, r0; \ + vpor r2, r0, r0; vpxor r1, r3, r3; \ + vpxor r0, r4, r4; + +#define SBOX7(r0, r1, r2, r3, r4) \ + vmovdqa r1, r4; vpor r2, r1, r1; \ + vpxor r3, r1, r1; vpxor r2, r4, r4; \ + vpxor r1, r2, r2; vpor r4, r3, r3; \ + vpand r0, r3, r3; vpxor r2, r4, r4; \ + vpxor r1, r3, r3; vpor r4, r1, r1; \ + vpxor r0, r1, r1; vpor r4, r0, r0; \ + vpxor r2, r0, r0; vpxor r4, r1, r1; \ + vpxor r1, r2, r2; vpand r0, r1, r1; \ + vpxor r4, r1, r1; vpxor RNOT, r2, r2; \ + vpor r0, r2, r2; \ + vpxor r2, r4, r4; + +#define SBOX7_INVERSE(r0, r1, r2, r3, r4) \ + vmovdqa r2, r4; vpxor r0, r2, r2; \ + vpand r3, r0, r0; vpor r3, r4, r4; \ + vpxor RNOT, r2, r2; vpxor r1, r3, r3; \ + vpor r0, r1, r1; vpxor r2, r0, r0; \ + vpand r4, r2, r2; vpand r4, r3, r3; \ + vpxor r2, r1, r1; vpxor r0, r2, r2; \ + vpor r2, r0, r0; vpxor r1, r4, r4; \ + vpxor r3, r0, r0; vpxor r4, r3, r3; \ + vpor r0, r4, r4; vpxor r2, r3, r3; \ + vpxor r2, r4, r4; + +/* Apply SBOX number WHICH to to the block. */ +#define SBOX(which, r0, r1, r2, r3, r4) \ + SBOX##which (r0, r1, r2, r3, r4) + +/* Apply inverse SBOX number WHICH to to the block. */ +#define SBOX_INVERSE(which, r0, r1, r2, r3, r4) \ + SBOX##which##_INVERSE (r0, r1, r2, r3, r4) + +/* XOR round key into block state in r0,r1,r2,r3. r4 used as temporary. */ +#define BLOCK_XOR_KEY(r0, r1, r2, r3, r4, round) \ + vpbroadcastd (ctx_keys + (round) * 16 + 0 * 4)(CTX), r4; \ + vpxor r4, r0, r0; \ + vpbroadcastd (ctx_keys + (round) * 16 + 1 * 4)(CTX), r4; \ + vpxor r4, r1, r1; \ + vpbroadcastd (ctx_keys + (round) * 16 + 2 * 4)(CTX), r4; \ + vpxor r4, r2, r2; \ + vpbroadcastd (ctx_keys + (round) * 16 + 3 * 4)(CTX), r4; \ + vpxor r4, r3, r3; + +/* Apply the linear transformation to BLOCK. */ +#define LINEAR_TRANSFORMATION(r0, r1, r2, r3, r4) \ + vec_rol(r0, 13, r4); \ + vec_rol(r2, 3, r4); \ + vpxor r0, r1, r1; \ + vpxor r2, r1, r1; \ + vpslld $3, r0, r4; \ + vpxor r2, r3, r3; \ + vpxor r4, r3, r3; \ + vec_rol(r1, 1, r4); \ + vec_rol(r3, 7, r4); \ + vpxor r1, r0, r0; \ + vpxor r3, r0, r0; \ + vpslld $7, r1, r4; \ + vpxor r3, r2, r2; \ + vpxor r4, r2, r2; \ + vec_rol(r0, 5, r4); \ + vec_rol(r2, 22, r4); + +/* Apply the inverse linear transformation to BLOCK. */ +#define LINEAR_TRANSFORMATION_INVERSE(r0, r1, r2, r3, r4) \ + vec_ror(r2, 22, r4); \ + vec_ror(r0, 5, r4); \ + vpslld $7, r1, r4; \ + vpxor r3, r2, r2; \ + vpxor r4, r2, r2; \ + vpxor r1, r0, r0; \ + vpxor r3, r0, r0; \ + vec_ror(r3, 7, r4); \ + vec_ror(r1, 1, r4); \ + vpslld $3, r0, r4; \ + vpxor r2, r3, r3; \ + vpxor r4, r3, r3; \ + vpxor r0, r1, r1; \ + vpxor r2, r1, r1; \ + vec_ror(r2, 3, r4); \ + vec_ror(r0, 13, r4); + +/* Apply a Serpent round to sixteen parallel blocks. This macro increments + `round'. */ +#define ROUND(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \ + b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \ + BLOCK_XOR_KEY (a0, a1, a2, a3, a4, round); \ + SBOX (which, a0, a1, a2, a3, a4); \ + BLOCK_XOR_KEY (b0, b1, b2, b3, b4, round); \ + SBOX (which, b0, b1, b2, b3, b4); \ + LINEAR_TRANSFORMATION (na0, na1, na2, na3, na4); \ + LINEAR_TRANSFORMATION (nb0, nb1, nb2, nb3, nb4); + +/* Apply the last Serpent round to sixteen parallel blocks. This macro + increments `round'. */ +#define ROUND_LAST(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \ + b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \ + BLOCK_XOR_KEY (a0, a1, a2, a3, a4, round); \ + SBOX (which, a0, a1, a2, a3, a4); \ + BLOCK_XOR_KEY (b0, b1, b2, b3, b4, round); \ + SBOX (which, b0, b1, b2, b3, b4); \ + BLOCK_XOR_KEY (na0, na1, na2, na3, na4, ((round) + 1)); \ + BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, ((round) + 1)); + +/* Apply an inverse Serpent round to sixteen parallel blocks. This macro + increments `round'. */ +#define ROUND_INVERSE(round, which, a0, a1, a2, a3, a4, \ + na0, na1, na2, na3, na4, \ + b0, b1, b2, b3, b4, \ + nb0, nb1, nb2, nb3, nb4) \ + LINEAR_TRANSFORMATION_INVERSE (a0, a1, a2, a3, a4); \ + LINEAR_TRANSFORMATION_INVERSE (b0, b1, b2, b3, b4); \ + SBOX_INVERSE (which, a0, a1, a2, a3, a4); \ + BLOCK_XOR_KEY (na0, na1, na2, na3, na4, round); \ + SBOX_INVERSE (which, b0, b1, b2, b3, b4); \ + BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, round); + +/* Apply the first inverse Serpent round to sixteen parallel blocks. This macro + increments `round'. */ +#define ROUND_FIRST_INVERSE(round, which, a0, a1, a2, a3, a4, \ + na0, na1, na2, na3, na4, \ + b0, b1, b2, b3, b4, \ + nb0, nb1, nb2, nb3, nb4) \ + BLOCK_XOR_KEY (a0, a1, a2, a3, a4, ((round) + 1)); \ + BLOCK_XOR_KEY (b0, b1, b2, b3, b4, ((round) + 1)); \ + SBOX_INVERSE (which, a0, a1, a2, a3, a4); \ + BLOCK_XOR_KEY (na0, na1, na2, na3, na4, round); \ + SBOX_INVERSE (which, b0, b1, b2, b3, b4); \ + BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, round); + +.text + +.align 8 +ELF(.type __serpent_enc_blk16,@function;) +__serpent_enc_blk16: + /* input: + * %rdi: ctx, CTX + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel + * plaintext blocks + * output: + * RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: sixteen parallel + * ciphertext blocks + */ + + vpcmpeqd RNOT, RNOT, RNOT; + + transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1); + + ROUND (0, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3, + RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3); + ROUND (1, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3, + RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3); + ROUND (2, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2, + RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2); + ROUND (3, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0, + RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0); + ROUND (4, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3, + RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3); + ROUND (5, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3, + RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3); + ROUND (6, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4, + RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4); + ROUND (7, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3, + RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3); + ROUND (8, 0, RA4, RA1, RA2, RA0, RA3, RA1, RA3, RA2, RA4, RA0, + RB4, RB1, RB2, RB0, RB3, RB1, RB3, RB2, RB4, RB0); + ROUND (9, 1, RA1, RA3, RA2, RA4, RA0, RA2, RA1, RA4, RA3, RA0, + RB1, RB3, RB2, RB4, RB0, RB2, RB1, RB4, RB3, RB0); + ROUND (10, 2, RA2, RA1, RA4, RA3, RA0, RA4, RA3, RA1, RA0, RA2, + RB2, RB1, RB4, RB3, RB0, RB4, RB3, RB1, RB0, RB2); + ROUND (11, 3, RA4, RA3, RA1, RA0, RA2, RA3, RA1, RA0, RA2, RA4, + RB4, RB3, RB1, RB0, RB2, RB3, RB1, RB0, RB2, RB4); + ROUND (12, 4, RA3, RA1, RA0, RA2, RA4, RA1, RA4, RA3, RA2, RA0, + RB3, RB1, RB0, RB2, RB4, RB1, RB4, RB3, RB2, RB0); + ROUND (13, 5, RA1, RA4, RA3, RA2, RA0, RA4, RA2, RA1, RA3, RA0, + RB1, RB4, RB3, RB2, RB0, RB4, RB2, RB1, RB3, RB0); + ROUND (14, 6, RA4, RA2, RA1, RA3, RA0, RA4, RA2, RA0, RA1, RA3, + RB4, RB2, RB1, RB3, RB0, RB4, RB2, RB0, RB1, RB3); + ROUND (15, 7, RA4, RA2, RA0, RA1, RA3, RA3, RA1, RA2, RA4, RA0, + RB4, RB2, RB0, RB1, RB3, RB3, RB1, RB2, RB4, RB0); + ROUND (16, 0, RA3, RA1, RA2, RA4, RA0, RA1, RA0, RA2, RA3, RA4, + RB3, RB1, RB2, RB4, RB0, RB1, RB0, RB2, RB3, RB4); + ROUND (17, 1, RA1, RA0, RA2, RA3, RA4, RA2, RA1, RA3, RA0, RA4, + RB1, RB0, RB2, RB3, RB4, RB2, RB1, RB3, RB0, RB4); + ROUND (18, 2, RA2, RA1, RA3, RA0, RA4, RA3, RA0, RA1, RA4, RA2, + RB2, RB1, RB3, RB0, RB4, RB3, RB0, RB1, RB4, RB2); + ROUND (19, 3, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA4, RA2, RA3, + RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB4, RB2, RB3); + ROUND (20, 4, RA0, RA1, RA4, RA2, RA3, RA1, RA3, RA0, RA2, RA4, + RB0, RB1, RB4, RB2, RB3, RB1, RB3, RB0, RB2, RB4); + ROUND (21, 5, RA1, RA3, RA0, RA2, RA4, RA3, RA2, RA1, RA0, RA4, + RB1, RB3, RB0, RB2, RB4, RB3, RB2, RB1, RB0, RB4); + ROUND (22, 6, RA3, RA2, RA1, RA0, RA4, RA3, RA2, RA4, RA1, RA0, + RB3, RB2, RB1, RB0, RB4, RB3, RB2, RB4, RB1, RB0); + ROUND (23, 7, RA3, RA2, RA4, RA1, RA0, RA0, RA1, RA2, RA3, RA4, + RB3, RB2, RB4, RB1, RB0, RB0, RB1, RB2, RB3, RB4); + ROUND (24, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3, + RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3); + ROUND (25, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3, + RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3); + ROUND (26, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2, + RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2); + ROUND (27, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0, + RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0); + ROUND (28, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3, + RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3); + ROUND (29, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3, + RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3); + ROUND (30, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4, + RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4); + ROUND_LAST (31, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3, + RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3); + + transpose_4x4(RA4, RA1, RA2, RA0, RA3, RTMP0, RTMP1); + transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1); + + ret; +ELF(.size __serpent_enc_blk16,.-__serpent_enc_blk16;) + +.align 8 +ELF(.type __serpent_dec_blk16,@function;) +__serpent_dec_blk16: + /* input: + * %rdi: ctx, CTX + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel + * ciphertext blocks + * output: + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel + * plaintext blocks + */ + + vpcmpeqd RNOT, RNOT, RNOT; + + transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1); + + ROUND_FIRST_INVERSE (31, 7, RA0, RA1, RA2, RA3, RA4, + RA3, RA0, RA1, RA4, RA2, + RB0, RB1, RB2, RB3, RB4, + RB3, RB0, RB1, RB4, RB2); + ROUND_INVERSE (30, 6, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA2, RA4, RA3, + RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB2, RB4, RB3); + ROUND_INVERSE (29, 5, RA0, RA1, RA2, RA4, RA3, RA1, RA3, RA4, RA2, RA0, + RB0, RB1, RB2, RB4, RB3, RB1, RB3, RB4, RB2, RB0); + ROUND_INVERSE (28, 4, RA1, RA3, RA4, RA2, RA0, RA1, RA2, RA4, RA0, RA3, + RB1, RB3, RB4, RB2, RB0, RB1, RB2, RB4, RB0, RB3); + ROUND_INVERSE (27, 3, RA1, RA2, RA4, RA0, RA3, RA4, RA2, RA0, RA1, RA3, + RB1, RB2, RB4, RB0, RB3, RB4, RB2, RB0, RB1, RB3); + ROUND_INVERSE (26, 2, RA4, RA2, RA0, RA1, RA3, RA2, RA3, RA0, RA1, RA4, + RB4, RB2, RB0, RB1, RB3, RB2, RB3, RB0, RB1, RB4); + ROUND_INVERSE (25, 1, RA2, RA3, RA0, RA1, RA4, RA4, RA2, RA1, RA0, RA3, + RB2, RB3, RB0, RB1, RB4, RB4, RB2, RB1, RB0, RB3); + ROUND_INVERSE (24, 0, RA4, RA2, RA1, RA0, RA3, RA4, RA3, RA2, RA0, RA1, + RB4, RB2, RB1, RB0, RB3, RB4, RB3, RB2, RB0, RB1); + ROUND_INVERSE (23, 7, RA4, RA3, RA2, RA0, RA1, RA0, RA4, RA3, RA1, RA2, + RB4, RB3, RB2, RB0, RB1, RB0, RB4, RB3, RB1, RB2); + ROUND_INVERSE (22, 6, RA0, RA4, RA3, RA1, RA2, RA4, RA3, RA2, RA1, RA0, + RB0, RB4, RB3, RB1, RB2, RB4, RB3, RB2, RB1, RB0); + ROUND_INVERSE (21, 5, RA4, RA3, RA2, RA1, RA0, RA3, RA0, RA1, RA2, RA4, + RB4, RB3, RB2, RB1, RB0, RB3, RB0, RB1, RB2, RB4); + ROUND_INVERSE (20, 4, RA3, RA0, RA1, RA2, RA4, RA3, RA2, RA1, RA4, RA0, + RB3, RB0, RB1, RB2, RB4, RB3, RB2, RB1, RB4, RB0); + ROUND_INVERSE (19, 3, RA3, RA2, RA1, RA4, RA0, RA1, RA2, RA4, RA3, RA0, + RB3, RB2, RB1, RB4, RB0, RB1, RB2, RB4, RB3, RB0); + ROUND_INVERSE (18, 2, RA1, RA2, RA4, RA3, RA0, RA2, RA0, RA4, RA3, RA1, + RB1, RB2, RB4, RB3, RB0, RB2, RB0, RB4, RB3, RB1); + ROUND_INVERSE (17, 1, RA2, RA0, RA4, RA3, RA1, RA1, RA2, RA3, RA4, RA0, + RB2, RB0, RB4, RB3, RB1, RB1, RB2, RB3, RB4, RB0); + ROUND_INVERSE (16, 0, RA1, RA2, RA3, RA4, RA0, RA1, RA0, RA2, RA4, RA3, + RB1, RB2, RB3, RB4, RB0, RB1, RB0, RB2, RB4, RB3); + ROUND_INVERSE (15, 7, RA1, RA0, RA2, RA4, RA3, RA4, RA1, RA0, RA3, RA2, + RB1, RB0, RB2, RB4, RB3, RB4, RB1, RB0, RB3, RB2); + ROUND_INVERSE (14, 6, RA4, RA1, RA0, RA3, RA2, RA1, RA0, RA2, RA3, RA4, + RB4, RB1, RB0, RB3, RB2, RB1, RB0, RB2, RB3, RB4); + ROUND_INVERSE (13, 5, RA1, RA0, RA2, RA3, RA4, RA0, RA4, RA3, RA2, RA1, + RB1, RB0, RB2, RB3, RB4, RB0, RB4, RB3, RB2, RB1); + ROUND_INVERSE (12, 4, RA0, RA4, RA3, RA2, RA1, RA0, RA2, RA3, RA1, RA4, + RB0, RB4, RB3, RB2, RB1, RB0, RB2, RB3, RB1, RB4); + ROUND_INVERSE (11, 3, RA0, RA2, RA3, RA1, RA4, RA3, RA2, RA1, RA0, RA4, + RB0, RB2, RB3, RB1, RB4, RB3, RB2, RB1, RB0, RB4); + ROUND_INVERSE (10, 2, RA3, RA2, RA1, RA0, RA4, RA2, RA4, RA1, RA0, RA3, + RB3, RB2, RB1, RB0, RB4, RB2, RB4, RB1, RB0, RB3); + ROUND_INVERSE (9, 1, RA2, RA4, RA1, RA0, RA3, RA3, RA2, RA0, RA1, RA4, + RB2, RB4, RB1, RB0, RB3, RB3, RB2, RB0, RB1, RB4); + ROUND_INVERSE (8, 0, RA3, RA2, RA0, RA1, RA4, RA3, RA4, RA2, RA1, RA0, + RB3, RB2, RB0, RB1, RB4, RB3, RB4, RB2, RB1, RB0); + ROUND_INVERSE (7, 7, RA3, RA4, RA2, RA1, RA0, RA1, RA3, RA4, RA0, RA2, + RB3, RB4, RB2, RB1, RB0, RB1, RB3, RB4, RB0, RB2); + ROUND_INVERSE (6, 6, RA1, RA3, RA4, RA0, RA2, RA3, RA4, RA2, RA0, RA1, + RB1, RB3, RB4, RB0, RB2, RB3, RB4, RB2, RB0, RB1); + ROUND_INVERSE (5, 5, RA3, RA4, RA2, RA0, RA1, RA4, RA1, RA0, RA2, RA3, + RB3, RB4, RB2, RB0, RB1, RB4, RB1, RB0, RB2, RB3); + ROUND_INVERSE (4, 4, RA4, RA1, RA0, RA2, RA3, RA4, RA2, RA0, RA3, RA1, + RB4, RB1, RB0, RB2, RB3, RB4, RB2, RB0, RB3, RB1); + ROUND_INVERSE (3, 3, RA4, RA2, RA0, RA3, RA1, RA0, RA2, RA3, RA4, RA1, + RB4, RB2, RB0, RB3, RB1, RB0, RB2, RB3, RB4, RB1); + ROUND_INVERSE (2, 2, RA0, RA2, RA3, RA4, RA1, RA2, RA1, RA3, RA4, RA0, + RB0, RB2, RB3, RB4, RB1, RB2, RB1, RB3, RB4, RB0); + ROUND_INVERSE (1, 1, RA2, RA1, RA3, RA4, RA0, RA0, RA2, RA4, RA3, RA1, + RB2, RB1, RB3, RB4, RB0, RB0, RB2, RB4, RB3, RB1); + ROUND_INVERSE (0, 0, RA0, RA2, RA4, RA3, RA1, RA0, RA1, RA2, RA3, RA4, + RB0, RB2, RB4, RB3, RB1, RB0, RB1, RB2, RB3, RB4); + + transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1); + + ret; +ELF(.size __serpent_dec_blk16,.-__serpent_dec_blk16;) + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + +.align 8 +.globl _gcry_serpent_avx2_ctr_enc +ELF(.type _gcry_serpent_avx2_ctr_enc,@function;) +_gcry_serpent_avx2_ctr_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv (big endian, 128bit) + */ + + movq 8(%rcx), %rax; + bswapq %rax; + + vzeroupper; + + vbroadcasti128 .Lbswap128_mask RIP, RTMP3; + vpcmpeqd RNOT, RNOT, RNOT; + vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */ + vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */ + + /* load IV and byteswap */ + vmovdqu (%rcx), RTMP4x; + vpshufb RTMP3x, RTMP4x, RTMP4x; + vmovdqa RTMP4x, RTMP0x; + inc_le128(RTMP4x, RNOTx, RTMP1x); + vinserti128 $1, RTMP4x, RTMP0, RTMP0; + vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */ + + /* check need for handling 64-bit overflow and carry */ + cmpq $(0xffffffffffffffff - 16), %rax; + ja .Lhandle_ctr_carry; + + /* construct IVs */ + vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */ + vpshufb RTMP3, RTMP0, RA1; + vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */ + vpshufb RTMP3, RTMP0, RA2; + vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */ + vpshufb RTMP3, RTMP0, RA3; + vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */ + vpshufb RTMP3, RTMP0, RB0; + vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */ + vpshufb RTMP3, RTMP0, RB1; + vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */ + vpshufb RTMP3, RTMP0, RB2; + vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */ + vpshufb RTMP3, RTMP0, RB3; + vpsubq RTMP2, RTMP0, RTMP0; /* +16 */ + vpshufb RTMP3x, RTMP0x, RTMP0x; + + jmp .Lctr_carry_done; + +.Lhandle_ctr_carry: + /* construct IVs */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */ + inc_le128(RTMP0, RNOT, RTMP1); + vextracti128 $1, RTMP0, RTMP0x; + vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */ + +.align 4 +.Lctr_carry_done: + /* store new IV */ + vmovdqu RTMP0x, (%rcx); + + call __serpent_enc_blk16; + + vpxor (0 * 32)(%rdx), RA4, RA4; + vpxor (1 * 32)(%rdx), RA1, RA1; + vpxor (2 * 32)(%rdx), RA2, RA2; + vpxor (3 * 32)(%rdx), RA0, RA0; + vpxor (4 * 32)(%rdx), RB4, RB4; + vpxor (5 * 32)(%rdx), RB1, RB1; + vpxor (6 * 32)(%rdx), RB2, RB2; + vpxor (7 * 32)(%rdx), RB0, RB0; + + vmovdqu RA4, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA0, (3 * 32)(%rsi); + vmovdqu RB4, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB0, (7 * 32)(%rsi); + + vzeroall; + + ret +ELF(.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;) + +.align 8 +.globl _gcry_serpent_avx2_cbc_dec +ELF(.type _gcry_serpent_avx2_cbc_dec,@function;) +_gcry_serpent_avx2_cbc_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + + vzeroupper; + + vmovdqu (0 * 32)(%rdx), RA0; + vmovdqu (1 * 32)(%rdx), RA1; + vmovdqu (2 * 32)(%rdx), RA2; + vmovdqu (3 * 32)(%rdx), RA3; + vmovdqu (4 * 32)(%rdx), RB0; + vmovdqu (5 * 32)(%rdx), RB1; + vmovdqu (6 * 32)(%rdx), RB2; + vmovdqu (7 * 32)(%rdx), RB3; + + call __serpent_dec_blk16; + + vmovdqu (%rcx), RNOTx; + vinserti128 $1, (%rdx), RNOT, RNOT; + vpxor RNOT, RA0, RA0; + vpxor (0 * 32 + 16)(%rdx), RA1, RA1; + vpxor (1 * 32 + 16)(%rdx), RA2, RA2; + vpxor (2 * 32 + 16)(%rdx), RA3, RA3; + vpxor (3 * 32 + 16)(%rdx), RB0, RB0; + vpxor (4 * 32 + 16)(%rdx), RB1, RB1; + vpxor (5 * 32 + 16)(%rdx), RB2, RB2; + vpxor (6 * 32 + 16)(%rdx), RB3, RB3; + vmovdqu (7 * 32 + 16)(%rdx), RNOTx; + vmovdqu RNOTx, (%rcx); /* store new IV */ + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + + ret +ELF(.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;) + +.align 8 +.globl _gcry_serpent_avx2_cfb_dec +ELF(.type _gcry_serpent_avx2_cfb_dec,@function;) +_gcry_serpent_avx2_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + + vzeroupper; + + /* Load input */ + vmovdqu (%rcx), RNOTx; + vinserti128 $1, (%rdx), RNOT, RA0; + vmovdqu (0 * 32 + 16)(%rdx), RA1; + vmovdqu (1 * 32 + 16)(%rdx), RA2; + vmovdqu (2 * 32 + 16)(%rdx), RA3; + vmovdqu (3 * 32 + 16)(%rdx), RB0; + vmovdqu (4 * 32 + 16)(%rdx), RB1; + vmovdqu (5 * 32 + 16)(%rdx), RB2; + vmovdqu (6 * 32 + 16)(%rdx), RB3; + + /* Update IV */ + vmovdqu (7 * 32 + 16)(%rdx), RNOTx; + vmovdqu RNOTx, (%rcx); + + call __serpent_enc_blk16; + + vpxor (0 * 32)(%rdx), RA4, RA4; + vpxor (1 * 32)(%rdx), RA1, RA1; + vpxor (2 * 32)(%rdx), RA2, RA2; + vpxor (3 * 32)(%rdx), RA0, RA0; + vpxor (4 * 32)(%rdx), RB4, RB4; + vpxor (5 * 32)(%rdx), RB1, RB1; + vpxor (6 * 32)(%rdx), RB2, RB2; + vpxor (7 * 32)(%rdx), RB0, RB0; + + vmovdqu RA4, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA0, (3 * 32)(%rsi); + vmovdqu RB4, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB0, (7 * 32)(%rsi); + + vzeroall; + + ret +ELF(.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;) + +.align 8 +.globl _gcry_serpent_avx2_ocb_enc +ELF(.type _gcry_serpent_avx2_ocb_enc,@function;) + +_gcry_serpent_avx2_ocb_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[16]) + */ + + vzeroupper; + + subq $(4 * 8), %rsp; + + movq %r10, (0 * 8)(%rsp); + movq %r11, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + + vmovdqu (%rcx), RTMP0x; + vmovdqu (%r8), RTMP1x; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rdx), yreg; \ + vpxor (l0reg), RTMP0x, RNOTx; \ + vpxor (l1reg), RNOTx, RTMP0x; \ + vinserti128 $1, RTMP0x, RNOT, RNOT; \ + vpxor yreg, RTMP1, RTMP1; \ + vpxor yreg, RNOT, yreg; \ + vmovdqu RNOT, (n * 32)(%rsi); + + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %r11, RA0); + OCB_INPUT(1, %r12, %r13, RA1); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(2, %r10, %r11, RA2); + OCB_INPUT(3, %r12, %r13, RA3); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %r11, RB0); + OCB_INPUT(5, %r12, %r13, RB1); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(6, %r10, %r11, RB2); + OCB_INPUT(7, %r12, %r13, RB3); +#undef OCB_INPUT + + vextracti128 $1, RTMP1, RNOTx; + vmovdqu RTMP0x, (%rcx); + vpxor RNOTx, RTMP1x, RTMP1x; + vmovdqu RTMP1x, (%r8); + + movq (0 * 8)(%rsp), %r10; + movq (1 * 8)(%rsp), %r11; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + + call __serpent_enc_blk16; + + addq $(4 * 8), %rsp; + + vpxor (0 * 32)(%rsi), RA4, RA4; + vpxor (1 * 32)(%rsi), RA1, RA1; + vpxor (2 * 32)(%rsi), RA2, RA2; + vpxor (3 * 32)(%rsi), RA0, RA0; + vpxor (4 * 32)(%rsi), RB4, RB4; + vpxor (5 * 32)(%rsi), RB1, RB1; + vpxor (6 * 32)(%rsi), RB2, RB2; + vpxor (7 * 32)(%rsi), RB0, RB0; + + vmovdqu RA4, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA0, (3 * 32)(%rsi); + vmovdqu RB4, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB0, (7 * 32)(%rsi); + + vzeroall; + + ret; +ELF(.size _gcry_serpent_avx2_ocb_enc,.-_gcry_serpent_avx2_ocb_enc;) + +.align 8 +.globl _gcry_serpent_avx2_ocb_dec +ELF(.type _gcry_serpent_avx2_ocb_dec,@function;) + +_gcry_serpent_avx2_ocb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[16]) + */ + + vzeroupper; + + subq $(4 * 8), %rsp; + + movq %r10, (0 * 8)(%rsp); + movq %r11, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + + vmovdqu (%rcx), RTMP0x; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rdx), yreg; \ + vpxor (l0reg), RTMP0x, RNOTx; \ + vpxor (l1reg), RNOTx, RTMP0x; \ + vinserti128 $1, RTMP0x, RNOT, RNOT; \ + vpxor yreg, RNOT, yreg; \ + vmovdqu RNOT, (n * 32)(%rsi); + + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %r11, RA0); + OCB_INPUT(1, %r12, %r13, RA1); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(2, %r10, %r11, RA2); + OCB_INPUT(3, %r12, %r13, RA3); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %r11, RB0); + OCB_INPUT(5, %r12, %r13, RB1); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(6, %r10, %r11, RB2); + OCB_INPUT(7, %r12, %r13, RB3); +#undef OCB_INPUT + + vmovdqu RTMP0x, (%rcx); + + movq (0 * 8)(%rsp), %r10; + movq (1 * 8)(%rsp), %r11; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + + call __serpent_dec_blk16; + + addq $(4 * 8), %rsp; + + vmovdqu (%r8), RTMP1x; + + vpxor (0 * 32)(%rsi), RA0, RA0; + vpxor (1 * 32)(%rsi), RA1, RA1; + vpxor (2 * 32)(%rsi), RA2, RA2; + vpxor (3 * 32)(%rsi), RA3, RA3; + vpxor (4 * 32)(%rsi), RB0, RB0; + vpxor (5 * 32)(%rsi), RB1, RB1; + vpxor (6 * 32)(%rsi), RB2, RB2; + vpxor (7 * 32)(%rsi), RB3, RB3; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + + vmovdqu RA0, (0 * 32)(%rsi); + vpxor RA0, RTMP1, RTMP1; + vmovdqu RA1, (1 * 32)(%rsi); + vpxor RA1, RTMP1, RTMP1; + vmovdqu RA2, (2 * 32)(%rsi); + vpxor RA2, RTMP1, RTMP1; + vmovdqu RA3, (3 * 32)(%rsi); + vpxor RA3, RTMP1, RTMP1; + vmovdqu RB0, (4 * 32)(%rsi); + vpxor RB0, RTMP1, RTMP1; + vmovdqu RB1, (5 * 32)(%rsi); + vpxor RB1, RTMP1, RTMP1; + vmovdqu RB2, (6 * 32)(%rsi); + vpxor RB2, RTMP1, RTMP1; + vmovdqu RB3, (7 * 32)(%rsi); + vpxor RB3, RTMP1, RTMP1; + + vextracti128 $1, RTMP1, RNOTx; + vpxor RNOTx, RTMP1x, RTMP1x; + vmovdqu RTMP1x, (%r8); + + vzeroall; + + ret; +ELF(.size _gcry_serpent_avx2_ocb_dec,.-_gcry_serpent_avx2_ocb_dec;) + +.align 8 +.globl _gcry_serpent_avx2_ocb_auth +ELF(.type _gcry_serpent_avx2_ocb_auth,@function;) + +_gcry_serpent_avx2_ocb_auth: + /* input: + * %rdi: ctx, CTX + * %rsi: abuf (16 blocks) + * %rdx: offset + * %rcx: checksum + * %r8 : L pointers (void *L[16]) + */ + + vzeroupper; + + subq $(4 * 8), %rsp; + + movq %r10, (0 * 8)(%rsp); + movq %r11, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + + vmovdqu (%rdx), RTMP0x; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rsi), yreg; \ + vpxor (l0reg), RTMP0x, RNOTx; \ + vpxor (l1reg), RNOTx, RTMP0x; \ + vinserti128 $1, RTMP0x, RNOT, RNOT; \ + vpxor yreg, RNOT, yreg; + + movq (0 * 8)(%r8), %r10; + movq (1 * 8)(%r8), %r11; + movq (2 * 8)(%r8), %r12; + movq (3 * 8)(%r8), %r13; + OCB_INPUT(0, %r10, %r11, RA0); + OCB_INPUT(1, %r12, %r13, RA1); + movq (4 * 8)(%r8), %r10; + movq (5 * 8)(%r8), %r11; + movq (6 * 8)(%r8), %r12; + movq (7 * 8)(%r8), %r13; + OCB_INPUT(2, %r10, %r11, RA2); + OCB_INPUT(3, %r12, %r13, RA3); + movq (8 * 8)(%r8), %r10; + movq (9 * 8)(%r8), %r11; + movq (10 * 8)(%r8), %r12; + movq (11 * 8)(%r8), %r13; + OCB_INPUT(4, %r10, %r11, RB0); + OCB_INPUT(5, %r12, %r13, RB1); + movq (12 * 8)(%r8), %r10; + movq (13 * 8)(%r8), %r11; + movq (14 * 8)(%r8), %r12; + movq (15 * 8)(%r8), %r13; + OCB_INPUT(6, %r10, %r11, RB2); + OCB_INPUT(7, %r12, %r13, RB3); +#undef OCB_INPUT + + vmovdqu RTMP0x, (%rdx); + + movq (0 * 8)(%rsp), %r10; + movq (1 * 8)(%rsp), %r11; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + + call __serpent_enc_blk16; + + addq $(4 * 8), %rsp; + + vpxor RA4, RB4, RA4; + vpxor RA1, RB1, RA1; + vpxor RA2, RB2, RA2; + vpxor RA0, RB0, RA0; + + vpxor RA4, RA1, RA1; + vpxor RA2, RA0, RA0; + + vpxor RA1, RA0, RTMP1; + + vextracti128 $1, RTMP1, RNOTx; + vpxor (%rcx), RTMP1x, RTMP1x; + vpxor RNOTx, RTMP1x, RTMP1x; + vmovdqu RTMP1x, (%rcx); + + vzeroall; + + ret; +ELF(.size _gcry_serpent_avx2_ocb_auth,.-_gcry_serpent_avx2_ocb_auth;) + +.align 16 + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +#endif /*defined(USE_SERPENT) && defined(ENABLE_AVX2_SUPPORT)*/ +#endif /*__x86_64*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/serpent-sse2-amd64.S b/libotr/libgcrypt-1.8.7/cipher/serpent-sse2-amd64.S new file mode 100644 index 0000000..b149af2 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/serpent-sse2-amd64.S @@ -0,0 +1,1175 @@ +/* serpent-sse2-amd64.S - SSE2 implementation of Serpent cipher + * + * Copyright (C) 2013-2015 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT) + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#ifdef __PIC__ +# define RIP (%rip) +#else +# define RIP +#endif + +/* struct serpent_context: */ +#define ctx_keys 0 + +/* register macros */ +#define CTX %rdi + +/* vector registers */ +#define RA0 %xmm0 +#define RA1 %xmm1 +#define RA2 %xmm2 +#define RA3 %xmm3 +#define RA4 %xmm4 + +#define RB0 %xmm5 +#define RB1 %xmm6 +#define RB2 %xmm7 +#define RB3 %xmm8 +#define RB4 %xmm9 + +#define RNOT %xmm10 +#define RTMP0 %xmm11 +#define RTMP1 %xmm12 +#define RTMP2 %xmm13 + +/********************************************************************** + helper macros + **********************************************************************/ + +/* vector 32-bit rotation to left */ +#define vec_rol(reg, nleft, tmp) \ + movdqa reg, tmp; \ + pslld $(nleft), tmp; \ + psrld $(32 - (nleft)), reg; \ + por tmp, reg; + +/* vector 32-bit rotation to right */ +#define vec_ror(reg, nright, tmp) \ + vec_rol(reg, 32 - nright, tmp) + +/* 4x4 32-bit integer matrix transpose */ +#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ + movdqa x0, t2; \ + punpckhdq x1, t2; \ + punpckldq x1, x0; \ + \ + movdqa x2, t1; \ + punpckldq x3, t1; \ + punpckhdq x3, x2; \ + \ + movdqa x0, x1; \ + punpckhqdq t1, x1; \ + punpcklqdq t1, x0; \ + \ + movdqa t2, x3; \ + punpckhqdq x2, x3; \ + punpcklqdq x2, t2; \ + movdqa t2, x2; + +/* fill xmm register with 32-bit value from memory */ +#define pbroadcastd(mem32, xreg) \ + movd mem32, xreg; \ + pshufd $0, xreg, xreg; + +/* xor with unaligned memory operand */ +#define pxor_u(umem128, xreg, t) \ + movdqu umem128, t; \ + pxor t, xreg; + +/* 128-bit wide byte swap */ +#define pbswap(xreg, t0) \ + /* reorder 32-bit words, [a,b,c,d] => [d,c,b,a] */ \ + pshufd $0x1b, xreg, xreg; \ + /* reorder high&low 16-bit words, [d0,d1,c0,c1] => [d1,d0,c1,c0] */ \ + pshuflw $0xb1, xreg, xreg; \ + pshufhw $0xb1, xreg, xreg; \ + /* reorder bytes in 16-bit words */ \ + movdqa xreg, t0; \ + psrlw $8, t0; \ + psllw $8, xreg; \ + por t0, xreg; + +/********************************************************************** + 8-way serpent + **********************************************************************/ + +/* + * These are the S-Boxes of Serpent from following research paper. + * + * D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference, + * (New York, New York, USA), p. 317–329, National Institute of Standards and + * Technology, 2000. + * + * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf + * + */ +#define SBOX0(r0, r1, r2, r3, r4) \ + pxor r0, r3; movdqa r1, r4; \ + pand r3, r1; pxor r2, r4; \ + pxor r0, r1; por r3, r0; \ + pxor r4, r0; pxor r3, r4; \ + pxor r2, r3; por r1, r2; \ + pxor r4, r2; pxor RNOT, r4; \ + por r1, r4; pxor r3, r1; \ + pxor r4, r1; por r0, r3; \ + pxor r3, r1; pxor r3, r4; + +#define SBOX0_INVERSE(r0, r1, r2, r3, r4) \ + pxor RNOT, r2; movdqa r1, r4; \ + por r0, r1; pxor RNOT, r4; \ + pxor r2, r1; por r4, r2; \ + pxor r3, r1; pxor r4, r0; \ + pxor r0, r2; pand r3, r0; \ + pxor r0, r4; por r1, r0; \ + pxor r2, r0; pxor r4, r3; \ + pxor r1, r2; pxor r0, r3; \ + pxor r1, r3; \ + pand r3, r2; \ + pxor r2, r4; + +#define SBOX1(r0, r1, r2, r3, r4) \ + pxor RNOT, r0; pxor RNOT, r2; \ + movdqa r0, r4; pand r1, r0; \ + pxor r0, r2; por r3, r0; \ + pxor r2, r3; pxor r0, r1; \ + pxor r4, r0; por r1, r4; \ + pxor r3, r1; por r0, r2; \ + pand r4, r2; pxor r1, r0; \ + pand r2, r1; \ + pxor r0, r1; pand r2, r0; \ + pxor r4, r0; + +#define SBOX1_INVERSE(r0, r1, r2, r3, r4) \ + movdqa r1, r4; pxor r3, r1; \ + pand r1, r3; pxor r2, r4; \ + pxor r0, r3; por r1, r0; \ + pxor r3, r2; pxor r4, r0; \ + por r2, r0; pxor r3, r1; \ + pxor r1, r0; por r3, r1; \ + pxor r0, r1; pxor RNOT, r4; \ + pxor r1, r4; por r0, r1; \ + pxor r0, r1; \ + por r4, r1; \ + pxor r1, r3; + +#define SBOX2(r0, r1, r2, r3, r4) \ + movdqa r0, r4; pand r2, r0; \ + pxor r3, r0; pxor r1, r2; \ + pxor r0, r2; por r4, r3; \ + pxor r1, r3; pxor r2, r4; \ + movdqa r3, r1; por r4, r3; \ + pxor r0, r3; pand r1, r0; \ + pxor r0, r4; pxor r3, r1; \ + pxor r4, r1; pxor RNOT, r4; + +#define SBOX2_INVERSE(r0, r1, r2, r3, r4) \ + pxor r3, r2; pxor r0, r3; \ + movdqa r3, r4; pand r2, r3; \ + pxor r1, r3; por r2, r1; \ + pxor r4, r1; pand r3, r4; \ + pxor r3, r2; pand r0, r4; \ + pxor r2, r4; pand r1, r2; \ + por r0, r2; pxor RNOT, r3; \ + pxor r3, r2; pxor r3, r0; \ + pand r1, r0; pxor r4, r3; \ + pxor r0, r3; + +#define SBOX3(r0, r1, r2, r3, r4) \ + movdqa r0, r4; por r3, r0; \ + pxor r1, r3; pand r4, r1; \ + pxor r2, r4; pxor r3, r2; \ + pand r0, r3; por r1, r4; \ + pxor r4, r3; pxor r1, r0; \ + pand r0, r4; pxor r3, r1; \ + pxor r2, r4; por r0, r1; \ + pxor r2, r1; pxor r3, r0; \ + movdqa r1, r2; por r3, r1; \ + pxor r0, r1; + +#define SBOX3_INVERSE(r0, r1, r2, r3, r4) \ + movdqa r2, r4; pxor r1, r2; \ + pxor r2, r0; pand r2, r4; \ + pxor r0, r4; pand r1, r0; \ + pxor r3, r1; por r4, r3; \ + pxor r3, r2; pxor r3, r0; \ + pxor r4, r1; pand r2, r3; \ + pxor r1, r3; pxor r0, r1; \ + por r2, r1; pxor r3, r0; \ + pxor r4, r1; \ + pxor r1, r0; + +#define SBOX4(r0, r1, r2, r3, r4) \ + pxor r3, r1; pxor RNOT, r3; \ + pxor r3, r2; pxor r0, r3; \ + movdqa r1, r4; pand r3, r1; \ + pxor r2, r1; pxor r3, r4; \ + pxor r4, r0; pand r4, r2; \ + pxor r0, r2; pand r1, r0; \ + pxor r0, r3; por r1, r4; \ + pxor r0, r4; por r3, r0; \ + pxor r2, r0; pand r3, r2; \ + pxor RNOT, r0; pxor r2, r4; + +#define SBOX4_INVERSE(r0, r1, r2, r3, r4) \ + movdqa r2, r4; pand r3, r2; \ + pxor r1, r2; por r3, r1; \ + pand r0, r1; pxor r2, r4; \ + pxor r1, r4; pand r2, r1; \ + pxor RNOT, r0; pxor r4, r3; \ + pxor r3, r1; pand r0, r3; \ + pxor r2, r3; pxor r1, r0; \ + pand r0, r2; pxor r0, r3; \ + pxor r4, r2; \ + por r3, r2; pxor r0, r3; \ + pxor r1, r2; + +#define SBOX5(r0, r1, r2, r3, r4) \ + pxor r1, r0; pxor r3, r1; \ + pxor RNOT, r3; movdqa r1, r4; \ + pand r0, r1; pxor r3, r2; \ + pxor r2, r1; por r4, r2; \ + pxor r3, r4; pand r1, r3; \ + pxor r0, r3; pxor r1, r4; \ + pxor r2, r4; pxor r0, r2; \ + pand r3, r0; pxor RNOT, r2; \ + pxor r4, r0; por r3, r4; \ + pxor r4, r2; + +#define SBOX5_INVERSE(r0, r1, r2, r3, r4) \ + pxor RNOT, r1; movdqa r3, r4; \ + pxor r1, r2; por r0, r3; \ + pxor r2, r3; por r1, r2; \ + pand r0, r2; pxor r3, r4; \ + pxor r4, r2; por r0, r4; \ + pxor r1, r4; pand r2, r1; \ + pxor r3, r1; pxor r2, r4; \ + pand r4, r3; pxor r1, r4; \ + pxor r4, r3; pxor RNOT, r4; \ + pxor r0, r3; + +#define SBOX6(r0, r1, r2, r3, r4) \ + pxor RNOT, r2; movdqa r3, r4; \ + pand r0, r3; pxor r4, r0; \ + pxor r2, r3; por r4, r2; \ + pxor r3, r1; pxor r0, r2; \ + por r1, r0; pxor r1, r2; \ + pxor r0, r4; por r3, r0; \ + pxor r2, r0; pxor r3, r4; \ + pxor r0, r4; pxor RNOT, r3; \ + pand r4, r2; \ + pxor r3, r2; + +#define SBOX6_INVERSE(r0, r1, r2, r3, r4) \ + pxor r2, r0; movdqa r2, r4; \ + pand r0, r2; pxor r3, r4; \ + pxor RNOT, r2; pxor r1, r3; \ + pxor r3, r2; por r0, r4; \ + pxor r2, r0; pxor r4, r3; \ + pxor r1, r4; pand r3, r1; \ + pxor r0, r1; pxor r3, r0; \ + por r2, r0; pxor r1, r3; \ + pxor r0, r4; + +#define SBOX7(r0, r1, r2, r3, r4) \ + movdqa r1, r4; por r2, r1; \ + pxor r3, r1; pxor r2, r4; \ + pxor r1, r2; por r4, r3; \ + pand r0, r3; pxor r2, r4; \ + pxor r1, r3; por r4, r1; \ + pxor r0, r1; por r4, r0; \ + pxor r2, r0; pxor r4, r1; \ + pxor r1, r2; pand r0, r1; \ + pxor r4, r1; pxor RNOT, r2; \ + por r0, r2; \ + pxor r2, r4; + +#define SBOX7_INVERSE(r0, r1, r2, r3, r4) \ + movdqa r2, r4; pxor r0, r2; \ + pand r3, r0; por r3, r4; \ + pxor RNOT, r2; pxor r1, r3; \ + por r0, r1; pxor r2, r0; \ + pand r4, r2; pand r4, r3; \ + pxor r2, r1; pxor r0, r2; \ + por r2, r0; pxor r1, r4; \ + pxor r3, r0; pxor r4, r3; \ + por r0, r4; pxor r2, r3; \ + pxor r2, r4; + +/* Apply SBOX number WHICH to to the block. */ +#define SBOX(which, r0, r1, r2, r3, r4) \ + SBOX##which (r0, r1, r2, r3, r4) + +/* Apply inverse SBOX number WHICH to to the block. */ +#define SBOX_INVERSE(which, r0, r1, r2, r3, r4) \ + SBOX##which##_INVERSE (r0, r1, r2, r3, r4) + +/* XOR round key into block state in r0,r1,r2,r3. r4 used as temporary. */ +#define BLOCK_XOR_KEY(r0, r1, r2, r3, r4, round) \ + pbroadcastd ((ctx_keys + (round) * 16 + 0 * 4)(CTX), r4); \ + pxor r4, r0; \ + pbroadcastd ((ctx_keys + (round) * 16 + 1 * 4)(CTX), r4); \ + pxor r4, r1; \ + pbroadcastd ((ctx_keys + (round) * 16 + 2 * 4)(CTX), r4); \ + pxor r4, r2; \ + pbroadcastd ((ctx_keys + (round) * 16 + 3 * 4)(CTX), r4); \ + pxor r4, r3; + +/* Apply the linear transformation to BLOCK. */ +#define LINEAR_TRANSFORMATION(r0, r1, r2, r3, r4) \ + vec_rol(r0, 13, r4); \ + vec_rol(r2, 3, r4); \ + pxor r0, r1; \ + pxor r2, r1; \ + movdqa r0, r4; \ + pslld $3, r4; \ + pxor r2, r3; \ + pxor r4, r3; \ + vec_rol(r1, 1, r4); \ + vec_rol(r3, 7, r4); \ + pxor r1, r0; \ + pxor r3, r0; \ + movdqa r1, r4; \ + pslld $7, r4; \ + pxor r3, r2; \ + pxor r4, r2; \ + vec_rol(r0, 5, r4); \ + vec_rol(r2, 22, r4); + +/* Apply the inverse linear transformation to BLOCK. */ +#define LINEAR_TRANSFORMATION_INVERSE(r0, r1, r2, r3, r4) \ + vec_ror(r2, 22, r4); \ + vec_ror(r0, 5, r4); \ + movdqa r1, r4; \ + pslld $7, r4; \ + pxor r3, r2; \ + pxor r4, r2; \ + pxor r1, r0; \ + pxor r3, r0; \ + vec_ror(r3, 7, r4); \ + vec_ror(r1, 1, r4); \ + movdqa r0, r4; \ + pslld $3, r4; \ + pxor r2, r3; \ + pxor r4, r3; \ + pxor r0, r1; \ + pxor r2, r1; \ + vec_ror(r2, 3, r4); \ + vec_ror(r0, 13, r4); + +/* Apply a Serpent round to eight parallel blocks. This macro increments + `round'. */ +#define ROUND(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \ + b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \ + BLOCK_XOR_KEY (a0, a1, a2, a3, a4, round); \ + SBOX (which, a0, a1, a2, a3, a4); \ + BLOCK_XOR_KEY (b0, b1, b2, b3, b4, round); \ + SBOX (which, b0, b1, b2, b3, b4); \ + LINEAR_TRANSFORMATION (na0, na1, na2, na3, na4); \ + LINEAR_TRANSFORMATION (nb0, nb1, nb2, nb3, nb4); + +/* Apply the last Serpent round to eight parallel blocks. This macro increments + `round'. */ +#define ROUND_LAST(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \ + b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \ + BLOCK_XOR_KEY (a0, a1, a2, a3, a4, round); \ + SBOX (which, a0, a1, a2, a3, a4); \ + BLOCK_XOR_KEY (b0, b1, b2, b3, b4, round); \ + SBOX (which, b0, b1, b2, b3, b4); \ + BLOCK_XOR_KEY (na0, na1, na2, na3, na4, ((round) + 1)); \ + BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, ((round) + 1)); + +/* Apply an inverse Serpent round to eight parallel blocks. This macro + increments `round'. */ +#define ROUND_INVERSE(round, which, a0, a1, a2, a3, a4, \ + na0, na1, na2, na3, na4, \ + b0, b1, b2, b3, b4, \ + nb0, nb1, nb2, nb3, nb4) \ + LINEAR_TRANSFORMATION_INVERSE (a0, a1, a2, a3, a4); \ + LINEAR_TRANSFORMATION_INVERSE (b0, b1, b2, b3, b4); \ + SBOX_INVERSE (which, a0, a1, a2, a3, a4); \ + BLOCK_XOR_KEY (na0, na1, na2, na3, na4, round); \ + SBOX_INVERSE (which, b0, b1, b2, b3, b4); \ + BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, round); + +/* Apply the first inverse Serpent round to eight parallel blocks. This macro + increments `round'. */ +#define ROUND_FIRST_INVERSE(round, which, a0, a1, a2, a3, a4, \ + na0, na1, na2, na3, na4, \ + b0, b1, b2, b3, b4, \ + nb0, nb1, nb2, nb3, nb4) \ + BLOCK_XOR_KEY (a0, a1, a2, a3, a4, ((round) + 1)); \ + BLOCK_XOR_KEY (b0, b1, b2, b3, b4, ((round) + 1)); \ + SBOX_INVERSE (which, a0, a1, a2, a3, a4); \ + BLOCK_XOR_KEY (na0, na1, na2, na3, na4, round); \ + SBOX_INVERSE (which, b0, b1, b2, b3, b4); \ + BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, round); + +.text + +.align 8 +ELF(.type __serpent_enc_blk8,@function;) +__serpent_enc_blk8: + /* input: + * %rdi: ctx, CTX + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext + * blocks + * output: + * RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: eight parallel + * ciphertext blocks + */ + + pcmpeqd RNOT, RNOT; + + transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1); + + ROUND (0, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3, + RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3); + ROUND (1, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3, + RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3); + ROUND (2, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2, + RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2); + ROUND (3, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0, + RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0); + ROUND (4, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3, + RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3); + ROUND (5, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3, + RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3); + ROUND (6, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4, + RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4); + ROUND (7, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3, + RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3); + ROUND (8, 0, RA4, RA1, RA2, RA0, RA3, RA1, RA3, RA2, RA4, RA0, + RB4, RB1, RB2, RB0, RB3, RB1, RB3, RB2, RB4, RB0); + ROUND (9, 1, RA1, RA3, RA2, RA4, RA0, RA2, RA1, RA4, RA3, RA0, + RB1, RB3, RB2, RB4, RB0, RB2, RB1, RB4, RB3, RB0); + ROUND (10, 2, RA2, RA1, RA4, RA3, RA0, RA4, RA3, RA1, RA0, RA2, + RB2, RB1, RB4, RB3, RB0, RB4, RB3, RB1, RB0, RB2); + ROUND (11, 3, RA4, RA3, RA1, RA0, RA2, RA3, RA1, RA0, RA2, RA4, + RB4, RB3, RB1, RB0, RB2, RB3, RB1, RB0, RB2, RB4); + ROUND (12, 4, RA3, RA1, RA0, RA2, RA4, RA1, RA4, RA3, RA2, RA0, + RB3, RB1, RB0, RB2, RB4, RB1, RB4, RB3, RB2, RB0); + ROUND (13, 5, RA1, RA4, RA3, RA2, RA0, RA4, RA2, RA1, RA3, RA0, + RB1, RB4, RB3, RB2, RB0, RB4, RB2, RB1, RB3, RB0); + ROUND (14, 6, RA4, RA2, RA1, RA3, RA0, RA4, RA2, RA0, RA1, RA3, + RB4, RB2, RB1, RB3, RB0, RB4, RB2, RB0, RB1, RB3); + ROUND (15, 7, RA4, RA2, RA0, RA1, RA3, RA3, RA1, RA2, RA4, RA0, + RB4, RB2, RB0, RB1, RB3, RB3, RB1, RB2, RB4, RB0); + ROUND (16, 0, RA3, RA1, RA2, RA4, RA0, RA1, RA0, RA2, RA3, RA4, + RB3, RB1, RB2, RB4, RB0, RB1, RB0, RB2, RB3, RB4); + ROUND (17, 1, RA1, RA0, RA2, RA3, RA4, RA2, RA1, RA3, RA0, RA4, + RB1, RB0, RB2, RB3, RB4, RB2, RB1, RB3, RB0, RB4); + ROUND (18, 2, RA2, RA1, RA3, RA0, RA4, RA3, RA0, RA1, RA4, RA2, + RB2, RB1, RB3, RB0, RB4, RB3, RB0, RB1, RB4, RB2); + ROUND (19, 3, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA4, RA2, RA3, + RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB4, RB2, RB3); + ROUND (20, 4, RA0, RA1, RA4, RA2, RA3, RA1, RA3, RA0, RA2, RA4, + RB0, RB1, RB4, RB2, RB3, RB1, RB3, RB0, RB2, RB4); + ROUND (21, 5, RA1, RA3, RA0, RA2, RA4, RA3, RA2, RA1, RA0, RA4, + RB1, RB3, RB0, RB2, RB4, RB3, RB2, RB1, RB0, RB4); + ROUND (22, 6, RA3, RA2, RA1, RA0, RA4, RA3, RA2, RA4, RA1, RA0, + RB3, RB2, RB1, RB0, RB4, RB3, RB2, RB4, RB1, RB0); + ROUND (23, 7, RA3, RA2, RA4, RA1, RA0, RA0, RA1, RA2, RA3, RA4, + RB3, RB2, RB4, RB1, RB0, RB0, RB1, RB2, RB3, RB4); + ROUND (24, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3, + RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3); + ROUND (25, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3, + RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3); + ROUND (26, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2, + RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2); + ROUND (27, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0, + RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0); + ROUND (28, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3, + RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3); + ROUND (29, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3, + RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3); + ROUND (30, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4, + RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4); + ROUND_LAST (31, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3, + RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3); + + transpose_4x4(RA4, RA1, RA2, RA0, RA3, RTMP0, RTMP1); + transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1); + + ret; +ELF(.size __serpent_enc_blk8,.-__serpent_enc_blk8;) + +.align 8 +ELF(.type __serpent_dec_blk8,@function;) +__serpent_dec_blk8: + /* input: + * %rdi: ctx, CTX + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel + * ciphertext blocks + * output: + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext + * blocks + */ + + pcmpeqd RNOT, RNOT; + + transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1); + + ROUND_FIRST_INVERSE (31, 7, RA0, RA1, RA2, RA3, RA4, + RA3, RA0, RA1, RA4, RA2, + RB0, RB1, RB2, RB3, RB4, + RB3, RB0, RB1, RB4, RB2); + ROUND_INVERSE (30, 6, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA2, RA4, RA3, + RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB2, RB4, RB3); + ROUND_INVERSE (29, 5, RA0, RA1, RA2, RA4, RA3, RA1, RA3, RA4, RA2, RA0, + RB0, RB1, RB2, RB4, RB3, RB1, RB3, RB4, RB2, RB0); + ROUND_INVERSE (28, 4, RA1, RA3, RA4, RA2, RA0, RA1, RA2, RA4, RA0, RA3, + RB1, RB3, RB4, RB2, RB0, RB1, RB2, RB4, RB0, RB3); + ROUND_INVERSE (27, 3, RA1, RA2, RA4, RA0, RA3, RA4, RA2, RA0, RA1, RA3, + RB1, RB2, RB4, RB0, RB3, RB4, RB2, RB0, RB1, RB3); + ROUND_INVERSE (26, 2, RA4, RA2, RA0, RA1, RA3, RA2, RA3, RA0, RA1, RA4, + RB4, RB2, RB0, RB1, RB3, RB2, RB3, RB0, RB1, RB4); + ROUND_INVERSE (25, 1, RA2, RA3, RA0, RA1, RA4, RA4, RA2, RA1, RA0, RA3, + RB2, RB3, RB0, RB1, RB4, RB4, RB2, RB1, RB0, RB3); + ROUND_INVERSE (24, 0, RA4, RA2, RA1, RA0, RA3, RA4, RA3, RA2, RA0, RA1, + RB4, RB2, RB1, RB0, RB3, RB4, RB3, RB2, RB0, RB1); + ROUND_INVERSE (23, 7, RA4, RA3, RA2, RA0, RA1, RA0, RA4, RA3, RA1, RA2, + RB4, RB3, RB2, RB0, RB1, RB0, RB4, RB3, RB1, RB2); + ROUND_INVERSE (22, 6, RA0, RA4, RA3, RA1, RA2, RA4, RA3, RA2, RA1, RA0, + RB0, RB4, RB3, RB1, RB2, RB4, RB3, RB2, RB1, RB0); + ROUND_INVERSE (21, 5, RA4, RA3, RA2, RA1, RA0, RA3, RA0, RA1, RA2, RA4, + RB4, RB3, RB2, RB1, RB0, RB3, RB0, RB1, RB2, RB4); + ROUND_INVERSE (20, 4, RA3, RA0, RA1, RA2, RA4, RA3, RA2, RA1, RA4, RA0, + RB3, RB0, RB1, RB2, RB4, RB3, RB2, RB1, RB4, RB0); + ROUND_INVERSE (19, 3, RA3, RA2, RA1, RA4, RA0, RA1, RA2, RA4, RA3, RA0, + RB3, RB2, RB1, RB4, RB0, RB1, RB2, RB4, RB3, RB0); + ROUND_INVERSE (18, 2, RA1, RA2, RA4, RA3, RA0, RA2, RA0, RA4, RA3, RA1, + RB1, RB2, RB4, RB3, RB0, RB2, RB0, RB4, RB3, RB1); + ROUND_INVERSE (17, 1, RA2, RA0, RA4, RA3, RA1, RA1, RA2, RA3, RA4, RA0, + RB2, RB0, RB4, RB3, RB1, RB1, RB2, RB3, RB4, RB0); + ROUND_INVERSE (16, 0, RA1, RA2, RA3, RA4, RA0, RA1, RA0, RA2, RA4, RA3, + RB1, RB2, RB3, RB4, RB0, RB1, RB0, RB2, RB4, RB3); + ROUND_INVERSE (15, 7, RA1, RA0, RA2, RA4, RA3, RA4, RA1, RA0, RA3, RA2, + RB1, RB0, RB2, RB4, RB3, RB4, RB1, RB0, RB3, RB2); + ROUND_INVERSE (14, 6, RA4, RA1, RA0, RA3, RA2, RA1, RA0, RA2, RA3, RA4, + RB4, RB1, RB0, RB3, RB2, RB1, RB0, RB2, RB3, RB4); + ROUND_INVERSE (13, 5, RA1, RA0, RA2, RA3, RA4, RA0, RA4, RA3, RA2, RA1, + RB1, RB0, RB2, RB3, RB4, RB0, RB4, RB3, RB2, RB1); + ROUND_INVERSE (12, 4, RA0, RA4, RA3, RA2, RA1, RA0, RA2, RA3, RA1, RA4, + RB0, RB4, RB3, RB2, RB1, RB0, RB2, RB3, RB1, RB4); + ROUND_INVERSE (11, 3, RA0, RA2, RA3, RA1, RA4, RA3, RA2, RA1, RA0, RA4, + RB0, RB2, RB3, RB1, RB4, RB3, RB2, RB1, RB0, RB4); + ROUND_INVERSE (10, 2, RA3, RA2, RA1, RA0, RA4, RA2, RA4, RA1, RA0, RA3, + RB3, RB2, RB1, RB0, RB4, RB2, RB4, RB1, RB0, RB3); + ROUND_INVERSE (9, 1, RA2, RA4, RA1, RA0, RA3, RA3, RA2, RA0, RA1, RA4, + RB2, RB4, RB1, RB0, RB3, RB3, RB2, RB0, RB1, RB4); + ROUND_INVERSE (8, 0, RA3, RA2, RA0, RA1, RA4, RA3, RA4, RA2, RA1, RA0, + RB3, RB2, RB0, RB1, RB4, RB3, RB4, RB2, RB1, RB0); + ROUND_INVERSE (7, 7, RA3, RA4, RA2, RA1, RA0, RA1, RA3, RA4, RA0, RA2, + RB3, RB4, RB2, RB1, RB0, RB1, RB3, RB4, RB0, RB2); + ROUND_INVERSE (6, 6, RA1, RA3, RA4, RA0, RA2, RA3, RA4, RA2, RA0, RA1, + RB1, RB3, RB4, RB0, RB2, RB3, RB4, RB2, RB0, RB1); + ROUND_INVERSE (5, 5, RA3, RA4, RA2, RA0, RA1, RA4, RA1, RA0, RA2, RA3, + RB3, RB4, RB2, RB0, RB1, RB4, RB1, RB0, RB2, RB3); + ROUND_INVERSE (4, 4, RA4, RA1, RA0, RA2, RA3, RA4, RA2, RA0, RA3, RA1, + RB4, RB1, RB0, RB2, RB3, RB4, RB2, RB0, RB3, RB1); + ROUND_INVERSE (3, 3, RA4, RA2, RA0, RA3, RA1, RA0, RA2, RA3, RA4, RA1, + RB4, RB2, RB0, RB3, RB1, RB0, RB2, RB3, RB4, RB1); + ROUND_INVERSE (2, 2, RA0, RA2, RA3, RA4, RA1, RA2, RA1, RA3, RA4, RA0, + RB0, RB2, RB3, RB4, RB1, RB2, RB1, RB3, RB4, RB0); + ROUND_INVERSE (1, 1, RA2, RA1, RA3, RA4, RA0, RA0, RA2, RA4, RA3, RA1, + RB2, RB1, RB3, RB4, RB0, RB0, RB2, RB4, RB3, RB1); + ROUND_INVERSE (0, 0, RA0, RA2, RA4, RA3, RA1, RA0, RA1, RA2, RA3, RA4, + RB0, RB2, RB4, RB3, RB1, RB0, RB1, RB2, RB3, RB4); + + transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1); + + ret; +ELF(.size __serpent_dec_blk8,.-__serpent_dec_blk8;) + +.align 8 +.globl _gcry_serpent_sse2_ctr_enc +ELF(.type _gcry_serpent_sse2_ctr_enc,@function;) +_gcry_serpent_sse2_ctr_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv (big endian, 128bit) + */ + + /* load IV and byteswap */ + movdqu (%rcx), RA0; + movdqa RA0, RTMP0; + pbswap(RTMP0, RTMP1); /* be => le */ + + pcmpeqd RNOT, RNOT; + psrldq $8, RNOT; /* low: -1, high: 0 */ + movdqa RNOT, RTMP2; + paddq RTMP2, RTMP2; /* low: -2, high: 0 */ + + /* construct IVs */ + movdqa RTMP0, RTMP1; + psubq RNOT, RTMP0; /* +1 */ + movdqa RTMP0, RA1; + psubq RTMP2, RTMP1; /* +2 */ + movdqa RTMP1, RA2; + psubq RTMP2, RTMP0; /* +3 */ + movdqa RTMP0, RA3; + psubq RTMP2, RTMP1; /* +4 */ + movdqa RTMP1, RB0; + psubq RTMP2, RTMP0; /* +5 */ + movdqa RTMP0, RB1; + psubq RTMP2, RTMP1; /* +6 */ + movdqa RTMP1, RB2; + psubq RTMP2, RTMP0; /* +7 */ + movdqa RTMP0, RB3; + psubq RTMP2, RTMP1; /* +8 */ + + /* check need for handling 64-bit overflow and carry */ + cmpl $0xffffffff, 8(%rcx); + jne .Lno_ctr_carry; + + movl 12(%rcx), %eax; + bswapl %eax; + cmpl $-8, %eax; + jb .Lno_ctr_carry; + pslldq $8, RNOT; /* low: 0, high: -1 */ + je .Lcarry_RTMP0; + + cmpl $-6, %eax; + jb .Lcarry_RB3; + je .Lcarry_RB2; + + cmpl $-4, %eax; + jb .Lcarry_RB1; + je .Lcarry_RB0; + + cmpl $-2, %eax; + jb .Lcarry_RA3; + je .Lcarry_RA2; + + psubq RNOT, RA1; +.Lcarry_RA2: + psubq RNOT, RA2; +.Lcarry_RA3: + psubq RNOT, RA3; +.Lcarry_RB0: + psubq RNOT, RB0; +.Lcarry_RB1: + psubq RNOT, RB1; +.Lcarry_RB2: + psubq RNOT, RB2; +.Lcarry_RB3: + psubq RNOT, RB3; +.Lcarry_RTMP0: + psubq RNOT, RTMP1; + +.Lno_ctr_carry: + /* le => be */ + pbswap(RA1, RTMP0); + pbswap(RA2, RTMP0); + pbswap(RA3, RTMP0); + pbswap(RB0, RTMP0); + pbswap(RB1, RTMP0); + pbswap(RB2, RTMP0); + pbswap(RB3, RTMP0); + pbswap(RTMP1, RTMP0); + /* store new IV */ + movdqu RTMP1, (%rcx); + + call __serpent_enc_blk8; + + pxor_u((0 * 16)(%rdx), RA4, RTMP0); + pxor_u((1 * 16)(%rdx), RA1, RTMP0); + pxor_u((2 * 16)(%rdx), RA2, RTMP0); + pxor_u((3 * 16)(%rdx), RA0, RTMP0); + pxor_u((4 * 16)(%rdx), RB4, RTMP0); + pxor_u((5 * 16)(%rdx), RB1, RTMP0); + pxor_u((6 * 16)(%rdx), RB2, RTMP0); + pxor_u((7 * 16)(%rdx), RB0, RTMP0); + + movdqu RA4, (0 * 16)(%rsi); + movdqu RA1, (1 * 16)(%rsi); + movdqu RA2, (2 * 16)(%rsi); + movdqu RA0, (3 * 16)(%rsi); + movdqu RB4, (4 * 16)(%rsi); + movdqu RB1, (5 * 16)(%rsi); + movdqu RB2, (6 * 16)(%rsi); + movdqu RB0, (7 * 16)(%rsi); + + /* clear the used registers */ + pxor RA0, RA0; + pxor RA1, RA1; + pxor RA2, RA2; + pxor RA3, RA3; + pxor RA4, RA4; + pxor RB0, RB0; + pxor RB1, RB1; + pxor RB2, RB2; + pxor RB3, RB3; + pxor RB4, RB4; + pxor RTMP0, RTMP0; + pxor RTMP1, RTMP1; + pxor RTMP2, RTMP2; + pxor RNOT, RNOT; + + ret +ELF(.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc;) + +.align 8 +.globl _gcry_serpent_sse2_cbc_dec +ELF(.type _gcry_serpent_sse2_cbc_dec,@function;) +_gcry_serpent_sse2_cbc_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv + */ + + movdqu (0 * 16)(%rdx), RA0; + movdqu (1 * 16)(%rdx), RA1; + movdqu (2 * 16)(%rdx), RA2; + movdqu (3 * 16)(%rdx), RA3; + movdqu (4 * 16)(%rdx), RB0; + movdqu (5 * 16)(%rdx), RB1; + movdqu (6 * 16)(%rdx), RB2; + movdqu (7 * 16)(%rdx), RB3; + + call __serpent_dec_blk8; + + movdqu (7 * 16)(%rdx), RNOT; + pxor_u((%rcx), RA0, RTMP0); + pxor_u((0 * 16)(%rdx), RA1, RTMP0); + pxor_u((1 * 16)(%rdx), RA2, RTMP0); + pxor_u((2 * 16)(%rdx), RA3, RTMP0); + pxor_u((3 * 16)(%rdx), RB0, RTMP0); + pxor_u((4 * 16)(%rdx), RB1, RTMP0); + pxor_u((5 * 16)(%rdx), RB2, RTMP0); + pxor_u((6 * 16)(%rdx), RB3, RTMP0); + movdqu RNOT, (%rcx); /* store new IV */ + + movdqu RA0, (0 * 16)(%rsi); + movdqu RA1, (1 * 16)(%rsi); + movdqu RA2, (2 * 16)(%rsi); + movdqu RA3, (3 * 16)(%rsi); + movdqu RB0, (4 * 16)(%rsi); + movdqu RB1, (5 * 16)(%rsi); + movdqu RB2, (6 * 16)(%rsi); + movdqu RB3, (7 * 16)(%rsi); + + /* clear the used registers */ + pxor RA0, RA0; + pxor RA1, RA1; + pxor RA2, RA2; + pxor RA3, RA3; + pxor RA4, RA4; + pxor RB0, RB0; + pxor RB1, RB1; + pxor RB2, RB2; + pxor RB3, RB3; + pxor RB4, RB4; + pxor RTMP0, RTMP0; + pxor RTMP1, RTMP1; + pxor RTMP2, RTMP2; + pxor RNOT, RNOT; + + ret +ELF(.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec;) + +.align 8 +.globl _gcry_serpent_sse2_cfb_dec +ELF(.type _gcry_serpent_sse2_cfb_dec,@function;) +_gcry_serpent_sse2_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv + */ + + /* Load input */ + movdqu (%rcx), RA0; + movdqu 0 * 16(%rdx), RA1; + movdqu 1 * 16(%rdx), RA2; + movdqu 2 * 16(%rdx), RA3; + movdqu 3 * 16(%rdx), RB0; + movdqu 4 * 16(%rdx), RB1; + movdqu 5 * 16(%rdx), RB2; + movdqu 6 * 16(%rdx), RB3; + + /* Update IV */ + movdqu 7 * 16(%rdx), RNOT; + movdqu RNOT, (%rcx); + + call __serpent_enc_blk8; + + pxor_u((0 * 16)(%rdx), RA4, RTMP0); + pxor_u((1 * 16)(%rdx), RA1, RTMP0); + pxor_u((2 * 16)(%rdx), RA2, RTMP0); + pxor_u((3 * 16)(%rdx), RA0, RTMP0); + pxor_u((4 * 16)(%rdx), RB4, RTMP0); + pxor_u((5 * 16)(%rdx), RB1, RTMP0); + pxor_u((6 * 16)(%rdx), RB2, RTMP0); + pxor_u((7 * 16)(%rdx), RB0, RTMP0); + + movdqu RA4, (0 * 16)(%rsi); + movdqu RA1, (1 * 16)(%rsi); + movdqu RA2, (2 * 16)(%rsi); + movdqu RA0, (3 * 16)(%rsi); + movdqu RB4, (4 * 16)(%rsi); + movdqu RB1, (5 * 16)(%rsi); + movdqu RB2, (6 * 16)(%rsi); + movdqu RB0, (7 * 16)(%rsi); + + /* clear the used registers */ + pxor RA0, RA0; + pxor RA1, RA1; + pxor RA2, RA2; + pxor RA3, RA3; + pxor RA4, RA4; + pxor RB0, RB0; + pxor RB1, RB1; + pxor RB2, RB2; + pxor RB3, RB3; + pxor RB4, RB4; + pxor RTMP0, RTMP0; + pxor RTMP1, RTMP1; + pxor RTMP2, RTMP2; + pxor RNOT, RNOT; + + ret +ELF(.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;) + +.align 8 +.globl _gcry_serpent_sse2_ocb_enc +ELF(.type _gcry_serpent_sse2_ocb_enc,@function;) + +_gcry_serpent_sse2_ocb_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[8]) + */ + + subq $(4 * 8), %rsp; + + movq %r10, (0 * 8)(%rsp); + movq %r11, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + + movdqu (%rcx), RTMP0; + movdqu (%r8), RTMP1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, lreg, xreg) \ + movdqu (n * 16)(%rdx), xreg; \ + movdqu (lreg), RNOT; \ + pxor RNOT, RTMP0; \ + pxor xreg, RTMP1; \ + pxor RTMP0, xreg; \ + movdqu RTMP0, (n * 16)(%rsi); + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, RA0); + OCB_INPUT(1, %r11, RA1); + OCB_INPUT(2, %r12, RA2); + OCB_INPUT(3, %r13, RA3); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, RB0); + OCB_INPUT(5, %r11, RB1); + OCB_INPUT(6, %r12, RB2); + OCB_INPUT(7, %r13, RB3); +#undef OCB_INPUT + + movdqu RTMP0, (%rcx); + movdqu RTMP1, (%r8); + + movq (0 * 8)(%rsp), %r10; + movq (1 * 8)(%rsp), %r11; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + + call __serpent_enc_blk8; + + addq $(4 * 8), %rsp; + + pxor_u((0 * 16)(%rsi), RA4, RTMP0); + pxor_u((1 * 16)(%rsi), RA1, RTMP0); + pxor_u((2 * 16)(%rsi), RA2, RTMP0); + pxor_u((3 * 16)(%rsi), RA0, RTMP0); + pxor_u((4 * 16)(%rsi), RB4, RTMP0); + pxor_u((5 * 16)(%rsi), RB1, RTMP0); + pxor_u((6 * 16)(%rsi), RB2, RTMP0); + pxor_u((7 * 16)(%rsi), RB0, RTMP0); + + movdqu RA4, (0 * 16)(%rsi); + movdqu RA1, (1 * 16)(%rsi); + movdqu RA2, (2 * 16)(%rsi); + movdqu RA0, (3 * 16)(%rsi); + movdqu RB4, (4 * 16)(%rsi); + movdqu RB1, (5 * 16)(%rsi); + movdqu RB2, (6 * 16)(%rsi); + movdqu RB0, (7 * 16)(%rsi); + + /* clear the used registers */ + pxor RA0, RA0; + pxor RA1, RA1; + pxor RA2, RA2; + pxor RA3, RA3; + pxor RA4, RA4; + pxor RB0, RB0; + pxor RB1, RB1; + pxor RB2, RB2; + pxor RB3, RB3; + pxor RB4, RB4; + pxor RTMP0, RTMP0; + pxor RTMP1, RTMP1; + pxor RTMP2, RTMP2; + pxor RNOT, RNOT; + + ret; +ELF(.size _gcry_serpent_sse2_ocb_enc,.-_gcry_serpent_sse2_ocb_enc;) + +.align 8 +.globl _gcry_serpent_sse2_ocb_dec +ELF(.type _gcry_serpent_sse2_ocb_dec,@function;) + +_gcry_serpent_sse2_ocb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[8]) + */ + + subq $(4 * 8), %rsp; + + movq %r10, (0 * 8)(%rsp); + movq %r11, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + + movdqu (%rcx), RTMP0; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + +#define OCB_INPUT(n, lreg, xreg) \ + movdqu (n * 16)(%rdx), xreg; \ + movdqu (lreg), RNOT; \ + pxor RNOT, RTMP0; \ + pxor RTMP0, xreg; \ + movdqu RTMP0, (n * 16)(%rsi); + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, RA0); + OCB_INPUT(1, %r11, RA1); + OCB_INPUT(2, %r12, RA2); + OCB_INPUT(3, %r13, RA3); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, RB0); + OCB_INPUT(5, %r11, RB1); + OCB_INPUT(6, %r12, RB2); + OCB_INPUT(7, %r13, RB3); +#undef OCB_INPUT + + movdqu RTMP0, (%rcx); + + movq (0 * 8)(%rsp), %r10; + movq (1 * 8)(%rsp), %r11; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + + call __serpent_dec_blk8; + + addq $(4 * 8), %rsp; + + movdqu (%r8), RTMP0; + + pxor_u((0 * 16)(%rsi), RA0, RTMP1); + pxor_u((1 * 16)(%rsi), RA1, RTMP1); + pxor_u((2 * 16)(%rsi), RA2, RTMP1); + pxor_u((3 * 16)(%rsi), RA3, RTMP1); + pxor_u((4 * 16)(%rsi), RB0, RTMP1); + pxor_u((5 * 16)(%rsi), RB1, RTMP1); + pxor_u((6 * 16)(%rsi), RB2, RTMP1); + pxor_u((7 * 16)(%rsi), RB3, RTMP1); + + /* Checksum_i = Checksum_{i-1} xor P_i */ + + movdqu RA0, (0 * 16)(%rsi); + pxor RA0, RTMP0; + movdqu RA1, (1 * 16)(%rsi); + pxor RA1, RTMP0; + movdqu RA2, (2 * 16)(%rsi); + pxor RA2, RTMP0; + movdqu RA3, (3 * 16)(%rsi); + pxor RA3, RTMP0; + movdqu RB0, (4 * 16)(%rsi); + pxor RB0, RTMP0; + movdqu RB1, (5 * 16)(%rsi); + pxor RB1, RTMP0; + movdqu RB2, (6 * 16)(%rsi); + pxor RB2, RTMP0; + movdqu RB3, (7 * 16)(%rsi); + pxor RB3, RTMP0; + + movdqu RTMP0, (%r8); + + /* clear the used registers */ + pxor RA0, RA0; + pxor RA1, RA1; + pxor RA2, RA2; + pxor RA3, RA3; + pxor RA4, RA4; + pxor RB0, RB0; + pxor RB1, RB1; + pxor RB2, RB2; + pxor RB3, RB3; + pxor RB4, RB4; + pxor RTMP0, RTMP0; + pxor RTMP1, RTMP1; + pxor RTMP2, RTMP2; + pxor RNOT, RNOT; + + ret; +ELF(.size _gcry_serpent_sse2_ocb_dec,.-_gcry_serpent_sse2_ocb_dec;) + +.align 8 +.globl _gcry_serpent_sse2_ocb_auth +ELF(.type _gcry_serpent_sse2_ocb_auth,@function;) + +_gcry_serpent_sse2_ocb_auth: + /* input: + * %rdi: ctx, CTX + * %rsi: abuf (8 blocks) + * %rdx: offset + * %rcx: checksum + * %r8 : L pointers (void *L[8]) + */ + + subq $(4 * 8), %rsp; + + movq %r10, (0 * 8)(%rsp); + movq %r11, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + + movdqu (%rdx), RTMP0; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + +#define OCB_INPUT(n, lreg, xreg) \ + movdqu (n * 16)(%rsi), xreg; \ + movdqu (lreg), RNOT; \ + pxor RNOT, RTMP0; \ + pxor RTMP0, xreg; + movq (0 * 8)(%r8), %r10; + movq (1 * 8)(%r8), %r11; + movq (2 * 8)(%r8), %r12; + movq (3 * 8)(%r8), %r13; + OCB_INPUT(0, %r10, RA0); + OCB_INPUT(1, %r11, RA1); + OCB_INPUT(2, %r12, RA2); + OCB_INPUT(3, %r13, RA3); + movq (4 * 8)(%r8), %r10; + movq (5 * 8)(%r8), %r11; + movq (6 * 8)(%r8), %r12; + movq (7 * 8)(%r8), %r13; + OCB_INPUT(4, %r10, RB0); + OCB_INPUT(5, %r11, RB1); + OCB_INPUT(6, %r12, RB2); + OCB_INPUT(7, %r13, RB3); +#undef OCB_INPUT + + movdqu RTMP0, (%rdx); + + movq (0 * 8)(%rsp), %r10; + movq (1 * 8)(%rsp), %r11; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + + call __serpent_enc_blk8; + + addq $(4 * 8), %rsp; + + movdqu (%rcx), RTMP0; + pxor RB4, RA4; + pxor RB1, RA1; + pxor RB2, RA2; + pxor RB0, RA0; + + pxor RTMP0, RA2; + pxor RA4, RA1; + pxor RA2, RA0; + + pxor RA1, RA0; + movdqu RA0, (%rcx); + + /* clear the used registers */ + pxor RA0, RA0; + pxor RA1, RA1; + pxor RA2, RA2; + pxor RA3, RA3; + pxor RA4, RA4; + pxor RB0, RB0; + pxor RB1, RB1; + pxor RB2, RB2; + pxor RB3, RB3; + pxor RB4, RB4; + pxor RTMP0, RTMP0; + pxor RTMP1, RTMP1; + pxor RTMP2, RTMP2; + pxor RNOT, RNOT; + + ret; +ELF(.size _gcry_serpent_sse2_ocb_auth,.-_gcry_serpent_sse2_ocb_auth;) + +#endif /*defined(USE_SERPENT)*/ +#endif /*__x86_64*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/serpent.c b/libotr/libgcrypt-1.8.7/cipher/serpent.c new file mode 100644 index 0000000..ea4b8ed --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/serpent.c @@ -0,0 +1,1791 @@ +/* serpent.c - Implementation of the Serpent encryption algorithm. + * Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <config.h> + +#include <string.h> +#include <stdio.h> + +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "bithelp.h" +#include "bufhelp.h" +#include "cipher-internal.h" +#include "cipher-selftest.h" + + +/* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */ +#undef USE_SSE2 +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_SSE2 1 +#endif + +/* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */ +#undef USE_AVX2 +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# if defined(ENABLE_AVX2_SUPPORT) +# define USE_AVX2 1 +# endif +#endif + +/* USE_NEON indicates whether to enable ARM NEON assembly code. */ +#undef USE_NEON +#ifdef ENABLE_NEON_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_NEON) +# define USE_NEON 1 +# endif +#endif /*ENABLE_NEON_SUPPORT*/ + +/* Number of rounds per Serpent encrypt/decrypt operation. */ +#define ROUNDS 32 + +/* Magic number, used during generating of the subkeys. */ +#define PHI 0x9E3779B9 + +/* Serpent works on 128 bit blocks. */ +typedef u32 serpent_block_t[4]; + +/* Serpent key, provided by the user. If the original key is shorter + than 256 bits, it is padded. */ +typedef u32 serpent_key_t[8]; + +/* The key schedule consists of 33 128 bit subkeys. */ +typedef u32 serpent_subkeys_t[ROUNDS + 1][4]; + +/* A Serpent context. */ +typedef struct serpent_context +{ + serpent_subkeys_t keys; /* Generated subkeys. */ + +#ifdef USE_AVX2 + int use_avx2; +#endif +#ifdef USE_NEON + int use_neon; +#endif +} serpent_context_t; + + +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#if defined(USE_SSE2) || defined(USE_AVX2) +# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# else +# define ASM_FUNC_ABI +# endif +#endif + + +#ifdef USE_SSE2 +/* Assembler implementations of Serpent using SSE2. Process 8 block in + parallel. + */ +extern void _gcry_serpent_sse2_ctr_enc(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *ctr) ASM_FUNC_ABI; + +extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_serpent_sse2_ocb_enc(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[8]) ASM_FUNC_ABI; + +extern void _gcry_serpent_sse2_ocb_dec(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[8]) ASM_FUNC_ABI; + +extern void _gcry_serpent_sse2_ocb_auth(serpent_context_t *ctx, + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[8]) ASM_FUNC_ABI; +#endif + +#ifdef USE_AVX2 +/* Assembler implementations of Serpent using AVX2. Process 16 block in + parallel. + */ +extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *ctr) ASM_FUNC_ABI; + +extern void _gcry_serpent_avx2_cbc_dec(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_serpent_avx2_ocb_enc(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[16]) ASM_FUNC_ABI; + +extern void _gcry_serpent_avx2_ocb_dec(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[16]) ASM_FUNC_ABI; + +extern void _gcry_serpent_avx2_ocb_auth(serpent_context_t *ctx, + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[16]) ASM_FUNC_ABI; +#endif + +#ifdef USE_NEON +/* Assembler implementations of Serpent using ARM NEON. Process 8 block in + parallel. + */ +extern void _gcry_serpent_neon_ctr_enc(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *ctr); + +extern void _gcry_serpent_neon_cbc_dec(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv); + +extern void _gcry_serpent_neon_cfb_dec(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv); + +extern void _gcry_serpent_neon_ocb_enc(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[8]); + +extern void _gcry_serpent_neon_ocb_dec(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[8]); + +extern void _gcry_serpent_neon_ocb_auth(serpent_context_t *ctx, + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[8]); +#endif + + +/* A prototype. */ +static const char *serpent_test (void); + + +/* + * These are the S-Boxes of Serpent from following research paper. + * + * D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference, + * (New York, New York, USA), p. 317–329, National Institute of Standards and + * Technology, 2000. + * + * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf + * + */ + +#define SBOX0(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r3 ^= r0; r4 = r1; \ + r1 &= r3; r4 ^= r2; \ + r1 ^= r0; r0 |= r3; \ + r0 ^= r4; r4 ^= r3; \ + r3 ^= r2; r2 |= r1; \ + r2 ^= r4; r4 = ~r4; \ + r4 |= r1; r1 ^= r3; \ + r1 ^= r4; r3 |= r0; \ + r1 ^= r3; r4 ^= r3; \ + \ + w = r1; x = r4; y = r2; z = r0; \ + } + +#define SBOX0_INVERSE(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r2 = ~r2; r4 = r1; \ + r1 |= r0; r4 = ~r4; \ + r1 ^= r2; r2 |= r4; \ + r1 ^= r3; r0 ^= r4; \ + r2 ^= r0; r0 &= r3; \ + r4 ^= r0; r0 |= r1; \ + r0 ^= r2; r3 ^= r4; \ + r2 ^= r1; r3 ^= r0; \ + r3 ^= r1; \ + r2 &= r3; \ + r4 ^= r2; \ + \ + w = r0; x = r4; y = r1; z = r3; \ + } + +#define SBOX1(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r0 = ~r0; r2 = ~r2; \ + r4 = r0; r0 &= r1; \ + r2 ^= r0; r0 |= r3; \ + r3 ^= r2; r1 ^= r0; \ + r0 ^= r4; r4 |= r1; \ + r1 ^= r3; r2 |= r0; \ + r2 &= r4; r0 ^= r1; \ + r1 &= r2; \ + r1 ^= r0; r0 &= r2; \ + r0 ^= r4; \ + \ + w = r2; x = r0; y = r3; z = r1; \ + } + +#define SBOX1_INVERSE(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r4 = r1; r1 ^= r3; \ + r3 &= r1; r4 ^= r2; \ + r3 ^= r0; r0 |= r1; \ + r2 ^= r3; r0 ^= r4; \ + r0 |= r2; r1 ^= r3; \ + r0 ^= r1; r1 |= r3; \ + r1 ^= r0; r4 = ~r4; \ + r4 ^= r1; r1 |= r0; \ + r1 ^= r0; \ + r1 |= r4; \ + r3 ^= r1; \ + \ + w = r4; x = r0; y = r3; z = r2; \ + } + +#define SBOX2(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r4 = r0; r0 &= r2; \ + r0 ^= r3; r2 ^= r1; \ + r2 ^= r0; r3 |= r4; \ + r3 ^= r1; r4 ^= r2; \ + r1 = r3; r3 |= r4; \ + r3 ^= r0; r0 &= r1; \ + r4 ^= r0; r1 ^= r3; \ + r1 ^= r4; r4 = ~r4; \ + \ + w = r2; x = r3; y = r1; z = r4; \ + } + +#define SBOX2_INVERSE(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r2 ^= r3; r3 ^= r0; \ + r4 = r3; r3 &= r2; \ + r3 ^= r1; r1 |= r2; \ + r1 ^= r4; r4 &= r3; \ + r2 ^= r3; r4 &= r0; \ + r4 ^= r2; r2 &= r1; \ + r2 |= r0; r3 = ~r3; \ + r2 ^= r3; r0 ^= r3; \ + r0 &= r1; r3 ^= r4; \ + r3 ^= r0; \ + \ + w = r1; x = r4; y = r2; z = r3; \ + } + +#define SBOX3(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r4 = r0; r0 |= r3; \ + r3 ^= r1; r1 &= r4; \ + r4 ^= r2; r2 ^= r3; \ + r3 &= r0; r4 |= r1; \ + r3 ^= r4; r0 ^= r1; \ + r4 &= r0; r1 ^= r3; \ + r4 ^= r2; r1 |= r0; \ + r1 ^= r2; r0 ^= r3; \ + r2 = r1; r1 |= r3; \ + r1 ^= r0; \ + \ + w = r1; x = r2; y = r3; z = r4; \ + } + +#define SBOX3_INVERSE(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r4 = r2; r2 ^= r1; \ + r0 ^= r2; r4 &= r2; \ + r4 ^= r0; r0 &= r1; \ + r1 ^= r3; r3 |= r4; \ + r2 ^= r3; r0 ^= r3; \ + r1 ^= r4; r3 &= r2; \ + r3 ^= r1; r1 ^= r0; \ + r1 |= r2; r0 ^= r3; \ + r1 ^= r4; \ + r0 ^= r1; \ + \ + w = r2; x = r1; y = r3; z = r0; \ + } + +#define SBOX4(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r1 ^= r3; r3 = ~r3; \ + r2 ^= r3; r3 ^= r0; \ + r4 = r1; r1 &= r3; \ + r1 ^= r2; r4 ^= r3; \ + r0 ^= r4; r2 &= r4; \ + r2 ^= r0; r0 &= r1; \ + r3 ^= r0; r4 |= r1; \ + r4 ^= r0; r0 |= r3; \ + r0 ^= r2; r2 &= r3; \ + r0 = ~r0; r4 ^= r2; \ + \ + w = r1; x = r4; y = r0; z = r3; \ + } + +#define SBOX4_INVERSE(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r4 = r2; r2 &= r3; \ + r2 ^= r1; r1 |= r3; \ + r1 &= r0; r4 ^= r2; \ + r4 ^= r1; r1 &= r2; \ + r0 = ~r0; r3 ^= r4; \ + r1 ^= r3; r3 &= r0; \ + r3 ^= r2; r0 ^= r1; \ + r2 &= r0; r3 ^= r0; \ + r2 ^= r4; \ + r2 |= r3; r3 ^= r0; \ + r2 ^= r1; \ + \ + w = r0; x = r3; y = r2; z = r4; \ + } + +#define SBOX5(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r0 ^= r1; r1 ^= r3; \ + r3 = ~r3; r4 = r1; \ + r1 &= r0; r2 ^= r3; \ + r1 ^= r2; r2 |= r4; \ + r4 ^= r3; r3 &= r1; \ + r3 ^= r0; r4 ^= r1; \ + r4 ^= r2; r2 ^= r0; \ + r0 &= r3; r2 = ~r2; \ + r0 ^= r4; r4 |= r3; \ + r2 ^= r4; \ + \ + w = r1; x = r3; y = r0; z = r2; \ + } + +#define SBOX5_INVERSE(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r1 = ~r1; r4 = r3; \ + r2 ^= r1; r3 |= r0; \ + r3 ^= r2; r2 |= r1; \ + r2 &= r0; r4 ^= r3; \ + r2 ^= r4; r4 |= r0; \ + r4 ^= r1; r1 &= r2; \ + r1 ^= r3; r4 ^= r2; \ + r3 &= r4; r4 ^= r1; \ + r3 ^= r4; r4 = ~r4; \ + r3 ^= r0; \ + \ + w = r1; x = r4; y = r3; z = r2; \ + } + +#define SBOX6(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r2 = ~r2; r4 = r3; \ + r3 &= r0; r0 ^= r4; \ + r3 ^= r2; r2 |= r4; \ + r1 ^= r3; r2 ^= r0; \ + r0 |= r1; r2 ^= r1; \ + r4 ^= r0; r0 |= r3; \ + r0 ^= r2; r4 ^= r3; \ + r4 ^= r0; r3 = ~r3; \ + r2 &= r4; \ + r2 ^= r3; \ + \ + w = r0; x = r1; y = r4; z = r2; \ + } + +#define SBOX6_INVERSE(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r0 ^= r2; r4 = r2; \ + r2 &= r0; r4 ^= r3; \ + r2 = ~r2; r3 ^= r1; \ + r2 ^= r3; r4 |= r0; \ + r0 ^= r2; r3 ^= r4; \ + r4 ^= r1; r1 &= r3; \ + r1 ^= r0; r0 ^= r3; \ + r0 |= r2; r3 ^= r1; \ + r4 ^= r0; \ + \ + w = r1; x = r2; y = r4; z = r3; \ + } + +#define SBOX7(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r4 = r1; r1 |= r2; \ + r1 ^= r3; r4 ^= r2; \ + r2 ^= r1; r3 |= r4; \ + r3 &= r0; r4 ^= r2; \ + r3 ^= r1; r1 |= r4; \ + r1 ^= r0; r0 |= r4; \ + r0 ^= r2; r1 ^= r4; \ + r2 ^= r1; r1 &= r0; \ + r1 ^= r4; r2 = ~r2; \ + r2 |= r0; \ + r4 ^= r2; \ + \ + w = r4; x = r3; y = r1; z = r0; \ + } + +#define SBOX7_INVERSE(r0, r1, r2, r3, w, x, y, z) \ + { \ + u32 r4; \ + \ + r4 = r2; r2 ^= r0; \ + r0 &= r3; r4 |= r3; \ + r2 = ~r2; r3 ^= r1; \ + r1 |= r0; r0 ^= r2; \ + r2 &= r4; r3 &= r4; \ + r1 ^= r2; r2 ^= r0; \ + r0 |= r2; r4 ^= r1; \ + r0 ^= r3; r3 ^= r4; \ + r4 |= r0; r3 ^= r2; \ + r4 ^= r2; \ + \ + w = r3; x = r0; y = r1; z = r4; \ + } + +/* XOR BLOCK1 into BLOCK0. */ +#define BLOCK_XOR(block0, block1) \ + { \ + block0[0] ^= block1[0]; \ + block0[1] ^= block1[1]; \ + block0[2] ^= block1[2]; \ + block0[3] ^= block1[3]; \ + } + +/* Copy BLOCK_SRC to BLOCK_DST. */ +#define BLOCK_COPY(block_dst, block_src) \ + { \ + block_dst[0] = block_src[0]; \ + block_dst[1] = block_src[1]; \ + block_dst[2] = block_src[2]; \ + block_dst[3] = block_src[3]; \ + } + +/* Apply SBOX number WHICH to to the block found in ARRAY0, writing + the output to the block found in ARRAY1. */ +#define SBOX(which, array0, array1) \ + SBOX##which (array0[0], array0[1], array0[2], array0[3], \ + array1[0], array1[1], array1[2], array1[3]); + +/* Apply inverse SBOX number WHICH to to the block found in ARRAY0, writing + the output to the block found in ARRAY1. */ +#define SBOX_INVERSE(which, array0, array1) \ + SBOX##which##_INVERSE (array0[0], array0[1], array0[2], array0[3], \ + array1[0], array1[1], array1[2], array1[3]); + +/* Apply the linear transformation to BLOCK. */ +#define LINEAR_TRANSFORMATION(block) \ + { \ + block[0] = rol (block[0], 13); \ + block[2] = rol (block[2], 3); \ + block[1] = block[1] ^ block[0] ^ block[2]; \ + block[3] = block[3] ^ block[2] ^ (block[0] << 3); \ + block[1] = rol (block[1], 1); \ + block[3] = rol (block[3], 7); \ + block[0] = block[0] ^ block[1] ^ block[3]; \ + block[2] = block[2] ^ block[3] ^ (block[1] << 7); \ + block[0] = rol (block[0], 5); \ + block[2] = rol (block[2], 22); \ + } + +/* Apply the inverse linear transformation to BLOCK. */ +#define LINEAR_TRANSFORMATION_INVERSE(block) \ + { \ + block[2] = ror (block[2], 22); \ + block[0] = ror (block[0] , 5); \ + block[2] = block[2] ^ block[3] ^ (block[1] << 7); \ + block[0] = block[0] ^ block[1] ^ block[3]; \ + block[3] = ror (block[3], 7); \ + block[1] = ror (block[1], 1); \ + block[3] = block[3] ^ block[2] ^ (block[0] << 3); \ + block[1] = block[1] ^ block[0] ^ block[2]; \ + block[2] = ror (block[2], 3); \ + block[0] = ror (block[0], 13); \ + } + +/* Apply a Serpent round to BLOCK, using the SBOX number WHICH and the + subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary storage. + This macro increments `round'. */ +#define ROUND(which, subkeys, block, block_tmp) \ + { \ + BLOCK_XOR (block, subkeys[round]); \ + round++; \ + SBOX (which, block, block_tmp); \ + LINEAR_TRANSFORMATION (block_tmp); \ + BLOCK_COPY (block, block_tmp); \ + } + +/* Apply the last Serpent round to BLOCK, using the SBOX number WHICH + and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary + storage. The result will be stored in BLOCK_TMP. This macro + increments `round'. */ +#define ROUND_LAST(which, subkeys, block, block_tmp) \ + { \ + BLOCK_XOR (block, subkeys[round]); \ + round++; \ + SBOX (which, block, block_tmp); \ + BLOCK_XOR (block_tmp, subkeys[round]); \ + round++; \ + } + +/* Apply an inverse Serpent round to BLOCK, using the SBOX number + WHICH and the subkeys contained in SUBKEYS. Use BLOCK_TMP as + temporary storage. This macro increments `round'. */ +#define ROUND_INVERSE(which, subkey, block, block_tmp) \ + { \ + LINEAR_TRANSFORMATION_INVERSE (block); \ + SBOX_INVERSE (which, block, block_tmp); \ + BLOCK_XOR (block_tmp, subkey[round]); \ + round--; \ + BLOCK_COPY (block, block_tmp); \ + } + +/* Apply the first Serpent round to BLOCK, using the SBOX number WHICH + and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary + storage. The result will be stored in BLOCK_TMP. This macro + increments `round'. */ +#define ROUND_FIRST_INVERSE(which, subkeys, block, block_tmp) \ + { \ + BLOCK_XOR (block, subkeys[round]); \ + round--; \ + SBOX_INVERSE (which, block, block_tmp); \ + BLOCK_XOR (block_tmp, subkeys[round]); \ + round--; \ + } + +/* Convert the user provided key KEY of KEY_LENGTH bytes into the + internally used format. */ +static void +serpent_key_prepare (const byte *key, unsigned int key_length, + serpent_key_t key_prepared) +{ + int i; + + /* Copy key. */ + key_length /= 4; + for (i = 0; i < key_length; i++) + key_prepared[i] = buf_get_le32 (key + i * 4); + + if (i < 8) + { + /* Key must be padded according to the Serpent + specification. */ + key_prepared[i] = 0x00000001; + + for (i++; i < 8; i++) + key_prepared[i] = 0; + } +} + +/* Derive the 33 subkeys from KEY and store them in SUBKEYS. */ +static void +serpent_subkeys_generate (serpent_key_t key, serpent_subkeys_t subkeys) +{ + u32 w[8]; /* The `prekey'. */ + u32 ws[4]; + u32 wt[4]; + + /* Initialize with key values. */ + w[0] = key[0]; + w[1] = key[1]; + w[2] = key[2]; + w[3] = key[3]; + w[4] = key[4]; + w[5] = key[5]; + w[6] = key[6]; + w[7] = key[7]; + + /* Expand to intermediate key using the affine recurrence. */ +#define EXPAND_KEY4(wo, r) \ + wo[0] = w[(r+0)%8] = \ + rol (w[(r+0)%8] ^ w[(r+3)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ PHI ^ (r+0), 11); \ + wo[1] = w[(r+1)%8] = \ + rol (w[(r+1)%8] ^ w[(r+4)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ PHI ^ (r+1), 11); \ + wo[2] = w[(r+2)%8] = \ + rol (w[(r+2)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ w[(r+1)%8] ^ PHI ^ (r+2), 11); \ + wo[3] = w[(r+3)%8] = \ + rol (w[(r+3)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ w[(r+2)%8] ^ PHI ^ (r+3), 11); + +#define EXPAND_KEY(r) \ + EXPAND_KEY4(ws, (r)); \ + EXPAND_KEY4(wt, (r + 4)); + + /* Calculate subkeys via S-Boxes, in bitslice mode. */ + EXPAND_KEY (0); SBOX (3, ws, subkeys[0]); SBOX (2, wt, subkeys[1]); + EXPAND_KEY (8); SBOX (1, ws, subkeys[2]); SBOX (0, wt, subkeys[3]); + EXPAND_KEY (16); SBOX (7, ws, subkeys[4]); SBOX (6, wt, subkeys[5]); + EXPAND_KEY (24); SBOX (5, ws, subkeys[6]); SBOX (4, wt, subkeys[7]); + EXPAND_KEY (32); SBOX (3, ws, subkeys[8]); SBOX (2, wt, subkeys[9]); + EXPAND_KEY (40); SBOX (1, ws, subkeys[10]); SBOX (0, wt, subkeys[11]); + EXPAND_KEY (48); SBOX (7, ws, subkeys[12]); SBOX (6, wt, subkeys[13]); + EXPAND_KEY (56); SBOX (5, ws, subkeys[14]); SBOX (4, wt, subkeys[15]); + EXPAND_KEY (64); SBOX (3, ws, subkeys[16]); SBOX (2, wt, subkeys[17]); + EXPAND_KEY (72); SBOX (1, ws, subkeys[18]); SBOX (0, wt, subkeys[19]); + EXPAND_KEY (80); SBOX (7, ws, subkeys[20]); SBOX (6, wt, subkeys[21]); + EXPAND_KEY (88); SBOX (5, ws, subkeys[22]); SBOX (4, wt, subkeys[23]); + EXPAND_KEY (96); SBOX (3, ws, subkeys[24]); SBOX (2, wt, subkeys[25]); + EXPAND_KEY (104); SBOX (1, ws, subkeys[26]); SBOX (0, wt, subkeys[27]); + EXPAND_KEY (112); SBOX (7, ws, subkeys[28]); SBOX (6, wt, subkeys[29]); + EXPAND_KEY (120); SBOX (5, ws, subkeys[30]); SBOX (4, wt, subkeys[31]); + EXPAND_KEY4 (ws, 128); SBOX (3, ws, subkeys[32]); + + wipememory (ws, sizeof (ws)); + wipememory (wt, sizeof (wt)); + wipememory (w, sizeof (w)); +} + +/* Initialize CONTEXT with the key KEY of KEY_LENGTH bits. */ +static void +serpent_setkey_internal (serpent_context_t *context, + const byte *key, unsigned int key_length) +{ + serpent_key_t key_prepared; + + serpent_key_prepare (key, key_length, key_prepared); + serpent_subkeys_generate (key_prepared, context->keys); + +#ifdef USE_AVX2 + context->use_avx2 = 0; + if ((_gcry_get_hw_features () & HWF_INTEL_AVX2)) + { + context->use_avx2 = 1; + } +#endif + +#ifdef USE_NEON + context->use_neon = 0; + if ((_gcry_get_hw_features () & HWF_ARM_NEON)) + { + context->use_neon = 1; + } +#endif + + wipememory (key_prepared, sizeof(key_prepared)); +} + +/* Initialize CTX with the key KEY of KEY_LENGTH bytes. */ +static gcry_err_code_t +serpent_setkey (void *ctx, + const byte *key, unsigned int key_length) +{ + serpent_context_t *context = ctx; + static const char *serpent_test_ret; + static int serpent_init_done; + gcry_err_code_t ret = GPG_ERR_NO_ERROR; + + if (! serpent_init_done) + { + /* Execute a self-test the first time, Serpent is used. */ + serpent_init_done = 1; + serpent_test_ret = serpent_test (); + if (serpent_test_ret) + log_error ("Serpent test failure: %s\n", serpent_test_ret); + } + + if (serpent_test_ret) + ret = GPG_ERR_SELFTEST_FAILED; + else + serpent_setkey_internal (context, key, key_length); + + return ret; +} + +static void +serpent_encrypt_internal (serpent_context_t *context, + const byte *input, byte *output) +{ + serpent_block_t b, b_next; + int round = 0; + + b[0] = buf_get_le32 (input + 0); + b[1] = buf_get_le32 (input + 4); + b[2] = buf_get_le32 (input + 8); + b[3] = buf_get_le32 (input + 12); + + ROUND (0, context->keys, b, b_next); + ROUND (1, context->keys, b, b_next); + ROUND (2, context->keys, b, b_next); + ROUND (3, context->keys, b, b_next); + ROUND (4, context->keys, b, b_next); + ROUND (5, context->keys, b, b_next); + ROUND (6, context->keys, b, b_next); + ROUND (7, context->keys, b, b_next); + ROUND (0, context->keys, b, b_next); + ROUND (1, context->keys, b, b_next); + ROUND (2, context->keys, b, b_next); + ROUND (3, context->keys, b, b_next); + ROUND (4, context->keys, b, b_next); + ROUND (5, context->keys, b, b_next); + ROUND (6, context->keys, b, b_next); + ROUND (7, context->keys, b, b_next); + ROUND (0, context->keys, b, b_next); + ROUND (1, context->keys, b, b_next); + ROUND (2, context->keys, b, b_next); + ROUND (3, context->keys, b, b_next); + ROUND (4, context->keys, b, b_next); + ROUND (5, context->keys, b, b_next); + ROUND (6, context->keys, b, b_next); + ROUND (7, context->keys, b, b_next); + ROUND (0, context->keys, b, b_next); + ROUND (1, context->keys, b, b_next); + ROUND (2, context->keys, b, b_next); + ROUND (3, context->keys, b, b_next); + ROUND (4, context->keys, b, b_next); + ROUND (5, context->keys, b, b_next); + ROUND (6, context->keys, b, b_next); + + ROUND_LAST (7, context->keys, b, b_next); + + buf_put_le32 (output + 0, b_next[0]); + buf_put_le32 (output + 4, b_next[1]); + buf_put_le32 (output + 8, b_next[2]); + buf_put_le32 (output + 12, b_next[3]); +} + +static void +serpent_decrypt_internal (serpent_context_t *context, + const byte *input, byte *output) +{ + serpent_block_t b, b_next; + int round = ROUNDS; + + b_next[0] = buf_get_le32 (input + 0); + b_next[1] = buf_get_le32 (input + 4); + b_next[2] = buf_get_le32 (input + 8); + b_next[3] = buf_get_le32 (input + 12); + + ROUND_FIRST_INVERSE (7, context->keys, b_next, b); + + ROUND_INVERSE (6, context->keys, b, b_next); + ROUND_INVERSE (5, context->keys, b, b_next); + ROUND_INVERSE (4, context->keys, b, b_next); + ROUND_INVERSE (3, context->keys, b, b_next); + ROUND_INVERSE (2, context->keys, b, b_next); + ROUND_INVERSE (1, context->keys, b, b_next); + ROUND_INVERSE (0, context->keys, b, b_next); + ROUND_INVERSE (7, context->keys, b, b_next); + ROUND_INVERSE (6, context->keys, b, b_next); + ROUND_INVERSE (5, context->keys, b, b_next); + ROUND_INVERSE (4, context->keys, b, b_next); + ROUND_INVERSE (3, context->keys, b, b_next); + ROUND_INVERSE (2, context->keys, b, b_next); + ROUND_INVERSE (1, context->keys, b, b_next); + ROUND_INVERSE (0, context->keys, b, b_next); + ROUND_INVERSE (7, context->keys, b, b_next); + ROUND_INVERSE (6, context->keys, b, b_next); + ROUND_INVERSE (5, context->keys, b, b_next); + ROUND_INVERSE (4, context->keys, b, b_next); + ROUND_INVERSE (3, context->keys, b, b_next); + ROUND_INVERSE (2, context->keys, b, b_next); + ROUND_INVERSE (1, context->keys, b, b_next); + ROUND_INVERSE (0, context->keys, b, b_next); + ROUND_INVERSE (7, context->keys, b, b_next); + ROUND_INVERSE (6, context->keys, b, b_next); + ROUND_INVERSE (5, context->keys, b, b_next); + ROUND_INVERSE (4, context->keys, b, b_next); + ROUND_INVERSE (3, context->keys, b, b_next); + ROUND_INVERSE (2, context->keys, b, b_next); + ROUND_INVERSE (1, context->keys, b, b_next); + ROUND_INVERSE (0, context->keys, b, b_next); + + buf_put_le32 (output + 0, b_next[0]); + buf_put_le32 (output + 4, b_next[1]); + buf_put_le32 (output + 8, b_next[2]); + buf_put_le32 (output + 12, b_next[3]); +} + +static unsigned int +serpent_encrypt (void *ctx, byte *buffer_out, const byte *buffer_in) +{ + serpent_context_t *context = ctx; + + serpent_encrypt_internal (context, buffer_in, buffer_out); + return /*burn_stack*/ (2 * sizeof (serpent_block_t)); +} + +static unsigned int +serpent_decrypt (void *ctx, byte *buffer_out, const byte *buffer_in) +{ + serpent_context_t *context = ctx; + + serpent_decrypt_internal (context, buffer_in, buffer_out); + return /*burn_stack*/ (2 * sizeof (serpent_block_t)); +} + + + +/* Bulk encryption of complete blocks in CTR mode. This function is only + intended for the bulk encryption feature of cipher.c. CTR is expected to be + of size sizeof(serpent_block_t). */ +void +_gcry_serpent_ctr_enc(void *context, unsigned char *ctr, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + serpent_context_t *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char tmpbuf[sizeof(serpent_block_t)]; + int burn_stack_depth = 2 * sizeof (serpent_block_t); + int i; + +#ifdef USE_AVX2 + if (ctx->use_avx2) + { + int did_use_avx2 = 0; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + _gcry_serpent_avx2_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 16; + outbuf += 16 * sizeof(serpent_block_t); + inbuf += 16 * sizeof(serpent_block_t); + did_use_avx2 = 1; + } + + if (did_use_avx2) + { + /* serpent-avx2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic/sse2 code to handle smaller chunks... */ + /* TODO: use caching instead? */ + } +#endif + +#ifdef USE_SSE2 + { + int did_use_sse2 = 0; + + /* Process data in 8 block chunks. */ + while (nblocks >= 8) + { + _gcry_serpent_sse2_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 8; + outbuf += 8 * sizeof(serpent_block_t); + inbuf += 8 * sizeof(serpent_block_t); + did_use_sse2 = 1; + } + + if (did_use_sse2) + { + /* serpent-sse2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + /* TODO: use caching instead? */ + } +#endif + +#ifdef USE_NEON + if (ctx->use_neon) + { + int did_use_neon = 0; + + /* Process data in 8 block chunks. */ + while (nblocks >= 8) + { + _gcry_serpent_neon_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 8; + outbuf += 8 * sizeof(serpent_block_t); + inbuf += 8 * sizeof(serpent_block_t); + did_use_neon = 1; + } + + if (did_use_neon) + { + /* serpent-neon assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + /* TODO: use caching instead? */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the counter. */ + serpent_encrypt_internal(ctx, ctr, tmpbuf); + /* XOR the input with the encrypted counter and store in output. */ + buf_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t)); + outbuf += sizeof(serpent_block_t); + inbuf += sizeof(serpent_block_t); + /* Increment the counter. */ + for (i = sizeof(serpent_block_t); i > 0; i--) + { + ctr[i-1]++; + if (ctr[i-1]) + break; + } + } + + wipememory(tmpbuf, sizeof(tmpbuf)); + _gcry_burn_stack(burn_stack_depth); +} + +/* Bulk decryption of complete blocks in CBC mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +void +_gcry_serpent_cbc_dec(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + serpent_context_t *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char savebuf[sizeof(serpent_block_t)]; + int burn_stack_depth = 2 * sizeof (serpent_block_t); + +#ifdef USE_AVX2 + if (ctx->use_avx2) + { + int did_use_avx2 = 0; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + _gcry_serpent_avx2_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 16; + outbuf += 16 * sizeof(serpent_block_t); + inbuf += 16 * sizeof(serpent_block_t); + did_use_avx2 = 1; + } + + if (did_use_avx2) + { + /* serpent-avx2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic/sse2 code to handle smaller chunks... */ + } +#endif + +#ifdef USE_SSE2 + { + int did_use_sse2 = 0; + + /* Process data in 8 block chunks. */ + while (nblocks >= 8) + { + _gcry_serpent_sse2_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 8; + outbuf += 8 * sizeof(serpent_block_t); + inbuf += 8 * sizeof(serpent_block_t); + did_use_sse2 = 1; + } + + if (did_use_sse2) + { + /* serpent-sse2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_NEON + if (ctx->use_neon) + { + int did_use_neon = 0; + + /* Process data in 8 block chunks. */ + while (nblocks >= 8) + { + _gcry_serpent_neon_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 8; + outbuf += 8 * sizeof(serpent_block_t); + inbuf += 8 * sizeof(serpent_block_t); + did_use_neon = 1; + } + + if (did_use_neon) + { + /* serpent-neon assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* INBUF is needed later and it may be identical to OUTBUF, so store + the intermediate result to SAVEBUF. */ + serpent_decrypt_internal (ctx, inbuf, savebuf); + + buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, sizeof(serpent_block_t)); + inbuf += sizeof(serpent_block_t); + outbuf += sizeof(serpent_block_t); + } + + wipememory(savebuf, sizeof(savebuf)); + _gcry_burn_stack(burn_stack_depth); +} + +/* Bulk decryption of complete blocks in CFB mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +void +_gcry_serpent_cfb_dec(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + serpent_context_t *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth = 2 * sizeof (serpent_block_t); + +#ifdef USE_AVX2 + if (ctx->use_avx2) + { + int did_use_avx2 = 0; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + _gcry_serpent_avx2_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 16; + outbuf += 16 * sizeof(serpent_block_t); + inbuf += 16 * sizeof(serpent_block_t); + did_use_avx2 = 1; + } + + if (did_use_avx2) + { + /* serpent-avx2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic/sse2 code to handle smaller chunks... */ + } +#endif + +#ifdef USE_SSE2 + { + int did_use_sse2 = 0; + + /* Process data in 8 block chunks. */ + while (nblocks >= 8) + { + _gcry_serpent_sse2_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 8; + outbuf += 8 * sizeof(serpent_block_t); + inbuf += 8 * sizeof(serpent_block_t); + did_use_sse2 = 1; + } + + if (did_use_sse2) + { + /* serpent-sse2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_NEON + if (ctx->use_neon) + { + int did_use_neon = 0; + + /* Process data in 8 block chunks. */ + while (nblocks >= 8) + { + _gcry_serpent_neon_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 8; + outbuf += 8 * sizeof(serpent_block_t); + inbuf += 8 * sizeof(serpent_block_t); + did_use_neon = 1; + } + + if (did_use_neon) + { + /* serpent-neon assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + serpent_encrypt_internal(ctx, iv, iv); + buf_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t)); + outbuf += sizeof(serpent_block_t); + inbuf += sizeof(serpent_block_t); + } + + _gcry_burn_stack(burn_stack_depth); +} + +/* Bulk encryption/decryption of complete blocks in OCB mode. */ +size_t +_gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ +#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) + serpent_context_t *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth = 2 * sizeof (serpent_block_t); + u64 blkn = c->u_mode.ocb.data_nblocks; +#else + (void)c; + (void)outbuf_arg; + (void)inbuf_arg; + (void)encrypt; +#endif + +#ifdef USE_AVX2 + if (ctx->use_avx2) + { + int did_use_avx2 = 0; + u64 Ls[16]; + unsigned int n = 16 - (blkn % 16); + u64 *l; + int i; + + if (nblocks >= 16) + { + for (i = 0; i < 16; i += 8) + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + l = &Ls[(15 + n) % 16]; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + blkn += 16; + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); + + if (encrypt) + _gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + else + _gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + + nblocks -= 16; + outbuf += 16 * sizeof(serpent_block_t); + inbuf += 16 * sizeof(serpent_block_t); + did_use_avx2 = 1; + } + } + + if (did_use_avx2) + { + /* serpent-avx2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_SSE2 + { + int did_use_sse2 = 0; + u64 Ls[8]; + unsigned int n = 8 - (blkn % 8); + u64 *l; + + if (nblocks >= 8) + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + l = &Ls[(7 + n) % 8]; + + /* Process data in 8 block chunks. */ + while (nblocks >= 8) + { + blkn += 8; + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8); + + if (encrypt) + _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + else + _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + + nblocks -= 8; + outbuf += 8 * sizeof(serpent_block_t); + inbuf += 8 * sizeof(serpent_block_t); + did_use_sse2 = 1; + } + } + + if (did_use_sse2) + { + /* serpent-sse2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_NEON + if (ctx->use_neon) + { + int did_use_neon = 0; + const void *Ls[8]; + unsigned int n = 8 - (blkn % 8); + const void **l; + + if (nblocks >= 8) + { + Ls[(0 + n) % 8] = c->u_mode.ocb.L[0]; + Ls[(1 + n) % 8] = c->u_mode.ocb.L[1]; + Ls[(2 + n) % 8] = c->u_mode.ocb.L[0]; + Ls[(3 + n) % 8] = c->u_mode.ocb.L[2]; + Ls[(4 + n) % 8] = c->u_mode.ocb.L[0]; + Ls[(5 + n) % 8] = c->u_mode.ocb.L[1]; + Ls[(6 + n) % 8] = c->u_mode.ocb.L[0]; + l = &Ls[(7 + n) % 8]; + + /* Process data in 8 block chunks. */ + while (nblocks >= 8) + { + blkn += 8; + *l = ocb_get_l(c, blkn - blkn % 8); + + if (encrypt) + _gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + else + _gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + + nblocks -= 8; + outbuf += 8 * sizeof(serpent_block_t); + inbuf += 8 * sizeof(serpent_block_t); + did_use_neon = 1; + } + } + + if (did_use_neon) + { + /* serpent-neon assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) + c->u_mode.ocb.data_nblocks = blkn; + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +#endif + + return nblocks; +} + +/* Bulk authentication of complete blocks in OCB mode. */ +size_t +_gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks) +{ +#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) + serpent_context_t *ctx = (void *)&c->context.c; + const unsigned char *abuf = abuf_arg; + int burn_stack_depth = 2 * sizeof(serpent_block_t); + u64 blkn = c->u_mode.ocb.aad_nblocks; +#else + (void)c; + (void)abuf_arg; +#endif + +#ifdef USE_AVX2 + if (ctx->use_avx2) + { + int did_use_avx2 = 0; + u64 Ls[16]; + unsigned int n = 16 - (blkn % 16); + u64 *l; + int i; + + if (nblocks >= 16) + { + for (i = 0; i < 16; i += 8) + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + l = &Ls[(15 + n) % 16]; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + blkn += 16; + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); + + _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 16; + abuf += 16 * sizeof(serpent_block_t); + did_use_avx2 = 1; + } + } + + if (did_use_avx2) + { + /* serpent-avx2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_SSE2 + { + int did_use_sse2 = 0; + u64 Ls[8]; + unsigned int n = 8 - (blkn % 8); + u64 *l; + + if (nblocks >= 8) + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + l = &Ls[(7 + n) % 8]; + + /* Process data in 8 block chunks. */ + while (nblocks >= 8) + { + blkn += 8; + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8); + + _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 8; + abuf += 8 * sizeof(serpent_block_t); + did_use_sse2 = 1; + } + } + + if (did_use_sse2) + { + /* serpent-avx2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_NEON + if (ctx->use_neon) + { + int did_use_neon = 0; + const void *Ls[8]; + unsigned int n = 8 - (blkn % 8); + const void **l; + + if (nblocks >= 8) + { + Ls[(0 + n) % 8] = c->u_mode.ocb.L[0]; + Ls[(1 + n) % 8] = c->u_mode.ocb.L[1]; + Ls[(2 + n) % 8] = c->u_mode.ocb.L[0]; + Ls[(3 + n) % 8] = c->u_mode.ocb.L[2]; + Ls[(4 + n) % 8] = c->u_mode.ocb.L[0]; + Ls[(5 + n) % 8] = c->u_mode.ocb.L[1]; + Ls[(6 + n) % 8] = c->u_mode.ocb.L[0]; + l = &Ls[(7 + n) % 8]; + + /* Process data in 8 block chunks. */ + while (nblocks >= 8) + { + blkn += 8; + *l = ocb_get_l(c, blkn - blkn % 8); + + _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 8; + abuf += 8 * sizeof(serpent_block_t); + did_use_neon = 1; + } + } + + if (did_use_neon) + { + /* serpent-neon assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) + c->u_mode.ocb.aad_nblocks = blkn; + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +#endif + + return nblocks; +} + + + +/* Run the self-tests for SERPENT-CTR-128, tests IV increment of bulk CTR + encryption. Returns NULL on success. */ +static const char* +selftest_ctr_128 (void) +{ + const int nblocks = 16+8+1; + const int blocksize = sizeof(serpent_block_t); + const int context_size = sizeof(serpent_context_t); + + return _gcry_selftest_helper_ctr("SERPENT", &serpent_setkey, + &serpent_encrypt, &_gcry_serpent_ctr_enc, nblocks, blocksize, + context_size); +} + + +/* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption. + Returns NULL on success. */ +static const char* +selftest_cbc_128 (void) +{ + const int nblocks = 16+8+2; + const int blocksize = sizeof(serpent_block_t); + const int context_size = sizeof(serpent_context_t); + + return _gcry_selftest_helper_cbc("SERPENT", &serpent_setkey, + &serpent_encrypt, &_gcry_serpent_cbc_dec, nblocks, blocksize, + context_size); +} + + +/* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption. + Returns NULL on success. */ +static const char* +selftest_cfb_128 (void) +{ + const int nblocks = 16+8+2; + const int blocksize = sizeof(serpent_block_t); + const int context_size = sizeof(serpent_context_t); + + return _gcry_selftest_helper_cfb("SERPENT", &serpent_setkey, + &serpent_encrypt, &_gcry_serpent_cfb_dec, nblocks, blocksize, + context_size); +} + + +/* Serpent test. */ + +static const char * +serpent_test (void) +{ + serpent_context_t context; + unsigned char scratch[16]; + unsigned int i; + const char *r; + + static struct test + { + int key_length; + unsigned char key[32]; + unsigned char text_plain[16]; + unsigned char text_cipher[16]; + } test_data[] = + { + { + 16, + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + "\xD2\x9D\x57\x6F\xCE\xA3\xA3\xA7\xED\x90\x99\xF2\x92\x73\xD7\x8E", + "\xB2\x28\x8B\x96\x8A\xE8\xB0\x86\x48\xD1\xCE\x96\x06\xFD\x99\x2D" + }, + { + 24, + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + "\xD2\x9D\x57\x6F\xCE\xAB\xA3\xA7\xED\x98\x99\xF2\x92\x7B\xD7\x8E", + "\x13\x0E\x35\x3E\x10\x37\xC2\x24\x05\xE8\xFA\xEF\xB2\xC3\xC3\xE9" + }, + { + 32, + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + "\xD0\x95\x57\x6F\xCE\xA3\xE3\xA7\xED\x98\xD9\xF2\x90\x73\xD7\x8E", + "\xB9\x0E\xE5\x86\x2D\xE6\x91\x68\xF2\xBD\xD5\x12\x5B\x45\x47\x2B" + }, + { + 32, + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + "\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00", + "\x20\x61\xA4\x27\x82\xBD\x52\xEC\x69\x1E\xC3\x83\xB0\x3B\xA7\x7C" + }, + { + 0 + }, + }; + + for (i = 0; test_data[i].key_length; i++) + { + serpent_setkey_internal (&context, test_data[i].key, + test_data[i].key_length); + serpent_encrypt_internal (&context, test_data[i].text_plain, scratch); + + if (memcmp (scratch, test_data[i].text_cipher, sizeof (serpent_block_t))) + switch (test_data[i].key_length) + { + case 16: + return "Serpent-128 test encryption failed."; + case 24: + return "Serpent-192 test encryption failed."; + case 32: + return "Serpent-256 test encryption failed."; + } + + serpent_decrypt_internal (&context, test_data[i].text_cipher, scratch); + if (memcmp (scratch, test_data[i].text_plain, sizeof (serpent_block_t))) + switch (test_data[i].key_length) + { + case 16: + return "Serpent-128 test decryption failed."; + case 24: + return "Serpent-192 test decryption failed."; + case 32: + return "Serpent-256 test decryption failed."; + } + } + + if ( (r = selftest_ctr_128 ()) ) + return r; + + if ( (r = selftest_cbc_128 ()) ) + return r; + + if ( (r = selftest_cfb_128 ()) ) + return r; + + return NULL; +} + + +static gcry_cipher_oid_spec_t serpent128_oids[] = + { + {"1.3.6.1.4.1.11591.13.2.1", GCRY_CIPHER_MODE_ECB }, + {"1.3.6.1.4.1.11591.13.2.2", GCRY_CIPHER_MODE_CBC }, + {"1.3.6.1.4.1.11591.13.2.3", GCRY_CIPHER_MODE_OFB }, + {"1.3.6.1.4.1.11591.13.2.4", GCRY_CIPHER_MODE_CFB }, + { NULL } + }; + +static gcry_cipher_oid_spec_t serpent192_oids[] = + { + {"1.3.6.1.4.1.11591.13.2.21", GCRY_CIPHER_MODE_ECB }, + {"1.3.6.1.4.1.11591.13.2.22", GCRY_CIPHER_MODE_CBC }, + {"1.3.6.1.4.1.11591.13.2.23", GCRY_CIPHER_MODE_OFB }, + {"1.3.6.1.4.1.11591.13.2.24", GCRY_CIPHER_MODE_CFB }, + { NULL } + }; + +static gcry_cipher_oid_spec_t serpent256_oids[] = + { + {"1.3.6.1.4.1.11591.13.2.41", GCRY_CIPHER_MODE_ECB }, + {"1.3.6.1.4.1.11591.13.2.42", GCRY_CIPHER_MODE_CBC }, + {"1.3.6.1.4.1.11591.13.2.43", GCRY_CIPHER_MODE_OFB }, + {"1.3.6.1.4.1.11591.13.2.44", GCRY_CIPHER_MODE_CFB }, + { NULL } + }; + +static const char *serpent128_aliases[] = + { + "SERPENT", + "SERPENT-128", + NULL + }; +static const char *serpent192_aliases[] = + { + "SERPENT-192", + NULL + }; +static const char *serpent256_aliases[] = + { + "SERPENT-256", + NULL + }; + +gcry_cipher_spec_t _gcry_cipher_spec_serpent128 = + { + GCRY_CIPHER_SERPENT128, {0, 0}, + "SERPENT128", serpent128_aliases, serpent128_oids, 16, 128, + sizeof (serpent_context_t), + serpent_setkey, serpent_encrypt, serpent_decrypt + }; + +gcry_cipher_spec_t _gcry_cipher_spec_serpent192 = + { + GCRY_CIPHER_SERPENT192, {0, 0}, + "SERPENT192", serpent192_aliases, serpent192_oids, 16, 192, + sizeof (serpent_context_t), + serpent_setkey, serpent_encrypt, serpent_decrypt + }; + +gcry_cipher_spec_t _gcry_cipher_spec_serpent256 = + { + GCRY_CIPHER_SERPENT256, {0, 0}, + "SERPENT256", serpent256_aliases, serpent256_oids, 16, 256, + sizeof (serpent_context_t), + serpent_setkey, serpent_encrypt, serpent_decrypt + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/sha1-armv7-neon.S b/libotr/libgcrypt-1.8.7/cipher/sha1-armv7-neon.S new file mode 100644 index 0000000..61cc541 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha1-armv7-neon.S @@ -0,0 +1,526 @@ +/* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function + * Copyright (C) 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * Based on sha1.c: + * Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) && defined(USE_SHA1) + +.syntax unified +.fpu neon +.arm + +.text + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + +/* Context structure */ + +#define state_h0 0 +#define state_h1 4 +#define state_h2 8 +#define state_h3 12 +#define state_h4 16 + + +/* Constants */ + +#define K1 0x5A827999 +#define K2 0x6ED9EBA1 +#define K3 0x8F1BBCDC +#define K4 0xCA62C1D6 +.align 4 +gcry_sha1_armv7_neon_K_VEC: +.LK_VEC: +.LK1: .long K1, K1, K1, K1 +.LK2: .long K2, K2, K2, K2 +.LK3: .long K3, K3, K3, K3 +.LK4: .long K4, K4, K4, K4 + + +/* Register macros */ + +#define RSTATE r0 +#define RDATA r1 +#define RNBLKS r2 +#define ROLDSTACK r3 +#define RWK lr + +#define _a r4 +#define _b r5 +#define _c r6 +#define _d r7 +#define _e r8 + +#define RT0 r9 +#define RT1 r10 +#define RT2 r11 +#define RT3 r12 + +#define W0 q0 +#define W1 q1 +#define W2 q2 +#define W3 q3 +#define W4 q4 +#define W5 q5 +#define W6 q6 +#define W7 q7 + +#define tmp0 q8 +#define tmp1 q9 +#define tmp2 q10 +#define tmp3 q11 + +#define qK1 q12 +#define qK2 q13 +#define qK3 q14 +#define qK4 q15 + + +/* Round function macros. */ + +#define WK_offs(i) (((i) & 15) * 4) + +#define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + ldr RT3, [sp, WK_offs(i)]; \ + pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + bic RT0, d, b; \ + add e, e, a, ror #(32 - 5); \ + and RT1, c, b; \ + pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + add RT0, RT0, RT3; \ + add e, e, RT1; \ + ror b, #(32 - 30); \ + pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + add e, e, RT0; + +#define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + ldr RT3, [sp, WK_offs(i)]; \ + pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + eor RT0, d, b; \ + add e, e, a, ror #(32 - 5); \ + eor RT0, RT0, c; \ + pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + add e, e, RT3; \ + ror b, #(32 - 30); \ + pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + add e, e, RT0; \ + +#define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + ldr RT3, [sp, WK_offs(i)]; \ + pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + eor RT0, b, c; \ + and RT1, b, c; \ + add e, e, a, ror #(32 - 5); \ + pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + and RT0, RT0, d; \ + add RT1, RT1, RT3; \ + add e, e, RT0; \ + ror b, #(32 - 30); \ + pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + add e, e, RT1; + +#define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) + +#define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) + +#define R(a,b,c,d,e,f,i) \ + _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) + +#define dummy(...) + + +/* Input expansion macros. */ + +/********* Precalc macros for rounds 0-15 *************************************/ + +#define W_PRECALC_00_15() \ + add RWK, sp, #(WK_offs(0)); \ + \ + vld1.32 {tmp0, tmp1}, [RDATA]!; \ + vrev32.8 W0, tmp0; /* big => little */ \ + vld1.32 {tmp2, tmp3}, [RDATA]!; \ + vadd.u32 tmp0, W0, curK; \ + vrev32.8 W7, tmp1; /* big => little */ \ + vrev32.8 W6, tmp2; /* big => little */ \ + vadd.u32 tmp1, W7, curK; \ + vrev32.8 W5, tmp3; /* big => little */ \ + vadd.u32 tmp2, W6, curK; \ + vst1.32 {tmp0, tmp1}, [RWK]!; \ + vadd.u32 tmp3, W5, curK; \ + vst1.32 {tmp2, tmp3}, [RWK]; \ + +#define WPRECALC_00_15_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vld1.32 {tmp0, tmp1}, [RDATA]!; \ + +#define WPRECALC_00_15_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + add RWK, sp, #(WK_offs(0)); \ + +#define WPRECALC_00_15_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vrev32.8 W0, tmp0; /* big => little */ \ + +#define WPRECALC_00_15_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vld1.32 {tmp2, tmp3}, [RDATA]!; \ + +#define WPRECALC_00_15_4(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vadd.u32 tmp0, W0, curK; \ + +#define WPRECALC_00_15_5(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vrev32.8 W7, tmp1; /* big => little */ \ + +#define WPRECALC_00_15_6(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vrev32.8 W6, tmp2; /* big => little */ \ + +#define WPRECALC_00_15_7(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vadd.u32 tmp1, W7, curK; \ + +#define WPRECALC_00_15_8(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vrev32.8 W5, tmp3; /* big => little */ \ + +#define WPRECALC_00_15_9(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vadd.u32 tmp2, W6, curK; \ + +#define WPRECALC_00_15_10(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vst1.32 {tmp0, tmp1}, [RWK]!; \ + +#define WPRECALC_00_15_11(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vadd.u32 tmp3, W5, curK; \ + +#define WPRECALC_00_15_12(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vst1.32 {tmp2, tmp3}, [RWK]; \ + + +/********* Precalc macros for rounds 16-31 ************************************/ + +#define WPRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + veor tmp0, tmp0; \ + vext.8 W, W_m16, W_m12, #8; \ + +#define WPRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + add RWK, sp, #(WK_offs(i)); \ + vext.8 tmp0, W_m04, tmp0, #4; \ + +#define WPRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + veor tmp0, tmp0, W_m16; \ + veor.32 W, W, W_m08; \ + +#define WPRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + veor tmp1, tmp1; \ + veor W, W, tmp0; \ + +#define WPRECALC_16_31_4(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vshl.u32 tmp0, W, #1; \ + +#define WPRECALC_16_31_5(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vext.8 tmp1, tmp1, W, #(16-12); \ + vshr.u32 W, W, #31; \ + +#define WPRECALC_16_31_6(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vorr tmp0, tmp0, W; \ + vshr.u32 W, tmp1, #30; \ + +#define WPRECALC_16_31_7(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vshl.u32 tmp1, tmp1, #2; \ + +#define WPRECALC_16_31_8(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + veor tmp0, tmp0, W; \ + +#define WPRECALC_16_31_9(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + veor W, tmp0, tmp1; \ + +#define WPRECALC_16_31_10(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vadd.u32 tmp0, W, curK; \ + +#define WPRECALC_16_31_11(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vst1.32 {tmp0}, [RWK]; + + +/********* Precalc macros for rounds 32-79 ************************************/ + +#define WPRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + veor W, W_m28; \ + +#define WPRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vext.8 tmp0, W_m08, W_m04, #8; \ + +#define WPRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + veor W, W_m16; \ + +#define WPRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + veor W, tmp0; \ + +#define WPRECALC_32_79_4(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + add RWK, sp, #(WK_offs(i&~3)); \ + +#define WPRECALC_32_79_5(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vshl.u32 tmp1, W, #2; \ + +#define WPRECALC_32_79_6(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vshr.u32 tmp0, W, #30; \ + +#define WPRECALC_32_79_7(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vorr W, tmp0, tmp1; \ + +#define WPRECALC_32_79_8(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vadd.u32 tmp0, W, curK; \ + +#define WPRECALC_32_79_9(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \ + vst1.32 {tmp0}, [RWK]; + + +/* Other functional macros */ + +#define CLEAR_REG(reg) veor reg, reg; + + +/* + * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. + * + * unsigned int + * _gcry_sha1_transform_armv7_neon (void *ctx, const unsigned char *data, + * size_t nblks) + */ +.align 3 +.globl _gcry_sha1_transform_armv7_neon +.type _gcry_sha1_transform_armv7_neon,%function; +_gcry_sha1_transform_armv7_neon: + /* input: + * r0: ctx, CTX + * r1: data (64*nblks bytes) + * r2: nblks + */ + + cmp RNBLKS, #0; + beq .Ldo_nothing; + + push {r4-r12, lr}; + + GET_DATA_POINTER(RT3, .LK_VEC, _a); + vpush {q4-q7}; + + mov ROLDSTACK, sp; + + /* Align stack. */ + sub sp, #(16*4); + and sp, #(~(16-1)); + + vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */ + + /* Get the values of the chaining variables. */ + ldm RSTATE, {_a-_e}; + + vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */ + +#undef curK +#define curK qK1 + /* Precalc 0-15. */ + W_PRECALC_00_15(); + + b .Loop; + +.ltorg +.Loop: + /* Transform 0-15 + Precalc 16-31. */ + _R( _a, _b, _c, _d, _e, F1, 0, WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16, W4, W5, W6, W7, W0, _, _, _ ); + _R( _e, _a, _b, _c, _d, F1, 1, WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16, W4, W5, W6, W7, W0, _, _, _ ); + _R( _d, _e, _a, _b, _c, F1, 2, WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16, W4, W5, W6, W7, W0, _, _, _ ); + _R( _c, _d, _e, _a, _b, F1, 3, WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16, W4, W5, W6, W7, W0, _, _, _ ); + +#undef curK +#define curK qK2 + _R( _b, _c, _d, _e, _a, F1, 4, WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20, W3, W4, W5, W6, W7, _, _, _ ); + _R( _a, _b, _c, _d, _e, F1, 5, WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20, W3, W4, W5, W6, W7, _, _, _ ); + _R( _e, _a, _b, _c, _d, F1, 6, WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20, W3, W4, W5, W6, W7, _, _, _ ); + _R( _d, _e, _a, _b, _c, F1, 7, WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20, W3, W4, W5, W6, W7, _, _, _ ); + + _R( _c, _d, _e, _a, _b, F1, 8, WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24, W2, W3, W4, W5, W6, _, _, _ ); + _R( _b, _c, _d, _e, _a, F1, 9, WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24, W2, W3, W4, W5, W6, _, _, _ ); + _R( _a, _b, _c, _d, _e, F1, 10, WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24, W2, W3, W4, W5, W6, _, _, _ ); + _R( _e, _a, _b, _c, _d, F1, 11, WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24, W2, W3, W4, W5, W6, _, _, _ ); + + _R( _d, _e, _a, _b, _c, F1, 12, WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28, W1, W2, W3, W4, W5, _, _, _ ); + _R( _c, _d, _e, _a, _b, F1, 13, WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28, W1, W2, W3, W4, W5, _, _, _ ); + _R( _b, _c, _d, _e, _a, F1, 14, WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28, W1, W2, W3, W4, W5, _, _, _ ); + _R( _a, _b, _c, _d, _e, F1, 15, WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28, W1, W2, W3, W4, W5, _, _, _ ); + + /* Transform 16-63 + Precalc 32-79. */ + _R( _e, _a, _b, _c, _d, F1, 16, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32, W0, W1, W2, W3, W4, W5, W6, W7); + _R( _d, _e, _a, _b, _c, F1, 17, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32, W0, W1, W2, W3, W4, W5, W6, W7); + _R( _c, _d, _e, _a, _b, F1, 18, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32, W0, W1, W2, W3, W4, W5, W6, W7); + _R( _b, _c, _d, _e, _a, F1, 19, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32, W0, W1, W2, W3, W4, W5, W6, W7); + + _R( _a, _b, _c, _d, _e, F2, 20, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36, W7, W0, W1, W2, W3, W4, W5, W6); + _R( _e, _a, _b, _c, _d, F2, 21, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36, W7, W0, W1, W2, W3, W4, W5, W6); + _R( _d, _e, _a, _b, _c, F2, 22, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36, W7, W0, W1, W2, W3, W4, W5, W6); + _R( _c, _d, _e, _a, _b, F2, 23, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36, W7, W0, W1, W2, W3, W4, W5, W6); + +#undef curK +#define curK qK3 + _R( _b, _c, _d, _e, _a, F2, 24, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40, W6, W7, W0, W1, W2, W3, W4, W5); + _R( _a, _b, _c, _d, _e, F2, 25, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40, W6, W7, W0, W1, W2, W3, W4, W5); + _R( _e, _a, _b, _c, _d, F2, 26, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40, W6, W7, W0, W1, W2, W3, W4, W5); + _R( _d, _e, _a, _b, _c, F2, 27, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40, W6, W7, W0, W1, W2, W3, W4, W5); + + _R( _c, _d, _e, _a, _b, F2, 28, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44, W5, W6, W7, W0, W1, W2, W3, W4); + _R( _b, _c, _d, _e, _a, F2, 29, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44, W5, W6, W7, W0, W1, W2, W3, W4); + _R( _a, _b, _c, _d, _e, F2, 30, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44, W5, W6, W7, W0, W1, W2, W3, W4); + _R( _e, _a, _b, _c, _d, F2, 31, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44, W5, W6, W7, W0, W1, W2, W3, W4); + + _R( _d, _e, _a, _b, _c, F2, 32, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48, W4, W5, W6, W7, W0, W1, W2, W3); + _R( _c, _d, _e, _a, _b, F2, 33, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48, W4, W5, W6, W7, W0, W1, W2, W3); + _R( _b, _c, _d, _e, _a, F2, 34, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48, W4, W5, W6, W7, W0, W1, W2, W3); + _R( _a, _b, _c, _d, _e, F2, 35, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48, W4, W5, W6, W7, W0, W1, W2, W3); + + _R( _e, _a, _b, _c, _d, F2, 36, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52, W3, W4, W5, W6, W7, W0, W1, W2); + _R( _d, _e, _a, _b, _c, F2, 37, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52, W3, W4, W5, W6, W7, W0, W1, W2); + _R( _c, _d, _e, _a, _b, F2, 38, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52, W3, W4, W5, W6, W7, W0, W1, W2); + _R( _b, _c, _d, _e, _a, F2, 39, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52, W3, W4, W5, W6, W7, W0, W1, W2); + + _R( _a, _b, _c, _d, _e, F3, 40, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56, W2, W3, W4, W5, W6, W7, W0, W1); + _R( _e, _a, _b, _c, _d, F3, 41, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56, W2, W3, W4, W5, W6, W7, W0, W1); + _R( _d, _e, _a, _b, _c, F3, 42, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56, W2, W3, W4, W5, W6, W7, W0, W1); + _R( _c, _d, _e, _a, _b, F3, 43, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56, W2, W3, W4, W5, W6, W7, W0, W1); + +#undef curK +#define curK qK4 + _R( _b, _c, _d, _e, _a, F3, 44, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60, W1, W2, W3, W4, W5, W6, W7, W0); + _R( _a, _b, _c, _d, _e, F3, 45, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60, W1, W2, W3, W4, W5, W6, W7, W0); + _R( _e, _a, _b, _c, _d, F3, 46, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60, W1, W2, W3, W4, W5, W6, W7, W0); + _R( _d, _e, _a, _b, _c, F3, 47, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60, W1, W2, W3, W4, W5, W6, W7, W0); + + _R( _c, _d, _e, _a, _b, F3, 48, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64, W0, W1, W2, W3, W4, W5, W6, W7); + _R( _b, _c, _d, _e, _a, F3, 49, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64, W0, W1, W2, W3, W4, W5, W6, W7); + _R( _a, _b, _c, _d, _e, F3, 50, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64, W0, W1, W2, W3, W4, W5, W6, W7); + _R( _e, _a, _b, _c, _d, F3, 51, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64, W0, W1, W2, W3, W4, W5, W6, W7); + + _R( _d, _e, _a, _b, _c, F3, 52, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68, W7, W0, W1, W2, W3, W4, W5, W6); + _R( _c, _d, _e, _a, _b, F3, 53, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68, W7, W0, W1, W2, W3, W4, W5, W6); + _R( _b, _c, _d, _e, _a, F3, 54, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68, W7, W0, W1, W2, W3, W4, W5, W6); + _R( _a, _b, _c, _d, _e, F3, 55, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68, W7, W0, W1, W2, W3, W4, W5, W6); + + _R( _e, _a, _b, _c, _d, F3, 56, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72, W6, W7, W0, W1, W2, W3, W4, W5); + _R( _d, _e, _a, _b, _c, F3, 57, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72, W6, W7, W0, W1, W2, W3, W4, W5); + _R( _c, _d, _e, _a, _b, F3, 58, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72, W6, W7, W0, W1, W2, W3, W4, W5); + _R( _b, _c, _d, _e, _a, F3, 59, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72, W6, W7, W0, W1, W2, W3, W4, W5); + + subs RNBLKS, #1; + + _R( _a, _b, _c, _d, _e, F4, 60, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76, W5, W6, W7, W0, W1, W2, W3, W4); + _R( _e, _a, _b, _c, _d, F4, 61, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76, W5, W6, W7, W0, W1, W2, W3, W4); + _R( _d, _e, _a, _b, _c, F4, 62, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76, W5, W6, W7, W0, W1, W2, W3, W4); + _R( _c, _d, _e, _a, _b, F4, 63, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76, W5, W6, W7, W0, W1, W2, W3, W4); + + beq .Lend; + + /* Transform 64-79 + Precalc 0-15 of next block. */ +#undef curK +#define curK qK1 + _R( _b, _c, _d, _e, _a, F4, 64, WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _a, _b, _c, _d, _e, F4, 65, WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _e, _a, _b, _c, _d, F4, 66, WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _d, _e, _a, _b, _c, F4, 67, WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + + _R( _c, _d, _e, _a, _b, F4, 68, dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _b, _c, _d, _e, _a, F4, 69, dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _a, _b, _c, _d, _e, F4, 70, WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _e, _a, _b, _c, _d, F4, 71, WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + + _R( _d, _e, _a, _b, _c, F4, 72, dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _c, _d, _e, _a, _b, F4, 73, dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _b, _c, _d, _e, _a, F4, 74, WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _a, _b, _c, _d, _e, F4, 75, WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + + _R( _e, _a, _b, _c, _d, F4, 76, WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _d, _e, _a, _b, _c, F4, 77, WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _c, _d, _e, _a, _b, F4, 78, WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _b, _c, _d, _e, _a, F4, 79, WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ ); + + /* Update the chaining variables. */ + ldm RSTATE, {RT0-RT3}; + add _a, RT0; + ldr RT0, [RSTATE, #state_h4]; + add _b, RT1; + add _c, RT2; + add _d, RT3; + add _e, RT0; + stm RSTATE, {_a-_e}; + + b .Loop; + +.ltorg +.Lend: + /* Transform 64-79 + Clear XMM registers. */ + R( _b, _c, _d, _e, _a, F4, 64 ); + R( _a, _b, _c, _d, _e, F4, 65 ); CLEAR_REG(tmp0); + R( _e, _a, _b, _c, _d, F4, 66 ); CLEAR_REG(tmp1); + R( _d, _e, _a, _b, _c, F4, 67 ); CLEAR_REG(W0); + R( _c, _d, _e, _a, _b, F4, 68 ); CLEAR_REG(W1); + R( _b, _c, _d, _e, _a, F4, 69 ); CLEAR_REG(W2); + R( _a, _b, _c, _d, _e, F4, 70 ); CLEAR_REG(W3); + R( _e, _a, _b, _c, _d, F4, 71 ); CLEAR_REG(W4); + R( _d, _e, _a, _b, _c, F4, 72 ); CLEAR_REG(W5); + R( _c, _d, _e, _a, _b, F4, 73 ); CLEAR_REG(W6); + R( _b, _c, _d, _e, _a, F4, 74 ); CLEAR_REG(W7); + R( _a, _b, _c, _d, _e, F4, 75 ); + R( _e, _a, _b, _c, _d, F4, 76 ); + R( _d, _e, _a, _b, _c, F4, 77 ); + R( _c, _d, _e, _a, _b, F4, 78 ); + R( _b, _c, _d, _e, _a, F4, 79 ); + + mov sp, ROLDSTACK; + + /* Update the chaining variables. */ + ldm RSTATE, {RT0-RT3}; + add _a, RT0; + ldr RT0, [RSTATE, #state_h4]; + add _b, RT1; + add _c, RT2; + add _d, RT3; + vpop {q4-q7}; + add _e, RT0; + stm RSTATE, {_a-_e}; + + /* burn_stack */ + mov r0, #(16*4 + 16*4 + 15); + + pop {r4-r12, pc}; + +.Ldo_nothing: + mov r0, #0; + bx lr +.size _gcry_sha1_transform_armv7_neon,.-_gcry_sha1_transform_armv7_neon; + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha1-armv8-aarch32-ce.S b/libotr/libgcrypt-1.8.7/cipher/sha1-armv8-aarch32-ce.S new file mode 100644 index 0000000..bf2b233 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha1-armv8-aarch32-ce.S @@ -0,0 +1,220 @@ +/* sha1-armv8-aarch32-ce.S - ARM/CE accelerated SHA-1 transform function + * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) && defined(USE_SHA1) + +.syntax unified +.arch armv8-a +.fpu crypto-neon-fp-armv8 +.arm + +.text + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + + +/* Constants */ + +#define K1 0x5A827999 +#define K2 0x6ED9EBA1 +#define K3 0x8F1BBCDC +#define K4 0xCA62C1D6 +.align 4 +gcry_sha1_aarch32_ce_K_VEC: +.LK_VEC: +.LK1: .long K1, K1, K1, K1 +.LK2: .long K2, K2, K2, K2 +.LK3: .long K3, K3, K3, K3 +.LK4: .long K4, K4, K4, K4 + + +/* Register macros */ + +#define qH4 q0 +#define sH4 s0 +#define qH0123 q1 + +#define qABCD q2 +#define qE0 q3 +#define qE1 q4 + +#define qT0 q5 +#define qT1 q6 + +#define qW0 q8 +#define qW1 q9 +#define qW2 q10 +#define qW3 q11 + +#define qK1 q12 +#define qK2 q13 +#define qK3 q14 +#define qK4 q15 + + +/* Round macros */ + +#define _(...) /*_*/ +#define do_add(dst, src0, src1) vadd.u32 dst, src0, src1; +#define do_sha1su0(w0,w1,w2) sha1su0.32 w0,w1,w2; +#define do_sha1su1(w0,w3) sha1su1.32 w0,w3; + +#define do_rounds(f, e0, e1, t, k, w0, w1, w2, w3, add_fn, sha1su0_fn, sha1su1_fn) \ + sha1su1_fn( w3, w2 ); \ + sha1h.32 e0, qABCD; \ + sha1##f.32 qABCD, e1, t; \ + add_fn( t, w2, k ); \ + sha1su0_fn( w0, w1, w2 ); + + +/* Other functional macros */ + +#define CLEAR_REG(reg) veor reg, reg; + + +/* + * unsigned int + * _gcry_sha1_transform_armv8_ce (void *ctx, const unsigned char *data, + * size_t nblks) + */ +.align 3 +.globl _gcry_sha1_transform_armv8_ce +.type _gcry_sha1_transform_armv8_ce,%function; +_gcry_sha1_transform_armv8_ce: + /* input: + * r0: ctx, CTX + * r1: data (64*nblks bytes) + * r2: nblks + */ + + cmp r2, #0; + push {r4,lr}; + beq .Ldo_nothing; + + vpush {q4-q7}; + + GET_DATA_POINTER(r4, .LK_VEC, lr); + + veor qH4, qH4 + vld1.32 {qH0123}, [r0] /* load h0,h1,h2,h3 */ + + vld1.32 {qK1-qK2}, [r4]! /* load K1,K2 */ + vldr sH4, [r0, #16] /* load h4 */ + vld1.32 {qK3-qK4}, [r4] /* load K3,K4 */ + + vld1.8 {qW0-qW1}, [r1]! + vmov qABCD, qH0123 + vld1.8 {qW2-qW3}, [r1]! + + vrev32.8 qW0, qW0 + vrev32.8 qW1, qW1 + vrev32.8 qW2, qW2 + do_add(qT0, qW0, qK1) + vrev32.8 qW3, qW3 + do_add(qT1, qW1, qK1) + +.Loop: + do_rounds(c, qE1, qH4, qT0, qK1, qW0, qW1, qW2, qW3, do_add, do_sha1su0, _) + subs r2, r2, #1 + do_rounds(c, qE0, qE1, qT1, qK1, qW1, qW2, qW3, qW0, do_add, do_sha1su0, do_sha1su1) + do_rounds(c, qE1, qE0, qT0, qK1, qW2, qW3, qW0, qW1, do_add, do_sha1su0, do_sha1su1) + do_rounds(c, qE0, qE1, qT1, qK2, qW3, qW0, qW1, qW2, do_add, do_sha1su0, do_sha1su1) + do_rounds(c, qE1, qE0, qT0, qK2, qW0, qW1, qW2, qW3, do_add, do_sha1su0, do_sha1su1) + + do_rounds(p, qE0, qE1, qT1, qK2, qW1, qW2, qW3, qW0, do_add, do_sha1su0, do_sha1su1) + do_rounds(p, qE1, qE0, qT0, qK2, qW2, qW3, qW0, qW1, do_add, do_sha1su0, do_sha1su1) + do_rounds(p, qE0, qE1, qT1, qK2, qW3, qW0, qW1, qW2, do_add, do_sha1su0, do_sha1su1) + do_rounds(p, qE1, qE0, qT0, qK3, qW0, qW1, qW2, qW3, do_add, do_sha1su0, do_sha1su1) + do_rounds(p, qE0, qE1, qT1, qK3, qW1, qW2, qW3, qW0, do_add, do_sha1su0, do_sha1su1) + + do_rounds(m, qE1, qE0, qT0, qK3, qW2, qW3, qW0, qW1, do_add, do_sha1su0, do_sha1su1) + do_rounds(m, qE0, qE1, qT1, qK3, qW3, qW0, qW1, qW2, do_add, do_sha1su0, do_sha1su1) + do_rounds(m, qE1, qE0, qT0, qK3, qW0, qW1, qW2, qW3, do_add, do_sha1su0, do_sha1su1) + do_rounds(m, qE0, qE1, qT1, qK4, qW1, qW2, qW3, qW0, do_add, do_sha1su0, do_sha1su1) + do_rounds(m, qE1, qE0, qT0, qK4, qW2, qW3, qW0, qW1, do_add, do_sha1su0, do_sha1su1) + + do_rounds(p, qE0, qE1, qT1, qK4, qW3, qW0, qW1, qW2, do_add, do_sha1su0, do_sha1su1) + beq .Lend + + vld1.8 {qW0-qW1}, [r1]! /* preload */ + do_rounds(p, qE1, qE0, qT0, qK4, _ , _ , qW2, qW3, do_add, _, do_sha1su1) + vrev32.8 qW0, qW0 + vld1.8 {qW2}, [r1]! + vrev32.8 qW1, qW1 + do_rounds(p, qE0, qE1, qT1, qK4, _ , _ , qW3, _ , do_add, _, _) + vld1.8 {qW3}, [r1]! + vrev32.8 qW2, qW2 + do_rounds(p, qE1, qE0, qT0, _, _, _, _, _, _, _, _) + vrev32.8 qW3, qW3 + do_rounds(p, qE0, qE1, qT1, _, _, _, _, _, _, _, _) + + do_add(qT0, qW0, qK1) + vadd.u32 qH4, qE0 + vadd.u32 qABCD, qH0123 + do_add(qT1, qW1, qK1) + + vmov qH0123, qABCD + + b .Loop + +.Lend: + do_rounds(p, qE1, qE0, qT0, qK4, _ , _ , qW2, qW3, do_add, _, do_sha1su1) + do_rounds(p, qE0, qE1, qT1, qK4, _ , _ , qW3, _ , do_add, _, _) + do_rounds(p, qE1, qE0, qT0, _, _, _, _, _, _, _, _) + do_rounds(p, qE0, qE1, qT1, _, _, _, _, _, _, _, _) + + vadd.u32 qH4, qE0 + vadd.u32 qH0123, qABCD + + CLEAR_REG(qW0) + CLEAR_REG(qW1) + CLEAR_REG(qW2) + CLEAR_REG(qW3) + CLEAR_REG(qABCD) + CLEAR_REG(qE1) + CLEAR_REG(qE0) + + vstr sH4, [r0, #16] /* store h4 */ + vst1.32 {qH0123}, [r0] /* store h0,h1,h2,h3 */ + + CLEAR_REG(qH0123) + CLEAR_REG(qH4) + vpop {q4-q7} + +.Ldo_nothing: + mov r0, #0 + pop {r4,pc} +.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce; + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha1-armv8-aarch64-ce.S b/libotr/libgcrypt-1.8.7/cipher/sha1-armv8-aarch64-ce.S new file mode 100644 index 0000000..ec1810d --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha1-armv8-aarch64-ce.S @@ -0,0 +1,204 @@ +/* sha1-armv8-aarch64-ce.S - ARM/CE accelerated SHA-1 transform function + * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(__AARCH64EL__) && \ + defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) && defined(USE_SHA1) + +.cpu generic+simd+crypto + +.text + + +#define GET_DATA_POINTER(reg, name) \ + adrp reg, :got:name ; \ + ldr reg, [reg, #:got_lo12:name] ; + + +/* Constants */ + +#define K1 0x5A827999 +#define K2 0x6ED9EBA1 +#define K3 0x8F1BBCDC +#define K4 0xCA62C1D6 +.align 4 +gcry_sha1_aarch64_ce_K_VEC: +.LK_VEC: +.LK1: .long K1, K1, K1, K1 +.LK2: .long K2, K2, K2, K2 +.LK3: .long K3, K3, K3, K3 +.LK4: .long K4, K4, K4, K4 + + +/* Register macros */ + +#define sH4 s0 +#define vH4 v0 +#define vH0123 v1 + +#define qABCD q2 +#define sABCD s2 +#define vABCD v2 +#define sE0 s3 +#define vE0 v3 +#define sE1 s4 +#define vE1 v4 + +#define vT0 v5 +#define vT1 v6 + +#define vW0 v16 +#define vW1 v17 +#define vW2 v18 +#define vW3 v19 + +#define vK1 v20 +#define vK2 v21 +#define vK3 v22 +#define vK4 v23 + + +/* Round macros */ + +#define _(...) /*_*/ +#define do_add(dst, src0, src1) add dst.4s, src0.4s, src1.4s; +#define do_sha1su0(w0,w1,w2) sha1su0 w0.4s,w1.4s,w2.4s; +#define do_sha1su1(w0,w3) sha1su1 w0.4s,w3.4s; + +#define do_rounds(f, e0, e1, t, k, w0, w1, w2, w3, add_fn, sha1su0_fn, sha1su1_fn) \ + sha1su1_fn( v##w3, v##w2 ); \ + sha1h e0, sABCD; \ + sha1##f qABCD, e1, v##t.4s; \ + add_fn( v##t, v##w2, v##k ); \ + sha1su0_fn( v##w0, v##w1, v##w2 ); + + +/* Other functional macros */ + +#define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b; + + +/* + * unsigned int + * _gcry_sha1_transform_armv8_ce (void *ctx, const unsigned char *data, + * size_t nblks) + */ +.align 3 +.globl _gcry_sha1_transform_armv8_ce +.type _gcry_sha1_transform_armv8_ce,%function; +_gcry_sha1_transform_armv8_ce: + /* input: + * x0: ctx, CTX + * x1: data (64*nblks bytes) + * x2: nblks + */ + + cbz x2, .Ldo_nothing; + + GET_DATA_POINTER(x4, .LK_VEC); + + ld1 {vH0123.4s}, [x0] /* load h0,h1,h2,h3 */ + ld1 {vK1.4s-vK4.4s}, [x4] /* load K1,K2,K3,K4 */ + ldr sH4, [x0, #16] /* load h4 */ + + ld1 {vW0.16b-vW3.16b}, [x1], #64 + mov vABCD.16b, vH0123.16b + + rev32 vW0.16b, vW0.16b + rev32 vW1.16b, vW1.16b + rev32 vW2.16b, vW2.16b + do_add(vT0, vW0, vK1) + rev32 vW3.16b, vW3.16b + do_add(vT1, vW1, vK1) + +.Loop: + do_rounds(c, sE1, sH4, T0, K1, W0, W1, W2, W3, do_add, do_sha1su0, _) + sub x2, x2, #1 + do_rounds(c, sE0, sE1, T1, K1, W1, W2, W3, W0, do_add, do_sha1su0, do_sha1su1) + do_rounds(c, sE1, sE0, T0, K1, W2, W3, W0, W1, do_add, do_sha1su0, do_sha1su1) + do_rounds(c, sE0, sE1, T1, K2, W3, W0, W1, W2, do_add, do_sha1su0, do_sha1su1) + do_rounds(c, sE1, sE0, T0, K2, W0, W1, W2, W3, do_add, do_sha1su0, do_sha1su1) + + do_rounds(p, sE0, sE1, T1, K2, W1, W2, W3, W0, do_add, do_sha1su0, do_sha1su1) + do_rounds(p, sE1, sE0, T0, K2, W2, W3, W0, W1, do_add, do_sha1su0, do_sha1su1) + do_rounds(p, sE0, sE1, T1, K2, W3, W0, W1, W2, do_add, do_sha1su0, do_sha1su1) + do_rounds(p, sE1, sE0, T0, K3, W0, W1, W2, W3, do_add, do_sha1su0, do_sha1su1) + do_rounds(p, sE0, sE1, T1, K3, W1, W2, W3, W0, do_add, do_sha1su0, do_sha1su1) + + do_rounds(m, sE1, sE0, T0, K3, W2, W3, W0, W1, do_add, do_sha1su0, do_sha1su1) + do_rounds(m, sE0, sE1, T1, K3, W3, W0, W1, W2, do_add, do_sha1su0, do_sha1su1) + do_rounds(m, sE1, sE0, T0, K3, W0, W1, W2, W3, do_add, do_sha1su0, do_sha1su1) + do_rounds(m, sE0, sE1, T1, K4, W1, W2, W3, W0, do_add, do_sha1su0, do_sha1su1) + do_rounds(m, sE1, sE0, T0, K4, W2, W3, W0, W1, do_add, do_sha1su0, do_sha1su1) + + do_rounds(p, sE0, sE1, T1, K4, W3, W0, W1, W2, do_add, do_sha1su0, do_sha1su1) + cbz x2, .Lend + + ld1 {vW0.16b-vW1.16b}, [x1], #32 /* preload */ + do_rounds(p, sE1, sE0, T0, K4, _ , _ , W2, W3, do_add, _, do_sha1su1) + rev32 vW0.16b, vW0.16b + ld1 {vW2.16b}, [x1], #16 + rev32 vW1.16b, vW1.16b + do_rounds(p, sE0, sE1, T1, K4, _ , _ , W3, _ , do_add, _, _) + ld1 {vW3.16b}, [x1], #16 + rev32 vW2.16b, vW2.16b + do_rounds(p, sE1, sE0, T0, _, _, _, _, _, _, _, _) + rev32 vW3.16b, vW3.16b + do_rounds(p, sE0, sE1, T1, _, _, _, _, _, _, _, _) + + do_add(vT0, vW0, vK1) + add vH4.2s, vH4.2s, vE0.2s + add vABCD.4s, vABCD.4s, vH0123.4s + do_add(vT1, vW1, vK1) + + mov vH0123.16b, vABCD.16b + + b .Loop + +.Lend: + do_rounds(p, sE1, sE0, T0, K4, _ , _ , W2, W3, do_add, _, do_sha1su1) + do_rounds(p, sE0, sE1, T1, K4, _ , _ , W3, _ , do_add, _, _) + do_rounds(p, sE1, sE0, T0, _, _, _, _, _, _, _, _) + do_rounds(p, sE0, sE1, T1, _, _, _, _, _, _, _, _) + + add vH4.2s, vH4.2s, vE0.2s + add vH0123.4s, vH0123.4s, vABCD.4s + + CLEAR_REG(vW0) + CLEAR_REG(vW1) + CLEAR_REG(vW2) + CLEAR_REG(vW3) + CLEAR_REG(vABCD) + CLEAR_REG(vE1) + CLEAR_REG(vE0) + + str sH4, [x0, #16] /* store h4 */ + st1 {vH0123.4s}, [x0] /* store h0,h1,h2,h3 */ + + CLEAR_REG(vH0123) + CLEAR_REG(vH4) + +.Ldo_nothing: + mov x0, #0 + ret +.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce; + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha1-avx-amd64.S b/libotr/libgcrypt-1.8.7/cipher/sha1-avx-amd64.S new file mode 100644 index 0000000..b14603b --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha1-avx-amd64.S @@ -0,0 +1,426 @@ +/* sha1-avx-amd64.S - Intel AVX accelerated SHA-1 transform function + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * Based on sha1.c: + * Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Intel SSSE3 accelerated SHA-1 implementation based on white paper: + * "Improving the Performance of the Secure Hash Algorithm (SHA-1)" + * http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1 + */ + +#ifdef __x86_64__ +#include <config.h> + +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA1) + +#ifdef __PIC__ +# define RIP (%rip) +#else +# define RIP +#endif + + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + + +/* Context structure */ + +#define state_h0 0 +#define state_h1 4 +#define state_h2 8 +#define state_h3 12 +#define state_h4 16 + + +/* Constants */ + +.text +#define K1 0x5A827999 +#define K2 0x6ED9EBA1 +#define K3 0x8F1BBCDC +#define K4 0xCA62C1D6 +.align 16 +.LK_XMM: +.LK1: .long K1, K1, K1, K1 +.LK2: .long K2, K2, K2, K2 +.LK3: .long K3, K3, K3, K3 +.LK4: .long K4, K4, K4, K4 + +.Lbswap_shufb_ctl: + .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f + + +/* Register macros */ + +#define RSTATE %r8 +#define RDATA %r9 +#define ROLDSTACK %r10 +#define RNBLKS %r11 + +#define a %eax +#define b %ebx +#define c %ecx +#define d %edx +#define e %edi + +#define RT0 %esi +#define RT1 %ebp + +#define Wtmp0 %xmm0 +#define Wtmp1 %xmm1 + +#define W0 %xmm2 +#define W1 %xmm3 +#define W2 %xmm4 +#define W3 %xmm5 +#define W4 %xmm6 +#define W5 %xmm7 +#define W6 %xmm8 +#define W7 %xmm9 + +#define BSWAP_REG %xmm10 + + +/* Round function macros. */ + +#define WK(i) (((i) & 15) * 4)(%rsp) + +#define R_F1(a,b,c,d,e,i) \ + movl c, RT0; \ + addl WK(i), e; \ + xorl d, RT0; \ + movl a, RT1; \ + andl b, RT0; \ + shldl $30, b, b; \ + xorl d, RT0; \ + leal (RT0,e), e; \ + shldl $5, RT1, RT1; \ + addl RT1, e; + +#define R_F2(a,b,c,d,e,i) \ + movl c, RT0; \ + addl WK(i), e; \ + xorl b, RT0; \ + shldl $30, b, b; \ + xorl d, RT0; \ + movl a, RT1; \ + leal (RT0,e), e; \ + shldl $5, RT1, RT1; \ + addl RT1, e; + +#define R_F3(a,b,c,d,e,i) \ + movl c, RT0; \ + movl b, RT1; \ + xorl b, RT0; \ + andl c, RT1; \ + andl d, RT0; \ + addl RT1, e; \ + addl WK(i), e; \ + shldl $30, b, b; \ + movl a, RT1; \ + leal (RT0,e), e; \ + shldl $5, RT1, RT1; \ + addl RT1, e; + +#define R_F4(a,b,c,d,e,i) R_F2(a,b,c,d,e,i) + +#define R(a,b,c,d,e,f,i) \ + R_##f(a,b,c,d,e,i) + + +/* Input expansion macros. */ + +#define W_PRECALC_00_15_0(i, W, tmp0) \ + vmovdqu (4*(i))(RDATA), tmp0; + +#define W_PRECALC_00_15_1(i, W, tmp0) \ + vpshufb BSWAP_REG, tmp0, W; + +#define W_PRECALC_00_15_2(i, W, tmp0) \ + vpaddd (.LK_XMM + ((i)/20)*16) RIP, W, tmp0; + +#define W_PRECALC_00_15_3(i, W, tmp0) \ + vmovdqa tmp0, WK(i&~3); + +#define W_PRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \ + vpalignr $8, W_m16, W_m12, W; \ + vpsrldq $4, W_m04, tmp0; \ + vpxor W_m08, W, W; + +#define W_PRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \ + vpxor W_m16, tmp0, tmp0; \ + vpxor tmp0, W, W; \ + vpslld $1, W, tmp0; \ + vpslldq $12, W, tmp1; \ + vpsrld $31, W, W; + +#define W_PRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \ + vpor W, tmp0, tmp0; \ + vpsrld $30, tmp1, W; \ + vpslld $2, tmp1, tmp1; + +#define W_PRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \ + vpxor W, tmp0, tmp0; \ + vpxor tmp1, tmp0, W; \ + vpaddd (.LK_XMM + ((i)/20)*16) RIP, W, tmp0; \ + vmovdqa tmp0, WK((i)&~3); + +#define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ + vpxor W_m28, W, W; \ + vpalignr $8, W_m08, W_m04, tmp0; + +#define W_PRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ + vpxor W_m16, W, W; \ + vpxor tmp0, W, W; + +#define W_PRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ + vpsrld $30, W, tmp0; \ + vpslld $2, W, W; + +#define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ + vpor W, tmp0, W; \ + vpaddd (.LK_XMM + ((i)/20)*16) RIP, W, tmp0; \ + vmovdqa tmp0, WK((i)&~3); + + +/* + * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. + * + * unsigned int + * _gcry_sha1_transform_amd64_avx (void *ctx, const unsigned char *data, + * size_t nblks) + */ +.globl _gcry_sha1_transform_amd64_avx +ELF(.type _gcry_sha1_transform_amd64_avx,@function) +.align 16 +_gcry_sha1_transform_amd64_avx: + /* input: + * %rdi: ctx, CTX + * %rsi: data (64*nblks bytes) + * %rdx: nblks + */ + + xorl %eax, %eax; + cmpq $0, %rdx; + jz .Lret; + + vzeroupper; + + movq %rdx, RNBLKS; + movq %rdi, RSTATE; + movq %rsi, RDATA; + pushq %rbx; + pushq %rbp; + + movq %rsp, ROLDSTACK; + + subq $(16*4), %rsp; + andq $(~31), %rsp; + + /* Get the values of the chaining variables. */ + movl state_h0(RSTATE), a; + movl state_h1(RSTATE), b; + movl state_h2(RSTATE), c; + movl state_h3(RSTATE), d; + movl state_h4(RSTATE), e; + + movdqa .Lbswap_shufb_ctl RIP, BSWAP_REG; + + /* Precalc 0-15. */ + W_PRECALC_00_15_0(0, W0, Wtmp0); + W_PRECALC_00_15_1(1, W0, Wtmp0); + W_PRECALC_00_15_2(2, W0, Wtmp0); + W_PRECALC_00_15_3(3, W0, Wtmp0); + W_PRECALC_00_15_0(4, W7, Wtmp0); + W_PRECALC_00_15_1(5, W7, Wtmp0); + W_PRECALC_00_15_2(6, W7, Wtmp0); + W_PRECALC_00_15_3(7, W7, Wtmp0); + W_PRECALC_00_15_0(8, W6, Wtmp0); + W_PRECALC_00_15_1(9, W6, Wtmp0); + W_PRECALC_00_15_2(10, W6, Wtmp0); + W_PRECALC_00_15_3(11, W6, Wtmp0); + W_PRECALC_00_15_0(12, W5, Wtmp0); + W_PRECALC_00_15_1(13, W5, Wtmp0); + W_PRECALC_00_15_2(14, W5, Wtmp0); + W_PRECALC_00_15_3(15, W5, Wtmp0); + +.align 8 +.Loop: + addq $64, RDATA; + + /* Transform 0-15 + Precalc 16-31. */ + R( a, b, c, d, e, F1, 0 ); W_PRECALC_16_31_0(16, W4, W5, W6, W7, W0, Wtmp0, Wtmp1); + R( e, a, b, c, d, F1, 1 ); W_PRECALC_16_31_1(17, W4, W5, W6, W7, W0, Wtmp0, Wtmp1); + R( d, e, a, b, c, F1, 2 ); W_PRECALC_16_31_2(18, W4, W5, W6, W7, W0, Wtmp0, Wtmp1); + R( c, d, e, a, b, F1, 3 ); W_PRECALC_16_31_3(19, W4, W5, W6, W7, W0, Wtmp0, Wtmp1); + R( b, c, d, e, a, F1, 4 ); W_PRECALC_16_31_0(20, W3, W4, W5, W6, W7, Wtmp0, Wtmp1); + R( a, b, c, d, e, F1, 5 ); W_PRECALC_16_31_1(21, W3, W4, W5, W6, W7, Wtmp0, Wtmp1); + R( e, a, b, c, d, F1, 6 ); W_PRECALC_16_31_2(22, W3, W4, W5, W6, W7, Wtmp0, Wtmp1); + R( d, e, a, b, c, F1, 7 ); W_PRECALC_16_31_3(23, W3, W4, W5, W6, W7, Wtmp0, Wtmp1); + R( c, d, e, a, b, F1, 8 ); W_PRECALC_16_31_0(24, W2, W3, W4, W5, W6, Wtmp0, Wtmp1); + R( b, c, d, e, a, F1, 9 ); W_PRECALC_16_31_1(25, W2, W3, W4, W5, W6, Wtmp0, Wtmp1); + R( a, b, c, d, e, F1, 10 ); W_PRECALC_16_31_2(26, W2, W3, W4, W5, W6, Wtmp0, Wtmp1); + R( e, a, b, c, d, F1, 11 ); W_PRECALC_16_31_3(27, W2, W3, W4, W5, W6, Wtmp0, Wtmp1); + R( d, e, a, b, c, F1, 12 ); W_PRECALC_16_31_0(28, W1, W2, W3, W4, W5, Wtmp0, Wtmp1); + R( c, d, e, a, b, F1, 13 ); W_PRECALC_16_31_1(29, W1, W2, W3, W4, W5, Wtmp0, Wtmp1); + R( b, c, d, e, a, F1, 14 ); W_PRECALC_16_31_2(30, W1, W2, W3, W4, W5, Wtmp0, Wtmp1); + R( a, b, c, d, e, F1, 15 ); W_PRECALC_16_31_3(31, W1, W2, W3, W4, W5, Wtmp0, Wtmp1); + + /* Transform 16-63 + Precalc 32-79. */ + R( e, a, b, c, d, F1, 16 ); W_PRECALC_32_79_0(32, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( d, e, a, b, c, F1, 17 ); W_PRECALC_32_79_1(33, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( c, d, e, a, b, F1, 18 ); W_PRECALC_32_79_2(34, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( b, c, d, e, a, F1, 19 ); W_PRECALC_32_79_3(35, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( a, b, c, d, e, F2, 20 ); W_PRECALC_32_79_0(36, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( e, a, b, c, d, F2, 21 ); W_PRECALC_32_79_1(37, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( d, e, a, b, c, F2, 22 ); W_PRECALC_32_79_2(38, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( c, d, e, a, b, F2, 23 ); W_PRECALC_32_79_3(39, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( b, c, d, e, a, F2, 24 ); W_PRECALC_32_79_0(40, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( a, b, c, d, e, F2, 25 ); W_PRECALC_32_79_1(41, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( e, a, b, c, d, F2, 26 ); W_PRECALC_32_79_2(42, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( d, e, a, b, c, F2, 27 ); W_PRECALC_32_79_3(43, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( c, d, e, a, b, F2, 28 ); W_PRECALC_32_79_0(44, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( b, c, d, e, a, F2, 29 ); W_PRECALC_32_79_1(45, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( a, b, c, d, e, F2, 30 ); W_PRECALC_32_79_2(46, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( e, a, b, c, d, F2, 31 ); W_PRECALC_32_79_3(47, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( d, e, a, b, c, F2, 32 ); W_PRECALC_32_79_0(48, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0); + R( c, d, e, a, b, F2, 33 ); W_PRECALC_32_79_1(49, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0); + R( b, c, d, e, a, F2, 34 ); W_PRECALC_32_79_2(50, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0); + R( a, b, c, d, e, F2, 35 ); W_PRECALC_32_79_3(51, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0); + R( e, a, b, c, d, F2, 36 ); W_PRECALC_32_79_0(52, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0); + R( d, e, a, b, c, F2, 37 ); W_PRECALC_32_79_1(53, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0); + R( c, d, e, a, b, F2, 38 ); W_PRECALC_32_79_2(54, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0); + R( b, c, d, e, a, F2, 39 ); W_PRECALC_32_79_3(55, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0); + R( a, b, c, d, e, F3, 40 ); W_PRECALC_32_79_0(56, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0); + R( e, a, b, c, d, F3, 41 ); W_PRECALC_32_79_1(57, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0); + R( d, e, a, b, c, F3, 42 ); W_PRECALC_32_79_2(58, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0); + R( c, d, e, a, b, F3, 43 ); W_PRECALC_32_79_3(59, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0); + R( b, c, d, e, a, F3, 44 ); W_PRECALC_32_79_0(60, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0); + R( a, b, c, d, e, F3, 45 ); W_PRECALC_32_79_1(61, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0); + R( e, a, b, c, d, F3, 46 ); W_PRECALC_32_79_2(62, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0); + R( d, e, a, b, c, F3, 47 ); W_PRECALC_32_79_3(63, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0); + R( c, d, e, a, b, F3, 48 ); W_PRECALC_32_79_0(64, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( b, c, d, e, a, F3, 49 ); W_PRECALC_32_79_1(65, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( a, b, c, d, e, F3, 50 ); W_PRECALC_32_79_2(66, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( e, a, b, c, d, F3, 51 ); W_PRECALC_32_79_3(67, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( d, e, a, b, c, F3, 52 ); W_PRECALC_32_79_0(68, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( c, d, e, a, b, F3, 53 ); W_PRECALC_32_79_1(69, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( b, c, d, e, a, F3, 54 ); W_PRECALC_32_79_2(70, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( a, b, c, d, e, F3, 55 ); W_PRECALC_32_79_3(71, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( e, a, b, c, d, F3, 56 ); W_PRECALC_32_79_0(72, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( d, e, a, b, c, F3, 57 ); W_PRECALC_32_79_1(73, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( c, d, e, a, b, F3, 58 ); W_PRECALC_32_79_2(74, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( b, c, d, e, a, F3, 59 ); W_PRECALC_32_79_3(75, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( a, b, c, d, e, F4, 60 ); W_PRECALC_32_79_0(76, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( e, a, b, c, d, F4, 61 ); W_PRECALC_32_79_1(77, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( d, e, a, b, c, F4, 62 ); W_PRECALC_32_79_2(78, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( c, d, e, a, b, F4, 63 ); W_PRECALC_32_79_3(79, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + + decq RNBLKS; + jz .Lend; + + /* Transform 64-79 + Precalc 0-15 of next block. */ + R( b, c, d, e, a, F4, 64 ); W_PRECALC_00_15_0(0, W0, Wtmp0); + R( a, b, c, d, e, F4, 65 ); W_PRECALC_00_15_1(1, W0, Wtmp0); + R( e, a, b, c, d, F4, 66 ); W_PRECALC_00_15_2(2, W0, Wtmp0); + R( d, e, a, b, c, F4, 67 ); W_PRECALC_00_15_3(3, W0, Wtmp0); + R( c, d, e, a, b, F4, 68 ); W_PRECALC_00_15_0(4, W7, Wtmp0); + R( b, c, d, e, a, F4, 69 ); W_PRECALC_00_15_1(5, W7, Wtmp0); + R( a, b, c, d, e, F4, 70 ); W_PRECALC_00_15_2(6, W7, Wtmp0); + R( e, a, b, c, d, F4, 71 ); W_PRECALC_00_15_3(7, W7, Wtmp0); + R( d, e, a, b, c, F4, 72 ); W_PRECALC_00_15_0(8, W6, Wtmp0); + R( c, d, e, a, b, F4, 73 ); W_PRECALC_00_15_1(9, W6, Wtmp0); + R( b, c, d, e, a, F4, 74 ); W_PRECALC_00_15_2(10, W6, Wtmp0); + R( a, b, c, d, e, F4, 75 ); W_PRECALC_00_15_3(11, W6, Wtmp0); + R( e, a, b, c, d, F4, 76 ); W_PRECALC_00_15_0(12, W5, Wtmp0); + R( d, e, a, b, c, F4, 77 ); W_PRECALC_00_15_1(13, W5, Wtmp0); + R( c, d, e, a, b, F4, 78 ); + addl state_h0(RSTATE), a; W_PRECALC_00_15_2(14, W5, Wtmp0); + R( b, c, d, e, a, F4, 79 ); W_PRECALC_00_15_3(15, W5, Wtmp0); + + /* Update the chaining variables. */ + addl state_h3(RSTATE), d; + addl state_h2(RSTATE), c; + addl state_h1(RSTATE), b; + addl state_h4(RSTATE), e; + + movl d, state_h3(RSTATE); + movl c, state_h2(RSTATE); + movl b, state_h1(RSTATE); + movl a, state_h0(RSTATE); + movl e, state_h4(RSTATE); + + jmp .Loop; + +.align 16 +.Lend: + vzeroall; + + /* Transform 64-79. */ + R( b, c, d, e, a, F4, 64 ); + R( a, b, c, d, e, F4, 65 ); + R( e, a, b, c, d, F4, 66 ); + R( d, e, a, b, c, F4, 67 ); + R( c, d, e, a, b, F4, 68 ); + R( b, c, d, e, a, F4, 69 ); + R( a, b, c, d, e, F4, 70 ); + R( e, a, b, c, d, F4, 71 ); + R( d, e, a, b, c, F4, 72 ); + R( c, d, e, a, b, F4, 73 ); + R( b, c, d, e, a, F4, 74 ); + R( a, b, c, d, e, F4, 75 ); + R( e, a, b, c, d, F4, 76 ); + R( d, e, a, b, c, F4, 77 ); + R( c, d, e, a, b, F4, 78 ); + addl state_h0(RSTATE), a; + R( b, c, d, e, a, F4, 79 ); + + /* Update the chaining variables. */ + addl state_h3(RSTATE), d; + addl state_h2(RSTATE), c; + addl state_h1(RSTATE), b; + addl state_h4(RSTATE), e; + + movl d, state_h3(RSTATE); + movl c, state_h2(RSTATE); + movl b, state_h1(RSTATE); + movl a, state_h0(RSTATE); + movl e, state_h4(RSTATE); + + movq ROLDSTACK, %rsp; + + popq %rbp; + popq %rbx; + + /* burn_stack */ + movl $(16*4 + 2*8 + 31), %eax; + +.Lret: + ret; + +#endif +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha1-avx-bmi2-amd64.S b/libotr/libgcrypt-1.8.7/cipher/sha1-avx-bmi2-amd64.S new file mode 100644 index 0000000..b267693 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha1-avx-bmi2-amd64.S @@ -0,0 +1,424 @@ +/* sha1-avx-bmi2-amd64.S - Intel AVX/BMI2 accelerated SHA-1 transform function + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * Based on sha1.c: + * Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Intel SSSE3 accelerated SHA-1 implementation based on white paper: + * "Improving the Performance of the Secure Hash Algorithm (SHA-1)" + * http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1 + */ + +#ifdef __x86_64__ +#include <config.h> + +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(HAVE_GCC_INLINE_ASM_BMI2) && \ + defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA1) + +#ifdef __PIC__ +# define RIP (%rip) +#else +# define RIP +#endif + + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + + +/* Context structure */ + +#define state_h0 0 +#define state_h1 4 +#define state_h2 8 +#define state_h3 12 +#define state_h4 16 + + +/* Constants */ + +.text +#define K1 0x5A827999 +#define K2 0x6ED9EBA1 +#define K3 0x8F1BBCDC +#define K4 0xCA62C1D6 +.align 16 +.LK_XMM: +.LK1: .long K1, K1, K1, K1 +.LK2: .long K2, K2, K2, K2 +.LK3: .long K3, K3, K3, K3 +.LK4: .long K4, K4, K4, K4 + +.Lbswap_shufb_ctl: + .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f + + +/* Register macros */ + +#define RSTATE %r8 +#define RDATA %r9 +#define ROLDSTACK %r10 +#define RNBLKS %r11 + +#define a %eax +#define b %ebx +#define c %ecx +#define d %edx +#define e %edi + +#define RT0 %esi +#define RT1 %ebp + +#define Wtmp0 %xmm0 +#define Wtmp1 %xmm1 + +#define W0 %xmm2 +#define W1 %xmm3 +#define W2 %xmm4 +#define W3 %xmm5 +#define W4 %xmm6 +#define W5 %xmm7 +#define W6 %xmm8 +#define W7 %xmm9 + +#define BSWAP_REG %xmm10 + + +/* Round function macros. */ + +#define WK(i) (((i) & 15) * 4)(%rsp) + +#define R_F1(a,b,c,d,e,i) \ + movl c, RT0; \ + andn d, b, RT1; \ + addl WK(i), e; \ + andl b, RT0; \ + rorxl $2, b, b; \ + addl RT1, e; \ + leal (RT0,e), e; \ + rorxl $27, a, RT1; \ + addl RT1, e; + +#define R_F2(a,b,c,d,e,i) \ + movl c, RT0; \ + addl WK(i), e; \ + xorl b, RT0; \ + rorxl $2, b, b; \ + xorl d, RT0; \ + leal (RT0,e), e; \ + rorxl $27, a, RT1; \ + addl RT1, e; + +#define R_F3(a,b,c,d,e,i) \ + movl c, RT0; \ + movl b, RT1; \ + xorl b, RT0; \ + andl c, RT1; \ + andl d, RT0; \ + addl RT1, e; \ + addl WK(i), e; \ + rorxl $2, b, b; \ + leal (RT0,e), e; \ + rorxl $27, a, RT1; \ + addl RT1, e; + +#define R_F4(a,b,c,d,e,i) R_F2(a,b,c,d,e,i) + +#define R(a,b,c,d,e,f,i) \ + R_##f(a,b,c,d,e,i) + + +/* Input expansion macros. */ + +#define W_PRECALC_00_15_0(i, W, tmp0) \ + vmovdqu (4*(i))(RDATA), tmp0; + +#define W_PRECALC_00_15_1(i, W, tmp0) \ + vpshufb BSWAP_REG, tmp0, W; + +#define W_PRECALC_00_15_2(i, W, tmp0) \ + vpaddd (.LK_XMM + ((i)/20)*16) RIP, W, tmp0; + +#define W_PRECALC_00_15_3(i, W, tmp0) \ + vmovdqa tmp0, WK(i&~3); + +#define W_PRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \ + vpalignr $8, W_m16, W_m12, W; \ + vpsrldq $4, W_m04, tmp0; \ + vpxor W_m08, W, W; + +#define W_PRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \ + vpxor W_m16, tmp0, tmp0; \ + vpxor tmp0, W, W; \ + vpslld $1, W, tmp0; \ + vpslldq $12, W, tmp1; \ + vpsrld $31, W, W; + +#define W_PRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \ + vpor W, tmp0, tmp0; \ + vpsrld $30, tmp1, W; \ + vpslld $2, tmp1, tmp1; + +#define W_PRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \ + vpxor W, tmp0, tmp0; \ + vpxor tmp1, tmp0, W; \ + vpaddd (.LK_XMM + ((i)/20)*16) RIP, W, tmp0; \ + vmovdqa tmp0, WK((i)&~3); + +#define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ + vpxor W_m28, W, W; \ + vpalignr $8, W_m08, W_m04, tmp0; + +#define W_PRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ + vpxor W_m16, W, W; \ + vpxor tmp0, W, W; + +#define W_PRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ + vpsrld $30, W, tmp0; \ + vpslld $2, W, W; + +#define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ + vpor W, tmp0, W; \ + vpaddd (.LK_XMM + ((i)/20)*16) RIP, W, tmp0; \ + vmovdqa tmp0, WK((i)&~3); + + +/* + * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. + * + * unsigned int + * _gcry_sha1_transform_amd64_avx_bmi2 (void *ctx, const unsigned char *data, + * size_t nblks) + */ +.globl _gcry_sha1_transform_amd64_avx_bmi2 +ELF(.type _gcry_sha1_transform_amd64_avx_bmi2,@function) +.align 16 +_gcry_sha1_transform_amd64_avx_bmi2: + /* input: + * %rdi: ctx, CTX + * %rsi: data (64*nblks bytes) + * %rdx: nblks + */ + + xorl %eax, %eax; + cmpq $0, %rdx; + jz .Lret; + + vzeroupper; + + movq %rdx, RNBLKS; + movq %rdi, RSTATE; + movq %rsi, RDATA; + pushq %rbx; + pushq %rbp; + + movq %rsp, ROLDSTACK; + + subq $(16*4), %rsp; + andq $(~31), %rsp; + + /* Get the values of the chaining variables. */ + movl state_h0(RSTATE), a; + movl state_h1(RSTATE), b; + movl state_h2(RSTATE), c; + movl state_h3(RSTATE), d; + movl state_h4(RSTATE), e; + + movdqa .Lbswap_shufb_ctl RIP, BSWAP_REG; + + /* Precalc 0-15. */ + W_PRECALC_00_15_0(0, W0, Wtmp0); + W_PRECALC_00_15_1(1, W0, Wtmp0); + W_PRECALC_00_15_2(2, W0, Wtmp0); + W_PRECALC_00_15_3(3, W0, Wtmp0); + W_PRECALC_00_15_0(4, W7, Wtmp0); + W_PRECALC_00_15_1(5, W7, Wtmp0); + W_PRECALC_00_15_2(6, W7, Wtmp0); + W_PRECALC_00_15_3(7, W7, Wtmp0); + W_PRECALC_00_15_0(8, W6, Wtmp0); + W_PRECALC_00_15_1(9, W6, Wtmp0); + W_PRECALC_00_15_2(10, W6, Wtmp0); + W_PRECALC_00_15_3(11, W6, Wtmp0); + W_PRECALC_00_15_0(12, W5, Wtmp0); + W_PRECALC_00_15_1(13, W5, Wtmp0); + W_PRECALC_00_15_2(14, W5, Wtmp0); + W_PRECALC_00_15_3(15, W5, Wtmp0); + +.align 8 +.Loop: + addq $64, RDATA; + + /* Transform 0-15 + Precalc 16-31. */ + R( a, b, c, d, e, F1, 0 ); W_PRECALC_16_31_0(16, W4, W5, W6, W7, W0, Wtmp0, Wtmp1); + R( e, a, b, c, d, F1, 1 ); W_PRECALC_16_31_1(17, W4, W5, W6, W7, W0, Wtmp0, Wtmp1); + R( d, e, a, b, c, F1, 2 ); W_PRECALC_16_31_2(18, W4, W5, W6, W7, W0, Wtmp0, Wtmp1); + R( c, d, e, a, b, F1, 3 ); W_PRECALC_16_31_3(19, W4, W5, W6, W7, W0, Wtmp0, Wtmp1); + R( b, c, d, e, a, F1, 4 ); W_PRECALC_16_31_0(20, W3, W4, W5, W6, W7, Wtmp0, Wtmp1); + R( a, b, c, d, e, F1, 5 ); W_PRECALC_16_31_1(21, W3, W4, W5, W6, W7, Wtmp0, Wtmp1); + R( e, a, b, c, d, F1, 6 ); W_PRECALC_16_31_2(22, W3, W4, W5, W6, W7, Wtmp0, Wtmp1); + R( d, e, a, b, c, F1, 7 ); W_PRECALC_16_31_3(23, W3, W4, W5, W6, W7, Wtmp0, Wtmp1); + R( c, d, e, a, b, F1, 8 ); W_PRECALC_16_31_0(24, W2, W3, W4, W5, W6, Wtmp0, Wtmp1); + R( b, c, d, e, a, F1, 9 ); W_PRECALC_16_31_1(25, W2, W3, W4, W5, W6, Wtmp0, Wtmp1); + R( a, b, c, d, e, F1, 10 ); W_PRECALC_16_31_2(26, W2, W3, W4, W5, W6, Wtmp0, Wtmp1); + R( e, a, b, c, d, F1, 11 ); W_PRECALC_16_31_3(27, W2, W3, W4, W5, W6, Wtmp0, Wtmp1); + R( d, e, a, b, c, F1, 12 ); W_PRECALC_16_31_0(28, W1, W2, W3, W4, W5, Wtmp0, Wtmp1); + R( c, d, e, a, b, F1, 13 ); W_PRECALC_16_31_1(29, W1, W2, W3, W4, W5, Wtmp0, Wtmp1); + R( b, c, d, e, a, F1, 14 ); W_PRECALC_16_31_2(30, W1, W2, W3, W4, W5, Wtmp0, Wtmp1); + R( a, b, c, d, e, F1, 15 ); W_PRECALC_16_31_3(31, W1, W2, W3, W4, W5, Wtmp0, Wtmp1); + + /* Transform 16-63 + Precalc 32-79. */ + R( e, a, b, c, d, F1, 16 ); W_PRECALC_32_79_0(32, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( d, e, a, b, c, F1, 17 ); W_PRECALC_32_79_1(33, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( c, d, e, a, b, F1, 18 ); W_PRECALC_32_79_2(34, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( b, c, d, e, a, F1, 19 ); W_PRECALC_32_79_3(35, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( a, b, c, d, e, F2, 20 ); W_PRECALC_32_79_0(36, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( e, a, b, c, d, F2, 21 ); W_PRECALC_32_79_1(37, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( d, e, a, b, c, F2, 22 ); W_PRECALC_32_79_2(38, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( c, d, e, a, b, F2, 23 ); W_PRECALC_32_79_3(39, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( b, c, d, e, a, F2, 24 ); W_PRECALC_32_79_0(40, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( a, b, c, d, e, F2, 25 ); W_PRECALC_32_79_1(41, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( e, a, b, c, d, F2, 26 ); W_PRECALC_32_79_2(42, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( d, e, a, b, c, F2, 27 ); W_PRECALC_32_79_3(43, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( c, d, e, a, b, F2, 28 ); W_PRECALC_32_79_0(44, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( b, c, d, e, a, F2, 29 ); W_PRECALC_32_79_1(45, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( a, b, c, d, e, F2, 30 ); W_PRECALC_32_79_2(46, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( e, a, b, c, d, F2, 31 ); W_PRECALC_32_79_3(47, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( d, e, a, b, c, F2, 32 ); W_PRECALC_32_79_0(48, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0); + R( c, d, e, a, b, F2, 33 ); W_PRECALC_32_79_1(49, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0); + R( b, c, d, e, a, F2, 34 ); W_PRECALC_32_79_2(50, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0); + R( a, b, c, d, e, F2, 35 ); W_PRECALC_32_79_3(51, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0); + R( e, a, b, c, d, F2, 36 ); W_PRECALC_32_79_0(52, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0); + R( d, e, a, b, c, F2, 37 ); W_PRECALC_32_79_1(53, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0); + R( c, d, e, a, b, F2, 38 ); W_PRECALC_32_79_2(54, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0); + R( b, c, d, e, a, F2, 39 ); W_PRECALC_32_79_3(55, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0); + R( a, b, c, d, e, F3, 40 ); W_PRECALC_32_79_0(56, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0); + R( e, a, b, c, d, F3, 41 ); W_PRECALC_32_79_1(57, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0); + R( d, e, a, b, c, F3, 42 ); W_PRECALC_32_79_2(58, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0); + R( c, d, e, a, b, F3, 43 ); W_PRECALC_32_79_3(59, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0); + R( b, c, d, e, a, F3, 44 ); W_PRECALC_32_79_0(60, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0); + R( a, b, c, d, e, F3, 45 ); W_PRECALC_32_79_1(61, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0); + R( e, a, b, c, d, F3, 46 ); W_PRECALC_32_79_2(62, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0); + R( d, e, a, b, c, F3, 47 ); W_PRECALC_32_79_3(63, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0); + R( c, d, e, a, b, F3, 48 ); W_PRECALC_32_79_0(64, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( b, c, d, e, a, F3, 49 ); W_PRECALC_32_79_1(65, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( a, b, c, d, e, F3, 50 ); W_PRECALC_32_79_2(66, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( e, a, b, c, d, F3, 51 ); W_PRECALC_32_79_3(67, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( d, e, a, b, c, F3, 52 ); W_PRECALC_32_79_0(68, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( c, d, e, a, b, F3, 53 ); W_PRECALC_32_79_1(69, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( b, c, d, e, a, F3, 54 ); W_PRECALC_32_79_2(70, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( a, b, c, d, e, F3, 55 ); W_PRECALC_32_79_3(71, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( e, a, b, c, d, F3, 56 ); W_PRECALC_32_79_0(72, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( d, e, a, b, c, F3, 57 ); W_PRECALC_32_79_1(73, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( c, d, e, a, b, F3, 58 ); W_PRECALC_32_79_2(74, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( b, c, d, e, a, F3, 59 ); W_PRECALC_32_79_3(75, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( a, b, c, d, e, F4, 60 ); W_PRECALC_32_79_0(76, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( e, a, b, c, d, F4, 61 ); W_PRECALC_32_79_1(77, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( d, e, a, b, c, F4, 62 ); W_PRECALC_32_79_2(78, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( c, d, e, a, b, F4, 63 ); W_PRECALC_32_79_3(79, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + + decq RNBLKS; + jz .Lend; + + /* Transform 64-79 + Precalc 0-15 of next block. */ + R( b, c, d, e, a, F4, 64 ); W_PRECALC_00_15_0(0, W0, Wtmp0); + R( a, b, c, d, e, F4, 65 ); W_PRECALC_00_15_1(1, W0, Wtmp0); + R( e, a, b, c, d, F4, 66 ); W_PRECALC_00_15_2(2, W0, Wtmp0); + R( d, e, a, b, c, F4, 67 ); W_PRECALC_00_15_3(3, W0, Wtmp0); + R( c, d, e, a, b, F4, 68 ); W_PRECALC_00_15_0(4, W7, Wtmp0); + R( b, c, d, e, a, F4, 69 ); W_PRECALC_00_15_1(5, W7, Wtmp0); + R( a, b, c, d, e, F4, 70 ); W_PRECALC_00_15_2(6, W7, Wtmp0); + R( e, a, b, c, d, F4, 71 ); W_PRECALC_00_15_3(7, W7, Wtmp0); + R( d, e, a, b, c, F4, 72 ); W_PRECALC_00_15_0(8, W6, Wtmp0); + R( c, d, e, a, b, F4, 73 ); W_PRECALC_00_15_1(9, W6, Wtmp0); + R( b, c, d, e, a, F4, 74 ); W_PRECALC_00_15_2(10, W6, Wtmp0); + R( a, b, c, d, e, F4, 75 ); W_PRECALC_00_15_3(11, W6, Wtmp0); + R( e, a, b, c, d, F4, 76 ); W_PRECALC_00_15_0(12, W5, Wtmp0); + R( d, e, a, b, c, F4, 77 ); W_PRECALC_00_15_1(13, W5, Wtmp0); + R( c, d, e, a, b, F4, 78 ); + addl state_h0(RSTATE), a; W_PRECALC_00_15_2(14, W5, Wtmp0); + R( b, c, d, e, a, F4, 79 ); W_PRECALC_00_15_3(15, W5, Wtmp0); + + /* Update the chaining variables. */ + addl state_h3(RSTATE), d; + addl state_h2(RSTATE), c; + addl state_h1(RSTATE), b; + addl state_h4(RSTATE), e; + + movl d, state_h3(RSTATE); + movl c, state_h2(RSTATE); + movl b, state_h1(RSTATE); + movl a, state_h0(RSTATE); + movl e, state_h4(RSTATE); + + jmp .Loop; + +.align 16 +.Lend: + vzeroall; + + /* Transform 64-79. */ + R( b, c, d, e, a, F4, 64 ); + R( a, b, c, d, e, F4, 65 ); + R( e, a, b, c, d, F4, 66 ); + R( d, e, a, b, c, F4, 67 ); + R( c, d, e, a, b, F4, 68 ); + R( b, c, d, e, a, F4, 69 ); + R( a, b, c, d, e, F4, 70 ); + R( e, a, b, c, d, F4, 71 ); + R( d, e, a, b, c, F4, 72 ); + R( c, d, e, a, b, F4, 73 ); + R( b, c, d, e, a, F4, 74 ); + R( a, b, c, d, e, F4, 75 ); + R( e, a, b, c, d, F4, 76 ); + R( d, e, a, b, c, F4, 77 ); + R( c, d, e, a, b, F4, 78 ); + addl state_h0(RSTATE), a; + R( b, c, d, e, a, F4, 79 ); + + /* Update the chaining variables. */ + addl state_h3(RSTATE), d; + addl state_h2(RSTATE), c; + addl state_h1(RSTATE), b; + addl state_h4(RSTATE), e; + + movl d, state_h3(RSTATE); + movl c, state_h2(RSTATE); + movl b, state_h1(RSTATE); + movl a, state_h0(RSTATE); + movl e, state_h4(RSTATE); + + movq ROLDSTACK, %rsp; + + popq %rbp; + popq %rbx; + + /* burn_stack */ + movl $(16*4 + 2*8 + 31), %eax; + +.Lret: + ret; + +#endif +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha1-ssse3-amd64.S b/libotr/libgcrypt-1.8.7/cipher/sha1-ssse3-amd64.S new file mode 100644 index 0000000..2b43947 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha1-ssse3-amd64.S @@ -0,0 +1,434 @@ +/* sha1-ssse3-amd64.S - Intel SSSE3 accelerated SHA-1 transform function + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * Based on sha1.c: + * Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Intel SSSE3 accelerated SHA-1 implementation based on white paper: + * "Improving the Performance of the Secure Hash Algorithm (SHA-1)" + * http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1 + */ + +#ifdef __x86_64__ +#include <config.h> + +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA1) + +#ifdef __PIC__ +# define RIP (%rip) +#else +# define RIP +#endif + + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + + +/* Context structure */ + +#define state_h0 0 +#define state_h1 4 +#define state_h2 8 +#define state_h3 12 +#define state_h4 16 + + +/* Constants */ + +.text +#define K1 0x5A827999 +#define K2 0x6ED9EBA1 +#define K3 0x8F1BBCDC +#define K4 0xCA62C1D6 +.align 16 +.LK_XMM: +.LK1: .long K1, K1, K1, K1 +.LK2: .long K2, K2, K2, K2 +.LK3: .long K3, K3, K3, K3 +.LK4: .long K4, K4, K4, K4 + +.Lbswap_shufb_ctl: + .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f + + +/* Register macros */ + +#define RSTATE %r8 +#define RDATA %r9 +#define ROLDSTACK %r10 +#define RNBLKS %r11 + +#define a %eax +#define b %ebx +#define c %ecx +#define d %edx +#define e %edi + +#define RT0 %esi +#define RT1 %ebp + +#define Wtmp0 %xmm0 +#define Wtmp1 %xmm1 + +#define W0 %xmm2 +#define W1 %xmm3 +#define W2 %xmm4 +#define W3 %xmm5 +#define W4 %xmm6 +#define W5 %xmm7 +#define W6 %xmm8 +#define W7 %xmm9 + +#define BSWAP_REG %xmm10 + + +/* Round function macros. */ + +#define WK(i) (((i) & 15) * 4)(%rsp) + +#define R_F1(a,b,c,d,e,i) \ + movl c, RT0; \ + addl WK(i), e; \ + xorl d, RT0; \ + movl a, RT1; \ + andl b, RT0; \ + roll $30, b; \ + xorl d, RT0; \ + leal (RT0,e), e; \ + roll $5, RT1; \ + addl RT1, e; + +#define R_F2(a,b,c,d,e,i) \ + movl c, RT0; \ + addl WK(i), e; \ + xorl b, RT0; \ + roll $30, b; \ + xorl d, RT0; \ + movl a, RT1; \ + leal (RT0,e), e; \ + roll $5, RT1; \ + addl RT1, e; + +#define R_F3(a,b,c,d,e,i) \ + movl c, RT0; \ + movl b, RT1; \ + xorl b, RT0; \ + andl c, RT1; \ + andl d, RT0; \ + addl RT1, e; \ + addl WK(i), e; \ + roll $30, b; \ + movl a, RT1; \ + leal (RT0,e), e; \ + roll $5, RT1; \ + addl RT1, e; + +#define R_F4(a,b,c,d,e,i) R_F2(a,b,c,d,e,i) + +#define R(a,b,c,d,e,f,i) \ + R_##f(a,b,c,d,e,i) + + +/* Input expansion macros. */ + +#define W_PRECALC_00_15_0(i, W, tmp0) \ + movdqu (4*(i))(RDATA), tmp0; + +#define W_PRECALC_00_15_1(i, W, tmp0) \ + pshufb BSWAP_REG, tmp0; \ + movdqa tmp0, W; + +#define W_PRECALC_00_15_2(i, W, tmp0) \ + paddd (.LK_XMM + ((i)/20)*16) RIP, tmp0; + +#define W_PRECALC_00_15_3(i, W, tmp0) \ + movdqa tmp0, WK(i&~3); + +#define W_PRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \ + movdqa W_m12, W; \ + palignr $8, W_m16, W; \ + movdqa W_m04, tmp0; \ + psrldq $4, tmp0; \ + pxor W_m08, W; + +#define W_PRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \ + pxor W_m16, tmp0; \ + pxor tmp0, W; \ + movdqa W, tmp1; \ + movdqa W, tmp0; \ + pslldq $12, tmp1; + +#define W_PRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \ + psrld $31, W; \ + pslld $1, tmp0; \ + por W, tmp0; \ + movdqa tmp1, W; \ + psrld $30, tmp1; \ + pslld $2, W; + +#define W_PRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \ + pxor W, tmp0; \ + pxor tmp1, tmp0; \ + movdqa tmp0, W; \ + paddd (.LK_XMM + ((i)/20)*16) RIP, tmp0; \ + movdqa tmp0, WK((i)&~3); + +#define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ + movdqa W_m04, tmp0; \ + pxor W_m28, W; \ + palignr $8, W_m08, tmp0; + +#define W_PRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ + pxor W_m16, W; \ + pxor tmp0, W; \ + movdqa W, tmp0; + +#define W_PRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ + psrld $30, W; \ + pslld $2, tmp0; \ + por W, tmp0; + +#define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ + movdqa tmp0, W; \ + paddd (.LK_XMM + ((i)/20)*16) RIP, tmp0; \ + movdqa tmp0, WK((i)&~3); + +#define CLEAR_REG(reg) pxor reg, reg; + + +/* + * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. + * + * unsigned int + * _gcry_sha1_transform_amd64_ssse3 (void *ctx, const unsigned char *data, + * size_t nblks) + */ +.globl _gcry_sha1_transform_amd64_ssse3 +ELF(.type _gcry_sha1_transform_amd64_ssse3,@function) +.align 16 +_gcry_sha1_transform_amd64_ssse3: + /* input: + * %rdi: ctx, CTX + * %rsi: data (64*nblks bytes) + * %rdx: nblks + */ + + xorl %eax, %eax; + cmpq $0, %rdx; + jz .Lret; + + movq %rdx, RNBLKS; + movq %rdi, RSTATE; + movq %rsi, RDATA; + pushq %rbx; + pushq %rbp; + + movq %rsp, ROLDSTACK; + + subq $(16*4), %rsp; + andq $(~31), %rsp; + + /* Get the values of the chaining variables. */ + movl state_h0(RSTATE), a; + movl state_h1(RSTATE), b; + movl state_h2(RSTATE), c; + movl state_h3(RSTATE), d; + movl state_h4(RSTATE), e; + + movdqa .Lbswap_shufb_ctl RIP, BSWAP_REG; + + /* Precalc 0-15. */ + W_PRECALC_00_15_0(0, W0, Wtmp0); + W_PRECALC_00_15_1(1, W0, Wtmp0); + W_PRECALC_00_15_2(2, W0, Wtmp0); + W_PRECALC_00_15_3(3, W0, Wtmp0); + W_PRECALC_00_15_0(4, W7, Wtmp0); + W_PRECALC_00_15_1(5, W7, Wtmp0); + W_PRECALC_00_15_2(6, W7, Wtmp0); + W_PRECALC_00_15_3(7, W7, Wtmp0); + W_PRECALC_00_15_0(8, W6, Wtmp0); + W_PRECALC_00_15_1(9, W6, Wtmp0); + W_PRECALC_00_15_2(10, W6, Wtmp0); + W_PRECALC_00_15_3(11, W6, Wtmp0); + W_PRECALC_00_15_0(12, W5, Wtmp0); + W_PRECALC_00_15_1(13, W5, Wtmp0); + W_PRECALC_00_15_2(14, W5, Wtmp0); + W_PRECALC_00_15_3(15, W5, Wtmp0); + +.align 8 +.Loop: + addq $64, RDATA; + + /* Transform 0-15 + Precalc 16-31. */ + R( a, b, c, d, e, F1, 0 ); W_PRECALC_16_31_0(16, W4, W5, W6, W7, W0, Wtmp0, Wtmp1); + R( e, a, b, c, d, F1, 1 ); W_PRECALC_16_31_1(17, W4, W5, W6, W7, W0, Wtmp0, Wtmp1); + R( d, e, a, b, c, F1, 2 ); W_PRECALC_16_31_2(18, W4, W5, W6, W7, W0, Wtmp0, Wtmp1); + R( c, d, e, a, b, F1, 3 ); W_PRECALC_16_31_3(19, W4, W5, W6, W7, W0, Wtmp0, Wtmp1); + R( b, c, d, e, a, F1, 4 ); W_PRECALC_16_31_0(20, W3, W4, W5, W6, W7, Wtmp0, Wtmp1); + R( a, b, c, d, e, F1, 5 ); W_PRECALC_16_31_1(21, W3, W4, W5, W6, W7, Wtmp0, Wtmp1); + R( e, a, b, c, d, F1, 6 ); W_PRECALC_16_31_2(22, W3, W4, W5, W6, W7, Wtmp0, Wtmp1); + R( d, e, a, b, c, F1, 7 ); W_PRECALC_16_31_3(23, W3, W4, W5, W6, W7, Wtmp0, Wtmp1); + R( c, d, e, a, b, F1, 8 ); W_PRECALC_16_31_0(24, W2, W3, W4, W5, W6, Wtmp0, Wtmp1); + R( b, c, d, e, a, F1, 9 ); W_PRECALC_16_31_1(25, W2, W3, W4, W5, W6, Wtmp0, Wtmp1); + R( a, b, c, d, e, F1, 10 ); W_PRECALC_16_31_2(26, W2, W3, W4, W5, W6, Wtmp0, Wtmp1); + R( e, a, b, c, d, F1, 11 ); W_PRECALC_16_31_3(27, W2, W3, W4, W5, W6, Wtmp0, Wtmp1); + R( d, e, a, b, c, F1, 12 ); W_PRECALC_16_31_0(28, W1, W2, W3, W4, W5, Wtmp0, Wtmp1); + R( c, d, e, a, b, F1, 13 ); W_PRECALC_16_31_1(29, W1, W2, W3, W4, W5, Wtmp0, Wtmp1); + R( b, c, d, e, a, F1, 14 ); W_PRECALC_16_31_2(30, W1, W2, W3, W4, W5, Wtmp0, Wtmp1); + R( a, b, c, d, e, F1, 15 ); W_PRECALC_16_31_3(31, W1, W2, W3, W4, W5, Wtmp0, Wtmp1); + + /* Transform 16-63 + Precalc 32-79. */ + R( e, a, b, c, d, F1, 16 ); W_PRECALC_32_79_0(32, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( d, e, a, b, c, F1, 17 ); W_PRECALC_32_79_1(33, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( c, d, e, a, b, F1, 18 ); W_PRECALC_32_79_2(34, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( b, c, d, e, a, F1, 19 ); W_PRECALC_32_79_3(35, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( a, b, c, d, e, F2, 20 ); W_PRECALC_32_79_0(36, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( e, a, b, c, d, F2, 21 ); W_PRECALC_32_79_1(37, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( d, e, a, b, c, F2, 22 ); W_PRECALC_32_79_2(38, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( c, d, e, a, b, F2, 23 ); W_PRECALC_32_79_3(39, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( b, c, d, e, a, F2, 24 ); W_PRECALC_32_79_0(40, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( a, b, c, d, e, F2, 25 ); W_PRECALC_32_79_1(41, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( e, a, b, c, d, F2, 26 ); W_PRECALC_32_79_2(42, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( d, e, a, b, c, F2, 27 ); W_PRECALC_32_79_3(43, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( c, d, e, a, b, F2, 28 ); W_PRECALC_32_79_0(44, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( b, c, d, e, a, F2, 29 ); W_PRECALC_32_79_1(45, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( a, b, c, d, e, F2, 30 ); W_PRECALC_32_79_2(46, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( e, a, b, c, d, F2, 31 ); W_PRECALC_32_79_3(47, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( d, e, a, b, c, F2, 32 ); W_PRECALC_32_79_0(48, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0); + R( c, d, e, a, b, F2, 33 ); W_PRECALC_32_79_1(49, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0); + R( b, c, d, e, a, F2, 34 ); W_PRECALC_32_79_2(50, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0); + R( a, b, c, d, e, F2, 35 ); W_PRECALC_32_79_3(51, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0); + R( e, a, b, c, d, F2, 36 ); W_PRECALC_32_79_0(52, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0); + R( d, e, a, b, c, F2, 37 ); W_PRECALC_32_79_1(53, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0); + R( c, d, e, a, b, F2, 38 ); W_PRECALC_32_79_2(54, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0); + R( b, c, d, e, a, F2, 39 ); W_PRECALC_32_79_3(55, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0); + R( a, b, c, d, e, F3, 40 ); W_PRECALC_32_79_0(56, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0); + R( e, a, b, c, d, F3, 41 ); W_PRECALC_32_79_1(57, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0); + R( d, e, a, b, c, F3, 42 ); W_PRECALC_32_79_2(58, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0); + R( c, d, e, a, b, F3, 43 ); W_PRECALC_32_79_3(59, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0); + R( b, c, d, e, a, F3, 44 ); W_PRECALC_32_79_0(60, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0); + R( a, b, c, d, e, F3, 45 ); W_PRECALC_32_79_1(61, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0); + R( e, a, b, c, d, F3, 46 ); W_PRECALC_32_79_2(62, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0); + R( d, e, a, b, c, F3, 47 ); W_PRECALC_32_79_3(63, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0); + R( c, d, e, a, b, F3, 48 ); W_PRECALC_32_79_0(64, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( b, c, d, e, a, F3, 49 ); W_PRECALC_32_79_1(65, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( a, b, c, d, e, F3, 50 ); W_PRECALC_32_79_2(66, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( e, a, b, c, d, F3, 51 ); W_PRECALC_32_79_3(67, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0); + R( d, e, a, b, c, F3, 52 ); W_PRECALC_32_79_0(68, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( c, d, e, a, b, F3, 53 ); W_PRECALC_32_79_1(69, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( b, c, d, e, a, F3, 54 ); W_PRECALC_32_79_2(70, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( a, b, c, d, e, F3, 55 ); W_PRECALC_32_79_3(71, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0); + R( e, a, b, c, d, F3, 56 ); W_PRECALC_32_79_0(72, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( d, e, a, b, c, F3, 57 ); W_PRECALC_32_79_1(73, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( c, d, e, a, b, F3, 58 ); W_PRECALC_32_79_2(74, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( b, c, d, e, a, F3, 59 ); W_PRECALC_32_79_3(75, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0); + R( a, b, c, d, e, F4, 60 ); W_PRECALC_32_79_0(76, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( e, a, b, c, d, F4, 61 ); W_PRECALC_32_79_1(77, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( d, e, a, b, c, F4, 62 ); W_PRECALC_32_79_2(78, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + R( c, d, e, a, b, F4, 63 ); W_PRECALC_32_79_3(79, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0); + + decq RNBLKS; + jz .Lend; + + /* Transform 64-79 + Precalc 0-15 of next block. */ + R( b, c, d, e, a, F4, 64 ); W_PRECALC_00_15_0(0, W0, Wtmp0); + R( a, b, c, d, e, F4, 65 ); W_PRECALC_00_15_1(1, W0, Wtmp0); + R( e, a, b, c, d, F4, 66 ); W_PRECALC_00_15_2(2, W0, Wtmp0); + R( d, e, a, b, c, F4, 67 ); W_PRECALC_00_15_3(3, W0, Wtmp0); + R( c, d, e, a, b, F4, 68 ); W_PRECALC_00_15_0(4, W7, Wtmp0); + R( b, c, d, e, a, F4, 69 ); W_PRECALC_00_15_1(5, W7, Wtmp0); + R( a, b, c, d, e, F4, 70 ); W_PRECALC_00_15_2(6, W7, Wtmp0); + R( e, a, b, c, d, F4, 71 ); W_PRECALC_00_15_3(7, W7, Wtmp0); + R( d, e, a, b, c, F4, 72 ); W_PRECALC_00_15_0(8, W6, Wtmp0); + R( c, d, e, a, b, F4, 73 ); W_PRECALC_00_15_1(9, W6, Wtmp0); + R( b, c, d, e, a, F4, 74 ); W_PRECALC_00_15_2(10, W6, Wtmp0); + R( a, b, c, d, e, F4, 75 ); W_PRECALC_00_15_3(11, W6, Wtmp0); + R( e, a, b, c, d, F4, 76 ); W_PRECALC_00_15_0(12, W5, Wtmp0); + R( d, e, a, b, c, F4, 77 ); W_PRECALC_00_15_1(13, W5, Wtmp0); + R( c, d, e, a, b, F4, 78 ); + addl state_h0(RSTATE), a; W_PRECALC_00_15_2(14, W5, Wtmp0); + R( b, c, d, e, a, F4, 79 ); W_PRECALC_00_15_3(15, W5, Wtmp0); + + /* Update the chaining variables. */ + addl state_h3(RSTATE), d; + addl state_h2(RSTATE), c; + addl state_h1(RSTATE), b; + addl state_h4(RSTATE), e; + + movl d, state_h3(RSTATE); + movl c, state_h2(RSTATE); + movl b, state_h1(RSTATE); + movl a, state_h0(RSTATE); + movl e, state_h4(RSTATE); + + jmp .Loop; + +.align 16 +.Lend: + /* Transform 64-79 + Clear XMM registers. */ + R( b, c, d, e, a, F4, 64 ); CLEAR_REG(BSWAP_REG); + R( a, b, c, d, e, F4, 65 ); CLEAR_REG(Wtmp0); + R( e, a, b, c, d, F4, 66 ); CLEAR_REG(Wtmp1); + R( d, e, a, b, c, F4, 67 ); CLEAR_REG(W0); + R( c, d, e, a, b, F4, 68 ); CLEAR_REG(W1); + R( b, c, d, e, a, F4, 69 ); CLEAR_REG(W2); + R( a, b, c, d, e, F4, 70 ); CLEAR_REG(W3); + R( e, a, b, c, d, F4, 71 ); CLEAR_REG(W4); + R( d, e, a, b, c, F4, 72 ); CLEAR_REG(W5); + R( c, d, e, a, b, F4, 73 ); CLEAR_REG(W6); + R( b, c, d, e, a, F4, 74 ); CLEAR_REG(W7); + R( a, b, c, d, e, F4, 75 ); + R( e, a, b, c, d, F4, 76 ); + R( d, e, a, b, c, F4, 77 ); + R( c, d, e, a, b, F4, 78 ); + addl state_h0(RSTATE), a; + R( b, c, d, e, a, F4, 79 ); + + /* Update the chaining variables. */ + addl state_h3(RSTATE), d; + addl state_h2(RSTATE), c; + addl state_h1(RSTATE), b; + addl state_h4(RSTATE), e; + + movl d, state_h3(RSTATE); + movl c, state_h2(RSTATE); + movl b, state_h1(RSTATE); + movl a, state_h0(RSTATE); + movl e, state_h4(RSTATE); + + movq ROLDSTACK, %rsp; + + popq %rbp; + popq %rbx; + + /* burn_stack */ + movl $(16*4 + 2*8 + 31), %eax; + +.Lret: + ret; + +#endif +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha1.c b/libotr/libgcrypt-1.8.7/cipher/sha1.c new file mode 100644 index 0000000..78b172f --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha1.c @@ -0,0 +1,626 @@ +/* sha1.c - SHA1 hash function + * Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + + +/* Test vectors: + * + * "abc" + * A999 3E36 4706 816A BA3E 2571 7850 C26C 9CD0 D89D + * + * "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" + * 8498 3E44 1C3B D26E BAAE 4AA1 F951 29E5 E546 70F1 + */ + + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#ifdef HAVE_STDINT_H +# include <stdint.h> +#endif + +#include "g10lib.h" +#include "bithelp.h" +#include "bufhelp.h" +#include "cipher.h" +#include "sha1.h" + + +/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ +#undef USE_SSSE3 +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_SSSE3 1 +#endif + +/* USE_AVX indicates whether to compile with Intel AVX code. */ +#undef USE_AVX +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AVX 1 +#endif + +/* USE_BMI2 indicates whether to compile with Intel AVX/BMI2 code. */ +#undef USE_BMI2 +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ + defined(HAVE_GCC_INLINE_ASM_BMI2) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_BMI2 1 +#endif + +/* USE_NEON indicates whether to enable ARM NEON assembly code. */ +#undef USE_NEON +#ifdef ENABLE_NEON_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_NEON) +# define USE_NEON 1 +# endif +#endif + +/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly + * code. */ +#undef USE_ARM_CE +#ifdef ENABLE_ARM_CRYPTO_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) +# define USE_ARM_CE 1 +# elif defined(__AARCH64EL__) \ + && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) +# define USE_ARM_CE 1 +# endif +#endif + +/* A macro to test whether P is properly aligned for an u32 type. + Note that config.h provides a suitable replacement for uintptr_t if + it does not exist in stdint.h. */ +/* #if __GNUC__ >= 2 */ +/* # define U32_ALIGNED_P(p) (!(((uintptr_t)p) % __alignof__ (u32))) */ +/* #else */ +/* # define U32_ALIGNED_P(p) (!(((uintptr_t)p) % sizeof (u32))) */ +/* #endif */ + + +static unsigned int +transform (void *c, const unsigned char *data, size_t nblks); + + +static void +sha1_init (void *context, unsigned int flags) +{ + SHA1_CONTEXT *hd = context; + unsigned int features = _gcry_get_hw_features (); + + (void)flags; + + hd->h0 = 0x67452301; + hd->h1 = 0xefcdab89; + hd->h2 = 0x98badcfe; + hd->h3 = 0x10325476; + hd->h4 = 0xc3d2e1f0; + + hd->bctx.nblocks = 0; + hd->bctx.nblocks_high = 0; + hd->bctx.count = 0; + hd->bctx.blocksize = 64; + hd->bctx.bwrite = transform; + +#ifdef USE_SSSE3 + hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; +#endif +#ifdef USE_AVX + /* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs. + * Therefore use this implementation on Intel CPUs only. */ + hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD); +#endif +#ifdef USE_BMI2 + hd->use_bmi2 = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2); +#endif +#ifdef USE_NEON + hd->use_neon = (features & HWF_ARM_NEON) != 0; +#endif +#ifdef USE_ARM_CE + hd->use_arm_ce = (features & HWF_ARM_SHA1) != 0; +#endif + (void)features; +} + +/* + * Initialize the context HD. This is used to prepare the use of + * _gcry_sha1_mixblock. WARNING: This is a special purpose function + * for exclusive use by random-csprng.c. + */ +void +_gcry_sha1_mixblock_init (SHA1_CONTEXT *hd) +{ + sha1_init (hd, 0); +} + + +/* Round function macros. */ +#define K1 0x5A827999L +#define K2 0x6ED9EBA1L +#define K3 0x8F1BBCDCL +#define K4 0xCA62C1D6L +#define F1(x,y,z) ( z ^ ( x & ( y ^ z ) ) ) +#define F2(x,y,z) ( x ^ y ^ z ) +#define F3(x,y,z) ( ( x & y ) | ( z & ( x | y ) ) ) +#define F4(x,y,z) ( x ^ y ^ z ) +#define M(i) ( tm = x[ i &0x0f] \ + ^ x[(i-14)&0x0f] \ + ^ x[(i-8) &0x0f] \ + ^ x[(i-3) &0x0f], \ + (x[i&0x0f] = rol(tm, 1))) +#define R(a,b,c,d,e,f,k,m) do { e += rol( a, 5 ) \ + + f( b, c, d ) \ + + k \ + + m; \ + b = rol( b, 30 ); \ + } while(0) + + +#ifdef USE_NEON +unsigned int +_gcry_sha1_transform_armv7_neon (void *state, const unsigned char *data, + size_t nblks); +#endif + +#ifdef USE_ARM_CE +unsigned int +_gcry_sha1_transform_armv8_ce (void *state, const unsigned char *data, + size_t nblks); +#endif + +/* + * Transform NBLOCKS of each 64 bytes (16 32-bit words) at DATA. + */ +static unsigned int +transform_blk (void *ctx, const unsigned char *data) +{ + SHA1_CONTEXT *hd = ctx; + const u32 *idata = (const void *)data; + register u32 a, b, c, d, e; /* Local copies of the chaining variables. */ + register u32 tm; /* Helper. */ + u32 x[16]; /* The array we work on. */ + +#define I(i) (x[i] = buf_get_be32(idata + i)) + + /* Get the values of the chaining variables. */ + a = hd->h0; + b = hd->h1; + c = hd->h2; + d = hd->h3; + e = hd->h4; + + /* Transform. */ + R( a, b, c, d, e, F1, K1, I( 0) ); + R( e, a, b, c, d, F1, K1, I( 1) ); + R( d, e, a, b, c, F1, K1, I( 2) ); + R( c, d, e, a, b, F1, K1, I( 3) ); + R( b, c, d, e, a, F1, K1, I( 4) ); + R( a, b, c, d, e, F1, K1, I( 5) ); + R( e, a, b, c, d, F1, K1, I( 6) ); + R( d, e, a, b, c, F1, K1, I( 7) ); + R( c, d, e, a, b, F1, K1, I( 8) ); + R( b, c, d, e, a, F1, K1, I( 9) ); + R( a, b, c, d, e, F1, K1, I(10) ); + R( e, a, b, c, d, F1, K1, I(11) ); + R( d, e, a, b, c, F1, K1, I(12) ); + R( c, d, e, a, b, F1, K1, I(13) ); + R( b, c, d, e, a, F1, K1, I(14) ); + R( a, b, c, d, e, F1, K1, I(15) ); + R( e, a, b, c, d, F1, K1, M(16) ); + R( d, e, a, b, c, F1, K1, M(17) ); + R( c, d, e, a, b, F1, K1, M(18) ); + R( b, c, d, e, a, F1, K1, M(19) ); + R( a, b, c, d, e, F2, K2, M(20) ); + R( e, a, b, c, d, F2, K2, M(21) ); + R( d, e, a, b, c, F2, K2, M(22) ); + R( c, d, e, a, b, F2, K2, M(23) ); + R( b, c, d, e, a, F2, K2, M(24) ); + R( a, b, c, d, e, F2, K2, M(25) ); + R( e, a, b, c, d, F2, K2, M(26) ); + R( d, e, a, b, c, F2, K2, M(27) ); + R( c, d, e, a, b, F2, K2, M(28) ); + R( b, c, d, e, a, F2, K2, M(29) ); + R( a, b, c, d, e, F2, K2, M(30) ); + R( e, a, b, c, d, F2, K2, M(31) ); + R( d, e, a, b, c, F2, K2, M(32) ); + R( c, d, e, a, b, F2, K2, M(33) ); + R( b, c, d, e, a, F2, K2, M(34) ); + R( a, b, c, d, e, F2, K2, M(35) ); + R( e, a, b, c, d, F2, K2, M(36) ); + R( d, e, a, b, c, F2, K2, M(37) ); + R( c, d, e, a, b, F2, K2, M(38) ); + R( b, c, d, e, a, F2, K2, M(39) ); + R( a, b, c, d, e, F3, K3, M(40) ); + R( e, a, b, c, d, F3, K3, M(41) ); + R( d, e, a, b, c, F3, K3, M(42) ); + R( c, d, e, a, b, F3, K3, M(43) ); + R( b, c, d, e, a, F3, K3, M(44) ); + R( a, b, c, d, e, F3, K3, M(45) ); + R( e, a, b, c, d, F3, K3, M(46) ); + R( d, e, a, b, c, F3, K3, M(47) ); + R( c, d, e, a, b, F3, K3, M(48) ); + R( b, c, d, e, a, F3, K3, M(49) ); + R( a, b, c, d, e, F3, K3, M(50) ); + R( e, a, b, c, d, F3, K3, M(51) ); + R( d, e, a, b, c, F3, K3, M(52) ); + R( c, d, e, a, b, F3, K3, M(53) ); + R( b, c, d, e, a, F3, K3, M(54) ); + R( a, b, c, d, e, F3, K3, M(55) ); + R( e, a, b, c, d, F3, K3, M(56) ); + R( d, e, a, b, c, F3, K3, M(57) ); + R( c, d, e, a, b, F3, K3, M(58) ); + R( b, c, d, e, a, F3, K3, M(59) ); + R( a, b, c, d, e, F4, K4, M(60) ); + R( e, a, b, c, d, F4, K4, M(61) ); + R( d, e, a, b, c, F4, K4, M(62) ); + R( c, d, e, a, b, F4, K4, M(63) ); + R( b, c, d, e, a, F4, K4, M(64) ); + R( a, b, c, d, e, F4, K4, M(65) ); + R( e, a, b, c, d, F4, K4, M(66) ); + R( d, e, a, b, c, F4, K4, M(67) ); + R( c, d, e, a, b, F4, K4, M(68) ); + R( b, c, d, e, a, F4, K4, M(69) ); + R( a, b, c, d, e, F4, K4, M(70) ); + R( e, a, b, c, d, F4, K4, M(71) ); + R( d, e, a, b, c, F4, K4, M(72) ); + R( c, d, e, a, b, F4, K4, M(73) ); + R( b, c, d, e, a, F4, K4, M(74) ); + R( a, b, c, d, e, F4, K4, M(75) ); + R( e, a, b, c, d, F4, K4, M(76) ); + R( d, e, a, b, c, F4, K4, M(77) ); + R( c, d, e, a, b, F4, K4, M(78) ); + R( b, c, d, e, a, F4, K4, M(79) ); + + /* Update the chaining variables. */ + hd->h0 += a; + hd->h1 += b; + hd->h2 += c; + hd->h3 += d; + hd->h4 += e; + + return /* burn_stack */ 88+4*sizeof(void*); +} + + +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#undef ASM_EXTRA_STACK +#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_BMI2) +# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_EXTRA_STACK (10 * 16) +# else +# define ASM_FUNC_ABI +# define ASM_EXTRA_STACK 0 +# endif +#endif + + +#ifdef USE_SSSE3 +unsigned int +_gcry_sha1_transform_amd64_ssse3 (void *state, const unsigned char *data, + size_t nblks) ASM_FUNC_ABI; +#endif + +#ifdef USE_AVX +unsigned int +_gcry_sha1_transform_amd64_avx (void *state, const unsigned char *data, + size_t nblks) ASM_FUNC_ABI; +#endif + +#ifdef USE_BMI2 +unsigned int +_gcry_sha1_transform_amd64_avx_bmi2 (void *state, const unsigned char *data, + size_t nblks) ASM_FUNC_ABI; +#endif + + +static unsigned int +transform (void *ctx, const unsigned char *data, size_t nblks) +{ + SHA1_CONTEXT *hd = ctx; + unsigned int burn; + +#ifdef USE_BMI2 + if (hd->use_bmi2) + return _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks) + + 4 * sizeof(void*) + ASM_EXTRA_STACK; +#endif +#ifdef USE_AVX + if (hd->use_avx) + return _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks) + + 4 * sizeof(void*) + ASM_EXTRA_STACK; +#endif +#ifdef USE_SSSE3 + if (hd->use_ssse3) + return _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks) + + 4 * sizeof(void*) + ASM_EXTRA_STACK; +#endif +#ifdef USE_ARM_CE + if (hd->use_arm_ce) + return _gcry_sha1_transform_armv8_ce (&hd->h0, data, nblks); +#endif +#ifdef USE_NEON + if (hd->use_neon) + return _gcry_sha1_transform_armv7_neon (&hd->h0, data, nblks) + + 4 * sizeof(void*); +#endif + + do + { + burn = transform_blk (hd, data); + data += 64; + } + while (--nblks); + +#ifdef ASM_EXTRA_STACK + /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at + * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to + * here too. + */ + burn += ASM_EXTRA_STACK; +#endif + + return burn; +} + + +/* + * Apply the SHA-1 transform function on the buffer BLOCKOF64BYTE + * which must have a length 64 bytes. BLOCKOF64BYTE must be 32-bit + * aligned. Updates the 20 bytes in BLOCKOF64BYTE with its mixed + * content. Returns the number of bytes which should be burned on the + * stack. You need to use _gcry_sha1_mixblock_init to initialize the + * context. + * WARNING: This is a special purpose function for exclusive use by + * random-csprng.c. + */ +unsigned int +_gcry_sha1_mixblock (SHA1_CONTEXT *hd, void *blockof64byte) +{ + u32 *p = blockof64byte; + unsigned int nburn; + + nburn = transform (hd, blockof64byte, 1); + p[0] = hd->h0; + p[1] = hd->h1; + p[2] = hd->h2; + p[3] = hd->h3; + p[4] = hd->h4; + + return nburn; +} + + +/* The routine final terminates the computation and + * returns the digest. + * The handle is prepared for a new cycle, but adding bytes to the + * handle will the destroy the returned buffer. + * Returns: 20 bytes representing the digest. + */ + +static void +sha1_final(void *context) +{ + SHA1_CONTEXT *hd = context; + u32 t, th, msb, lsb; + unsigned char *p; + unsigned int burn; + + _gcry_md_block_write (hd, NULL, 0); /* flush */; + + t = hd->bctx.nblocks; + if (sizeof t == sizeof hd->bctx.nblocks) + th = hd->bctx.nblocks_high; + else + th = hd->bctx.nblocks >> 32; + + /* multiply by 64 to make a byte count */ + lsb = t << 6; + msb = (th << 6) | (t >> 26); + /* add the count */ + t = lsb; + if( (lsb += hd->bctx.count) < t ) + msb++; + /* multiply by 8 to make a bit count */ + t = lsb; + lsb <<= 3; + msb <<= 3; + msb |= t >> 29; + + if( hd->bctx.count < 56 ) /* enough room */ + { + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */ + while( hd->bctx.count < 56 ) + hd->bctx.buf[hd->bctx.count++] = 0; /* pad */ + } + else /* need one extra block */ + { + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */ + while( hd->bctx.count < 64 ) + hd->bctx.buf[hd->bctx.count++] = 0; + _gcry_md_block_write(hd, NULL, 0); /* flush */; + memset(hd->bctx.buf, 0, 56 ); /* fill next block with zeroes */ + } + /* append the 64 bit count */ + buf_put_be32(hd->bctx.buf + 56, msb); + buf_put_be32(hd->bctx.buf + 60, lsb); + burn = transform( hd, hd->bctx.buf, 1 ); + _gcry_burn_stack (burn); + + p = hd->bctx.buf; +#define X(a) do { buf_put_be32(p, hd->h##a); p += 4; } while(0) + X(0); + X(1); + X(2); + X(3); + X(4); +#undef X + +} + +static unsigned char * +sha1_read( void *context ) +{ + SHA1_CONTEXT *hd = context; + + return hd->bctx.buf; +} + +/**************** + * Shortcut functions which puts the hash value of the supplied buffer + * into outbuf which must have a size of 20 bytes. + */ +void +_gcry_sha1_hash_buffer (void *outbuf, const void *buffer, size_t length) +{ + SHA1_CONTEXT hd; + + sha1_init (&hd, 0); + _gcry_md_block_write (&hd, buffer, length); + sha1_final (&hd); + memcpy (outbuf, hd.bctx.buf, 20); +} + + +/* Variant of the above shortcut function using a multiple buffers. */ +void +_gcry_sha1_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt) +{ + SHA1_CONTEXT hd; + + sha1_init (&hd, 0); + for (;iovcnt > 0; iov++, iovcnt--) + _gcry_md_block_write (&hd, + (const char*)iov[0].data + iov[0].off, iov[0].len); + sha1_final (&hd); + memcpy (outbuf, hd.bctx.buf, 20); +} + + + +/* + Self-test section. + */ + + +static gpg_err_code_t +selftests_sha1 (int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + + what = "short string"; + errtxt = _gcry_hash_selftest_check_one + (GCRY_MD_SHA1, 0, + "abc", 3, + "\xA9\x99\x3E\x36\x47\x06\x81\x6A\xBA\x3E" + "\x25\x71\x78\x50\xC2\x6C\x9C\xD0\xD8\x9D", 20); + if (errtxt) + goto failed; + + if (extended) + { + what = "long string"; + errtxt = _gcry_hash_selftest_check_one + (GCRY_MD_SHA1, 0, + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56, + "\x84\x98\x3E\x44\x1C\x3B\xD2\x6E\xBA\xAE" + "\x4A\xA1\xF9\x51\x29\xE5\xE5\x46\x70\xF1", 20); + if (errtxt) + goto failed; + + what = "one million \"a\""; + errtxt = _gcry_hash_selftest_check_one + (GCRY_MD_SHA1, 1, + NULL, 0, + "\x34\xAA\x97\x3C\xD4\xC4\xDA\xA4\xF6\x1E" + "\xEB\x2B\xDB\xAD\x27\x31\x65\x34\x01\x6F", 20); + if (errtxt) + goto failed; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("digest", GCRY_MD_SHA1, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +/* Run a full self-test for ALGO and return 0 on success. */ +static gpg_err_code_t +run_selftests (int algo, int extended, selftest_report_func_t report) +{ + gpg_err_code_t ec; + + switch (algo) + { + case GCRY_MD_SHA1: + ec = selftests_sha1 (extended, report); + break; + default: + ec = GPG_ERR_DIGEST_ALGO; + break; + + } + return ec; +} + + + + +static unsigned char asn[15] = /* Object ID is 1.3.14.3.2.26 */ + { 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, 0x2b, 0x0e, 0x03, + 0x02, 0x1a, 0x05, 0x00, 0x04, 0x14 }; + +static gcry_md_oid_spec_t oid_spec_sha1[] = + { + /* iso.member-body.us.rsadsi.pkcs.pkcs-1.5 (sha1WithRSAEncryption) */ + { "1.2.840.113549.1.1.5" }, + /* iso.member-body.us.x9-57.x9cm.3 (dsaWithSha1)*/ + { "1.2.840.10040.4.3" }, + /* from NIST's OIW (sha1) */ + { "1.3.14.3.2.26" }, + /* from NIST OIW (sha-1WithRSAEncryption) */ + { "1.3.14.3.2.29" }, + /* iso.member-body.us.ansi-x9-62.signatures.ecdsa-with-sha1 */ + { "1.2.840.10045.4.1" }, + { NULL }, + }; + +gcry_md_spec_t _gcry_digest_spec_sha1 = + { + GCRY_MD_SHA1, {0, 1}, + "SHA1", asn, DIM (asn), oid_spec_sha1, 20, + sha1_init, _gcry_md_block_write, sha1_final, sha1_read, NULL, + sizeof (SHA1_CONTEXT), + run_selftests + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/sha1.h b/libotr/libgcrypt-1.8.7/cipher/sha1.h new file mode 100644 index 0000000..d448fca --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha1.h @@ -0,0 +1,40 @@ +/* sha1.h - SHA-1 context definition + * Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ +#ifndef GCRY_SHA1_H +#define GCRY_SHA1_H + +#include "hash-common.h" + +/* We need this here for direct use by random-csprng.c. */ +typedef struct +{ + gcry_md_block_ctx_t bctx; + u32 h0,h1,h2,h3,h4; + unsigned int use_ssse3:1; + unsigned int use_avx:1; + unsigned int use_bmi2:1; + unsigned int use_neon:1; + unsigned int use_arm_ce:1; +} SHA1_CONTEXT; + + +void _gcry_sha1_mixblock_init (SHA1_CONTEXT *hd); +unsigned int _gcry_sha1_mixblock (SHA1_CONTEXT *hd, void *blockof64byte); + +#endif /*GCRY_SHA1_H*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/sha256-armv8-aarch32-ce.S b/libotr/libgcrypt-1.8.7/cipher/sha256-armv8-aarch32-ce.S new file mode 100644 index 0000000..2b17ab1 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha256-armv8-aarch32-ce.S @@ -0,0 +1,231 @@ +/* sha256-armv8-aarch32-ce.S - ARM/CE accelerated SHA-256 transform function + * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) && defined(USE_SHA256) + +.syntax unified +.arch armv8-a +.fpu crypto-neon-fp-armv8 +.arm + +.text + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + + +/* Constants */ + +.align 4 +gcry_sha256_aarch32_ce_K: +.LK: + .long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 + + +/* Register macros */ + +#define qH0123 q0 +#define qH4567 q1 + +#define qABCD0 q2 +#define qABCD1 q3 +#define qEFGH q4 + +#define qT0 q5 +#define qT1 q6 + +#define qW0 q8 +#define qW1 q9 +#define qW2 q10 +#define qW3 q11 + +#define qK0 q12 +#define qK1 q13 +#define qK2 q14 +#define qK3 q15 + + +/* Round macros */ + +#define _(...) /*_*/ + +#define do_loadk(nk0, nk1) vld1.32 {nk0-nk1},[lr]!; +#define do_add(a, b) vadd.u32 a, a, b; +#define do_sha256su0(w0, w1) sha256su0.32 w0, w1; +#define do_sha256su1(w0, w2, w3) sha256su1.32 w0, w2, w3; + +#define do_rounds(k, nk0, nk1, w0, w1, w2, w3, loadk_fn, add_fn, su0_fn, su1_fn) \ + loadk_fn( nk0, nk1 ); \ + su0_fn( w0, w1 ); \ + vmov qABCD1, qABCD0; \ + sha256h.32 qABCD0, qEFGH, k; \ + sha256h2.32 qEFGH, qABCD1, k; \ + add_fn( nk0, w2 ); \ + su1_fn( w0, w2, w3 ); + + +/* Other functional macros */ + +#define CLEAR_REG(reg) veor reg, reg; + + +/* + * unsigned int + * _gcry_sha256_transform_armv8_ce (u32 state[8], const void *input_data, + * size_t num_blks) + */ +.align 3 +.globl _gcry_sha256_transform_armv8_ce +.type _gcry_sha256_transform_armv8_ce,%function; +_gcry_sha256_transform_armv8_ce: + /* input: + * r0: ctx, CTX + * r1: data (64*nblks bytes) + * r2: nblks + */ + + cmp r2, #0; + push {r4,lr}; + beq .Ldo_nothing; + + vpush {q4-q7}; + + GET_DATA_POINTER(r4, .LK, lr); + mov lr, r4 + + vld1.32 {qH0123-qH4567}, [r0] /* load state */ + + vld1.8 {qW0-qW1}, [r1]! + do_loadk(qK0, qK1) + vld1.8 {qW2-qW3}, [r1]! + vmov qABCD0, qH0123 + vmov qEFGH, qH4567 + + vrev32.8 qW0, qW0 + vrev32.8 qW1, qW1 + vrev32.8 qW2, qW2 + do_add(qK0, qW0) + vrev32.8 qW3, qW3 + do_add(qK1, qW1) + +.Loop: + do_rounds(qK0, qK2, qK3, qW0, qW1, qW2, qW3, do_loadk, do_add, do_sha256su0, do_sha256su1) + subs r2,r2,#1 + do_rounds(qK1, qK3, _ , qW1, qW2, qW3, qW0, _ , do_add, do_sha256su0, do_sha256su1) + do_rounds(qK2, qK0, qK1, qW2, qW3, qW0, qW1, do_loadk, do_add, do_sha256su0, do_sha256su1) + do_rounds(qK3, qK1, _ , qW3, qW0, qW1, qW2, _ , do_add, do_sha256su0, do_sha256su1) + + do_rounds(qK0, qK2, qK3, qW0, qW1, qW2, qW3, do_loadk, do_add, do_sha256su0, do_sha256su1) + do_rounds(qK1, qK3, _ , qW1, qW2, qW3, qW0, _ , do_add, do_sha256su0, do_sha256su1) + do_rounds(qK2, qK0, qK1, qW2, qW3, qW0, qW1, do_loadk, do_add, do_sha256su0, do_sha256su1) + do_rounds(qK3, qK1, _ , qW3, qW0, qW1, qW2, _ , do_add, do_sha256su0, do_sha256su1) + + do_rounds(qK0, qK2, qK3, qW0, qW1, qW2, qW3, do_loadk, do_add, do_sha256su0, do_sha256su1) + do_rounds(qK1, qK3, _ , qW1, qW2, qW3, qW0, _ , do_add, do_sha256su0, do_sha256su1) + do_rounds(qK2, qK0, qK1, qW2, qW3, qW0, qW1, do_loadk, do_add, do_sha256su0, do_sha256su1) + do_rounds(qK3, qK1, _ , qW3, qW0, qW1, qW2, _ , do_add, do_sha256su0, do_sha256su1) + + beq .Lend + + do_rounds(qK0, qK2, qK3, qW0, _ , qW2, qW3, do_loadk, do_add, _, _) + vld1.8 {qW0}, [r1]! + mov lr, r4 + do_rounds(qK1, qK3, _ , qW1, _ , qW3, _ , _ , do_add, _, _) + vld1.8 {qW1}, [r1]! + vrev32.8 qW0, qW0 + do_rounds(qK2, qK0, qK1, qW2, _ , qW0, _ , do_loadk, do_add, _, _) + vrev32.8 qW1, qW1 + vld1.8 {qW2}, [r1]! + do_rounds(qK3, qK1, _ , qW3, _ , qW1, _ , _ , do_add, _, _) + vld1.8 {qW3}, [r1]! + + vadd.u32 qH0123, qABCD0 + vadd.u32 qH4567, qEFGH + + vrev32.8 qW2, qW2 + vmov qABCD0, qH0123 + vrev32.8 qW3, qW3 + vmov qEFGH, qH4567 + + b .Loop + +.Lend: + + do_rounds(qK0, qK2, qK3, qW0, _ , qW2, qW3, do_loadk, do_add, _, _) + do_rounds(qK1, qK3, _ , qW1, _ , qW3, _ , _ , do_add, _, _) + do_rounds(qK2, _ , _ , qW2, _ , _ , _ , _ , _, _, _) + do_rounds(qK3, _ , _ , qW3, _ , _ , _ , _ , _, _, _) + + CLEAR_REG(qW0) + CLEAR_REG(qW1) + CLEAR_REG(qW2) + CLEAR_REG(qW3) + CLEAR_REG(qK0) + CLEAR_REG(qK1) + CLEAR_REG(qK2) + CLEAR_REG(qK3) + + vadd.u32 qH0123, qABCD0 + vadd.u32 qH4567, qEFGH + + CLEAR_REG(qABCD0) + CLEAR_REG(qABCD1) + CLEAR_REG(qEFGH) + + vst1.32 {qH0123-qH4567}, [r0] /* store state */ + + CLEAR_REG(qH0123) + CLEAR_REG(qH4567) + vpop {q4-q7} + +.Ldo_nothing: + mov r0, #0 + pop {r4,pc} +.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce; + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha256-armv8-aarch64-ce.S b/libotr/libgcrypt-1.8.7/cipher/sha256-armv8-aarch64-ce.S new file mode 100644 index 0000000..a4575da --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha256-armv8-aarch64-ce.S @@ -0,0 +1,218 @@ +/* sha256-armv8-aarch64-ce.S - ARM/CE accelerated SHA-256 transform function + * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(__AARCH64EL__) && \ + defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) && defined(USE_SHA256) + +.cpu generic+simd+crypto + +.text + + +#define GET_DATA_POINTER(reg, name) \ + adrp reg, :got:name ; \ + ldr reg, [reg, #:got_lo12:name] ; + + +/* Constants */ + +.align 4 +gcry_sha256_aarch64_ce_K: +.LK: + .long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 + + +/* Register macros */ + +#define vH0123 v0 +#define vH4567 v1 + +#define vABCD0 v2 +#define qABCD0 q2 +#define vABCD1 v3 +#define qABCD1 q3 +#define vEFGH v4 +#define qEFGH q4 + +#define vT0 v5 +#define vT1 v6 + +#define vW0 v16 +#define vW1 v17 +#define vW2 v18 +#define vW3 v19 + +#define vK0 v20 +#define vK1 v21 +#define vK2 v22 +#define vK3 v23 + + +/* Round macros */ + +#define _(...) /*_*/ + +#define do_loadk(nk0, nk1) ld1 {nk0.16b-nk1.16b},[x3],#32; +#define do_add(a, b) add a.4s, a.4s, b.4s; +#define do_sha256su0(w0, w1) sha256su0 w0.4s, w1.4s; +#define do_sha256su1(w0, w2, w3) sha256su1 w0.4s, w2.4s, w3.4s; + +#define do_rounds(k, nk0, nk1, w0, w1, w2, w3, loadk_fn, add_fn, su0_fn, su1_fn) \ + loadk_fn( v##nk0, v##nk1 ); \ + su0_fn( v##w0, v##w1 ); \ + mov vABCD1.16b, vABCD0.16b; \ + sha256h qABCD0, qEFGH, v##k.4s; \ + sha256h2 qEFGH, qABCD1, v##k.4s; \ + add_fn( v##nk0, v##w2 ); \ + su1_fn( v##w0, v##w2, v##w3 ); + + +/* Other functional macros */ + +#define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b; + + +/* + * unsigned int + * _gcry_sha256_transform_armv8_ce (u32 state[8], const void *input_data, + * size_t num_blks) + */ +.align 3 +.globl _gcry_sha256_transform_armv8_ce +.type _gcry_sha256_transform_armv8_ce,%function; +_gcry_sha256_transform_armv8_ce: + /* input: + * r0: ctx, CTX + * r1: data (64*nblks bytes) + * r2: nblks + */ + + cbz x2, .Ldo_nothing; + + GET_DATA_POINTER(x3, .LK); + mov x4, x3 + + ld1 {vH0123.4s-vH4567.4s}, [x0] /* load state */ + + ld1 {vW0.16b-vW1.16b}, [x1], #32 + do_loadk(vK0, vK1) + ld1 {vW2.16b-vW3.16b}, [x1], #32 + mov vABCD0.16b, vH0123.16b + mov vEFGH.16b, vH4567.16b + + rev32 vW0.16b, vW0.16b + rev32 vW1.16b, vW1.16b + rev32 vW2.16b, vW2.16b + do_add(vK0, vW0) + rev32 vW3.16b, vW3.16b + do_add(vK1, vW1) + +.Loop: + do_rounds(K0, K2, K3, W0, W1, W2, W3, do_loadk, do_add, do_sha256su0, do_sha256su1) + sub x2,x2,#1 + do_rounds(K1, K3, _ , W1, W2, W3, W0, _ , do_add, do_sha256su0, do_sha256su1) + do_rounds(K2, K0, K1, W2, W3, W0, W1, do_loadk, do_add, do_sha256su0, do_sha256su1) + do_rounds(K3, K1, _ , W3, W0, W1, W2, _ , do_add, do_sha256su0, do_sha256su1) + + do_rounds(K0, K2, K3, W0, W1, W2, W3, do_loadk, do_add, do_sha256su0, do_sha256su1) + do_rounds(K1, K3, _ , W1, W2, W3, W0, _ , do_add, do_sha256su0, do_sha256su1) + do_rounds(K2, K0, K1, W2, W3, W0, W1, do_loadk, do_add, do_sha256su0, do_sha256su1) + do_rounds(K3, K1, _ , W3, W0, W1, W2, _ , do_add, do_sha256su0, do_sha256su1) + + do_rounds(K0, K2, K3, W0, W1, W2, W3, do_loadk, do_add, do_sha256su0, do_sha256su1) + do_rounds(K1, K3, _ , W1, W2, W3, W0, _ , do_add, do_sha256su0, do_sha256su1) + do_rounds(K2, K0, K1, W2, W3, W0, W1, do_loadk, do_add, do_sha256su0, do_sha256su1) + do_rounds(K3, K1, _ , W3, W0, W1, W2, _ , do_add, do_sha256su0, do_sha256su1) + + cbz x2, .Lend + + do_rounds(K0, K2, K3, W0, _ , W2, W3, do_loadk, do_add, _, _) + ld1 {vW0.16b}, [x1], #16 + mov x3, x4 + do_rounds(K1, K3, _ , W1, _ , W3, _ , _ , do_add, _, _) + ld1 {vW1.16b}, [x1], #16 + rev32 vW0.16b, vW0.16b + do_rounds(K2, K0, K1, W2, _ , W0, _ , do_loadk, do_add, _, _) + rev32 vW1.16b, vW1.16b + ld1 {vW2.16b}, [x1], #16 + do_rounds(K3, K1, _ , W3, _ , W1, _ , _ , do_add, _, _) + ld1 {vW3.16b}, [x1], #16 + + do_add(vH0123, vABCD0) + do_add(vH4567, vEFGH) + + rev32 vW2.16b, vW2.16b + mov vABCD0.16b, vH0123.16b + rev32 vW3.16b, vW3.16b + mov vEFGH.16b, vH4567.16b + + b .Loop + +.Lend: + + do_rounds(K0, K2, K3, W0, _ , W2, W3, do_loadk, do_add, _, _) + do_rounds(K1, K3, _ , W1, _ , W3, _ , _ , do_add, _, _) + do_rounds(K2, _ , _ , W2, _ , _ , _ , _ , _, _, _) + do_rounds(K3, _ , _ , W3, _ , _ , _ , _ , _, _, _) + + CLEAR_REG(vW0) + CLEAR_REG(vW1) + CLEAR_REG(vW2) + CLEAR_REG(vW3) + CLEAR_REG(vK0) + CLEAR_REG(vK1) + CLEAR_REG(vK2) + CLEAR_REG(vK3) + + do_add(vH0123, vABCD0) + do_add(vH4567, vEFGH) + + CLEAR_REG(vABCD0) + CLEAR_REG(vABCD1) + CLEAR_REG(vEFGH) + + st1 {vH0123.4s-vH4567.4s}, [x0] /* store state */ + + CLEAR_REG(vH0123) + CLEAR_REG(vH4567) + +.Ldo_nothing: + mov x0, #0 + ret +.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce; + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha256-avx-amd64.S b/libotr/libgcrypt-1.8.7/cipher/sha256-avx-amd64.S new file mode 100644 index 0000000..6953855 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha256-avx-amd64.S @@ -0,0 +1,527 @@ +/* +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright (c) 2012, Intel Corporation +; +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions are +; met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the +; distribution. +; +; * Neither the name of the Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived from +; this software without specific prior written permission. +; +; +; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY +; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR +; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; This code is described in an Intel White-Paper: +; "Fast SHA-256 Implementations on Intel Architecture Processors" +; +; To find it, surf to http://www.intel.com/p/en_US/embedded +; and search for that title. +; The paper is expected to be released roughly at the end of April, 2012 +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; This code schedules 1 blocks at a time, with 4 lanes per block +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +*/ +/* + * Conversion to GAS assembly and integration to libgcrypt + * by Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * Note: Based on the SSSE3 implementation. + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA256) + +#ifdef __PIC__ +# define ADD_RIP +rip +#else +# define ADD_RIP +#endif + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.intel_syntax noprefix + +#define VMOVDQ vmovdqu /* assume buffers not aligned */ + +.macro ROR p1 p2 + /* shld is faster than ror on Intel Sandybridge */ + shld \p1, \p1, (32 - \p2) +.endm + +/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros*/ + +/* addm [mem], reg + * Add reg to mem using reg-mem add and store */ +.macro addm p1 p2 + add \p2, \p1 + mov \p1, \p2 +.endm + +/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ + +/* COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask + * Load xmm with mem and byte swap each dword */ +.macro COPY_XMM_AND_BSWAP p1 p2 p3 + VMOVDQ \p1, \p2 + vpshufb \p1, \p1, \p3 +.endm + +/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ + +X0 = xmm4 +X1 = xmm5 +X2 = xmm6 +X3 = xmm7 + +XTMP0 = xmm0 +XTMP1 = xmm1 +XTMP2 = xmm2 +XTMP3 = xmm3 +XTMP4 = xmm8 +XFER = xmm9 + +SHUF_00BA = xmm10 /* shuffle xBxA -> 00BA */ +SHUF_DC00 = xmm11 /* shuffle xDxC -> DC00 */ +BYTE_FLIP_MASK = xmm12 + +NUM_BLKS = rdx /* 3rd arg */ +CTX = rsi /* 2nd arg */ +INP = rdi /* 1st arg */ + +SRND = rdi /* clobbers INP */ +c = ecx +d = r8d +e = edx + +TBL = rbp +a = eax +b = ebx + +f = r9d +g = r10d +h = r11d + +y0 = r13d +y1 = r14d +y2 = r15d + + + +#define _INP_END_SIZE 8 +#define _INP_SIZE 8 +#define _XFER_SIZE 8 +#define _XMM_SAVE_SIZE 0 +/* STACK_SIZE plus pushes must be an odd multiple of 8 */ +#define _ALIGN_SIZE 8 + +#define _INP_END 0 +#define _INP (_INP_END + _INP_END_SIZE) +#define _XFER (_INP + _INP_SIZE) +#define _XMM_SAVE (_XFER + _XFER_SIZE + _ALIGN_SIZE) +#define STACK_SIZE (_XMM_SAVE + _XMM_SAVE_SIZE) + +/* rotate_Xs + * Rotate values of symbols X0...X3 */ +.macro rotate_Xs +X_ = X0 +X0 = X1 +X1 = X2 +X2 = X3 +X3 = X_ +.endm + +/* ROTATE_ARGS + * Rotate values of symbols a...h */ +.macro ROTATE_ARGS +TMP_ = h +h = g +g = f +f = e +e = d +d = c +c = b +b = a +a = TMP_ +.endm + +.macro FOUR_ROUNDS_AND_SCHED + /* compute s0 four at a time and s1 two at a time + * compute W[-16] + W[-7] 4 at a time */ + mov y0, e /* y0 = e */ + ROR y0, (25-11) /* y0 = e >> (25-11) */ + mov y1, a /* y1 = a */ + vpalignr XTMP0, X3, X2, 4 /* XTMP0 = W[-7] */ + ROR y1, (22-13) /* y1 = a >> (22-13) */ + xor y0, e /* y0 = e ^ (e >> (25-11)) */ + mov y2, f /* y2 = f */ + ROR y0, (11-6) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + xor y1, a /* y1 = a ^ (a >> (22-13) */ + xor y2, g /* y2 = f^g */ + vpaddd XTMP0, XTMP0, X0 /* XTMP0 = W[-7] + W[-16] */ + xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + and y2, e /* y2 = (f^g)&e */ + ROR y1, (13-2) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + /* compute s0 */ + vpalignr XTMP1, X1, X0, 4 /* XTMP1 = W[-15] */ + xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + ROR y0, 6 /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + xor y2, g /* y2 = CH = ((f^g)&e)^g */ + ROR y1, 2 /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + add y2, y0 /* y2 = S1 + CH */ + add y2, [rsp + _XFER + 0*4] /* y2 = k + w + S1 + CH */ + mov y0, a /* y0 = a */ + add h, y2 /* h = h + S1 + CH + k + w */ + mov y2, a /* y2 = a */ + vpslld XTMP2, XTMP1, (32-7) + or y0, c /* y0 = a|c */ + add d, h /* d = d + h + S1 + CH + k + w */ + and y2, c /* y2 = a&c */ + vpsrld XTMP3, XTMP1, 7 + and y0, b /* y0 = (a|c)&b */ + add h, y1 /* h = h + S1 + CH + k + w + S0 */ + vpor XTMP3, XTMP3, XTMP2 /* XTMP1 = W[-15] ror 7 */ + or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */ + lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */ + +ROTATE_ARGS + mov y0, e /* y0 = e */ + mov y1, a /* y1 = a */ + ROR y0, (25-11) /* y0 = e >> (25-11) */ + xor y0, e /* y0 = e ^ (e >> (25-11)) */ + mov y2, f /* y2 = f */ + ROR y1, (22-13) /* y1 = a >> (22-13) */ + vpslld XTMP2, XTMP1, (32-18) + xor y1, a /* y1 = a ^ (a >> (22-13) */ + ROR y0, (11-6) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + xor y2, g /* y2 = f^g */ + vpsrld XTMP4, XTMP1, 18 + ROR y1, (13-2) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + and y2, e /* y2 = (f^g)&e */ + ROR y0, 6 /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + vpxor XTMP4, XTMP4, XTMP3 + xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + xor y2, g /* y2 = CH = ((f^g)&e)^g */ + vpsrld XTMP1, XTMP1, 3 /* XTMP4 = W[-15] >> 3 */ + add y2, y0 /* y2 = S1 + CH */ + add y2, [rsp + _XFER + 1*4] /* y2 = k + w + S1 + CH */ + ROR y1, 2 /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + vpxor XTMP1, XTMP1, XTMP2 /* XTMP1 = W[-15] ror 7 ^ W[-15] ror 18 */ + mov y0, a /* y0 = a */ + add h, y2 /* h = h + S1 + CH + k + w */ + mov y2, a /* y2 = a */ + vpxor XTMP1, XTMP1, XTMP4 /* XTMP1 = s0 */ + or y0, c /* y0 = a|c */ + add d, h /* d = d + h + S1 + CH + k + w */ + and y2, c /* y2 = a&c */ + /* compute low s1 */ + vpshufd XTMP2, X3, 0b11111010 /* XTMP2 = W[-2] {BBAA} */ + and y0, b /* y0 = (a|c)&b */ + add h, y1 /* h = h + S1 + CH + k + w + S0 */ + vpaddd XTMP0, XTMP0, XTMP1 /* XTMP0 = W[-16] + W[-7] + s0 */ + or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */ + lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */ + +ROTATE_ARGS + mov y0, e /* y0 = e */ + mov y1, a /* y1 = a */ + ROR y0, (25-11) /* y0 = e >> (25-11) */ + xor y0, e /* y0 = e ^ (e >> (25-11)) */ + ROR y1, (22-13) /* y1 = a >> (22-13) */ + mov y2, f /* y2 = f */ + xor y1, a /* y1 = a ^ (a >> (22-13) */ + ROR y0, (11-6) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + vpsrlq XTMP3, XTMP2, 17 /* XTMP2 = W[-2] ror 17 {xBxA} */ + xor y2, g /* y2 = f^g */ + vpsrlq XTMP4, XTMP2, 19 /* XTMP3 = W[-2] ror 19 {xBxA} */ + xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + and y2, e /* y2 = (f^g)&e */ + vpsrld XTMP2, XTMP2, 10 /* XTMP4 = W[-2] >> 10 {BBAA} */ + ROR y1, (13-2) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + xor y2, g /* y2 = CH = ((f^g)&e)^g */ + ROR y0, 6 /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + vpxor XTMP2, XTMP2, XTMP3 + add y2, y0 /* y2 = S1 + CH */ + ROR y1, 2 /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + add y2, [rsp + _XFER + 2*4] /* y2 = k + w + S1 + CH */ + vpxor XTMP4, XTMP4, XTMP2 /* XTMP4 = s1 {xBxA} */ + mov y0, a /* y0 = a */ + add h, y2 /* h = h + S1 + CH + k + w */ + mov y2, a /* y2 = a */ + vpshufb XTMP4, XTMP4, SHUF_00BA /* XTMP4 = s1 {00BA} */ + or y0, c /* y0 = a|c */ + add d, h /* d = d + h + S1 + CH + k + w */ + and y2, c /* y2 = a&c */ + vpaddd XTMP0, XTMP0, XTMP4 /* XTMP0 = {..., ..., W[1], W[0]} */ + and y0, b /* y0 = (a|c)&b */ + add h, y1 /* h = h + S1 + CH + k + w + S0 */ + /* compute high s1 */ + vpshufd XTMP2, XTMP0, 0b01010000 /* XTMP2 = W[-2] {DDCC} */ + or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */ + lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */ + +ROTATE_ARGS + mov y0, e /* y0 = e */ + ROR y0, (25-11) /* y0 = e >> (25-11) */ + mov y1, a /* y1 = a */ + ROR y1, (22-13) /* y1 = a >> (22-13) */ + xor y0, e /* y0 = e ^ (e >> (25-11)) */ + mov y2, f /* y2 = f */ + ROR y0, (11-6) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + vpsrlq XTMP3, XTMP2, 17 /* XTMP2 = W[-2] ror 17 {xDxC} */ + xor y1, a /* y1 = a ^ (a >> (22-13) */ + xor y2, g /* y2 = f^g */ + vpsrlq X0, XTMP2, 19 /* XTMP3 = W[-2] ror 19 {xDxC} */ + xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + and y2, e /* y2 = (f^g)&e */ + ROR y1, (13-2) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + vpsrld XTMP2, XTMP2, 10 /* X0 = W[-2] >> 10 {DDCC} */ + xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + ROR y0, 6 /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + xor y2, g /* y2 = CH = ((f^g)&e)^g */ + vpxor XTMP2, XTMP2, XTMP3 + ROR y1, 2 /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + add y2, y0 /* y2 = S1 + CH */ + add y2, [rsp + _XFER + 3*4] /* y2 = k + w + S1 + CH */ + vpxor X0, X0, XTMP2 /* X0 = s1 {xDxC} */ + mov y0, a /* y0 = a */ + add h, y2 /* h = h + S1 + CH + k + w */ + mov y2, a /* y2 = a */ + vpshufb X0, X0, SHUF_DC00 /* X0 = s1 {DC00} */ + or y0, c /* y0 = a|c */ + add d, h /* d = d + h + S1 + CH + k + w */ + and y2, c /* y2 = a&c */ + vpaddd X0, X0, XTMP0 /* X0 = {W[3], W[2], W[1], W[0]} */ + and y0, b /* y0 = (a|c)&b */ + add h, y1 /* h = h + S1 + CH + k + w + S0 */ + or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */ + lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */ + +ROTATE_ARGS +rotate_Xs +.endm + +/* input is [rsp + _XFER + %1 * 4] */ +.macro DO_ROUND i1 + mov y0, e /* y0 = e */ + ROR y0, (25-11) /* y0 = e >> (25-11) */ + mov y1, a /* y1 = a */ + xor y0, e /* y0 = e ^ (e >> (25-11)) */ + ROR y1, (22-13) /* y1 = a >> (22-13) */ + mov y2, f /* y2 = f */ + xor y1, a /* y1 = a ^ (a >> (22-13) */ + ROR y0, (11-6) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + xor y2, g /* y2 = f^g */ + xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + ROR y1, (13-2) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + and y2, e /* y2 = (f^g)&e */ + xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + ROR y0, 6 /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + xor y2, g /* y2 = CH = ((f^g)&e)^g */ + add y2, y0 /* y2 = S1 + CH */ + ROR y1, 2 /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + add y2, [rsp + _XFER + \i1 * 4] /* y2 = k + w + S1 + CH */ + mov y0, a /* y0 = a */ + add h, y2 /* h = h + S1 + CH + k + w */ + mov y2, a /* y2 = a */ + or y0, c /* y0 = a|c */ + add d, h /* d = d + h + S1 + CH + k + w */ + and y2, c /* y2 = a&c */ + and y0, b /* y0 = (a|c)&b */ + add h, y1 /* h = h + S1 + CH + k + w + S0 */ + or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */ + lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */ + ROTATE_ARGS +.endm + +/* +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; void sha256_avx(void *input_data, UINT32 digest[8], UINT64 num_blks) +;; arg 1 : pointer to input data +;; arg 2 : pointer to digest +;; arg 3 : Num blocks +*/ +.text +.globl _gcry_sha256_transform_amd64_avx +ELF(.type _gcry_sha256_transform_amd64_avx,@function;) +.align 16 +_gcry_sha256_transform_amd64_avx: + vzeroupper + + push rbx + push rbp + push r13 + push r14 + push r15 + + sub rsp, STACK_SIZE + + shl NUM_BLKS, 6 /* convert to bytes */ + jz .Ldone_hash + add NUM_BLKS, INP /* pointer to end of data */ + mov [rsp + _INP_END], NUM_BLKS + + /* load initial digest */ + mov a,[4*0 + CTX] + mov b,[4*1 + CTX] + mov c,[4*2 + CTX] + mov d,[4*3 + CTX] + mov e,[4*4 + CTX] + mov f,[4*5 + CTX] + mov g,[4*6 + CTX] + mov h,[4*7 + CTX] + + vmovdqa BYTE_FLIP_MASK, [.LPSHUFFLE_BYTE_FLIP_MASK ADD_RIP] + vmovdqa SHUF_00BA, [.L_SHUF_00BA ADD_RIP] + vmovdqa SHUF_DC00, [.L_SHUF_DC00 ADD_RIP] + +.Loop0: + lea TBL, [.LK256 ADD_RIP] + + /* byte swap first 16 dwords */ + COPY_XMM_AND_BSWAP X0, [INP + 0*16], BYTE_FLIP_MASK + COPY_XMM_AND_BSWAP X1, [INP + 1*16], BYTE_FLIP_MASK + COPY_XMM_AND_BSWAP X2, [INP + 2*16], BYTE_FLIP_MASK + COPY_XMM_AND_BSWAP X3, [INP + 3*16], BYTE_FLIP_MASK + + mov [rsp + _INP], INP + + /* schedule 48 input dwords, by doing 3 rounds of 16 each */ + mov SRND, 3 +.align 16 +.Loop1: + vpaddd XFER, X0, [TBL + 0*16] + vmovdqa [rsp + _XFER], XFER + FOUR_ROUNDS_AND_SCHED + + vpaddd XFER, X0, [TBL + 1*16] + vmovdqa [rsp + _XFER], XFER + FOUR_ROUNDS_AND_SCHED + + vpaddd XFER, X0, [TBL + 2*16] + vmovdqa [rsp + _XFER], XFER + FOUR_ROUNDS_AND_SCHED + + vpaddd XFER, X0, [TBL + 3*16] + vmovdqa [rsp + _XFER], XFER + add TBL, 4*16 + FOUR_ROUNDS_AND_SCHED + + sub SRND, 1 + jne .Loop1 + + mov SRND, 2 +.Loop2: + vpaddd X0, X0, [TBL + 0*16] + vmovdqa [rsp + _XFER], X0 + DO_ROUND 0 + DO_ROUND 1 + DO_ROUND 2 + DO_ROUND 3 + vpaddd X1, X1, [TBL + 1*16] + vmovdqa [rsp + _XFER], X1 + add TBL, 2*16 + DO_ROUND 0 + DO_ROUND 1 + DO_ROUND 2 + DO_ROUND 3 + + vmovdqa X0, X2 + vmovdqa X1, X3 + + sub SRND, 1 + jne .Loop2 + + addm [4*0 + CTX],a + addm [4*1 + CTX],b + addm [4*2 + CTX],c + addm [4*3 + CTX],d + addm [4*4 + CTX],e + addm [4*5 + CTX],f + addm [4*6 + CTX],g + addm [4*7 + CTX],h + + mov INP, [rsp + _INP] + add INP, 64 + cmp INP, [rsp + _INP_END] + jne .Loop0 + + vzeroall + +.Ldone_hash: + add rsp, STACK_SIZE + + pop r15 + pop r14 + pop r13 + pop rbp + pop rbx + + mov eax, STACK_SIZE + 5*8 + + ret + + +.align 16 +.LK256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.LPSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 + +/* shuffle xBxA -> 00BA */ +.L_SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100 + +/* shuffle xDxC -> DC00 */ +.L_SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF + +#endif +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha256-avx2-bmi2-amd64.S b/libotr/libgcrypt-1.8.7/cipher/sha256-avx2-bmi2-amd64.S new file mode 100644 index 0000000..85e663f --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha256-avx2-bmi2-amd64.S @@ -0,0 +1,813 @@ +/* +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright (c) 2012, Intel Corporation +; +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions are +; met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the +; distribution. +; +; * Neither the name of the Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived from +; this software without specific prior written permission. +; +; +; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY +; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR +; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; This code is described in an Intel White-Paper: +; "Fast SHA-256 Implementations on Intel Architecture Processors" +; +; To find it, surf to http://www.intel.com/p/en_US/embedded +; and search for that title. +; The paper is expected to be released roughly at the end of April, 2012 +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; This code schedules 2 blocks at a time, with 4 lanes per block +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +*/ +/* + * Conversion to GAS assembly and integration to libgcrypt + * by Jussi Kivilinna <jussi.kivilinna@iki.fi> + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \ + defined(USE_SHA256) + +#ifdef __PIC__ +# define ADD_RIP +rip +#else +# define ADD_RIP +#endif + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.intel_syntax noprefix + +#define VMOVDQ vmovdqu /* ; assume buffers not aligned */ + +/* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros */ + +/* addm [mem], reg */ +/* Add reg to mem using reg-mem add and store */ +.macro addm p1 p2 + add \p2, \p1 + mov \p1, \p2 +.endm + +/* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + +X0 = ymm4 +X1 = ymm5 +X2 = ymm6 +X3 = ymm7 + +/* XMM versions of above */ +XWORD0 = xmm4 +XWORD1 = xmm5 +XWORD2 = xmm6 +XWORD3 = xmm7 + +XTMP0 = ymm0 +XTMP1 = ymm1 +XTMP2 = ymm2 +XTMP3 = ymm3 +XTMP4 = ymm8 +XFER = ymm9 +XTMP5 = ymm11 + +SHUF_00BA = ymm10 /* shuffle xBxA -> 00BA */ +SHUF_DC00 = ymm12 /* shuffle xDxC -> DC00 */ +BYTE_FLIP_MASK = ymm13 + +X_BYTE_FLIP_MASK = xmm13 /* XMM version of BYTE_FLIP_MASK */ + +NUM_BLKS = rdx /* 3rd arg */ +CTX = rsi /* 2nd arg */ +INP = rdi /* 1st arg */ +c = ecx +d = r8d +e = edx /* clobbers NUM_BLKS */ +y3 = edi /* clobbers INP */ + +TBL = rbp +SRND = CTX /* SRND is same register as CTX */ + +a = eax +b = ebx +f = r9d +g = r10d +h = r11d +old_h = r11d + +T1 = r12d +y0 = r13d +y1 = r14d +y2 = r15d + + +_XFER_SIZE = 2*64*4 /* 2 blocks, 64 rounds, 4 bytes/round */ +_XMM_SAVE_SIZE = 0 +_INP_END_SIZE = 8 +_INP_SIZE = 8 +_CTX_SIZE = 8 +_RSP_SIZE = 8 + +_XFER = 0 +_XMM_SAVE = _XFER + _XFER_SIZE +_INP_END = _XMM_SAVE + _XMM_SAVE_SIZE +_INP = _INP_END + _INP_END_SIZE +_CTX = _INP + _INP_SIZE +_RSP = _CTX + _CTX_SIZE +STACK_SIZE = _RSP + _RSP_SIZE + +/* rotate_Xs */ +/* Rotate values of symbols X0...X3 */ +.macro rotate_Xs +X_ = X0 +X0 = X1 +X1 = X2 +X2 = X3 +X3 = X_ +.endm + +/* ROTATE_ARGS */ +/* Rotate values of symbols a...h */ +.macro ROTATE_ARGS +old_h = h +TMP_ = h +h = g +g = f +f = e +e = d +d = c +c = b +b = a +a = TMP_ +.endm + +.macro FOUR_ROUNDS_AND_SCHED XFER +/* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 0 ;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + + mov y3, a /* y3 = a ; MAJA */ + rorx y0, e, 25 /* y0 = e >> 25 ; S1A */ + rorx y1, e, 11 /* y1 = e >> 11 ; S1B */ + + add h, [\XFER+0*4] /* h = k + w + h ; -- */ + or y3, c /* y3 = a|c ; MAJA */ + vpalignr XTMP0, X3, X2, 4 /* XTMP0 = W[-7] */ + mov y2, f /* y2 = f ; CH */ + rorx T1, a, 13 /* T1 = a >> 13 ; S0B */ + + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ; S1 */ + xor y2, g /* y2 = f^g ; CH */ + vpaddd XTMP0, XTMP0, X0 /* XTMP0 = W[-7] + W[-16]; y1 = (e >> 6) ; S1 */ + rorx y1, e, 6 /* y1 = (e >> 6) ; S1 */ + + and y2, e /* y2 = (f^g)&e ; CH */ + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 */ + rorx y1, a, 22 /* y1 = a >> 22 ; S0A */ + add d, h /* d = k + w + h + d ; -- */ + + and y3, b /* y3 = (a|c)&b ; MAJA */ + vpalignr XTMP1, X1, X0, 4 /* XTMP1 = W[-15] */ + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ; S0 */ + rorx T1, a, 2 /* T1 = (a >> 2) ; S0 */ + + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + vpsrld XTMP2, XTMP1, 7 + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and T1, c /* T1 = a&c ; MAJB */ + + add y2, y0 /* y2 = S1 + CH ; -- */ + vpslld XTMP3, XTMP1, (32-7) + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + add h, y1 /* h = k + w + h + S0 ; -- */ + + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + vpor XTMP3, XTMP3, XTMP2 /* XTMP3 = W[-15] ror 7 */ + + vpsrld XTMP2, XTMP1,18 + add h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0; -- */ + lea h, [h + y3] /* h = t1 + S0 + MAJ ; -- */ + + +ROTATE_ARGS + +/* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + + + mov y3, a /* y3 = a ; MAJA */ + rorx y0, e, 25 /* y0 = e >> 25 ; S1A */ + rorx y1, e, 11 /* y1 = e >> 11 ; S1B */ + add h, [\XFER+1*4] /* h = k + w + h ; -- */ + or y3, c /* y3 = a|c ; MAJA */ + + + vpsrld XTMP4, XTMP1, 3 /* XTMP4 = W[-15] >> 3 */ + mov y2, f /* y2 = f ; CH */ + rorx T1, a, 13 /* T1 = a >> 13 ; S0B */ + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ; S1 */ + xor y2, g /* y2 = f^g ; CH */ + + + rorx y1, e, 6 /* y1 = (e >> 6) ; S1 */ + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 */ + rorx y1, a, 22 /* y1 = a >> 22 ; S0A */ + and y2, e /* y2 = (f^g)&e ; CH */ + add d, h /* d = k + w + h + d ; -- */ + + vpslld XTMP1, XTMP1, (32-18) + and y3, b /* y3 = (a|c)&b ; MAJA */ + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ; S0 */ + + vpxor XTMP3, XTMP3, XTMP1 + rorx T1, a, 2 /* T1 = (a >> 2) ; S0 */ + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + + vpxor XTMP3, XTMP3, XTMP2 /* XTMP3 = W[-15] ror 7 ^ W[-15] ror 18 */ + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and T1, c /* T1 = a&c ; MAJB */ + add y2, y0 /* y2 = S1 + CH ; -- */ + + vpxor XTMP1, XTMP3, XTMP4 /* XTMP1 = s0 */ + vpshufd XTMP2, X3, 0b11111010 /* XTMP2 = W[-2] {BBAA} */ + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + add h, y1 /* h = k + w + h + S0 ; -- */ + + vpaddd XTMP0, XTMP0, XTMP1 /* XTMP0 = W[-16] + W[-7] + s0 */ + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + add h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0; -- */ + lea h, [h + y3] /* h = t1 + S0 + MAJ ; -- */ + + vpsrld XTMP4, XTMP2, 10 /* XTMP4 = W[-2] >> 10 {BBAA} */ + + +ROTATE_ARGS + +/* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + + mov y3, a /* y3 = a ; MAJA */ + rorx y0, e, 25 /* y0 = e >> 25 ; S1A */ + add h, [\XFER+2*4] /* h = k + w + h ; -- */ + + vpsrlq XTMP3, XTMP2, 19 /* XTMP3 = W[-2] ror 19 {xBxA} */ + rorx y1, e, 11 /* y1 = e >> 11 ; S1B */ + or y3, c /* y3 = a|c ; MAJA */ + mov y2, f /* y2 = f ; CH */ + xor y2, g /* y2 = f^g ; CH */ + + rorx T1, a, 13 /* T1 = a >> 13 ; S0B */ + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ; S1 */ + vpsrlq XTMP2, XTMP2, 17 /* XTMP2 = W[-2] ror 17 {xBxA} */ + and y2, e /* y2 = (f^g)&e ; CH */ + + rorx y1, e, 6 /* y1 = (e >> 6) ; S1 */ + vpxor XTMP2, XTMP2, XTMP3 + add d, h /* d = k + w + h + d ; -- */ + and y3, b /* y3 = (a|c)&b ; MAJA */ + + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 */ + rorx y1, a, 22 /* y1 = a >> 22 ; S0A */ + vpxor XTMP4, XTMP4, XTMP2 /* XTMP4 = s1 {xBxA} */ + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + + vpshufb XTMP4, XTMP4, SHUF_00BA /* XTMP4 = s1 {00BA} */ + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ; S0 */ + rorx T1, a, 2 /* T1 = (a >> 2) ; S0 */ + vpaddd XTMP0, XTMP0, XTMP4 /* XTMP0 = {..., ..., W[1], W[0]} */ + + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and T1, c /* T1 = a&c ; MAJB */ + add y2, y0 /* y2 = S1 + CH ; -- */ + vpshufd XTMP2, XTMP0, 0b1010000 /* XTMP2 = W[-2] {DDCC} */ + + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + add h, y1 /* h = k + w + h + S0 ; -- */ + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + add h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0; -- */ + + lea h, [h + y3] /* h = t1 + S0 + MAJ ; -- */ + + +ROTATE_ARGS + +/* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + + mov y3, a /* y3 = a ; MAJA */ + rorx y0, e, 25 /* y0 = e >> 25 ; S1A */ + rorx y1, e, 11 /* y1 = e >> 11 ; S1B */ + add h, [\XFER+3*4] /* h = k + w + h ; -- */ + or y3, c /* y3 = a|c ; MAJA */ + + + vpsrld XTMP5, XTMP2, 10 /* XTMP5 = W[-2] >> 10 {DDCC} */ + mov y2, f /* y2 = f ; CH */ + rorx T1, a, 13 /* T1 = a >> 13 ; S0B */ + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ; S1 */ + xor y2, g /* y2 = f^g ; CH */ + + + vpsrlq XTMP3, XTMP2, 19 /* XTMP3 = W[-2] ror 19 {xDxC} */ + rorx y1, e, 6 /* y1 = (e >> 6) ; S1 */ + and y2, e /* y2 = (f^g)&e ; CH */ + add d, h /* d = k + w + h + d ; -- */ + and y3, b /* y3 = (a|c)&b ; MAJA */ + + vpsrlq XTMP2, XTMP2, 17 /* XTMP2 = W[-2] ror 17 {xDxC} */ + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 */ + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + + vpxor XTMP2, XTMP2, XTMP3 + rorx y1, a, 22 /* y1 = a >> 22 ; S0A */ + add y2, y0 /* y2 = S1 + CH ; -- */ + + vpxor XTMP5, XTMP5, XTMP2 /* XTMP5 = s1 {xDxC} */ + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ; S0 */ + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + + rorx T1, a, 2 /* T1 = (a >> 2) ; S0 */ + vpshufb XTMP5, XTMP5, SHUF_DC00 /* XTMP5 = s1 {DC00} */ + + vpaddd X0, XTMP5, XTMP0 /* X0 = {W[3], W[2], W[1], W[0]} */ + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and T1, c /* T1 = a&c ; MAJB */ + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + + add h, y1 /* h = k + w + h + S0 ; -- */ + add h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0; -- */ + lea h, [h + y3] /* h = t1 + S0 + MAJ ; -- */ + +ROTATE_ARGS +rotate_Xs +.endm + +.macro DO_4ROUNDS XFER +/* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 0 ;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + + mov y2, f /* y2 = f ; CH */ + rorx y0, e, 25 /* y0 = e >> 25 ; S1A */ + rorx y1, e, 11 /* y1 = e >> 11 ; S1B */ + xor y2, g /* y2 = f^g ; CH */ + + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ; S1 */ + rorx y1, e, 6 /* y1 = (e >> 6) ; S1 */ + and y2, e /* y2 = (f^g)&e ; CH */ + + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 */ + rorx T1, a, 13 /* T1 = a >> 13 ; S0B */ + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + rorx y1, a, 22 /* y1 = a >> 22 ; S0A */ + mov y3, a /* y3 = a ; MAJA */ + + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ; S0 */ + rorx T1, a, 2 /* T1 = (a >> 2) ; S0 */ + add h, [\XFER + 4*0] /* h = k + w + h ; -- */ + or y3, c /* y3 = a|c ; MAJA */ + + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and y3, b /* y3 = (a|c)&b ; MAJA */ + and T1, c /* T1 = a&c ; MAJB */ + add y2, y0 /* y2 = S1 + CH ; -- */ + + + add d, h /* d = k + w + h + d ; -- */ + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + add h, y1 /* h = k + w + h + S0 ; -- */ + + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + + + /* add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; -- */ + + /* lea h, [h + y3] ; h = t1 + S0 + MAJ ; -- */ + + ROTATE_ARGS + +/* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 1 ;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + + add old_h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0; -- */ + mov y2, f /* y2 = f ; CH */ + rorx y0, e, 25 /* y0 = e >> 25 ; S1A */ + rorx y1, e, 11 /* y1 = e >> 11 ; S1B */ + xor y2, g /* y2 = f^g ; CH */ + + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ; S1 */ + rorx y1, e, 6 /* y1 = (e >> 6) ; S1 */ + and y2, e /* y2 = (f^g)&e ; CH */ + add old_h, y3 /* h = t1 + S0 + MAJ ; -- */ + + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 */ + rorx T1, a, 13 /* T1 = a >> 13 ; S0B */ + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + rorx y1, a, 22 /* y1 = a >> 22 ; S0A */ + mov y3, a /* y3 = a ; MAJA */ + + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ; S0 */ + rorx T1, a, 2 /* T1 = (a >> 2) ; S0 */ + add h, [\XFER + 4*1] /* h = k + w + h ; -- */ + or y3, c /* y3 = a|c ; MAJA */ + + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and y3, b /* y3 = (a|c)&b ; MAJA */ + and T1, c /* T1 = a&c ; MAJB */ + add y2, y0 /* y2 = S1 + CH ; -- */ + + + add d, h /* d = k + w + h + d ; -- */ + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + add h, y1 /* h = k + w + h + S0 ; -- */ + + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + + + /* add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; -- */ + + /* lea h, [h + y3] ; h = t1 + S0 + MAJ ; -- */ + + ROTATE_ARGS + +/* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + + add old_h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0; -- */ + mov y2, f /* y2 = f ; CH */ + rorx y0, e, 25 /* y0 = e >> 25 ; S1A */ + rorx y1, e, 11 /* y1 = e >> 11 ; S1B */ + xor y2, g /* y2 = f^g ; CH */ + + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ; S1 */ + rorx y1, e, 6 /* y1 = (e >> 6) ; S1 */ + and y2, e /* y2 = (f^g)&e ; CH */ + add old_h, y3 /* h = t1 + S0 + MAJ ; -- */ + + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 */ + rorx T1, a, 13 /* T1 = a >> 13 ; S0B */ + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + rorx y1, a, 22 /* y1 = a >> 22 ; S0A */ + mov y3, a /* y3 = a ; MAJA */ + + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ; S0 */ + rorx T1, a, 2 /* T1 = (a >> 2) ; S0 */ + add h, [\XFER + 4*2] /* h = k + w + h ; -- */ + or y3, c /* y3 = a|c ; MAJA */ + + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and y3, b /* y3 = (a|c)&b ; MAJA */ + and T1, c /* T1 = a&c ; MAJB */ + add y2, y0 /* y2 = S1 + CH ; -- */ + + + add d, h /* d = k + w + h + d ; -- */ + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + add h, y1 /* h = k + w + h + S0 ; -- */ + + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + + + /* add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; -- */ + + /* lea h, [h + y3] ; h = t1 + S0 + MAJ ; -- */ + + ROTATE_ARGS + +/* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 3 ;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + + add old_h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0; -- */ + mov y2, f /* y2 = f ; CH */ + rorx y0, e, 25 /* y0 = e >> 25 ; S1A */ + rorx y1, e, 11 /* y1 = e >> 11 ; S1B */ + xor y2, g /* y2 = f^g ; CH */ + + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ; S1 */ + rorx y1, e, 6 /* y1 = (e >> 6) ; S1 */ + and y2, e /* y2 = (f^g)&e ; CH */ + add old_h, y3 /* h = t1 + S0 + MAJ ; -- */ + + xor y0, y1 /* y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 */ + rorx T1, a, 13 /* T1 = a >> 13 ; S0B */ + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + rorx y1, a, 22 /* y1 = a >> 22 ; S0A */ + mov y3, a /* y3 = a ; MAJA */ + + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ; S0 */ + rorx T1, a, 2 /* T1 = (a >> 2) ; S0 */ + add h, [\XFER + 4*3] /* h = k + w + h ; -- */ + or y3, c /* y3 = a|c ; MAJA */ + + xor y1, T1 /* y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and y3, b /* y3 = (a|c)&b ; MAJA */ + and T1, c /* T1 = a&c ; MAJB */ + add y2, y0 /* y2 = S1 + CH ; -- */ + + + add d, h /* d = k + w + h + d ; -- */ + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + add h, y1 /* h = k + w + h + S0 ; -- */ + + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + + + add h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0; -- */ + + lea h, [h + y3] /* h = t1 + S0 + MAJ ; -- */ + + ROTATE_ARGS +.endm + +/* +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; void sha256_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks) +;; arg 1 : pointer to input data +;; arg 2 : pointer to digest +;; arg 3 : Num blocks +*/ +.text +.globl _gcry_sha256_transform_amd64_avx2 +ELF(.type _gcry_sha256_transform_amd64_avx2,@function) +.align 32 +_gcry_sha256_transform_amd64_avx2: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + vzeroupper + + mov rax, rsp + sub rsp, STACK_SIZE + and rsp, -32 + mov [rsp + _RSP], rax + + shl NUM_BLKS, 6 /* convert to bytes */ + jz .Ldone_hash + lea NUM_BLKS, [NUM_BLKS + INP - 64] /* pointer to last block */ + mov [rsp + _INP_END], NUM_BLKS + + cmp INP, NUM_BLKS + je .Lonly_one_block + + /* ; load initial digest */ + mov a,[4*0 + CTX] + mov b,[4*1 + CTX] + mov c,[4*2 + CTX] + mov d,[4*3 + CTX] + mov e,[4*4 + CTX] + mov f,[4*5 + CTX] + mov g,[4*6 + CTX] + mov h,[4*7 + CTX] + + vmovdqa BYTE_FLIP_MASK, [.LPSHUFFLE_BYTE_FLIP_MASK ADD_RIP] + vmovdqa SHUF_00BA, [.L_SHUF_00BA ADD_RIP] + vmovdqa SHUF_DC00, [.L_SHUF_DC00 ADD_RIP] + + mov [rsp + _CTX], CTX + +.Loop0: + lea TBL, [.LK256 ADD_RIP] + + /* ; Load first 16 dwords from two blocks */ + VMOVDQ XTMP0, [INP + 0*32] + VMOVDQ XTMP1, [INP + 1*32] + VMOVDQ XTMP2, [INP + 2*32] + VMOVDQ XTMP3, [INP + 3*32] + + /* ; byte swap data */ + vpshufb XTMP0, XTMP0, BYTE_FLIP_MASK + vpshufb XTMP1, XTMP1, BYTE_FLIP_MASK + vpshufb XTMP2, XTMP2, BYTE_FLIP_MASK + vpshufb XTMP3, XTMP3, BYTE_FLIP_MASK + + /* ; transpose data into high/low halves */ + vperm2i128 X0, XTMP0, XTMP2, 0x20 + vperm2i128 X1, XTMP0, XTMP2, 0x31 + vperm2i128 X2, XTMP1, XTMP3, 0x20 + vperm2i128 X3, XTMP1, XTMP3, 0x31 + +.Last_block_enter: + add INP, 64 + mov [rsp + _INP], INP + + /* ; schedule 48 input dwords, by doing 3 rounds of 12 each */ + xor SRND, SRND + +.align 16 +.Loop1: + vpaddd XFER, X0, [TBL + SRND + 0*32] + vmovdqa [rsp + _XFER + SRND + 0*32], XFER + FOUR_ROUNDS_AND_SCHED rsp + _XFER + SRND + 0*32 + + vpaddd XFER, X0, [TBL + SRND + 1*32] + vmovdqa [rsp + _XFER + SRND + 1*32], XFER + FOUR_ROUNDS_AND_SCHED rsp + _XFER + SRND + 1*32 + + vpaddd XFER, X0, [TBL + SRND + 2*32] + vmovdqa [rsp + _XFER + SRND + 2*32], XFER + FOUR_ROUNDS_AND_SCHED rsp + _XFER + SRND + 2*32 + + vpaddd XFER, X0, [TBL + SRND + 3*32] + vmovdqa [rsp + _XFER + SRND + 3*32], XFER + FOUR_ROUNDS_AND_SCHED rsp + _XFER + SRND + 3*32 + + add SRND, 4*32 + cmp SRND, 3 * 4*32 + jb .Loop1 + +.Loop2: + /* ; Do last 16 rounds with no scheduling */ + vpaddd XFER, X0, [TBL + SRND + 0*32] + vmovdqa [rsp + _XFER + SRND + 0*32], XFER + DO_4ROUNDS rsp + _XFER + SRND + 0*32 + vpaddd XFER, X1, [TBL + SRND + 1*32] + vmovdqa [rsp + _XFER + SRND + 1*32], XFER + DO_4ROUNDS rsp + _XFER + SRND + 1*32 + add SRND, 2*32 + + vmovdqa X0, X2 + vmovdqa X1, X3 + + cmp SRND, 4 * 4*32 + jb .Loop2 + + mov CTX, [rsp + _CTX] + mov INP, [rsp + _INP] + + addm [4*0 + CTX],a + addm [4*1 + CTX],b + addm [4*2 + CTX],c + addm [4*3 + CTX],d + addm [4*4 + CTX],e + addm [4*5 + CTX],f + addm [4*6 + CTX],g + addm [4*7 + CTX],h + + cmp INP, [rsp + _INP_END] + ja .Ldone_hash + + /* ;;; Do second block using previously scheduled results */ + xor SRND, SRND +.align 16 +.Loop3: + DO_4ROUNDS rsp + _XFER + SRND + 0*32 + 16 + DO_4ROUNDS rsp + _XFER + SRND + 1*32 + 16 + add SRND, 2*32 + cmp SRND, 4 * 4*32 + jb .Loop3 + + mov CTX, [rsp + _CTX] + mov INP, [rsp + _INP] + add INP, 64 + + addm [4*0 + CTX],a + addm [4*1 + CTX],b + addm [4*2 + CTX],c + addm [4*3 + CTX],d + addm [4*4 + CTX],e + addm [4*5 + CTX],f + addm [4*6 + CTX],g + addm [4*7 + CTX],h + + cmp INP, [rsp + _INP_END] + jb .Loop0 + ja .Ldone_hash + +.Ldo_last_block: + /* ;;; do last block */ + lea TBL, [.LK256 ADD_RIP] + + VMOVDQ XWORD0, [INP + 0*16] + VMOVDQ XWORD1, [INP + 1*16] + VMOVDQ XWORD2, [INP + 2*16] + VMOVDQ XWORD3, [INP + 3*16] + + vpshufb XWORD0, XWORD0, X_BYTE_FLIP_MASK + vpshufb XWORD1, XWORD1, X_BYTE_FLIP_MASK + vpshufb XWORD2, XWORD2, X_BYTE_FLIP_MASK + vpshufb XWORD3, XWORD3, X_BYTE_FLIP_MASK + + jmp .Last_block_enter + +.Lonly_one_block: + + /* ; load initial digest */ + mov a,[4*0 + CTX] + mov b,[4*1 + CTX] + mov c,[4*2 + CTX] + mov d,[4*3 + CTX] + mov e,[4*4 + CTX] + mov f,[4*5 + CTX] + mov g,[4*6 + CTX] + mov h,[4*7 + CTX] + + vmovdqa BYTE_FLIP_MASK, [.LPSHUFFLE_BYTE_FLIP_MASK ADD_RIP] + vmovdqa SHUF_00BA, [.L_SHUF_00BA ADD_RIP] + vmovdqa SHUF_DC00, [.L_SHUF_DC00 ADD_RIP] + + mov [rsp + _CTX], CTX + jmp .Ldo_last_block + +.Ldone_hash: + mov rsp, [rsp + _RSP] + + vzeroall + + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + + /* stack burn depth */ + mov eax, STACK_SIZE + 6*8 + 31 + + ret + +.align 64 +.LK256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.LPSHUFFLE_BYTE_FLIP_MASK: + .octa 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203 + +/* shuffle xBxA -> 00BA */ +.L_SHUF_00BA: + .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100 + +/* shuffle xDxC -> DC00 */ +.L_SHUF_DC00: + .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF + +#endif +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha256-ssse3-amd64.S b/libotr/libgcrypt-1.8.7/cipher/sha256-ssse3-amd64.S new file mode 100644 index 0000000..a9213e4 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha256-ssse3-amd64.S @@ -0,0 +1,547 @@ +/* +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright (c) 2012, Intel Corporation +; +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions are +; met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the +; distribution. +; +; * Neither the name of the Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived from +; this software without specific prior written permission. +; +; +; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY +; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR +; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; This code is described in an Intel White-Paper: +; "Fast SHA-256 Implementations on Intel Architecture Processors" +; +; To find it, surf to http://www.intel.com/p/en_US/embedded +; and search for that title. +; The paper is expected to be released roughly at the end of April, 2012 +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; This code schedules 1 blocks at a time, with 4 lanes per block +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +*/ +/* + * Conversion to GAS assembly and integration to libgcrypt + * by Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * Note: original implementation was named as SHA256-SSE4. However, only SSSE3 + * is required. + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA256) + +#ifdef __PIC__ +# define ADD_RIP +rip +#else +# define ADD_RIP +#endif + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.intel_syntax noprefix + +#define MOVDQ movdqu /* assume buffers not aligned */ + +/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros*/ + +/* addm [mem], reg + * Add reg to mem using reg-mem add and store */ +.macro addm p1 p2 + add \p2, \p1 + mov \p1, \p2 +.endm + +/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ + +/* COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask + * Load xmm with mem and byte swap each dword */ +.macro COPY_XMM_AND_BSWAP p1 p2 p3 + MOVDQ \p1, \p2 + pshufb \p1, \p3 +.endm + +/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ + +X0 = xmm4 +X1 = xmm5 +X2 = xmm6 +X3 = xmm7 + +XTMP0 = xmm0 +XTMP1 = xmm1 +XTMP2 = xmm2 +XTMP3 = xmm3 +XTMP4 = xmm8 +XFER = xmm9 + +SHUF_00BA = xmm10 /* shuffle xBxA -> 00BA */ +SHUF_DC00 = xmm11 /* shuffle xDxC -> DC00 */ +BYTE_FLIP_MASK = xmm12 + +NUM_BLKS = rdx /* 3rd arg */ +CTX = rsi /* 2nd arg */ +INP = rdi /* 1st arg */ + +SRND = rdi /* clobbers INP */ +c = ecx +d = r8d +e = edx + +TBL = rbp +a = eax +b = ebx + +f = r9d +g = r10d +h = r11d + +y0 = r13d +y1 = r14d +y2 = r15d + + + +#define _INP_END_SIZE 8 +#define _INP_SIZE 8 +#define _XFER_SIZE 8 +#define _XMM_SAVE_SIZE 0 +/* STACK_SIZE plus pushes must be an odd multiple of 8 */ +#define _ALIGN_SIZE 8 + +#define _INP_END 0 +#define _INP (_INP_END + _INP_END_SIZE) +#define _XFER (_INP + _INP_SIZE) +#define _XMM_SAVE (_XFER + _XFER_SIZE + _ALIGN_SIZE) +#define STACK_SIZE (_XMM_SAVE + _XMM_SAVE_SIZE) + +/* rotate_Xs + * Rotate values of symbols X0...X3 */ +.macro rotate_Xs +X_ = X0 +X0 = X1 +X1 = X2 +X2 = X3 +X3 = X_ +.endm + +/* ROTATE_ARGS + * Rotate values of symbols a...h */ +.macro ROTATE_ARGS +TMP_ = h +h = g +g = f +f = e +e = d +d = c +c = b +b = a +a = TMP_ +.endm + +.macro FOUR_ROUNDS_AND_SCHED + /* compute s0 four at a time and s1 two at a time + * compute W[-16] + W[-7] 4 at a time */ + movdqa XTMP0, X3 + mov y0, e /* y0 = e */ + ror y0, (25-11) /* y0 = e >> (25-11) */ + mov y1, a /* y1 = a */ + palignr XTMP0, X2, 4 /* XTMP0 = W[-7] */ + ror y1, (22-13) /* y1 = a >> (22-13) */ + xor y0, e /* y0 = e ^ (e >> (25-11)) */ + mov y2, f /* y2 = f */ + ror y0, (11-6) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + movdqa XTMP1, X1 + xor y1, a /* y1 = a ^ (a >> (22-13) */ + xor y2, g /* y2 = f^g */ + paddd XTMP0, X0 /* XTMP0 = W[-7] + W[-16] */ + xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + and y2, e /* y2 = (f^g)&e */ + ror y1, (13-2) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + /* compute s0 */ + palignr XTMP1, X0, 4 /* XTMP1 = W[-15] */ + xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + ror y0, 6 /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + xor y2, g /* y2 = CH = ((f^g)&e)^g */ + movdqa XTMP2, XTMP1 /* XTMP2 = W[-15] */ + ror y1, 2 /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + add y2, y0 /* y2 = S1 + CH */ + add y2, [rsp + _XFER + 0*4] /* y2 = k + w + S1 + CH */ + movdqa XTMP3, XTMP1 /* XTMP3 = W[-15] */ + mov y0, a /* y0 = a */ + add h, y2 /* h = h + S1 + CH + k + w */ + mov y2, a /* y2 = a */ + pslld XTMP1, (32-7) + or y0, c /* y0 = a|c */ + add d, h /* d = d + h + S1 + CH + k + w */ + and y2, c /* y2 = a&c */ + psrld XTMP2, 7 + and y0, b /* y0 = (a|c)&b */ + add h, y1 /* h = h + S1 + CH + k + w + S0 */ + por XTMP1, XTMP2 /* XTMP1 = W[-15] ror 7 */ + or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */ + lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */ + +ROTATE_ARGS + movdqa XTMP2, XTMP3 /* XTMP2 = W[-15] */ + mov y0, e /* y0 = e */ + mov y1, a /* y1 = a */ + movdqa XTMP4, XTMP3 /* XTMP4 = W[-15] */ + ror y0, (25-11) /* y0 = e >> (25-11) */ + xor y0, e /* y0 = e ^ (e >> (25-11)) */ + mov y2, f /* y2 = f */ + ror y1, (22-13) /* y1 = a >> (22-13) */ + pslld XTMP3, (32-18) + xor y1, a /* y1 = a ^ (a >> (22-13) */ + ror y0, (11-6) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + xor y2, g /* y2 = f^g */ + psrld XTMP2, 18 + ror y1, (13-2) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + and y2, e /* y2 = (f^g)&e */ + ror y0, 6 /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + pxor XTMP1, XTMP3 + xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + xor y2, g /* y2 = CH = ((f^g)&e)^g */ + psrld XTMP4, 3 /* XTMP4 = W[-15] >> 3 */ + add y2, y0 /* y2 = S1 + CH */ + add y2, [rsp + _XFER + 1*4] /* y2 = k + w + S1 + CH */ + ror y1, 2 /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + pxor XTMP1, XTMP2 /* XTMP1 = W[-15] ror 7 ^ W[-15] ror 18 */ + mov y0, a /* y0 = a */ + add h, y2 /* h = h + S1 + CH + k + w */ + mov y2, a /* y2 = a */ + pxor XTMP1, XTMP4 /* XTMP1 = s0 */ + or y0, c /* y0 = a|c */ + add d, h /* d = d + h + S1 + CH + k + w */ + and y2, c /* y2 = a&c */ + /* compute low s1 */ + pshufd XTMP2, X3, 0b11111010 /* XTMP2 = W[-2] {BBAA} */ + and y0, b /* y0 = (a|c)&b */ + add h, y1 /* h = h + S1 + CH + k + w + S0 */ + paddd XTMP0, XTMP1 /* XTMP0 = W[-16] + W[-7] + s0 */ + or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */ + lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */ + +ROTATE_ARGS + movdqa XTMP3, XTMP2 /* XTMP3 = W[-2] {BBAA} */ + mov y0, e /* y0 = e */ + mov y1, a /* y1 = a */ + ror y0, (25-11) /* y0 = e >> (25-11) */ + movdqa XTMP4, XTMP2 /* XTMP4 = W[-2] {BBAA} */ + xor y0, e /* y0 = e ^ (e >> (25-11)) */ + ror y1, (22-13) /* y1 = a >> (22-13) */ + mov y2, f /* y2 = f */ + xor y1, a /* y1 = a ^ (a >> (22-13) */ + ror y0, (11-6) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + psrlq XTMP2, 17 /* XTMP2 = W[-2] ror 17 {xBxA} */ + xor y2, g /* y2 = f^g */ + psrlq XTMP3, 19 /* XTMP3 = W[-2] ror 19 {xBxA} */ + xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + and y2, e /* y2 = (f^g)&e */ + psrld XTMP4, 10 /* XTMP4 = W[-2] >> 10 {BBAA} */ + ror y1, (13-2) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + xor y2, g /* y2 = CH = ((f^g)&e)^g */ + ror y0, 6 /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + pxor XTMP2, XTMP3 + add y2, y0 /* y2 = S1 + CH */ + ror y1, 2 /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + add y2, [rsp + _XFER + 2*4] /* y2 = k + w + S1 + CH */ + pxor XTMP4, XTMP2 /* XTMP4 = s1 {xBxA} */ + mov y0, a /* y0 = a */ + add h, y2 /* h = h + S1 + CH + k + w */ + mov y2, a /* y2 = a */ + pshufb XTMP4, SHUF_00BA /* XTMP4 = s1 {00BA} */ + or y0, c /* y0 = a|c */ + add d, h /* d = d + h + S1 + CH + k + w */ + and y2, c /* y2 = a&c */ + paddd XTMP0, XTMP4 /* XTMP0 = {..., ..., W[1], W[0]} */ + and y0, b /* y0 = (a|c)&b */ + add h, y1 /* h = h + S1 + CH + k + w + S0 */ + /* compute high s1 */ + pshufd XTMP2, XTMP0, 0b01010000 /* XTMP2 = W[-2] {DDCC} */ + or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */ + lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */ + +ROTATE_ARGS + movdqa XTMP3, XTMP2 /* XTMP3 = W[-2] {DDCC} */ + mov y0, e /* y0 = e */ + ror y0, (25-11) /* y0 = e >> (25-11) */ + mov y1, a /* y1 = a */ + movdqa X0, XTMP2 /* X0 = W[-2] {DDCC} */ + ror y1, (22-13) /* y1 = a >> (22-13) */ + xor y0, e /* y0 = e ^ (e >> (25-11)) */ + mov y2, f /* y2 = f */ + ror y0, (11-6) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + psrlq XTMP2, 17 /* XTMP2 = W[-2] ror 17 {xDxC} */ + xor y1, a /* y1 = a ^ (a >> (22-13) */ + xor y2, g /* y2 = f^g */ + psrlq XTMP3, 19 /* XTMP3 = W[-2] ror 19 {xDxC} */ + xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + and y2, e /* y2 = (f^g)&e */ + ror y1, (13-2) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + psrld X0, 10 /* X0 = W[-2] >> 10 {DDCC} */ + xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + ror y0, 6 /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + xor y2, g /* y2 = CH = ((f^g)&e)^g */ + pxor XTMP2, XTMP3 + ror y1, 2 /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + add y2, y0 /* y2 = S1 + CH */ + add y2, [rsp + _XFER + 3*4] /* y2 = k + w + S1 + CH */ + pxor X0, XTMP2 /* X0 = s1 {xDxC} */ + mov y0, a /* y0 = a */ + add h, y2 /* h = h + S1 + CH + k + w */ + mov y2, a /* y2 = a */ + pshufb X0, SHUF_DC00 /* X0 = s1 {DC00} */ + or y0, c /* y0 = a|c */ + add d, h /* d = d + h + S1 + CH + k + w */ + and y2, c /* y2 = a&c */ + paddd X0, XTMP0 /* X0 = {W[3], W[2], W[1], W[0]} */ + and y0, b /* y0 = (a|c)&b */ + add h, y1 /* h = h + S1 + CH + k + w + S0 */ + or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */ + lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */ + +ROTATE_ARGS +rotate_Xs +.endm + +/* input is [rsp + _XFER + %1 * 4] */ +.macro DO_ROUND i1 + mov y0, e /* y0 = e */ + ror y0, (25-11) /* y0 = e >> (25-11) */ + mov y1, a /* y1 = a */ + xor y0, e /* y0 = e ^ (e >> (25-11)) */ + ror y1, (22-13) /* y1 = a >> (22-13) */ + mov y2, f /* y2 = f */ + xor y1, a /* y1 = a ^ (a >> (22-13) */ + ror y0, (11-6) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + xor y2, g /* y2 = f^g */ + xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + ror y1, (13-2) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + and y2, e /* y2 = (f^g)&e */ + xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + ror y0, 6 /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + xor y2, g /* y2 = CH = ((f^g)&e)^g */ + add y2, y0 /* y2 = S1 + CH */ + ror y1, 2 /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + add y2, [rsp + _XFER + \i1 * 4] /* y2 = k + w + S1 + CH */ + mov y0, a /* y0 = a */ + add h, y2 /* h = h + S1 + CH + k + w */ + mov y2, a /* y2 = a */ + or y0, c /* y0 = a|c */ + add d, h /* d = d + h + S1 + CH + k + w */ + and y2, c /* y2 = a&c */ + and y0, b /* y0 = (a|c)&b */ + add h, y1 /* h = h + S1 + CH + k + w + S0 */ + or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */ + lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */ + ROTATE_ARGS +.endm + +/* +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; void sha256_sse4(void *input_data, UINT32 digest[8], UINT64 num_blks) +;; arg 1 : pointer to input data +;; arg 2 : pointer to digest +;; arg 3 : Num blocks +*/ +.text +.globl _gcry_sha256_transform_amd64_ssse3 +ELF(.type _gcry_sha256_transform_amd64_ssse3,@function;) +.align 16 +_gcry_sha256_transform_amd64_ssse3: + push rbx + push rbp + push r13 + push r14 + push r15 + + sub rsp, STACK_SIZE + + shl NUM_BLKS, 6 /* convert to bytes */ + jz .Ldone_hash + add NUM_BLKS, INP /* pointer to end of data */ + mov [rsp + _INP_END], NUM_BLKS + + /* load initial digest */ + mov a,[4*0 + CTX] + mov b,[4*1 + CTX] + mov c,[4*2 + CTX] + mov d,[4*3 + CTX] + mov e,[4*4 + CTX] + mov f,[4*5 + CTX] + mov g,[4*6 + CTX] + mov h,[4*7 + CTX] + + movdqa BYTE_FLIP_MASK, [.LPSHUFFLE_BYTE_FLIP_MASK ADD_RIP] + movdqa SHUF_00BA, [.L_SHUF_00BA ADD_RIP] + movdqa SHUF_DC00, [.L_SHUF_DC00 ADD_RIP] + +.Loop0: + lea TBL, [.LK256 ADD_RIP] + + /* byte swap first 16 dwords */ + COPY_XMM_AND_BSWAP X0, [INP + 0*16], BYTE_FLIP_MASK + COPY_XMM_AND_BSWAP X1, [INP + 1*16], BYTE_FLIP_MASK + COPY_XMM_AND_BSWAP X2, [INP + 2*16], BYTE_FLIP_MASK + COPY_XMM_AND_BSWAP X3, [INP + 3*16], BYTE_FLIP_MASK + + mov [rsp + _INP], INP + + /* schedule 48 input dwords, by doing 3 rounds of 16 each */ + mov SRND, 3 +.align 16 +.Loop1: + movdqa XFER, [TBL + 0*16] + paddd XFER, X0 + movdqa [rsp + _XFER], XFER + FOUR_ROUNDS_AND_SCHED + + movdqa XFER, [TBL + 1*16] + paddd XFER, X0 + movdqa [rsp + _XFER], XFER + FOUR_ROUNDS_AND_SCHED + + movdqa XFER, [TBL + 2*16] + paddd XFER, X0 + movdqa [rsp + _XFER], XFER + FOUR_ROUNDS_AND_SCHED + + movdqa XFER, [TBL + 3*16] + paddd XFER, X0 + movdqa [rsp + _XFER], XFER + add TBL, 4*16 + FOUR_ROUNDS_AND_SCHED + + sub SRND, 1 + jne .Loop1 + + mov SRND, 2 +.Loop2: + paddd X0, [TBL + 0*16] + movdqa [rsp + _XFER], X0 + DO_ROUND 0 + DO_ROUND 1 + DO_ROUND 2 + DO_ROUND 3 + paddd X1, [TBL + 1*16] + movdqa [rsp + _XFER], X1 + add TBL, 2*16 + DO_ROUND 0 + DO_ROUND 1 + DO_ROUND 2 + DO_ROUND 3 + + movdqa X0, X2 + movdqa X1, X3 + + sub SRND, 1 + jne .Loop2 + + addm [4*0 + CTX],a + addm [4*1 + CTX],b + addm [4*2 + CTX],c + addm [4*3 + CTX],d + addm [4*4 + CTX],e + addm [4*5 + CTX],f + addm [4*6 + CTX],g + addm [4*7 + CTX],h + + mov INP, [rsp + _INP] + add INP, 64 + cmp INP, [rsp + _INP_END] + jne .Loop0 + + pxor xmm0, xmm0 + pxor xmm1, xmm1 + pxor xmm2, xmm2 + pxor xmm3, xmm3 + pxor xmm4, xmm4 + pxor xmm5, xmm5 + pxor xmm6, xmm6 + pxor xmm7, xmm7 + pxor xmm8, xmm8 + pxor xmm9, xmm9 + pxor xmm10, xmm10 + pxor xmm11, xmm11 + pxor xmm12, xmm12 + +.Ldone_hash: + add rsp, STACK_SIZE + + pop r15 + pop r14 + pop r13 + pop rbp + pop rbx + + mov eax, STACK_SIZE + 5*8 + + ret + + +.align 16 +.LK256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.LPSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 + +/* shuffle xBxA -> 00BA */ +.L_SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100 + +/* shuffle xDxC -> DC00 */ +.L_SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF + +#endif +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha256.c b/libotr/libgcrypt-1.8.7/cipher/sha256.c new file mode 100644 index 0000000..d174321 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha256.c @@ -0,0 +1,707 @@ +/* sha256.c - SHA256 hash function + * Copyright (C) 2003, 2006, 2008, 2009 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + + +/* Test vectors: + + "abc" + SHA224: 23097d22 3405d822 8642a477 bda255b3 2aadbce4 bda0b3f7 e36c9da7 + SHA256: ba7816bf 8f01cfea 414140de 5dae2223 b00361a3 96177a9c b410ff61 f20015ad + + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" + SHA224: 75388b16 512776cc 5dba5da1 fd890150 b0c6455c b4f58b19 52522525 + SHA256: 248d6a61 d20638b8 e5c02693 0c3e6039 a33ce459 64ff2167 f6ecedd4 19db06c1 + + "a" one million times + SHA224: 20794655 980c91d8 bbb4c1ea 97618a4b f03f4258 1948b2ee 4ee7ad67 + SHA256: cdc76e5c 9914fb92 81a1c7e2 84d73e67 f1809a48 a497200e 046d39cc c7112cd0 + + */ + + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" +#include "bithelp.h" +#include "bufhelp.h" +#include "cipher.h" +#include "hash-common.h" + + +/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ +#undef USE_SSSE3 +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_SSSE3 1 +#endif + +/* USE_AVX indicates whether to compile with Intel AVX code. */ +#undef USE_AVX +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AVX 1 +#endif + +/* USE_AVX2 indicates whether to compile with Intel AVX2/BMI2 code. */ +#undef USE_AVX2 +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ + defined(HAVE_GCC_INLINE_ASM_BMI2) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AVX2 1 +#endif + +/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly + * code. */ +#undef USE_ARM_CE +#ifdef ENABLE_ARM_CRYPTO_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) +# define USE_ARM_CE 1 +# elif defined(__AARCH64EL__) \ + && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) +# define USE_ARM_CE 1 +# endif +#endif + + +typedef struct { + gcry_md_block_ctx_t bctx; + u32 h0,h1,h2,h3,h4,h5,h6,h7; +#ifdef USE_SSSE3 + unsigned int use_ssse3:1; +#endif +#ifdef USE_AVX + unsigned int use_avx:1; +#endif +#ifdef USE_AVX2 + unsigned int use_avx2:1; +#endif +#ifdef USE_ARM_CE + unsigned int use_arm_ce:1; +#endif +} SHA256_CONTEXT; + + +static unsigned int +transform (void *c, const unsigned char *data, size_t nblks); + + +static void +sha256_init (void *context, unsigned int flags) +{ + SHA256_CONTEXT *hd = context; + unsigned int features = _gcry_get_hw_features (); + + (void)flags; + + hd->h0 = 0x6a09e667; + hd->h1 = 0xbb67ae85; + hd->h2 = 0x3c6ef372; + hd->h3 = 0xa54ff53a; + hd->h4 = 0x510e527f; + hd->h5 = 0x9b05688c; + hd->h6 = 0x1f83d9ab; + hd->h7 = 0x5be0cd19; + + hd->bctx.nblocks = 0; + hd->bctx.nblocks_high = 0; + hd->bctx.count = 0; + hd->bctx.blocksize = 64; + hd->bctx.bwrite = transform; + +#ifdef USE_SSSE3 + hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; +#endif +#ifdef USE_AVX + /* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs. + * Therefore use this implementation on Intel CPUs only. */ + hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD); +#endif +#ifdef USE_AVX2 + hd->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2); +#endif +#ifdef USE_ARM_CE + hd->use_arm_ce = (features & HWF_ARM_SHA2) != 0; +#endif + (void)features; +} + + +static void +sha224_init (void *context, unsigned int flags) +{ + SHA256_CONTEXT *hd = context; + unsigned int features = _gcry_get_hw_features (); + + (void)flags; + + hd->h0 = 0xc1059ed8; + hd->h1 = 0x367cd507; + hd->h2 = 0x3070dd17; + hd->h3 = 0xf70e5939; + hd->h4 = 0xffc00b31; + hd->h5 = 0x68581511; + hd->h6 = 0x64f98fa7; + hd->h7 = 0xbefa4fa4; + + hd->bctx.nblocks = 0; + hd->bctx.nblocks_high = 0; + hd->bctx.count = 0; + hd->bctx.blocksize = 64; + hd->bctx.bwrite = transform; + +#ifdef USE_SSSE3 + hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; +#endif +#ifdef USE_AVX + /* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs. + * Therefore use this implementation on Intel CPUs only. */ + hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD); +#endif +#ifdef USE_AVX2 + hd->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2); +#endif +#ifdef USE_ARM_CE + hd->use_arm_ce = (features & HWF_ARM_SHA2) != 0; +#endif + (void)features; +} + + +/* + Transform the message X which consists of 16 32-bit-words. See FIPS + 180-2 for details. */ +#define R(a,b,c,d,e,f,g,h,k,w) do \ + { \ + t1 = (h) + Sum1((e)) + Cho((e),(f),(g)) + (k) + (w); \ + t2 = Sum0((a)) + Maj((a),(b),(c)); \ + d += t1; \ + h = t1 + t2; \ + } while (0) + +/* (4.2) same as SHA-1's F1. */ +#define Cho(x, y, z) (z ^ (x & (y ^ z))) + +/* (4.3) same as SHA-1's F3 */ +#define Maj(x, y, z) ((x & y) + (z & (x ^ y))) + +/* (4.4) */ +#define Sum0(x) (ror (x, 2) ^ ror (x, 13) ^ ror (x, 22)) + +/* (4.5) */ +#define Sum1(x) (ror (x, 6) ^ ror (x, 11) ^ ror (x, 25)) + +/* Message expansion */ +#define S0(x) (ror ((x), 7) ^ ror ((x), 18) ^ ((x) >> 3)) /* (4.6) */ +#define S1(x) (ror ((x), 17) ^ ror ((x), 19) ^ ((x) >> 10)) /* (4.7) */ +#define I(i) ( w[i] = buf_get_be32(data + i * 4) ) +#define W(i) ( w[i&0x0f] = S1(w[(i-2) &0x0f]) \ + + w[(i-7) &0x0f] \ + + S0(w[(i-15)&0x0f]) \ + + w[(i-16)&0x0f] ) + +static unsigned int +transform_blk (void *ctx, const unsigned char *data) +{ + SHA256_CONTEXT *hd = ctx; + static const u32 K[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 + }; + + u32 a,b,c,d,e,f,g,h,t1,t2; + u32 w[16]; + + a = hd->h0; + b = hd->h1; + c = hd->h2; + d = hd->h3; + e = hd->h4; + f = hd->h5; + g = hd->h6; + h = hd->h7; + + R(a, b, c, d, e, f, g, h, K[0], I(0)); + R(h, a, b, c, d, e, f, g, K[1], I(1)); + R(g, h, a, b, c, d, e, f, K[2], I(2)); + R(f, g, h, a, b, c, d, e, K[3], I(3)); + R(e, f, g, h, a, b, c, d, K[4], I(4)); + R(d, e, f, g, h, a, b, c, K[5], I(5)); + R(c, d, e, f, g, h, a, b, K[6], I(6)); + R(b, c, d, e, f, g, h, a, K[7], I(7)); + R(a, b, c, d, e, f, g, h, K[8], I(8)); + R(h, a, b, c, d, e, f, g, K[9], I(9)); + R(g, h, a, b, c, d, e, f, K[10], I(10)); + R(f, g, h, a, b, c, d, e, K[11], I(11)); + R(e, f, g, h, a, b, c, d, K[12], I(12)); + R(d, e, f, g, h, a, b, c, K[13], I(13)); + R(c, d, e, f, g, h, a, b, K[14], I(14)); + R(b, c, d, e, f, g, h, a, K[15], I(15)); + + R(a, b, c, d, e, f, g, h, K[16], W(16)); + R(h, a, b, c, d, e, f, g, K[17], W(17)); + R(g, h, a, b, c, d, e, f, K[18], W(18)); + R(f, g, h, a, b, c, d, e, K[19], W(19)); + R(e, f, g, h, a, b, c, d, K[20], W(20)); + R(d, e, f, g, h, a, b, c, K[21], W(21)); + R(c, d, e, f, g, h, a, b, K[22], W(22)); + R(b, c, d, e, f, g, h, a, K[23], W(23)); + R(a, b, c, d, e, f, g, h, K[24], W(24)); + R(h, a, b, c, d, e, f, g, K[25], W(25)); + R(g, h, a, b, c, d, e, f, K[26], W(26)); + R(f, g, h, a, b, c, d, e, K[27], W(27)); + R(e, f, g, h, a, b, c, d, K[28], W(28)); + R(d, e, f, g, h, a, b, c, K[29], W(29)); + R(c, d, e, f, g, h, a, b, K[30], W(30)); + R(b, c, d, e, f, g, h, a, K[31], W(31)); + + R(a, b, c, d, e, f, g, h, K[32], W(32)); + R(h, a, b, c, d, e, f, g, K[33], W(33)); + R(g, h, a, b, c, d, e, f, K[34], W(34)); + R(f, g, h, a, b, c, d, e, K[35], W(35)); + R(e, f, g, h, a, b, c, d, K[36], W(36)); + R(d, e, f, g, h, a, b, c, K[37], W(37)); + R(c, d, e, f, g, h, a, b, K[38], W(38)); + R(b, c, d, e, f, g, h, a, K[39], W(39)); + R(a, b, c, d, e, f, g, h, K[40], W(40)); + R(h, a, b, c, d, e, f, g, K[41], W(41)); + R(g, h, a, b, c, d, e, f, K[42], W(42)); + R(f, g, h, a, b, c, d, e, K[43], W(43)); + R(e, f, g, h, a, b, c, d, K[44], W(44)); + R(d, e, f, g, h, a, b, c, K[45], W(45)); + R(c, d, e, f, g, h, a, b, K[46], W(46)); + R(b, c, d, e, f, g, h, a, K[47], W(47)); + + R(a, b, c, d, e, f, g, h, K[48], W(48)); + R(h, a, b, c, d, e, f, g, K[49], W(49)); + R(g, h, a, b, c, d, e, f, K[50], W(50)); + R(f, g, h, a, b, c, d, e, K[51], W(51)); + R(e, f, g, h, a, b, c, d, K[52], W(52)); + R(d, e, f, g, h, a, b, c, K[53], W(53)); + R(c, d, e, f, g, h, a, b, K[54], W(54)); + R(b, c, d, e, f, g, h, a, K[55], W(55)); + R(a, b, c, d, e, f, g, h, K[56], W(56)); + R(h, a, b, c, d, e, f, g, K[57], W(57)); + R(g, h, a, b, c, d, e, f, K[58], W(58)); + R(f, g, h, a, b, c, d, e, K[59], W(59)); + R(e, f, g, h, a, b, c, d, K[60], W(60)); + R(d, e, f, g, h, a, b, c, K[61], W(61)); + R(c, d, e, f, g, h, a, b, K[62], W(62)); + R(b, c, d, e, f, g, h, a, K[63], W(63)); + + hd->h0 += a; + hd->h1 += b; + hd->h2 += c; + hd->h3 += d; + hd->h4 += e; + hd->h5 += f; + hd->h6 += g; + hd->h7 += h; + + return /*burn_stack*/ 26*4+32; +} +#undef S0 +#undef S1 +#undef R + + +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#undef ASM_EXTRA_STACK +#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) +# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_EXTRA_STACK (10 * 16) +# else +# define ASM_FUNC_ABI +# define ASM_EXTRA_STACK 0 +# endif +#endif + + +#ifdef USE_SSSE3 +unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data, + u32 state[8], + size_t num_blks) ASM_FUNC_ABI; +#endif + +#ifdef USE_AVX +unsigned int _gcry_sha256_transform_amd64_avx(const void *input_data, + u32 state[8], + size_t num_blks) ASM_FUNC_ABI; +#endif + +#ifdef USE_AVX2 +unsigned int _gcry_sha256_transform_amd64_avx2(const void *input_data, + u32 state[8], + size_t num_blks) ASM_FUNC_ABI; +#endif + +#ifdef USE_ARM_CE +unsigned int _gcry_sha256_transform_armv8_ce(u32 state[8], + const void *input_data, + size_t num_blks); +#endif + +static unsigned int +transform (void *ctx, const unsigned char *data, size_t nblks) +{ + SHA256_CONTEXT *hd = ctx; + unsigned int burn; + +#ifdef USE_AVX2 + if (hd->use_avx2) + return _gcry_sha256_transform_amd64_avx2 (data, &hd->h0, nblks) + + 4 * sizeof(void*) + ASM_EXTRA_STACK; +#endif + +#ifdef USE_AVX + if (hd->use_avx) + return _gcry_sha256_transform_amd64_avx (data, &hd->h0, nblks) + + 4 * sizeof(void*) + ASM_EXTRA_STACK; +#endif + +#ifdef USE_SSSE3 + if (hd->use_ssse3) + return _gcry_sha256_transform_amd64_ssse3 (data, &hd->h0, nblks) + + 4 * sizeof(void*) + ASM_EXTRA_STACK; +#endif + +#ifdef USE_ARM_CE + if (hd->use_arm_ce) + return _gcry_sha256_transform_armv8_ce (&hd->h0, data, nblks); +#endif + + do + { + burn = transform_blk (hd, data); + data += 64; + } + while (--nblks); + +#ifdef ASM_EXTRA_STACK + /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at + * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to + * here too. + */ + burn += ASM_EXTRA_STACK; +#endif + + return burn; +} + + +/* + The routine finally terminates the computation and returns the + digest. The handle is prepared for a new cycle, but adding bytes + to the handle will the destroy the returned buffer. Returns: 32 + bytes with the message the digest. */ +static void +sha256_final(void *context) +{ + SHA256_CONTEXT *hd = context; + u32 t, th, msb, lsb; + byte *p; + unsigned int burn; + + _gcry_md_block_write (hd, NULL, 0); /* flush */; + + t = hd->bctx.nblocks; + if (sizeof t == sizeof hd->bctx.nblocks) + th = hd->bctx.nblocks_high; + else + th = hd->bctx.nblocks >> 32; + + /* multiply by 64 to make a byte count */ + lsb = t << 6; + msb = (th << 6) | (t >> 26); + /* add the count */ + t = lsb; + if ((lsb += hd->bctx.count) < t) + msb++; + /* multiply by 8 to make a bit count */ + t = lsb; + lsb <<= 3; + msb <<= 3; + msb |= t >> 29; + + if (hd->bctx.count < 56) + { /* enough room */ + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */ + while (hd->bctx.count < 56) + hd->bctx.buf[hd->bctx.count++] = 0; /* pad */ + } + else + { /* need one extra block */ + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */ + while (hd->bctx.count < 64) + hd->bctx.buf[hd->bctx.count++] = 0; + _gcry_md_block_write (hd, NULL, 0); /* flush */; + memset (hd->bctx.buf, 0, 56 ); /* fill next block with zeroes */ + } + /* append the 64 bit count */ + buf_put_be32(hd->bctx.buf + 56, msb); + buf_put_be32(hd->bctx.buf + 60, lsb); + burn = transform (hd, hd->bctx.buf, 1); + _gcry_burn_stack (burn); + + p = hd->bctx.buf; +#define X(a) do { buf_put_be32(p, hd->h##a); p += 4; } while(0) + X(0); + X(1); + X(2); + X(3); + X(4); + X(5); + X(6); + X(7); +#undef X +} + +static byte * +sha256_read (void *context) +{ + SHA256_CONTEXT *hd = context; + + return hd->bctx.buf; +} + + +/* Shortcut functions which puts the hash value of the supplied buffer + * into outbuf which must have a size of 32 bytes. */ +void +_gcry_sha256_hash_buffer (void *outbuf, const void *buffer, size_t length) +{ + SHA256_CONTEXT hd; + + sha256_init (&hd, 0); + _gcry_md_block_write (&hd, buffer, length); + sha256_final (&hd); + memcpy (outbuf, hd.bctx.buf, 32); +} + + +/* Variant of the above shortcut function using multiple buffers. */ +void +_gcry_sha256_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt) +{ + SHA256_CONTEXT hd; + + sha256_init (&hd, 0); + for (;iovcnt > 0; iov++, iovcnt--) + _gcry_md_block_write (&hd, + (const char*)iov[0].data + iov[0].off, iov[0].len); + sha256_final (&hd); + memcpy (outbuf, hd.bctx.buf, 32); +} + + + +/* + Self-test section. + */ + + +static gpg_err_code_t +selftests_sha224 (int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + + what = "short string"; + errtxt = _gcry_hash_selftest_check_one + (GCRY_MD_SHA224, 0, + "abc", 3, + "\x23\x09\x7d\x22\x34\x05\xd8\x22\x86\x42\xa4\x77\xbd\xa2\x55\xb3" + "\x2a\xad\xbc\xe4\xbd\xa0\xb3\xf7\xe3\x6c\x9d\xa7", 28); + if (errtxt) + goto failed; + + if (extended) + { + what = "long string"; + errtxt = _gcry_hash_selftest_check_one + (GCRY_MD_SHA224, 0, + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56, + "\x75\x38\x8b\x16\x51\x27\x76\xcc\x5d\xba\x5d\xa1\xfd\x89\x01\x50" + "\xb0\xc6\x45\x5c\xb4\xf5\x8b\x19\x52\x52\x25\x25", 28); + if (errtxt) + goto failed; + + what = "one million \"a\""; + errtxt = _gcry_hash_selftest_check_one + (GCRY_MD_SHA224, 1, + NULL, 0, + "\x20\x79\x46\x55\x98\x0c\x91\xd8\xbb\xb4\xc1\xea\x97\x61\x8a\x4b" + "\xf0\x3f\x42\x58\x19\x48\xb2\xee\x4e\xe7\xad\x67", 28); + if (errtxt) + goto failed; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("digest", GCRY_MD_SHA224, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + +static gpg_err_code_t +selftests_sha256 (int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + + what = "short string"; + errtxt = _gcry_hash_selftest_check_one + (GCRY_MD_SHA256, 0, + "abc", 3, + "\xba\x78\x16\xbf\x8f\x01\xcf\xea\x41\x41\x40\xde\x5d\xae\x22\x23" + "\xb0\x03\x61\xa3\x96\x17\x7a\x9c\xb4\x10\xff\x61\xf2\x00\x15\xad", 32); + if (errtxt) + goto failed; + + if (extended) + { + what = "long string"; + errtxt = _gcry_hash_selftest_check_one + (GCRY_MD_SHA256, 0, + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56, + "\x24\x8d\x6a\x61\xd2\x06\x38\xb8\xe5\xc0\x26\x93\x0c\x3e\x60\x39" + "\xa3\x3c\xe4\x59\x64\xff\x21\x67\xf6\xec\xed\xd4\x19\xdb\x06\xc1", + 32); + if (errtxt) + goto failed; + + what = "one million \"a\""; + errtxt = _gcry_hash_selftest_check_one + (GCRY_MD_SHA256, 1, + NULL, 0, + "\xcd\xc7\x6e\x5c\x99\x14\xfb\x92\x81\xa1\xc7\xe2\x84\xd7\x3e\x67" + "\xf1\x80\x9a\x48\xa4\x97\x20\x0e\x04\x6d\x39\xcc\xc7\x11\x2c\xd0", + 32); + if (errtxt) + goto failed; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("digest", GCRY_MD_SHA256, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +/* Run a full self-test for ALGO and return 0 on success. */ +static gpg_err_code_t +run_selftests (int algo, int extended, selftest_report_func_t report) +{ + gpg_err_code_t ec; + + switch (algo) + { + case GCRY_MD_SHA224: + ec = selftests_sha224 (extended, report); + break; + case GCRY_MD_SHA256: + ec = selftests_sha256 (extended, report); + break; + default: + ec = GPG_ERR_DIGEST_ALGO; + break; + + } + return ec; +} + + + + +static byte asn224[19] = /* Object ID is 2.16.840.1.101.3.4.2.4 */ + { 0x30, 0x2D, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86, 0x48, + 0x01, 0x65, 0x03, 0x04, 0x02, 0x04, 0x05, 0x00, 0x04, + 0x1C + }; + +static gcry_md_oid_spec_t oid_spec_sha224[] = + { + /* From RFC3874, Section 4 */ + { "2.16.840.1.101.3.4.2.4" }, + { NULL }, + }; + +static byte asn256[19] = /* Object ID is 2.16.840.1.101.3.4.2.1 */ + { 0x30, 0x31, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86, + 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01, 0x05, + 0x00, 0x04, 0x20 }; + +static gcry_md_oid_spec_t oid_spec_sha256[] = + { + /* According to the OpenPGP draft rfc2440-bis06 */ + { "2.16.840.1.101.3.4.2.1" }, + /* PKCS#1 sha256WithRSAEncryption */ + { "1.2.840.113549.1.1.11" }, + + { NULL }, + }; + +gcry_md_spec_t _gcry_digest_spec_sha224 = + { + GCRY_MD_SHA224, {0, 1}, + "SHA224", asn224, DIM (asn224), oid_spec_sha224, 28, + sha224_init, _gcry_md_block_write, sha256_final, sha256_read, NULL, + sizeof (SHA256_CONTEXT), + run_selftests + }; + +gcry_md_spec_t _gcry_digest_spec_sha256 = + { + GCRY_MD_SHA256, {0, 1}, + "SHA256", asn256, DIM (asn256), oid_spec_sha256, 32, + sha256_init, _gcry_md_block_write, sha256_final, sha256_read, NULL, + sizeof (SHA256_CONTEXT), + run_selftests + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/sha512-arm.S b/libotr/libgcrypt-1.8.7/cipher/sha512-arm.S new file mode 100644 index 0000000..94ec014 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha512-arm.S @@ -0,0 +1,464 @@ +/* sha512-arm.S - ARM assembly implementation of SHA-512 transform + * + * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ +#include <config.h> + +#if defined(__ARMEL__) +#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS + +.text + +.syntax unified +.arm + +/* structure of SHA512_CONTEXT */ +#define hd_a 0 +#define hd_b ((hd_a) + 8) +#define hd_c ((hd_b) + 8) +#define hd_d ((hd_c) + 8) +#define hd_e ((hd_d) + 8) +#define hd_f ((hd_e) + 8) +#define hd_g ((hd_f) + 8) +#define hd_h ((hd_g) + 8) + +/* register macros */ +#define RK %r2 + +#define RElo %r0 +#define REhi %r1 + +#define RT1lo %r3 +#define RT1hi %r4 +#define RT2lo %r5 +#define RT2hi %r6 +#define RWlo %r7 +#define RWhi %r8 +#define RT3lo %r9 +#define RT3hi %r10 +#define RT4lo %r11 +#define RT4hi %ip + +#define RRND %lr + +/* variable offsets in stack */ +#define ctx (0) +#define data ((ctx) + 4) +#define nblks ((data) + 4) +#define _a ((nblks) + 4) +#define _b ((_a) + 8) +#define _c ((_b) + 8) +#define _d ((_c) + 8) +#define _e ((_d) + 8) +#define _f ((_e) + 8) +#define _g ((_f) + 8) +#define _h ((_g) + 8) + +#define w(i) ((_h) + 8 + ((i) % 16) * 8) + +#define STACK_MAX (w(15) + 8) + +/* helper macros */ +#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 3)]; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 0)]; \ + orr rout, rout, rtmp, lsl #24; + +#ifdef __ARMEL__ + /* bswap on little-endian */ +#ifdef HAVE_ARM_ARCH_V6 + #define be_to_host(reg, rtmp) \ + rev reg, reg; +#else + #define be_to_host(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; +#endif +#else + /* nop on big-endian */ + #define be_to_host(reg, rtmp) /*_*/ +#endif + +#define host_to_host(x, y) /*_*/ + +#define read_u64_aligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, hi3, convert, rtmp) \ + ldr lo0, [rin, #((offs) + 0 * 8 + 4)]; \ + ldr hi0, [rin, #((offs) + 0 * 8 + 0)]; \ + ldr lo1, [rin, #((offs) + 1 * 8 + 4)]; \ + ldr hi1, [rin, #((offs) + 1 * 8 + 0)]; \ + ldr lo2, [rin, #((offs) + 2 * 8 + 4)]; \ + convert(lo0, rtmp); \ + ldr hi2, [rin, #((offs) + 2 * 8 + 0)]; \ + convert(hi0, rtmp); \ + ldr lo3, [rin, #((offs) + 3 * 8 + 4)]; \ + convert(lo1, rtmp); \ + ldr hi3, [rin, #((offs) + 3 * 8 + 0)]; \ + convert(hi1, rtmp); \ + convert(lo2, rtmp); \ + convert(hi2, rtmp); \ + convert(lo3, rtmp); \ + convert(hi3, rtmp); + +#define read_be64_aligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, hi3, rtmp0) \ + read_u64_aligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, hi3, be_to_host, rtmp0) + +/* need to handle unaligned reads by byte reads */ +#define read_be64_unaligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, hi3, rtmp0) \ + ldr_unaligned_be(lo0, rin, (offs) + 0 * 8 + 4, rtmp0); \ + ldr_unaligned_be(hi0, rin, (offs) + 0 * 8 + 0, rtmp0); \ + ldr_unaligned_be(lo1, rin, (offs) + 1 * 8 + 4, rtmp0); \ + ldr_unaligned_be(hi1, rin, (offs) + 1 * 8 + 0, rtmp0); \ + ldr_unaligned_be(lo2, rin, (offs) + 2 * 8 + 4, rtmp0); \ + ldr_unaligned_be(hi2, rin, (offs) + 2 * 8 + 0, rtmp0); \ + ldr_unaligned_be(lo3, rin, (offs) + 3 * 8 + 4, rtmp0); \ + ldr_unaligned_be(hi3, rin, (offs) + 3 * 8 + 0, rtmp0); + +/*********************************************************************** + * ARM assembly implementation of sha512 transform + ***********************************************************************/ + +/* Round function */ + +#define R(_a,_b,_c,_d,_e,_f,_g,_h,W,wi) \ + /* Message expansion, t1 = _h + w[i] */ \ + W(_a,_h,wi); \ + \ + /* w = Sum1(_e) */ \ + mov RWlo, RElo, lsr#14; \ + ldm RK!, {RT2lo-RT2hi}; \ + mov RWhi, REhi, lsr#14; \ + eor RWlo, RWlo, RElo, lsr#18; \ + eor RWhi, RWhi, REhi, lsr#18; \ + ldr RT3lo, [%sp, #(_f)]; \ + adds RT1lo, RT2lo; /* t1 += K */ \ + ldr RT3hi, [%sp, #(_f) + 4]; \ + adc RT1hi, RT2hi; \ + ldr RT4lo, [%sp, #(_g)]; \ + eor RWlo, RWlo, RElo, lsl#23; \ + ldr RT4hi, [%sp, #(_g) + 4]; \ + eor RWhi, RWhi, REhi, lsl#23; \ + eor RWlo, RWlo, REhi, lsl#18; \ + eor RWhi, RWhi, RElo, lsl#18; \ + eor RWlo, RWlo, REhi, lsl#14; \ + eor RWhi, RWhi, RElo, lsl#14; \ + eor RWlo, RWlo, REhi, lsr#9; \ + eor RWhi, RWhi, RElo, lsr#9; \ + \ + /* Cho(_e,_f,_g) => (_e & _f) ^ (~_e & _g) */ \ + adds RT1lo, RWlo; /* t1 += Sum1(_e) */ \ + and RT3lo, RT3lo, RElo; \ + adc RT1hi, RWhi; \ + and RT3hi, RT3hi, REhi; \ + bic RT4lo, RT4lo, RElo; \ + bic RT4hi, RT4hi, REhi; \ + eor RT3lo, RT3lo, RT4lo; \ + eor RT3hi, RT3hi, RT4hi; \ + \ + /* Load D */ \ + /* t1 += Cho(_e,_f,_g) */ \ + ldr RElo, [%sp, #(_d)]; \ + adds RT1lo, RT3lo; \ + ldr REhi, [%sp, #(_d) + 4]; \ + adc RT1hi, RT3hi; \ + \ + /* Load A */ \ + ldr RT3lo, [%sp, #(_a)]; \ + \ + /* _d += t1 */ \ + adds RElo, RT1lo; \ + ldr RT3hi, [%sp, #(_a) + 4]; \ + adc REhi, RT1hi; \ + \ + /* Store D */ \ + str RElo, [%sp, #(_d)]; \ + \ + /* t2 = Sum0(_a) */ \ + mov RT2lo, RT3lo, lsr#28; \ + str REhi, [%sp, #(_d) + 4]; \ + mov RT2hi, RT3hi, lsr#28; \ + ldr RWlo, [%sp, #(_b)]; \ + eor RT2lo, RT2lo, RT3lo, lsl#30; \ + ldr RWhi, [%sp, #(_b) + 4]; \ + eor RT2hi, RT2hi, RT3hi, lsl#30; \ + eor RT2lo, RT2lo, RT3lo, lsl#25; \ + eor RT2hi, RT2hi, RT3hi, lsl#25; \ + eor RT2lo, RT2lo, RT3hi, lsl#4; \ + eor RT2hi, RT2hi, RT3lo, lsl#4; \ + eor RT2lo, RT2lo, RT3hi, lsr#2; \ + eor RT2hi, RT2hi, RT3lo, lsr#2; \ + eor RT2lo, RT2lo, RT3hi, lsr#7; \ + eor RT2hi, RT2hi, RT3lo, lsr#7; \ + \ + /* t2 += t1 */ \ + adds RT2lo, RT1lo; \ + ldr RT1lo, [%sp, #(_c)]; \ + adc RT2hi, RT1hi; \ + \ + /* Maj(_a,_b,_c) => ((_a & _b) ^ (_c & (_a ^ _b))) */ \ + ldr RT1hi, [%sp, #(_c) + 4]; \ + and RT4lo, RWlo, RT3lo; \ + and RT4hi, RWhi, RT3hi; \ + eor RWlo, RWlo, RT3lo; \ + eor RWhi, RWhi, RT3hi; \ + and RWlo, RWlo, RT1lo; \ + and RWhi, RWhi, RT1hi; \ + eor RWlo, RWlo, RT4lo; \ + eor RWhi, RWhi, RT4hi; \ + +/* Message expansion */ + +#define W_0_63(_a,_h,i) \ + ldr RT3lo, [%sp, #(w(i-2))]; \ + adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ \ + ldr RT3hi, [%sp, #(w(i-2)) + 4]; \ + adc RT2hi, RWhi; \ + /* nw = S1(w[i-2]) */ \ + ldr RT1lo, [%sp, #(_h)]; /* Load H */ \ + mov RWlo, RT3lo, lsr#19; \ + str RT2lo, [%sp, #(_a)]; \ + eor RWlo, RWlo, RT3lo, lsl#3; \ + ldr RT1hi, [%sp, #(_h) + 4]; \ + mov RWhi, RT3hi, lsr#19; \ + ldr RT2lo, [%sp, #(w(i-7))]; \ + eor RWhi, RWhi, RT3hi, lsl#3; \ + str RT2hi, [%sp, #(_a) + 4]; \ + eor RWlo, RWlo, RT3lo, lsr#6; \ + ldr RT2hi, [%sp, #(w(i-7)) + 4]; \ + eor RWhi, RWhi, RT3hi, lsr#6; \ + eor RWlo, RWlo, RT3hi, lsl#13; \ + eor RWhi, RWhi, RT3lo, lsl#13; \ + eor RWlo, RWlo, RT3hi, lsr#29; \ + eor RWhi, RWhi, RT3lo, lsr#29; \ + ldr RT3lo, [%sp, #(w(i-15))]; \ + eor RWlo, RWlo, RT3hi, lsl#26; \ + ldr RT3hi, [%sp, #(w(i-15)) + 4]; \ + \ + adds RT2lo, RWlo; /* nw += w[i-7] */ \ + ldr RWlo, [%sp, #(w(i-16))]; \ + adc RT2hi, RWhi; \ + mov RT4lo, RT3lo, lsr#1; /* S0(w[i-15]) */ \ + ldr RWhi, [%sp, #(w(i-16)) + 4]; \ + mov RT4hi, RT3hi, lsr#1; \ + adds RT2lo, RWlo; /* nw += w[i-16] */ \ + eor RT4lo, RT4lo, RT3lo, lsr#8; \ + eor RT4hi, RT4hi, RT3hi, lsr#8; \ + eor RT4lo, RT4lo, RT3lo, lsr#7; \ + eor RT4hi, RT4hi, RT3hi, lsr#7; \ + eor RT4lo, RT4lo, RT3hi, lsl#31; \ + eor RT4hi, RT4hi, RT3lo, lsl#31; \ + eor RT4lo, RT4lo, RT3hi, lsl#24; \ + eor RT4hi, RT4hi, RT3lo, lsl#24; \ + eor RT4lo, RT4lo, RT3hi, lsl#25; \ + adc RT2hi, RWhi; \ + \ + /* nw += S0(w[i-15]) */ \ + adds RT2lo, RT4lo; \ + adc RT2hi, RT4hi; \ + \ + /* w[0] = nw */ \ + str RT2lo, [%sp, #(w(i))]; \ + adds RT1lo, RWlo; \ + str RT2hi, [%sp, #(w(i)) + 4]; \ + adc RT1hi, RWhi; + +#define W_64_79(_a,_h,i) \ + adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ \ + ldr RWlo, [%sp, #(w(i-16))]; \ + adc RT2hi, RWhi; \ + ldr RWhi, [%sp, #(w(i-16)) + 4]; \ + ldr RT1lo, [%sp, #(_h)]; /* Load H */ \ + ldr RT1hi, [%sp, #(_h) + 4]; \ + str RT2lo, [%sp, #(_a)]; \ + str RT2hi, [%sp, #(_a) + 4]; \ + adds RT1lo, RWlo; \ + adc RT1hi, RWhi; + +.align 3 +.globl _gcry_sha512_transform_arm +.type _gcry_sha512_transform_arm,%function; + +_gcry_sha512_transform_arm: + /* Input: + * %r0: SHA512_CONTEXT + * %r1: data + * %r2: u64 k[] constants + * %r3: nblks + */ + push {%r4-%r11, %ip, %lr}; + sub %sp, %sp, #STACK_MAX; + movs RWlo, %r3; + str %r0, [%sp, #(ctx)]; + + beq .Ldone; + +.Loop_blocks: + str RWlo, [%sp, #nblks]; + + /* Load context to stack */ + add RWhi, %sp, #(_a); + ldm %r0!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} + stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} + ldm %r0, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} + stm RWhi, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} + + /* Load input to w[16] */ + + /* test if data is unaligned */ + tst %r1, #3; + beq 1f; + + /* unaligned load */ + add RWhi, %sp, #(w(0)); + read_be64_unaligned_4(%r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} + + read_be64_unaligned_4(%r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} + + read_be64_unaligned_4(%r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} + + read_be64_unaligned_4(%r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + b 2f; +1: + /* aligned load */ + add RWhi, %sp, #(w(0)); + read_be64_aligned_4(%r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} + + read_be64_aligned_4(%r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} + + read_be64_aligned_4(%r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} + + read_be64_aligned_4(%r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); +2: + add %r1, #(16 * 8); + stm RWhi, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} + str %r1, [%sp, #(data)]; + + /* preload E & A */ + ldr RElo, [%sp, #(_e)]; + ldr REhi, [%sp, #(_e) + 4]; + mov RWlo, #0; + ldr RT2lo, [%sp, #(_a)]; + mov RRND, #(80-16); + ldr RT2hi, [%sp, #(_a) + 4]; + mov RWhi, #0; + +.Loop_rounds: + R(_a, _b, _c, _d, _e, _f, _g, _h, W_0_63, 16); + R(_h, _a, _b, _c, _d, _e, _f, _g, W_0_63, 17); + R(_g, _h, _a, _b, _c, _d, _e, _f, W_0_63, 18); + R(_f, _g, _h, _a, _b, _c, _d, _e, W_0_63, 19); + R(_e, _f, _g, _h, _a, _b, _c, _d, W_0_63, 20); + R(_d, _e, _f, _g, _h, _a, _b, _c, W_0_63, 21); + R(_c, _d, _e, _f, _g, _h, _a, _b, W_0_63, 22); + R(_b, _c, _d, _e, _f, _g, _h, _a, W_0_63, 23); + R(_a, _b, _c, _d, _e, _f, _g, _h, W_0_63, 24); + R(_h, _a, _b, _c, _d, _e, _f, _g, W_0_63, 25); + R(_g, _h, _a, _b, _c, _d, _e, _f, W_0_63, 26); + R(_f, _g, _h, _a, _b, _c, _d, _e, W_0_63, 27); + R(_e, _f, _g, _h, _a, _b, _c, _d, W_0_63, 28); + R(_d, _e, _f, _g, _h, _a, _b, _c, W_0_63, 29); + R(_c, _d, _e, _f, _g, _h, _a, _b, W_0_63, 30); + R(_b, _c, _d, _e, _f, _g, _h, _a, W_0_63, 31); + + subs RRND, #16; + bne .Loop_rounds; + + R(_a, _b, _c, _d, _e, _f, _g, _h, W_64_79, 16); + R(_h, _a, _b, _c, _d, _e, _f, _g, W_64_79, 17); + R(_g, _h, _a, _b, _c, _d, _e, _f, W_64_79, 18); + R(_f, _g, _h, _a, _b, _c, _d, _e, W_64_79, 19); + R(_e, _f, _g, _h, _a, _b, _c, _d, W_64_79, 20); + R(_d, _e, _f, _g, _h, _a, _b, _c, W_64_79, 21); + R(_c, _d, _e, _f, _g, _h, _a, _b, W_64_79, 22); + R(_b, _c, _d, _e, _f, _g, _h, _a, W_64_79, 23); + R(_a, _b, _c, _d, _e, _f, _g, _h, W_64_79, 24); + R(_h, _a, _b, _c, _d, _e, _f, _g, W_64_79, 25); + R(_g, _h, _a, _b, _c, _d, _e, _f, W_64_79, 26); + R(_f, _g, _h, _a, _b, _c, _d, _e, W_64_79, 27); + R(_e, _f, _g, _h, _a, _b, _c, _d, W_64_79, 28); + R(_d, _e, _f, _g, _h, _a, _b, _c, W_64_79, 29); + R(_c, _d, _e, _f, _g, _h, _a, _b, W_64_79, 30); + R(_b, _c, _d, _e, _f, _g, _h, _a, W_64_79, 31); + + ldr %r0, [%sp, #(ctx)]; + adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ + ldr %r1, [%sp, #(data)]; + adc RT2hi, RWhi; + + ldm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} + adds RT1lo, RT2lo; + ldr RT2lo, [%sp, #(_b + 0)]; + adc RT1hi, RT2hi; + ldr RT2hi, [%sp, #(_b + 4)]; + adds RWlo, RT2lo; + ldr RT2lo, [%sp, #(_c + 0)]; + adc RWhi, RT2hi; + ldr RT2hi, [%sp, #(_c + 4)]; + adds RT3lo, RT2lo; + ldr RT2lo, [%sp, #(_d + 0)]; + adc RT3hi, RT2hi; + ldr RT2hi, [%sp, #(_d + 4)]; + adds RT4lo, RT2lo; + ldr RT2lo, [%sp, #(_e + 0)]; + adc RT4hi, RT2hi; + stm %r0!, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} + + ldr RT2hi, [%sp, #(_e + 4)]; + ldm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} + adds RT1lo, RT2lo; + ldr RT2lo, [%sp, #(_f + 0)]; + adc RT1hi, RT2hi; + ldr RT2hi, [%sp, #(_f + 4)]; + adds RWlo, RT2lo; + ldr RT2lo, [%sp, #(_g + 0)]; + adc RWhi, RT2hi; + ldr RT2hi, [%sp, #(_g + 4)]; + adds RT3lo, RT2lo; + ldr RT2lo, [%sp, #(_h + 0)]; + adc RT3hi, RT2hi; + ldr RT2hi, [%sp, #(_h + 4)]; + adds RT4lo, RT2lo; + adc RT4hi, RT2hi; + stm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} + sub %r0, %r0, #(4 * 8); + ldr RWlo, [%sp, #nblks]; + + sub RK, #(80 * 8); + subs RWlo, #1; + bne .Loop_blocks; + +.Ldone: + mov %r0, #STACK_MAX; +__out: + add %sp, %sp, #STACK_MAX; + pop {%r4-%r11, %ip, %pc}; +.size _gcry_sha512_transform_arm,.-_gcry_sha512_transform_arm; + +#endif +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha512-armv7-neon.S b/libotr/libgcrypt-1.8.7/cipher/sha512-armv7-neon.S new file mode 100644 index 0000000..a9d1272 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha512-armv7-neon.S @@ -0,0 +1,449 @@ +/* sha512-armv7-neon.S - ARM/NEON assembly implementation of SHA-512 transform + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) + +.text + +.syntax unified +.fpu neon +.arm + +/* structure of SHA512_CONTEXT */ +#define hd_a 0 +#define hd_b ((hd_a) + 8) +#define hd_c ((hd_b) + 8) +#define hd_d ((hd_c) + 8) +#define hd_e ((hd_d) + 8) +#define hd_f ((hd_e) + 8) +#define hd_g ((hd_f) + 8) + +/* register macros */ +#define RK %r2 + +#define RA d0 +#define RB d1 +#define RC d2 +#define RD d3 +#define RE d4 +#define RF d5 +#define RG d6 +#define RH d7 + +#define RT0 d8 +#define RT1 d9 +#define RT2 d10 +#define RT3 d11 +#define RT4 d12 +#define RT5 d13 +#define RT6 d14 +#define RT7 d15 + +#define RT01q q4 +#define RT23q q5 +#define RT45q q6 +#define RT67q q7 + +#define RW0 d16 +#define RW1 d17 +#define RW2 d18 +#define RW3 d19 +#define RW4 d20 +#define RW5 d21 +#define RW6 d22 +#define RW7 d23 +#define RW8 d24 +#define RW9 d25 +#define RW10 d26 +#define RW11 d27 +#define RW12 d28 +#define RW13 d29 +#define RW14 d30 +#define RW15 d31 + +#define RW01q q8 +#define RW23q q9 +#define RW45q q10 +#define RW67q q11 +#define RW89q q12 +#define RW1011q q13 +#define RW1213q q14 +#define RW1415q q15 + +/*********************************************************************** + * ARM assembly implementation of sha512 transform + ***********************************************************************/ +#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \ + /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \ + vshr.u64 RT2, re, #14; \ + vshl.u64 RT3, re, #64 - 14; \ + interleave_op(arg1); \ + vshr.u64 RT4, re, #18; \ + vshl.u64 RT5, re, #64 - 18; \ + vld1.64 {RT0}, [RK]!; \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, re, #41; \ + vshl.u64 RT5, re, #64 - 41; \ + vadd.u64 RT0, RT0, rw0; \ + veor.64 RT23q, RT23q, RT45q; \ + vmov.64 RT7, re; \ + veor.64 RT1, RT2, RT3; \ + vbsl.64 RT7, rf, rg; \ + \ + vadd.u64 RT1, RT1, rh; \ + vshr.u64 RT2, ra, #28; \ + vshl.u64 RT3, ra, #64 - 28; \ + vadd.u64 RT1, RT1, RT0; \ + vshr.u64 RT4, ra, #34; \ + vshl.u64 RT5, ra, #64 - 34; \ + vadd.u64 RT1, RT1, RT7; \ + \ + /* h = Sum0 (a) + Maj (a, b, c); */ \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, ra, #39; \ + vshl.u64 RT5, ra, #64 - 39; \ + veor.64 RT0, ra, rb; \ + veor.64 RT23q, RT23q, RT45q; \ + vbsl.64 RT0, rc, rb; \ + vadd.u64 rd, rd, RT1; /* d+=t1; */ \ + veor.64 rh, RT2, RT3; \ + \ + /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \ + vshr.u64 RT2, rd, #14; \ + vshl.u64 RT3, rd, #64 - 14; \ + vadd.u64 rh, rh, RT0; \ + vshr.u64 RT4, rd, #18; \ + vshl.u64 RT5, rd, #64 - 18; \ + vadd.u64 rh, rh, RT1; /* h+=t1; */ \ + vld1.64 {RT0}, [RK]!; \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, rd, #41; \ + vshl.u64 RT5, rd, #64 - 41; \ + vadd.u64 RT0, RT0, rw1; \ + veor.64 RT23q, RT23q, RT45q; \ + vmov.64 RT7, rd; \ + veor.64 RT1, RT2, RT3; \ + vbsl.64 RT7, re, rf; \ + \ + vadd.u64 RT1, RT1, rg; \ + vshr.u64 RT2, rh, #28; \ + vshl.u64 RT3, rh, #64 - 28; \ + vadd.u64 RT1, RT1, RT0; \ + vshr.u64 RT4, rh, #34; \ + vshl.u64 RT5, rh, #64 - 34; \ + vadd.u64 RT1, RT1, RT7; \ + \ + /* g = Sum0 (h) + Maj (h, a, b); */ \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, rh, #39; \ + vshl.u64 RT5, rh, #64 - 39; \ + veor.64 RT0, rh, ra; \ + veor.64 RT23q, RT23q, RT45q; \ + vbsl.64 RT0, rb, ra; \ + vadd.u64 rc, rc, RT1; /* c+=t1; */ \ + veor.64 rg, RT2, RT3; \ + \ + /* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \ + /* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \ + \ + /**** S0(w[1:2]) */ \ + \ + /* w[0:1] += w[9:10] */ \ + /* RT23q = rw1:rw2 */ \ + vext.u64 RT23q, rw01q, rw23q, #1; \ + vadd.u64 rw0, rw9; \ + vadd.u64 rg, rg, RT0; \ + vadd.u64 rw1, rw10;\ + vadd.u64 rg, rg, RT1; /* g+=t1; */ \ + \ + vshr.u64 RT45q, RT23q, #1; \ + vshl.u64 RT67q, RT23q, #64 - 1; \ + vshr.u64 RT01q, RT23q, #8; \ + veor.u64 RT45q, RT45q, RT67q; \ + vshl.u64 RT67q, RT23q, #64 - 8; \ + veor.u64 RT45q, RT45q, RT01q; \ + vshr.u64 RT01q, RT23q, #7; \ + veor.u64 RT45q, RT45q, RT67q; \ + \ + /**** S1(w[14:15]) */ \ + vshr.u64 RT23q, rw1415q, #6; \ + veor.u64 RT01q, RT01q, RT45q; \ + vshr.u64 RT45q, rw1415q, #19; \ + vshl.u64 RT67q, rw1415q, #64 - 19; \ + veor.u64 RT23q, RT23q, RT45q; \ + vshr.u64 RT45q, rw1415q, #61; \ + veor.u64 RT23q, RT23q, RT67q; \ + vshl.u64 RT67q, rw1415q, #64 - 61; \ + veor.u64 RT23q, RT23q, RT45q; \ + vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \ + veor.u64 RT01q, RT23q, RT67q; +#define vadd_RT01q(rw01q) \ + /* w[0:1] += S(w[14:15]) */ \ + vadd.u64 rw01q, RT01q; + +#define dummy(_) /*_*/ + +#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, interleave_op1, arg1, interleave_op2, arg2) \ + /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \ + vshr.u64 RT2, re, #14; \ + vshl.u64 RT3, re, #64 - 14; \ + interleave_op1(arg1); \ + vshr.u64 RT4, re, #18; \ + vshl.u64 RT5, re, #64 - 18; \ + interleave_op2(arg2); \ + vld1.64 {RT0}, [RK]!; \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, re, #41; \ + vshl.u64 RT5, re, #64 - 41; \ + vadd.u64 RT0, RT0, rw0; \ + veor.64 RT23q, RT23q, RT45q; \ + vmov.64 RT7, re; \ + veor.64 RT1, RT2, RT3; \ + vbsl.64 RT7, rf, rg; \ + \ + vadd.u64 RT1, RT1, rh; \ + vshr.u64 RT2, ra, #28; \ + vshl.u64 RT3, ra, #64 - 28; \ + vadd.u64 RT1, RT1, RT0; \ + vshr.u64 RT4, ra, #34; \ + vshl.u64 RT5, ra, #64 - 34; \ + vadd.u64 RT1, RT1, RT7; \ + \ + /* h = Sum0 (a) + Maj (a, b, c); */ \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, ra, #39; \ + vshl.u64 RT5, ra, #64 - 39; \ + veor.64 RT0, ra, rb; \ + veor.64 RT23q, RT23q, RT45q; \ + vbsl.64 RT0, rc, rb; \ + vadd.u64 rd, rd, RT1; /* d+=t1; */ \ + veor.64 rh, RT2, RT3; \ + \ + /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \ + vshr.u64 RT2, rd, #14; \ + vshl.u64 RT3, rd, #64 - 14; \ + vadd.u64 rh, rh, RT0; \ + vshr.u64 RT4, rd, #18; \ + vshl.u64 RT5, rd, #64 - 18; \ + vadd.u64 rh, rh, RT1; /* h+=t1; */ \ + vld1.64 {RT0}, [RK]!; \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, rd, #41; \ + vshl.u64 RT5, rd, #64 - 41; \ + vadd.u64 RT0, RT0, rw1; \ + veor.64 RT23q, RT23q, RT45q; \ + vmov.64 RT7, rd; \ + veor.64 RT1, RT2, RT3; \ + vbsl.64 RT7, re, rf; \ + \ + vadd.u64 RT1, RT1, rg; \ + vshr.u64 RT2, rh, #28; \ + vshl.u64 RT3, rh, #64 - 28; \ + vadd.u64 RT1, RT1, RT0; \ + vshr.u64 RT4, rh, #34; \ + vshl.u64 RT5, rh, #64 - 34; \ + vadd.u64 RT1, RT1, RT7; \ + \ + /* g = Sum0 (h) + Maj (h, a, b); */ \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, rh, #39; \ + vshl.u64 RT5, rh, #64 - 39; \ + veor.64 RT0, rh, ra; \ + veor.64 RT23q, RT23q, RT45q; \ + vbsl.64 RT0, rb, ra; \ + vadd.u64 rc, rc, RT1; /* c+=t1; */ \ + veor.64 rg, RT2, RT3; +#define vadd_rg_RT0(rg) \ + vadd.u64 rg, rg, RT0; +#define vadd_rg_RT1(rg) \ + vadd.u64 rg, rg, RT1; /* g+=t1; */ + +.align 3 +.globl _gcry_sha512_transform_armv7_neon +.type _gcry_sha512_transform_armv7_neon,%function; + +_gcry_sha512_transform_armv7_neon: + /* Input: + * %r0: SHA512_CONTEXT + * %r1: data + * %r2: u64 k[] constants + * %r3: nblks + */ + push {%lr}; + + mov %lr, #0; + + /* Load context to d0-d7 */ + vld1.64 {RA-RD}, [%r0]!; + vld1.64 {RE-RH}, [%r0]; + sub %r0, #(4*8); + + /* Load input to w[16], d16-d31 */ + /* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */ + vld1.64 {RW0-RW3}, [%r1]!; + vld1.64 {RW4-RW7}, [%r1]!; + vld1.64 {RW8-RW11}, [%r1]!; + vld1.64 {RW12-RW15}, [%r1]!; +#ifdef __ARMEL__ + /* byteswap */ + vrev64.8 RW01q, RW01q; + vrev64.8 RW23q, RW23q; + vrev64.8 RW45q, RW45q; + vrev64.8 RW67q, RW67q; + vrev64.8 RW89q, RW89q; + vrev64.8 RW1011q, RW1011q; + vrev64.8 RW1213q, RW1213q; + vrev64.8 RW1415q, RW1415q; +#endif + + /* EABI says that d8-d15 must be preserved by callee. */ + vpush {RT0-RT7}; + +.Loop: + rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, RW23q, RW1415q, RW9, RW10, dummy, _); + b .Lenter_rounds; + +.Loop_rounds: + rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q); +.Lenter_rounds: + rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4, RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q); + rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6, RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q); + rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8, RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q); + rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10, RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q); + rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12, RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q); + add %lr, #16; + rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14, RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q); + cmp %lr, #64; + rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0, RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q); + bne .Loop_rounds; + + subs %r3, #1; + + rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, vadd_RT01q, RW1415q, dummy, _); + rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, vadd_rg_RT0, RG, vadd_rg_RT1, RG); + beq .Lhandle_tail; + vld1.64 {RW0-RW3}, [%r1]!; + rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, vadd_rg_RT0, RE, vadd_rg_RT1, RE); + rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, vadd_rg_RT0, RC, vadd_rg_RT1, RC); +#ifdef __ARMEL__ + vrev64.8 RW01q, RW01q; + vrev64.8 RW23q, RW23q; +#endif + vld1.64 {RW4-RW7}, [%r1]!; + rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, vadd_rg_RT0, RA, vadd_rg_RT1, RA); + rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, vadd_rg_RT0, RG, vadd_rg_RT1, RG); +#ifdef __ARMEL__ + vrev64.8 RW45q, RW45q; + vrev64.8 RW67q, RW67q; +#endif + vld1.64 {RW8-RW11}, [%r1]!; + rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, vadd_rg_RT0, RE, vadd_rg_RT1, RE); + rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, vadd_rg_RT0, RC, vadd_rg_RT1, RC); +#ifdef __ARMEL__ + vrev64.8 RW89q, RW89q; + vrev64.8 RW1011q, RW1011q; +#endif + vld1.64 {RW12-RW15}, [%r1]!; + vadd_rg_RT0(RA); + vadd_rg_RT1(RA); + + /* Load context */ + vld1.64 {RT0-RT3}, [%r0]!; + vld1.64 {RT4-RT7}, [%r0]; + sub %r0, #(4*8); + +#ifdef __ARMEL__ + vrev64.8 RW1213q, RW1213q; + vrev64.8 RW1415q, RW1415q; +#endif + + vadd.u64 RA, RT0; + vadd.u64 RB, RT1; + vadd.u64 RC, RT2; + vadd.u64 RD, RT3; + vadd.u64 RE, RT4; + vadd.u64 RF, RT5; + vadd.u64 RG, RT6; + vadd.u64 RH, RT7; + + /* Store the first half of context */ + vst1.64 {RA-RD}, [%r0]!; + sub RK, $(8*80); + vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */ + mov %lr, #0; + sub %r0, #(4*8); + + b .Loop; +.ltorg + +.Lhandle_tail: + rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, vadd_rg_RT0, RE, vadd_rg_RT1, RE); + rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, vadd_rg_RT0, RC, vadd_rg_RT1, RC); + rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, vadd_rg_RT0, RA, vadd_rg_RT1, RA); + rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, vadd_rg_RT0, RG, vadd_rg_RT1, RG); + rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, vadd_rg_RT0, RE, vadd_rg_RT1, RE); + rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, vadd_rg_RT0, RC, vadd_rg_RT1, RC); + + /* Load context to d16-d23 */ + vld1.64 {RW0-RW3}, [%r0]!; + vadd_rg_RT0(RA); + vld1.64 {RW4-RW7}, [%r0]; + vadd_rg_RT1(RA); + sub %r0, #(4*8); + + vadd.u64 RA, RW0; + vadd.u64 RB, RW1; + vadd.u64 RC, RW2; + vadd.u64 RD, RW3; + vadd.u64 RE, RW4; + vadd.u64 RF, RW5; + vadd.u64 RG, RW6; + vadd.u64 RH, RW7; + + /* Store the first half of context */ + vst1.64 {RA-RD}, [%r0]!; + + /* Clear used registers */ + /* d16-d31 */ + veor.u64 RW01q, RW01q; + veor.u64 RW23q, RW23q; + veor.u64 RW45q, RW45q; + veor.u64 RW67q, RW67q; + vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */ + veor.u64 RW89q, RW89q; + veor.u64 RW1011q, RW1011q; + veor.u64 RW1213q, RW1213q; + veor.u64 RW1415q, RW1415q; + /* d8-d15 */ + vpop {RT0-RT7}; + /* d0-d7 (q0-q3) */ + veor.u64 %q0, %q0; + veor.u64 %q1, %q1; + veor.u64 %q2, %q2; + veor.u64 %q3, %q3; + + pop {%pc}; +.size _gcry_sha512_transform_armv7_neon,.-_gcry_sha512_transform_armv7_neon; + +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha512-avx-amd64.S b/libotr/libgcrypt-1.8.7/cipher/sha512-avx-amd64.S new file mode 100644 index 0000000..446a8b4 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha512-avx-amd64.S @@ -0,0 +1,421 @@ +/* +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright (c) 2012, Intel Corporation +; +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions are +; met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the +; distribution. +; +; * Neither the name of the Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived from +; this software without specific prior written permission. +; +; +; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY +; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR +; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +*/ +/* + * Conversion to GAS assembly and integration to libgcrypt + * by Jussi Kivilinna <jussi.kivilinna@iki.fi> + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA512) + +#ifdef __PIC__ +# define ADD_RIP +rip +#else +# define ADD_RIP +#endif + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.intel_syntax noprefix + +.text + +/* Virtual Registers */ +msg = rdi /* ARG1 */ +digest = rsi /* ARG2 */ +msglen = rdx /* ARG3 */ +T1 = rcx +T2 = r8 +a_64 = r9 +b_64 = r10 +c_64 = r11 +d_64 = r12 +e_64 = r13 +f_64 = r14 +g_64 = r15 +h_64 = rbx +tmp0 = rax + +/* +; Local variables (stack frame) +; Note: frame_size must be an odd multiple of 8 bytes to XMM align RSP +*/ +frame_W = 0 /* Message Schedule */ +frame_W_size = (80 * 8) +frame_WK = ((frame_W) + (frame_W_size)) /* W[t] + K[t] | W[t+1] + K[t+1] */ +frame_WK_size = (2 * 8) +frame_GPRSAVE = ((frame_WK) + (frame_WK_size)) +frame_GPRSAVE_size = (5 * 8) +frame_size = ((frame_GPRSAVE) + (frame_GPRSAVE_size)) + + +/* Useful QWORD "arrays" for simpler memory references */ +#define MSG(i) msg + 8*(i) /* Input message (arg1) */ +#define DIGEST(i) digest + 8*(i) /* Output Digest (arg2) */ +#define K_t(i) .LK512 + 8*(i) ADD_RIP /* SHA Constants (static mem) */ +#define W_t(i) rsp + frame_W + 8*(i) /* Message Schedule (stack frame) */ +#define WK_2(i) rsp + frame_WK + 8*((i) % 2) /* W[t]+K[t] (stack frame) */ +/* MSG, DIGEST, K_t, W_t are arrays */ +/* WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even */ + +.macro RotateState + /* Rotate symbles a..h right */ + __TMP = h_64 + h_64 = g_64 + g_64 = f_64 + f_64 = e_64 + e_64 = d_64 + d_64 = c_64 + c_64 = b_64 + b_64 = a_64 + a_64 = __TMP +.endm + +.macro RORQ p1 p2 + /* shld is faster than ror on Intel Sandybridge */ + shld \p1, \p1, (64 - \p2) +.endm + +.macro SHA512_Round t + /* Compute Round %%t */ + mov T1, f_64 /* T1 = f */ + mov tmp0, e_64 /* tmp = e */ + xor T1, g_64 /* T1 = f ^ g */ + RORQ tmp0, 23 /* 41 ; tmp = e ror 23 */ + and T1, e_64 /* T1 = (f ^ g) & e */ + xor tmp0, e_64 /* tmp = (e ror 23) ^ e */ + xor T1, g_64 /* T1 = ((f ^ g) & e) ^ g = CH(e,f,g) */ + add T1, [WK_2(\t)] /* W[t] + K[t] from message scheduler */ + RORQ tmp0, 4 /* 18 ; tmp = ((e ror 23) ^ e) ror 4 */ + xor tmp0, e_64 /* tmp = (((e ror 23) ^ e) ror 4) ^ e */ + mov T2, a_64 /* T2 = a */ + add T1, h_64 /* T1 = CH(e,f,g) + W[t] + K[t] + h */ + RORQ tmp0, 14 /* 14 ; tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e) */ + add T1, tmp0 /* T1 = CH(e,f,g) + W[t] + K[t] + S1(e) */ + mov tmp0, a_64 /* tmp = a */ + xor T2, c_64 /* T2 = a ^ c */ + and tmp0, c_64 /* tmp = a & c */ + and T2, b_64 /* T2 = (a ^ c) & b */ + xor T2, tmp0 /* T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c) */ + mov tmp0, a_64 /* tmp = a */ + RORQ tmp0, 5 /* 39 ; tmp = a ror 5 */ + xor tmp0, a_64 /* tmp = (a ror 5) ^ a */ + add d_64, T1 /* e(next_state) = d + T1 */ + RORQ tmp0, 6 /* 34 ; tmp = ((a ror 5) ^ a) ror 6 */ + xor tmp0, a_64 /* tmp = (((a ror 5) ^ a) ror 6) ^ a */ + lea h_64, [T1 + T2] /* a(next_state) = T1 + Maj(a,b,c) */ + RORQ tmp0, 28 /* 28 ; tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a) */ + add h_64, tmp0 /* a(next_state) = T1 + Maj(a,b,c) S0(a) */ + RotateState +.endm + +.macro SHA512_2Sched_2Round_avx t +/* ; Compute rounds %%t-2 and %%t-1 + ; Compute message schedule QWORDS %%t and %%t+1 + + ; Two rounds are computed based on the values for K[t-2]+W[t-2] and + ; K[t-1]+W[t-1] which were previously stored at WK_2 by the message + ; scheduler. + ; The two new schedule QWORDS are stored at [W_t(%%t)] and [W_t(%%t+1)]. + ; They are then added to their respective SHA512 constants at + ; [K_t(%%t)] and [K_t(%%t+1)] and stored at dqword [WK_2(%%t)] + ; For brievity, the comments following vectored instructions only refer to + ; the first of a pair of QWORDS. + ; Eg. XMM4=W[t-2] really means XMM4={W[t-2]|W[t-1]} + ; The computation of the message schedule and the rounds are tightly + ; stitched to take advantage of instruction-level parallelism. + ; For clarity, integer instructions (for the rounds calculation) are indented + ; by one tab. Vectored instructions (for the message scheduler) are indented + ; by two tabs. */ + + vmovdqa xmm4, [W_t(\t-2)] /* XMM4 = W[t-2] */ + vmovdqu xmm5, [W_t(\t-15)] /* XMM5 = W[t-15] */ + mov T1, f_64 + vpsrlq xmm0, xmm4, 61 /* XMM0 = W[t-2]>>61 */ + mov tmp0, e_64 + vpsrlq xmm6, xmm5, 1 /* XMM6 = W[t-15]>>1 */ + xor T1, g_64 + RORQ tmp0, 23 /* 41 */ + vpsrlq xmm1, xmm4, 19 /* XMM1 = W[t-2]>>19 */ + and T1, e_64 + xor tmp0, e_64 + vpxor xmm0, xmm0, xmm1 /* XMM0 = W[t-2]>>61 ^ W[t-2]>>19 */ + xor T1, g_64 + add T1, [WK_2(\t)]; + vpsrlq xmm7, xmm5, 8 /* XMM7 = W[t-15]>>8 */ + RORQ tmp0, 4 /* 18 */ + vpsrlq xmm2, xmm4, 6 /* XMM2 = W[t-2]>>6 */ + xor tmp0, e_64 + mov T2, a_64 + add T1, h_64 + vpxor xmm6, xmm6, xmm7 /* XMM6 = W[t-15]>>1 ^ W[t-15]>>8 */ + RORQ tmp0, 14 /* 14 */ + add T1, tmp0 + vpsrlq xmm8, xmm5, 7 /* XMM8 = W[t-15]>>7 */ + mov tmp0, a_64 + xor T2, c_64 + vpsllq xmm3, xmm4, (64-61) /* XMM3 = W[t-2]<<3 */ + and tmp0, c_64 + and T2, b_64 + vpxor xmm2, xmm2, xmm3 /* XMM2 = W[t-2]>>6 ^ W[t-2]<<3 */ + xor T2, tmp0 + mov tmp0, a_64 + vpsllq xmm9, xmm5, (64-1) /* XMM9 = W[t-15]<<63 */ + RORQ tmp0, 5 /* 39 */ + vpxor xmm8, xmm8, xmm9 /* XMM8 = W[t-15]>>7 ^ W[t-15]<<63 */ + xor tmp0, a_64 + add d_64, T1 + RORQ tmp0, 6 /* 34 */ + xor tmp0, a_64 + vpxor xmm6, xmm6, xmm8 /* XMM6 = W[t-15]>>1 ^ W[t-15]>>8 ^ W[t-15]>>7 ^ W[t-15]<<63 */ + lea h_64, [T1 + T2] + RORQ tmp0, 28 /* 28 */ + vpsllq xmm4, xmm4, (64-19) /* XMM4 = W[t-2]<<25 */ + add h_64, tmp0 + RotateState + vpxor xmm0, xmm0, xmm4 /* XMM0 = W[t-2]>>61 ^ W[t-2]>>19 ^ W[t-2]<<25 */ + mov T1, f_64 + vpxor xmm0, xmm0, xmm2 /* XMM0 = s1(W[t-2]) */ + mov tmp0, e_64 + xor T1, g_64 + vpaddq xmm0, xmm0, [W_t(\t-16)] /* XMM0 = s1(W[t-2]) + W[t-16] */ + vmovdqu xmm1, [W_t(\t- 7)] /* XMM1 = W[t-7] */ + RORQ tmp0, 23 /* 41 */ + and T1, e_64 + xor tmp0, e_64 + xor T1, g_64 + vpsllq xmm5, xmm5, (64-8) /* XMM5 = W[t-15]<<56 */ + add T1, [WK_2(\t+1)] + vpxor xmm6, xmm6, xmm5 /* XMM6 = s0(W[t-15]) */ + RORQ tmp0, 4 /* 18 */ + vpaddq xmm0, xmm0, xmm6 /* XMM0 = s1(W[t-2]) + W[t-16] + s0(W[t-15]) */ + xor tmp0, e_64 + vpaddq xmm0, xmm0, xmm1 /* XMM0 = W[t] = s1(W[t-2]) + W[t-7] + s0(W[t-15]) + W[t-16] */ + mov T2, a_64 + add T1, h_64 + RORQ tmp0, 14 /* 14 */ + add T1, tmp0 + vmovdqa [W_t(\t)], xmm0 /* Store W[t] */ + vpaddq xmm0, xmm0, [K_t(t)] /* Compute W[t]+K[t] */ + vmovdqa [WK_2(t)], xmm0 /* Store W[t]+K[t] for next rounds */ + mov tmp0, a_64 + xor T2, c_64 + and tmp0, c_64 + and T2, b_64 + xor T2, tmp0 + mov tmp0, a_64 + RORQ tmp0, 5 /* 39 */ + xor tmp0, a_64 + add d_64, T1 + RORQ tmp0, 6 /* 34 */ + xor tmp0, a_64 + lea h_64, [T1 + T2] + RORQ tmp0, 28 /* 28 */ + add h_64, tmp0 + RotateState +.endm + +/* +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; void sha512_avx(const void* M, void* D, uint64_t L); +; Purpose: Updates the SHA512 digest stored at D with the message stored in M. +; The size of the message pointed to by M must be an integer multiple of SHA512 +; message blocks. +; L is the message length in SHA512 blocks +*/ +.globl _gcry_sha512_transform_amd64_avx +ELF(.type _gcry_sha512_transform_amd64_avx,@function;) +.align 16 +_gcry_sha512_transform_amd64_avx: + xor eax, eax + + cmp msglen, 0 + je .Lnowork + + vzeroupper + + /* Allocate Stack Space */ + sub rsp, frame_size + + /* Save GPRs */ + mov [rsp + frame_GPRSAVE + 8 * 0], rbx + mov [rsp + frame_GPRSAVE + 8 * 1], r12 + mov [rsp + frame_GPRSAVE + 8 * 2], r13 + mov [rsp + frame_GPRSAVE + 8 * 3], r14 + mov [rsp + frame_GPRSAVE + 8 * 4], r15 + +.Lupdateblock: + + /* Load state variables */ + mov a_64, [DIGEST(0)] + mov b_64, [DIGEST(1)] + mov c_64, [DIGEST(2)] + mov d_64, [DIGEST(3)] + mov e_64, [DIGEST(4)] + mov f_64, [DIGEST(5)] + mov g_64, [DIGEST(6)] + mov h_64, [DIGEST(7)] + + t = 0 + .rept 80/2 + 1 + /* (80 rounds) / (2 rounds/iteration) + (1 iteration) */ + /* +1 iteration because the scheduler leads hashing by 1 iteration */ + .if t < 2 + /* BSWAP 2 QWORDS */ + vmovdqa xmm1, [.LXMM_QWORD_BSWAP ADD_RIP] + vmovdqu xmm0, [MSG(t)] + vpshufb xmm0, xmm0, xmm1 /* BSWAP */ + vmovdqa [W_t(t)], xmm0 /* Store Scheduled Pair */ + vpaddq xmm0, xmm0, [K_t(t)] /* Compute W[t]+K[t] */ + vmovdqa [WK_2(t)], xmm0 /* Store into WK for rounds */ + .elseif t < 16 + /* BSWAP 2 QWORDS, Compute 2 Rounds */ + vmovdqu xmm0, [MSG(t)] + vpshufb xmm0, xmm0, xmm1 /* BSWAP */ + SHA512_Round (t - 2) /* Round t-2 */ + vmovdqa [W_t(t)], xmm0 /* Store Scheduled Pair */ + vpaddq xmm0, xmm0, [K_t(t)] /* Compute W[t]+K[t] */ + SHA512_Round (t - 1) /* Round t-1 */ + vmovdqa [WK_2(t)], xmm0 /* W[t]+K[t] into WK */ + .elseif t < 79 + /* Schedule 2 QWORDS; Compute 2 Rounds */ + SHA512_2Sched_2Round_avx t + .else + /* Compute 2 Rounds */ + SHA512_Round (t - 2) + SHA512_Round (t - 1) + .endif + t = ((t)+2) + .endr + + /* Update digest */ + add [DIGEST(0)], a_64 + add [DIGEST(1)], b_64 + add [DIGEST(2)], c_64 + add [DIGEST(3)], d_64 + add [DIGEST(4)], e_64 + add [DIGEST(5)], f_64 + add [DIGEST(6)], g_64 + add [DIGEST(7)], h_64 + + /* Advance to next message block */ + add msg, 16*8 + dec msglen + jnz .Lupdateblock + + /* Restore GPRs */ + mov rbx, [rsp + frame_GPRSAVE + 8 * 0] + mov r12, [rsp + frame_GPRSAVE + 8 * 1] + mov r13, [rsp + frame_GPRSAVE + 8 * 2] + mov r14, [rsp + frame_GPRSAVE + 8 * 3] + mov r15, [rsp + frame_GPRSAVE + 8 * 4] + + /* Restore Stack Pointer */ + add rsp, frame_size + + vzeroall + + /* Return stack burn depth */ + mov rax, frame_size + +.Lnowork: + ret + +/* +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Binary Data +*/ + +.align 16 + +/* Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. */ +.LXMM_QWORD_BSWAP: + .octa 0x08090a0b0c0d0e0f0001020304050607 + +/* K[t] used in SHA512 hashing */ +.LK512: + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + +#endif +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha512-avx2-bmi2-amd64.S b/libotr/libgcrypt-1.8.7/cipher/sha512-avx2-bmi2-amd64.S new file mode 100644 index 0000000..05bef64 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha512-avx2-bmi2-amd64.S @@ -0,0 +1,792 @@ +/* +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright (c) 2012, Intel Corporation +; +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions are +; met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the +; distribution. +; +; * Neither the name of the Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived from +; this software without specific prior written permission. +; +; +; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY +; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR +; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; This code schedules 1 blocks at a time, with 4 lanes per block +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +*/ +/* + * Conversion to GAS assembly and integration to libgcrypt + * by Jussi Kivilinna <jussi.kivilinna@iki.fi> + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \ + defined(USE_SHA512) + +#ifdef __PIC__ +# define ADD_RIP +rip +#else +# define ADD_RIP +#endif + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.intel_syntax noprefix + +.text + +/* Virtual Registers */ +Y_0 = ymm4 +Y_1 = ymm5 +Y_2 = ymm6 +Y_3 = ymm7 + +YTMP0 = ymm0 +YTMP1 = ymm1 +YTMP2 = ymm2 +YTMP3 = ymm3 +YTMP4 = ymm8 +XFER = YTMP0 + +BYTE_FLIP_MASK = ymm9 + +INP = rdi /* 1st arg */ +CTX = rsi /* 2nd arg */ +NUM_BLKS = rdx /* 3rd arg */ +c = rcx +d = r8 +e = rdx +y3 = rdi + +TBL = rbp + +a = rax +b = rbx + +f = r9 +g = r10 +h = r11 +old_h = r11 + +T1 = r12 +y0 = r13 +y1 = r14 +y2 = r15 + +y4 = r12 + +/* Local variables (stack frame) */ +#define frame_XFER 0 +#define frame_XFER_size (4*8) +#define frame_SRND (frame_XFER + frame_XFER_size) +#define frame_SRND_size (1*8) +#define frame_INP (frame_SRND + frame_SRND_size) +#define frame_INP_size (1*8) +#define frame_INPEND (frame_INP + frame_INP_size) +#define frame_INPEND_size (1*8) +#define frame_RSPSAVE (frame_INPEND + frame_INPEND_size) +#define frame_RSPSAVE_size (1*8) +#define frame_GPRSAVE (frame_RSPSAVE + frame_RSPSAVE_size) +#define frame_GPRSAVE_size (6*8) +#define frame_size (frame_GPRSAVE + frame_GPRSAVE_size) + +#define VMOVDQ vmovdqu /*; assume buffers not aligned */ + +/* addm [mem], reg */ +/* Add reg to mem using reg-mem add and store */ +.macro addm p1 p2 + add \p2, \p1 + mov \p1, \p2 +.endm + + +/* COPY_YMM_AND_BSWAP ymm, [mem], byte_flip_mask */ +/* Load ymm with mem and byte swap each dword */ +.macro COPY_YMM_AND_BSWAP p1 p2 p3 + VMOVDQ \p1, \p2 + vpshufb \p1, \p1, \p3 +.endm +/* rotate_Ys */ +/* Rotate values of symbols Y0...Y3 */ +.macro rotate_Ys + __Y_ = Y_0 + Y_0 = Y_1 + Y_1 = Y_2 + Y_2 = Y_3 + Y_3 = __Y_ +.endm + +/* RotateState */ +.macro RotateState + /* Rotate symbles a..h right */ + old_h = h + __TMP_ = h + h = g + g = f + f = e + e = d + d = c + c = b + b = a + a = __TMP_ +.endm + +/* %macro MY_VPALIGNR YDST, YSRC1, YSRC2, RVAL */ +/* YDST = {YSRC1, YSRC2} >> RVAL*8 */ +.macro MY_VPALIGNR YDST, YSRC1, YSRC2, RVAL + vperm2f128 \YDST, \YSRC1, \YSRC2, 0x3 /* YDST = {YS1_LO, YS2_HI} */ + vpalignr \YDST, \YDST, \YSRC2, \RVAL /* YDST = {YDS1, YS2} >> RVAL*8 */ +.endm + +.macro FOUR_ROUNDS_AND_SCHED +/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 0 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + + /* Extract w[t-7] */ + MY_VPALIGNR YTMP0, Y_3, Y_2, 8 /* YTMP0 = W[-7] */ + /* Calculate w[t-16] + w[t-7] */ + vpaddq YTMP0, YTMP0, Y_0 /* YTMP0 = W[-7] + W[-16] */ + /* Extract w[t-15] */ + MY_VPALIGNR YTMP1, Y_1, Y_0, 8 /* YTMP1 = W[-15] */ + + /* Calculate sigma0 */ + + /* Calculate w[t-15] ror 1 */ + vpsrlq YTMP2, YTMP1, 1 + vpsllq YTMP3, YTMP1, (64-1) + vpor YTMP3, YTMP3, YTMP2 /* YTMP3 = W[-15] ror 1 */ + /* Calculate w[t-15] shr 7 */ + vpsrlq YTMP4, YTMP1, 7 /* YTMP4 = W[-15] >> 7 */ + + mov y3, a /* y3 = a ; MAJA */ + rorx y0, e, 41 /* y0 = e >> 41 ; S1A */ + rorx y1, e, 18 /* y1 = e >> 18 ; S1B */ + + add h, [rsp+frame_XFER+0*8] /* h = k + w + h ; -- */ + or y3, c /* y3 = a|c ; MAJA */ + mov y2, f /* y2 = f ; CH */ + rorx T1, a, 34 /* T1 = a >> 34 ; S0B */ + + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ; S1 */ + xor y2, g /* y2 = f^g ; CH */ + rorx y1, e, 14 /* y1 = (e >> 14) ; S1 */ + + and y2, e /* y2 = (f^g)&e ; CH */ + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ^ (e>>14) ; S1 */ + rorx y1, a, 39 /* y1 = a >> 39 ; S0A */ + add d, h /* d = k + w + h + d ; -- */ + + and y3, b /* y3 = (a|c)&b ; MAJA */ + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ; S0 */ + rorx T1, a, 28 /* T1 = (a >> 28) ; S0 */ + + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ^ (a>>28) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and T1, c /* T1 = a&c ; MAJB */ + + add y2, y0 /* y2 = S1 + CH ; -- */ + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + add h, y1 /* h = k + w + h + S0 ; -- */ + + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + + add h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0 ; -- */ + add h, y3 /* h = t1 + S0 + MAJ ; -- */ + +RotateState + +/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + +/*;;;;;;;;;;;;;;;;;;;;;;;;; */ + + /* Calculate w[t-15] ror 8 */ + vpsrlq YTMP2, YTMP1, 8 + vpsllq YTMP1, YTMP1, (64-8) + vpor YTMP1, YTMP1, YTMP2 /* YTMP1 = W[-15] ror 8 */ + /* XOR the three components */ + vpxor YTMP3, YTMP3, YTMP4 /* YTMP3 = W[-15] ror 1 ^ W[-15] >> 7 */ + vpxor YTMP1, YTMP3, YTMP1 /* YTMP1 = s0 */ + + + /* Add three components, w[t-16], w[t-7] and sigma0 */ + vpaddq YTMP0, YTMP0, YTMP1 /* YTMP0 = W[-16] + W[-7] + s0 */ + /* Move to appropriate lanes for calculating w[16] and w[17] */ + vperm2f128 Y_0, YTMP0, YTMP0, 0x0 /* Y_0 = W[-16] + W[-7] + s0 {BABA} */ + /* Move to appropriate lanes for calculating w[18] and w[19] */ + vpand YTMP0, YTMP0, [.LMASK_YMM_LO ADD_RIP] /* YTMP0 = W[-16] + W[-7] + s0 {DC00} */ + + /* Calculate w[16] and w[17] in both 128 bit lanes */ + + /* Calculate sigma1 for w[16] and w[17] on both 128 bit lanes */ + vperm2f128 YTMP2, Y_3, Y_3, 0x11 /* YTMP2 = W[-2] {BABA} */ + vpsrlq YTMP4, YTMP2, 6 /* YTMP4 = W[-2] >> 6 {BABA} */ + + + mov y3, a /* y3 = a ; MAJA */ + rorx y0, e, 41 /* y0 = e >> 41 ; S1A */ + rorx y1, e, 18 /* y1 = e >> 18 ; S1B */ + add h, [rsp+frame_XFER+1*8] /* h = k + w + h ; -- */ + or y3, c /* y3 = a|c ; MAJA */ + + + mov y2, f /* y2 = f ; CH */ + rorx T1, a, 34 /* T1 = a >> 34 ; S0B */ + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ; S1 */ + xor y2, g /* y2 = f^g ; CH */ + + + rorx y1, e, 14 /* y1 = (e >> 14) ; S1 */ + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ^ (e>>14) ; S1 */ + rorx y1, a, 39 /* y1 = a >> 39 ; S0A */ + and y2, e /* y2 = (f^g)&e ; CH */ + add d, h /* d = k + w + h + d ; -- */ + + and y3, b /* y3 = (a|c)&b ; MAJA */ + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ; S0 */ + + rorx T1, a, 28 /* T1 = (a >> 28) ; S0 */ + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ^ (a>>28) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and T1, c /* T1 = a&c ; MAJB */ + add y2, y0 /* y2 = S1 + CH ; -- */ + + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + add h, y1 /* h = k + w + h + S0 ; -- */ + + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + add h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0 ; -- */ + add h, y3 /* h = t1 + S0 + MAJ ; -- */ + +RotateState + + + + +/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + +/*;;;;;;;;;;;;;;;;;;;;;;;;; */ + + + vpsrlq YTMP3, YTMP2, 19 /* YTMP3 = W[-2] >> 19 {BABA} */ + vpsllq YTMP1, YTMP2, (64-19) /* YTMP1 = W[-2] << 19 {BABA} */ + vpor YTMP3, YTMP3, YTMP1 /* YTMP3 = W[-2] ror 19 {BABA} */ + vpxor YTMP4, YTMP4, YTMP3 /* YTMP4 = W[-2] ror 19 ^ W[-2] >> 6 {BABA} */ + vpsrlq YTMP3, YTMP2, 61 /* YTMP3 = W[-2] >> 61 {BABA} */ + vpsllq YTMP1, YTMP2, (64-61) /* YTMP1 = W[-2] << 61 {BABA} */ + vpor YTMP3, YTMP3, YTMP1 /* YTMP3 = W[-2] ror 61 {BABA} */ + vpxor YTMP4, YTMP4, YTMP3 /* YTMP4 = s1 = (W[-2] ror 19) ^ (W[-2] ror 61) ^ (W[-2] >> 6) {BABA} */ + + /* Add sigma1 to the other compunents to get w[16] and w[17] */ + vpaddq Y_0, Y_0, YTMP4 /* Y_0 = {W[1], W[0], W[1], W[0]} */ + + /* Calculate sigma1 for w[18] and w[19] for upper 128 bit lane */ + vpsrlq YTMP4, Y_0, 6 /* YTMP4 = W[-2] >> 6 {DC--} */ + + mov y3, a /* y3 = a ; MAJA */ + rorx y0, e, 41 /* y0 = e >> 41 ; S1A */ + add h, [rsp+frame_XFER+2*8] /* h = k + w + h ; -- */ + + rorx y1, e, 18 /* y1 = e >> 18 ; S1B */ + or y3, c /* y3 = a|c ; MAJA */ + mov y2, f /* y2 = f ; CH */ + xor y2, g /* y2 = f^g ; CH */ + + rorx T1, a, 34 /* T1 = a >> 34 ; S0B */ + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ; S1 */ + and y2, e /* y2 = (f^g)&e ; CH */ + + rorx y1, e, 14 /* y1 = (e >> 14) ; S1 */ + add d, h /* d = k + w + h + d ; -- */ + and y3, b /* y3 = (a|c)&b ; MAJA */ + + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ^ (e>>14) ; S1 */ + rorx y1, a, 39 /* y1 = a >> 39 ; S0A */ + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ; S0 */ + rorx T1, a, 28 /* T1 = (a >> 28) ; S0 */ + + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ^ (a>>28) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and T1, c /* T1 = a&c ; MAJB */ + add y2, y0 /* y2 = S1 + CH ; -- */ + + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + add h, y1 /* h = k + w + h + S0 ; -- */ + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + add h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0 ; -- */ + + add h, y3 /* h = t1 + S0 + MAJ ; -- */ + +RotateState + +/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + +/*;;;;;;;;;;;;;;;;;;;;;;;;; */ + + vpsrlq YTMP3, Y_0, 19 /* YTMP3 = W[-2] >> 19 {DC--} */ + vpsllq YTMP1, Y_0, (64-19) /* YTMP1 = W[-2] << 19 {DC--} */ + vpor YTMP3, YTMP3, YTMP1 /* YTMP3 = W[-2] ror 19 {DC--} */ + vpxor YTMP4, YTMP4, YTMP3 /* YTMP4 = W[-2] ror 19 ^ W[-2] >> 6 {DC--} */ + vpsrlq YTMP3, Y_0, 61 /* YTMP3 = W[-2] >> 61 {DC--} */ + vpsllq YTMP1, Y_0, (64-61) /* YTMP1 = W[-2] << 61 {DC--} */ + vpor YTMP3, YTMP3, YTMP1 /* YTMP3 = W[-2] ror 61 {DC--} */ + vpxor YTMP4, YTMP4, YTMP3 /* YTMP4 = s1 = (W[-2] ror 19) ^ (W[-2] ror 61) ^ (W[-2] >> 6) {DC--} */ + + /* Add the sigma0 + w[t-7] + w[t-16] for w[18] and w[19] to newly calculated sigma1 to get w[18] and w[19] */ + vpaddq YTMP2, YTMP0, YTMP4 /* YTMP2 = {W[3], W[2], --, --} */ + + /* Form w[19, w[18], w17], w[16] */ + vpblendd Y_0, Y_0, YTMP2, 0xF0 /* Y_0 = {W[3], W[2], W[1], W[0]} */ +/* vperm2f128 Y_0, Y_0, YTMP2, 0x30 */ + + mov y3, a /* y3 = a ; MAJA */ + rorx y0, e, 41 /* y0 = e >> 41 ; S1A */ + rorx y1, e, 18 /* y1 = e >> 18 ; S1B */ + add h, [rsp+frame_XFER+3*8] /* h = k + w + h ; -- */ + or y3, c /* y3 = a|c ; MAJA */ + + + mov y2, f /* y2 = f ; CH */ + rorx T1, a, 34 /* T1 = a >> 34 ; S0B */ + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ; S1 */ + xor y2, g /* y2 = f^g ; CH */ + + + rorx y1, e, 14 /* y1 = (e >> 14) ; S1 */ + and y2, e /* y2 = (f^g)&e ; CH */ + add d, h /* d = k + w + h + d ; -- */ + and y3, b /* y3 = (a|c)&b ; MAJA */ + + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ^ (e>>14) ; S1 */ + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + + rorx y1, a, 39 /* y1 = a >> 39 ; S0A */ + add y2, y0 /* y2 = S1 + CH ; -- */ + + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ; S0 */ + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + + rorx T1, a, 28 /* T1 = (a >> 28) ; S0 */ + + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ^ (a>>28) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and T1, c /* T1 = a&c ; MAJB */ + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + + add h, y1 /* h = k + w + h + S0 ; -- */ + add h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0 ; -- */ + add h, y3 /* h = t1 + S0 + MAJ ; -- */ + +RotateState + +rotate_Ys +.endm + +.macro DO_4ROUNDS + +/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 0 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + + mov y2, f /* y2 = f ; CH */ + rorx y0, e, 41 /* y0 = e >> 41 ; S1A */ + rorx y1, e, 18 /* y1 = e >> 18 ; S1B */ + xor y2, g /* y2 = f^g ; CH */ + + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ; S1 */ + rorx y1, e, 14 /* y1 = (e >> 14) ; S1 */ + and y2, e /* y2 = (f^g)&e ; CH */ + + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ^ (e>>14) ; S1 */ + rorx T1, a, 34 /* T1 = a >> 34 ; S0B */ + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + rorx y1, a, 39 /* y1 = a >> 39 ; S0A */ + mov y3, a /* y3 = a ; MAJA */ + + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ; S0 */ + rorx T1, a, 28 /* T1 = (a >> 28) ; S0 */ + add h, [rsp + frame_XFER + 8*0] /* h = k + w + h ; -- */ + or y3, c /* y3 = a|c ; MAJA */ + + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ^ (a>>28) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and y3, b /* y3 = (a|c)&b ; MAJA */ + and T1, c /* T1 = a&c ; MAJB */ + add y2, y0 /* y2 = S1 + CH ; -- */ + + + add d, h /* d = k + w + h + d ; -- */ + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + add h, y1 /* h = k + w + h + S0 ; -- */ + + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + + + /*add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0 ; -- */ + + /*add h, y3 ; h = t1 + S0 + MAJ ; -- */ + + RotateState + +/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + + add old_h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0 ; -- */ + mov y2, f /* y2 = f ; CH */ + rorx y0, e, 41 /* y0 = e >> 41 ; S1A */ + rorx y1, e, 18 /* y1 = e >> 18 ; S1B */ + xor y2, g /* y2 = f^g ; CH */ + + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ; S1 */ + rorx y1, e, 14 /* y1 = (e >> 14) ; S1 */ + and y2, e /* y2 = (f^g)&e ; CH */ + add old_h, y3 /* h = t1 + S0 + MAJ ; -- */ + + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ^ (e>>14) ; S1 */ + rorx T1, a, 34 /* T1 = a >> 34 ; S0B */ + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + rorx y1, a, 39 /* y1 = a >> 39 ; S0A */ + mov y3, a /* y3 = a ; MAJA */ + + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ; S0 */ + rorx T1, a, 28 /* T1 = (a >> 28) ; S0 */ + add h, [rsp + frame_XFER + 8*1] /* h = k + w + h ; -- */ + or y3, c /* y3 = a|c ; MAJA */ + + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ^ (a>>28) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and y3, b /* y3 = (a|c)&b ; MAJA */ + and T1, c /* T1 = a&c ; MAJB */ + add y2, y0 /* y2 = S1 + CH ; -- */ + + + add d, h /* d = k + w + h + d ; -- */ + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + add h, y1 /* h = k + w + h + S0 ; -- */ + + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + + + /*add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0 ; -- */ + + /*add h, y3 ; h = t1 + S0 + MAJ ; -- */ + + RotateState + +/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + + add old_h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0 ; -- */ + mov y2, f /* y2 = f ; CH */ + rorx y0, e, 41 /* y0 = e >> 41 ; S1A */ + rorx y1, e, 18 /* y1 = e >> 18 ; S1B */ + xor y2, g /* y2 = f^g ; CH */ + + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ; S1 */ + rorx y1, e, 14 /* y1 = (e >> 14) ; S1 */ + and y2, e /* y2 = (f^g)&e ; CH */ + add old_h, y3 /* h = t1 + S0 + MAJ ; -- */ + + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ^ (e>>14) ; S1 */ + rorx T1, a, 34 /* T1 = a >> 34 ; S0B */ + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + rorx y1, a, 39 /* y1 = a >> 39 ; S0A */ + mov y3, a /* y3 = a ; MAJA */ + + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ; S0 */ + rorx T1, a, 28 /* T1 = (a >> 28) ; S0 */ + add h, [rsp + frame_XFER + 8*2] /* h = k + w + h ; -- */ + or y3, c /* y3 = a|c ; MAJA */ + + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ^ (a>>28) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and y3, b /* y3 = (a|c)&b ; MAJA */ + and T1, c /* T1 = a&c ; MAJB */ + add y2, y0 /* y2 = S1 + CH ; -- */ + + + add d, h /* d = k + w + h + d ; -- */ + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + add h, y1 /* h = k + w + h + S0 ; -- */ + + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + + + /*add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0 ; -- */ + + /*add h, y3 ; h = t1 + S0 + MAJ ; -- */ + + RotateState + +/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ + + add old_h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0 ; -- */ + mov y2, f /* y2 = f ; CH */ + rorx y0, e, 41 /* y0 = e >> 41 ; S1A */ + rorx y1, e, 18 /* y1 = e >> 18 ; S1B */ + xor y2, g /* y2 = f^g ; CH */ + + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ; S1 */ + rorx y1, e, 14 /* y1 = (e >> 14) ; S1 */ + and y2, e /* y2 = (f^g)&e ; CH */ + add old_h, y3 /* h = t1 + S0 + MAJ ; -- */ + + xor y0, y1 /* y0 = (e>>41) ^ (e>>18) ^ (e>>14) ; S1 */ + rorx T1, a, 34 /* T1 = a >> 34 ; S0B */ + xor y2, g /* y2 = CH = ((f^g)&e)^g ; CH */ + rorx y1, a, 39 /* y1 = a >> 39 ; S0A */ + mov y3, a /* y3 = a ; MAJA */ + + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ; S0 */ + rorx T1, a, 28 /* T1 = (a >> 28) ; S0 */ + add h, [rsp + frame_XFER + 8*3] /* h = k + w + h ; -- */ + or y3, c /* y3 = a|c ; MAJA */ + + xor y1, T1 /* y1 = (a>>39) ^ (a>>34) ^ (a>>28) ; S0 */ + mov T1, a /* T1 = a ; MAJB */ + and y3, b /* y3 = (a|c)&b ; MAJA */ + and T1, c /* T1 = a&c ; MAJB */ + add y2, y0 /* y2 = S1 + CH ; -- */ + + + add d, h /* d = k + w + h + d ; -- */ + or y3, T1 /* y3 = MAJ = (a|c)&b)|(a&c) ; MAJ */ + add h, y1 /* h = k + w + h + S0 ; -- */ + + add d, y2 /* d = k + w + h + d + S1 + CH = d + t1 ; -- */ + + + add h, y2 /* h = k + w + h + S0 + S1 + CH = t1 + S0 ; -- */ + + add h, y3 /* h = t1 + S0 + MAJ ; -- */ + + RotateState + +.endm + +/* +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; void sha512_rorx(const void* M, void* D, uint64_t L); +; Purpose: Updates the SHA512 digest stored at D with the message stored in M. +; The size of the message pointed to by M must be an integer multiple of SHA512 +; message blocks. +; L is the message length in SHA512 blocks +*/ +.globl _gcry_sha512_transform_amd64_avx2 +ELF(.type _gcry_sha512_transform_amd64_avx2,@function;) +.align 16 +_gcry_sha512_transform_amd64_avx2: + xor eax, eax + + cmp rdx, 0 + je .Lnowork + + vzeroupper + + /* Allocate Stack Space */ + mov rax, rsp + sub rsp, frame_size + and rsp, ~(0x20 - 1) + mov [rsp + frame_RSPSAVE], rax + + /* Save GPRs */ + mov [rsp + frame_GPRSAVE + 8 * 0], rbp + mov [rsp + frame_GPRSAVE + 8 * 1], rbx + mov [rsp + frame_GPRSAVE + 8 * 2], r12 + mov [rsp + frame_GPRSAVE + 8 * 3], r13 + mov [rsp + frame_GPRSAVE + 8 * 4], r14 + mov [rsp + frame_GPRSAVE + 8 * 5], r15 + + vpblendd xmm0, xmm0, xmm1, 0xf0 + vpblendd ymm0, ymm0, ymm1, 0xf0 + + shl NUM_BLKS, 7 /* convert to bytes */ + jz .Ldone_hash + add NUM_BLKS, INP /* pointer to end of data */ + mov [rsp + frame_INPEND], NUM_BLKS + + /*; load initial digest */ + mov a,[8*0 + CTX] + mov b,[8*1 + CTX] + mov c,[8*2 + CTX] + mov d,[8*3 + CTX] + mov e,[8*4 + CTX] + mov f,[8*5 + CTX] + mov g,[8*6 + CTX] + mov h,[8*7 + CTX] + + vmovdqa BYTE_FLIP_MASK, [.LPSHUFFLE_BYTE_FLIP_MASK ADD_RIP] + +.Loop0: + lea TBL,[.LK512 ADD_RIP] + + /*; byte swap first 16 dwords */ + COPY_YMM_AND_BSWAP Y_0, [INP + 0*32], BYTE_FLIP_MASK + COPY_YMM_AND_BSWAP Y_1, [INP + 1*32], BYTE_FLIP_MASK + COPY_YMM_AND_BSWAP Y_2, [INP + 2*32], BYTE_FLIP_MASK + COPY_YMM_AND_BSWAP Y_3, [INP + 3*32], BYTE_FLIP_MASK + + mov [rsp + frame_INP], INP + + /*; schedule 64 input dwords, by doing 12 rounds of 4 each */ + movq [rsp + frame_SRND],4 + +.align 16 +.Loop1: + vpaddq XFER, Y_0, [TBL + 0*32] + vmovdqa [rsp + frame_XFER], XFER + FOUR_ROUNDS_AND_SCHED + + vpaddq XFER, Y_0, [TBL + 1*32] + vmovdqa [rsp + frame_XFER], XFER + FOUR_ROUNDS_AND_SCHED + + vpaddq XFER, Y_0, [TBL + 2*32] + vmovdqa [rsp + frame_XFER], XFER + FOUR_ROUNDS_AND_SCHED + + vpaddq XFER, Y_0, [TBL + 3*32] + vmovdqa [rsp + frame_XFER], XFER + add TBL, 4*32 + FOUR_ROUNDS_AND_SCHED + + subq [rsp + frame_SRND], 1 + jne .Loop1 + + movq [rsp + frame_SRND], 2 +.Loop2: + vpaddq XFER, Y_0, [TBL + 0*32] + vmovdqa [rsp + frame_XFER], XFER + DO_4ROUNDS + vpaddq XFER, Y_1, [TBL + 1*32] + vmovdqa [rsp + frame_XFER], XFER + add TBL, 2*32 + DO_4ROUNDS + + vmovdqa Y_0, Y_2 + vmovdqa Y_1, Y_3 + + subq [rsp + frame_SRND], 1 + jne .Loop2 + + addm [8*0 + CTX],a + addm [8*1 + CTX],b + addm [8*2 + CTX],c + addm [8*3 + CTX],d + addm [8*4 + CTX],e + addm [8*5 + CTX],f + addm [8*6 + CTX],g + addm [8*7 + CTX],h + + mov INP, [rsp + frame_INP] + add INP, 128 + cmp INP, [rsp + frame_INPEND] + jne .Loop0 + +.Ldone_hash: + + /* Restore GPRs */ + mov rbp, [rsp + frame_GPRSAVE + 8 * 0] + mov rbx, [rsp + frame_GPRSAVE + 8 * 1] + mov r12, [rsp + frame_GPRSAVE + 8 * 2] + mov r13, [rsp + frame_GPRSAVE + 8 * 3] + mov r14, [rsp + frame_GPRSAVE + 8 * 4] + mov r15, [rsp + frame_GPRSAVE + 8 * 5] + + /* Restore Stack Pointer */ + mov rsp, [rsp + frame_RSPSAVE] + + vzeroall + + mov eax, frame_size + 31 +.Lnowork: + ret + +/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ +/*;; Binary Data */ + +.align 64 +/* K[t] used in SHA512 hashing */ +.LK512: + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + +.align 32 + +/* Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. */ +.LPSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607 + .octa 0x18191a1b1c1d1e1f1011121314151617 + +.LMASK_YMM_LO: .octa 0x00000000000000000000000000000000 + .octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF + +#endif +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha512-ssse3-amd64.S b/libotr/libgcrypt-1.8.7/cipher/sha512-ssse3-amd64.S new file mode 100644 index 0000000..51193b3 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha512-ssse3-amd64.S @@ -0,0 +1,426 @@ +/* +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright (c) 2012, Intel Corporation +; +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions are +; met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the +; distribution. +; +; * Neither the name of the Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived from +; this software without specific prior written permission. +; +; +; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY +; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR +; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +*/ +/* + * Conversion to GAS assembly and integration to libgcrypt + * by Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * Note: original implementation was named as SHA512-SSE4. However, only SSSE3 + * is required. + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA512) + +#ifdef __PIC__ +# define ADD_RIP +rip +#else +# define ADD_RIP +#endif + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.intel_syntax noprefix + +.text + +/* Virtual Registers */ +msg = rdi /* ARG1 */ +digest = rsi /* ARG2 */ +msglen = rdx /* ARG3 */ +T1 = rcx +T2 = r8 +a_64 = r9 +b_64 = r10 +c_64 = r11 +d_64 = r12 +e_64 = r13 +f_64 = r14 +g_64 = r15 +h_64 = rbx +tmp0 = rax + +/* +; Local variables (stack frame) +; Note: frame_size must be an odd multiple of 8 bytes to XMM align RSP +*/ +frame_W = 0 /* Message Schedule */ +frame_W_size = (80 * 8) +frame_WK = ((frame_W) + (frame_W_size)) /* W[t] + K[t] | W[t+1] + K[t+1] */ +frame_WK_size = (2 * 8) +frame_GPRSAVE = ((frame_WK) + (frame_WK_size)) +frame_GPRSAVE_size = (5 * 8) +frame_size = ((frame_GPRSAVE) + (frame_GPRSAVE_size)) + + +/* Useful QWORD "arrays" for simpler memory references */ +#define MSG(i) msg + 8*(i) /* Input message (arg1) */ +#define DIGEST(i) digest + 8*(i) /* Output Digest (arg2) */ +#define K_t(i) .LK512 + 8*(i) ADD_RIP /* SHA Constants (static mem) */ +#define W_t(i) rsp + frame_W + 8*(i) /* Message Schedule (stack frame) */ +#define WK_2(i) rsp + frame_WK + 8*((i) % 2) /* W[t]+K[t] (stack frame) */ +/* MSG, DIGEST, K_t, W_t are arrays */ +/* WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even */ + +.macro RotateState + /* Rotate symbles a..h right */ + __TMP = h_64 + h_64 = g_64 + g_64 = f_64 + f_64 = e_64 + e_64 = d_64 + d_64 = c_64 + c_64 = b_64 + b_64 = a_64 + a_64 = __TMP +.endm + +.macro SHA512_Round t + /* Compute Round %%t */ + mov T1, f_64 /* T1 = f */ + mov tmp0, e_64 /* tmp = e */ + xor T1, g_64 /* T1 = f ^ g */ + ror tmp0, 23 /* 41 ; tmp = e ror 23 */ + and T1, e_64 /* T1 = (f ^ g) & e */ + xor tmp0, e_64 /* tmp = (e ror 23) ^ e */ + xor T1, g_64 /* T1 = ((f ^ g) & e) ^ g = CH(e,f,g) */ + add T1, [WK_2(\t)] /* W[t] + K[t] from message scheduler */ + ror tmp0, 4 /* 18 ; tmp = ((e ror 23) ^ e) ror 4 */ + xor tmp0, e_64 /* tmp = (((e ror 23) ^ e) ror 4) ^ e */ + mov T2, a_64 /* T2 = a */ + add T1, h_64 /* T1 = CH(e,f,g) + W[t] + K[t] + h */ + ror tmp0, 14 /* 14 ; tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e) */ + add T1, tmp0 /* T1 = CH(e,f,g) + W[t] + K[t] + S1(e) */ + mov tmp0, a_64 /* tmp = a */ + xor T2, c_64 /* T2 = a ^ c */ + and tmp0, c_64 /* tmp = a & c */ + and T2, b_64 /* T2 = (a ^ c) & b */ + xor T2, tmp0 /* T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c) */ + mov tmp0, a_64 /* tmp = a */ + ror tmp0, 5 /* 39 ; tmp = a ror 5 */ + xor tmp0, a_64 /* tmp = (a ror 5) ^ a */ + add d_64, T1 /* e(next_state) = d + T1 */ + ror tmp0, 6 /* 34 ; tmp = ((a ror 5) ^ a) ror 6 */ + xor tmp0, a_64 /* tmp = (((a ror 5) ^ a) ror 6) ^ a */ + lea h_64, [T1 + T2] /* a(next_state) = T1 + Maj(a,b,c) */ + ror tmp0, 28 /* 28 ; tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a) */ + add h_64, tmp0 /* a(next_state) = T1 + Maj(a,b,c) S0(a) */ + RotateState +.endm + +.macro SHA512_2Sched_2Round_sse t +/* ; Compute rounds %%t-2 and %%t-1 + ; Compute message schedule QWORDS %%t and %%t+1 + + ; Two rounds are computed based on the values for K[t-2]+W[t-2] and + ; K[t-1]+W[t-1] which were previously stored at WK_2 by the message + ; scheduler. + ; The two new schedule QWORDS are stored at [W_t(%%t)] and [W_t(%%t+1)]. + ; They are then added to their respective SHA512 constants at + ; [K_t(%%t)] and [K_t(%%t+1)] and stored at dqword [WK_2(%%t)] + ; For brievity, the comments following vectored instructions only refer to + ; the first of a pair of QWORDS. + ; Eg. XMM2=W[t-2] really means XMM2={W[t-2]|W[t-1]} + ; The computation of the message schedule and the rounds are tightly + ; stitched to take advantage of instruction-level parallelism. + ; For clarity, integer instructions (for the rounds calculation) are indented + ; by one tab. Vectored instructions (for the message scheduler) are indented + ; by two tabs. */ + + mov T1, f_64 + movdqa xmm2, [W_t(\t-2)] /* XMM2 = W[t-2] */ + xor T1, g_64 + and T1, e_64 + movdqa xmm0, xmm2 /* XMM0 = W[t-2] */ + xor T1, g_64 + add T1, [WK_2(\t)] + movdqu xmm5, [W_t(\t-15)] /* XMM5 = W[t-15] */ + mov tmp0, e_64 + ror tmp0, 23 /* 41 */ + movdqa xmm3, xmm5 /* XMM3 = W[t-15] */ + xor tmp0, e_64 + ror tmp0, 4 /* 18 */ + psrlq xmm0, 61 - 19 /* XMM0 = W[t-2] >> 42 */ + xor tmp0, e_64 + ror tmp0, 14 /* 14 */ + psrlq xmm3, (8 - 7) /* XMM3 = W[t-15] >> 1 */ + add T1, tmp0 + add T1, h_64 + pxor xmm0, xmm2 /* XMM0 = (W[t-2] >> 42) ^ W[t-2] */ + mov T2, a_64 + xor T2, c_64 + pxor xmm3, xmm5 /* XMM3 = (W[t-15] >> 1) ^ W[t-15] */ + and T2, b_64 + mov tmp0, a_64 + psrlq xmm0, 19 - 6 /* XMM0 = ((W[t-2]>>42)^W[t-2])>>13 */ + and tmp0, c_64 + xor T2, tmp0 + psrlq xmm3, (7 - 1) /* XMM3 = ((W[t-15]>>1)^W[t-15])>>6 */ + mov tmp0, a_64 + ror tmp0, 5 /* 39 */ + pxor xmm0, xmm2 /* XMM0 = (((W[t-2]>>42)^W[t-2])>>13)^W[t-2] */ + xor tmp0, a_64 + ror tmp0, 6 /* 34 */ + pxor xmm3, xmm5 /* XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15] */ + xor tmp0, a_64 + ror tmp0, 28 /* 28 */ + psrlq xmm0, 6 /* XMM0 = ((((W[t-2]>>42)^W[t-2])>>13)^W[t-2])>>6 */ + add T2, tmp0 + add d_64, T1 + psrlq xmm3, 1 /* XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15]>>1 */ + lea h_64, [T1 + T2] + RotateState + movdqa xmm1, xmm2 /* XMM1 = W[t-2] */ + mov T1, f_64 + xor T1, g_64 + movdqa xmm4, xmm5 /* XMM4 = W[t-15] */ + and T1, e_64 + xor T1, g_64 + psllq xmm1, (64 - 19) - (64 - 61) /* XMM1 = W[t-2] << 42 */ + add T1, [WK_2(\t+1)] + mov tmp0, e_64 + psllq xmm4, (64 - 1) - (64 - 8) /* XMM4 = W[t-15] << 7 */ + ror tmp0, 23 /* 41 */ + xor tmp0, e_64 + pxor xmm1, xmm2 /* XMM1 = (W[t-2] << 42)^W[t-2] */ + ror tmp0, 4 /* 18 */ + xor tmp0, e_64 + pxor xmm4, xmm5 /* XMM4 = (W[t-15]<<7)^W[t-15] */ + ror tmp0, 14 /* 14 */ + add T1, tmp0 + psllq xmm1, (64 - 61) /* XMM1 = ((W[t-2] << 42)^W[t-2])<<3 */ + add T1, h_64 + mov T2, a_64 + psllq xmm4, (64 - 8) /* XMM4 = ((W[t-15]<<7)^W[t-15])<<56 */ + xor T2, c_64 + and T2, b_64 + pxor xmm0, xmm1 /* XMM0 = s1(W[t-2]) */ + mov tmp0, a_64 + and tmp0, c_64 + movdqu xmm1, [W_t(\t- 7)] /* XMM1 = W[t-7] */ + xor T2, tmp0 + pxor xmm3, xmm4 /* XMM3 = s0(W[t-15]) */ + mov tmp0, a_64 + paddq xmm0, xmm3 /* XMM0 = s1(W[t-2]) + s0(W[t-15]) */ + ror tmp0, 5 /* 39 */ + paddq xmm0, [W_t(\t-16)] /* XMM0 = s1(W[t-2]) + s0(W[t-15]) + W[t-16] */ + xor tmp0, a_64 + paddq xmm0, xmm1 /* XMM0 = s1(W[t-2]) + W[t-7] + s0(W[t-15]) + W[t-16] */ + ror tmp0, 6 /* 34 */ + movdqa [W_t(\t)], xmm0 /* Store scheduled qwords */ + xor tmp0, a_64 + paddq xmm0, [K_t(t)] /* Compute W[t]+K[t] */ + ror tmp0, 28 /* 28 */ + movdqa [WK_2(t)], xmm0 /* Store W[t]+K[t] for next rounds */ + add T2, tmp0 + add d_64, T1 + lea h_64, [T1 + T2] + RotateState +.endm + +/* +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; void sha512_sse4(const void* M, void* D, uint64_t L); +; Purpose: Updates the SHA512 digest stored at D with the message stored in M. +; The size of the message pointed to by M must be an integer multiple of SHA512 +; message blocks. +; L is the message length in SHA512 blocks. +*/ +.globl _gcry_sha512_transform_amd64_ssse3 +ELF(.type _gcry_sha512_transform_amd64_ssse3,@function;) +.align 16 +_gcry_sha512_transform_amd64_ssse3: + xor eax, eax + + cmp msglen, 0 + je .Lnowork + + /* Allocate Stack Space */ + sub rsp, frame_size + + /* Save GPRs */ + mov [rsp + frame_GPRSAVE + 8 * 0], rbx + mov [rsp + frame_GPRSAVE + 8 * 1], r12 + mov [rsp + frame_GPRSAVE + 8 * 2], r13 + mov [rsp + frame_GPRSAVE + 8 * 3], r14 + mov [rsp + frame_GPRSAVE + 8 * 4], r15 + +.Lupdateblock: + + /* Load state variables */ + mov a_64, [DIGEST(0)] + mov b_64, [DIGEST(1)] + mov c_64, [DIGEST(2)] + mov d_64, [DIGEST(3)] + mov e_64, [DIGEST(4)] + mov f_64, [DIGEST(5)] + mov g_64, [DIGEST(6)] + mov h_64, [DIGEST(7)] + + t = 0 + .rept 80/2 + 1 + /* (80 rounds) / (2 rounds/iteration) + (1 iteration) */ + /* +1 iteration because the scheduler leads hashing by 1 iteration */ + .if t < 2 + /* BSWAP 2 QWORDS */ + movdqa xmm1, [.LXMM_QWORD_BSWAP ADD_RIP] + movdqu xmm0, [MSG(t)] + pshufb xmm0, xmm1 /* BSWAP */ + movdqa [W_t(t)], xmm0 /* Store Scheduled Pair */ + paddq xmm0, [K_t(t)] /* Compute W[t]+K[t] */ + movdqa [WK_2(t)], xmm0 /* Store into WK for rounds */ + .elseif t < 16 + /* BSWAP 2 QWORDS; Compute 2 Rounds */ + movdqu xmm0, [MSG(t)] + pshufb xmm0, xmm1 /* BSWAP */ + SHA512_Round (t - 2) /* Round t-2 */ + movdqa [W_t(t)], xmm0 /* Store Scheduled Pair */ + paddq xmm0, [K_t(t)] /* Compute W[t]+K[t] */ + SHA512_Round (t - 1) /* Round t-1 */ + movdqa [WK_2(t)], xmm0 /* Store W[t]+K[t] into WK */ + .elseif t < 79 + /* Schedule 2 QWORDS; Compute 2 Rounds */ + SHA512_2Sched_2Round_sse t + .else + /* Compute 2 Rounds */ + SHA512_Round (t - 2) + SHA512_Round (t - 1) + .endif + t = (t)+2 + .endr + + /* Update digest */ + add [DIGEST(0)], a_64 + add [DIGEST(1)], b_64 + add [DIGEST(2)], c_64 + add [DIGEST(3)], d_64 + add [DIGEST(4)], e_64 + add [DIGEST(5)], f_64 + add [DIGEST(6)], g_64 + add [DIGEST(7)], h_64 + + /* Advance to next message block */ + add msg, 16*8 + dec msglen + jnz .Lupdateblock + + /* Restore GPRs */ + mov rbx, [rsp + frame_GPRSAVE + 8 * 0] + mov r12, [rsp + frame_GPRSAVE + 8 * 1] + mov r13, [rsp + frame_GPRSAVE + 8 * 2] + mov r14, [rsp + frame_GPRSAVE + 8 * 3] + mov r15, [rsp + frame_GPRSAVE + 8 * 4] + + /* Restore Stack Pointer */ + add rsp, frame_size + + pxor xmm0, xmm0 + pxor xmm1, xmm1 + pxor xmm2, xmm2 + pxor xmm3, xmm3 + pxor xmm4, xmm4 + pxor xmm5, xmm5 + + /* Return stack burn depth */ + mov rax, frame_size + +.Lnowork: + ret + +/* +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Binary Data +*/ + +.align 16 + +/* Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. */ +.LXMM_QWORD_BSWAP: + .octa 0x08090a0b0c0d0e0f0001020304050607 + +/* K[t] used in SHA512 hashing */ +.LK512: + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + +#endif +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/sha512.c b/libotr/libgcrypt-1.8.7/cipher/sha512.c new file mode 100644 index 0000000..06e8a2b --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/sha512.c @@ -0,0 +1,959 @@ +/* sha512.c - SHA384 and SHA512 hash functions + * Copyright (C) 2003, 2008, 2009 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + + +/* Test vectors from FIPS-180-2: + * + * "abc" + * 384: + * CB00753F 45A35E8B B5A03D69 9AC65007 272C32AB 0EDED163 + * 1A8B605A 43FF5BED 8086072B A1E7CC23 58BAECA1 34C825A7 + * 512: + * DDAF35A1 93617ABA CC417349 AE204131 12E6FA4E 89A97EA2 0A9EEEE6 4B55D39A + * 2192992A 274FC1A8 36BA3C23 A3FEEBBD 454D4423 643CE80E 2A9AC94F A54CA49F + * + * "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu" + * 384: + * 09330C33 F71147E8 3D192FC7 82CD1B47 53111B17 3B3B05D2 + * 2FA08086 E3B0F712 FCC7C71A 557E2DB9 66C3E9FA 91746039 + * 512: + * 8E959B75 DAE313DA 8CF4F728 14FC143F 8F7779C6 EB9F7FA1 7299AEAD B6889018 + * 501D289E 4900F7E4 331B99DE C4B5433A C7D329EE B6DD2654 5E96E55B 874BE909 + * + * "a" x 1000000 + * 384: + * 9D0E1809 716474CB 086E834E 310A4A1C ED149E9C 00F24852 + * 7972CEC5 704C2A5B 07B8B3DC 38ECC4EB AE97DDD8 7F3D8985 + * 512: + * E718483D 0CE76964 4E2E42C7 BC15B463 8E1F98B1 3B204428 5632A803 AFA973EB + * DE0FF244 877EA60A 4CB0432C E577C31B EB009C5C 2C49AA2E 4EADB217 AD8CC09B + */ + + +#include <config.h> +#include <string.h> +#include "g10lib.h" +#include "bithelp.h" +#include "bufhelp.h" +#include "cipher.h" +#include "hash-common.h" + + +/* USE_ARM_NEON_ASM indicates whether to enable ARM NEON assembly code. */ +#undef USE_ARM_NEON_ASM +#ifdef ENABLE_NEON_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_NEON) +# define USE_ARM_NEON_ASM 1 +# endif +#endif /*ENABLE_NEON_SUPPORT*/ + + +/* USE_ARM_ASM indicates whether to enable ARM assembly code. */ +#undef USE_ARM_ASM +#if defined(__ARMEL__) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) +# define USE_ARM_ASM 1 +#endif + + +/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ +#undef USE_SSSE3 +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_SSSE3 1 +#endif + + +/* USE_AVX indicates whether to compile with Intel AVX code. */ +#undef USE_AVX +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AVX 1 +#endif + + +/* USE_AVX2 indicates whether to compile with Intel AVX2/rorx code. */ +#undef USE_AVX2 +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ + defined(HAVE_GCC_INLINE_ASM_BMI2) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AVX2 1 +#endif + + +typedef struct +{ + u64 h0, h1, h2, h3, h4, h5, h6, h7; +} SHA512_STATE; + +typedef struct +{ + gcry_md_block_ctx_t bctx; + SHA512_STATE state; +#ifdef USE_ARM_NEON_ASM + unsigned int use_neon:1; +#endif +#ifdef USE_SSSE3 + unsigned int use_ssse3:1; +#endif +#ifdef USE_AVX + unsigned int use_avx:1; +#endif +#ifdef USE_AVX2 + unsigned int use_avx2:1; +#endif +} SHA512_CONTEXT; + +static unsigned int +transform (void *context, const unsigned char *data, size_t nblks); + +static void +sha512_init (void *context, unsigned int flags) +{ + SHA512_CONTEXT *ctx = context; + SHA512_STATE *hd = &ctx->state; + unsigned int features = _gcry_get_hw_features (); + + (void)flags; + + hd->h0 = U64_C(0x6a09e667f3bcc908); + hd->h1 = U64_C(0xbb67ae8584caa73b); + hd->h2 = U64_C(0x3c6ef372fe94f82b); + hd->h3 = U64_C(0xa54ff53a5f1d36f1); + hd->h4 = U64_C(0x510e527fade682d1); + hd->h5 = U64_C(0x9b05688c2b3e6c1f); + hd->h6 = U64_C(0x1f83d9abfb41bd6b); + hd->h7 = U64_C(0x5be0cd19137e2179); + + ctx->bctx.nblocks = 0; + ctx->bctx.nblocks_high = 0; + ctx->bctx.count = 0; + ctx->bctx.blocksize = 128; + ctx->bctx.bwrite = transform; + +#ifdef USE_ARM_NEON_ASM + ctx->use_neon = (features & HWF_ARM_NEON) != 0; +#endif +#ifdef USE_SSSE3 + ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; +#endif +#ifdef USE_AVX + ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD); +#endif +#ifdef USE_AVX2 + ctx->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2); +#endif + + (void)features; +} + +static void +sha384_init (void *context, unsigned int flags) +{ + SHA512_CONTEXT *ctx = context; + SHA512_STATE *hd = &ctx->state; + unsigned int features = _gcry_get_hw_features (); + + (void)flags; + + hd->h0 = U64_C(0xcbbb9d5dc1059ed8); + hd->h1 = U64_C(0x629a292a367cd507); + hd->h2 = U64_C(0x9159015a3070dd17); + hd->h3 = U64_C(0x152fecd8f70e5939); + hd->h4 = U64_C(0x67332667ffc00b31); + hd->h5 = U64_C(0x8eb44a8768581511); + hd->h6 = U64_C(0xdb0c2e0d64f98fa7); + hd->h7 = U64_C(0x47b5481dbefa4fa4); + + ctx->bctx.nblocks = 0; + ctx->bctx.nblocks_high = 0; + ctx->bctx.count = 0; + ctx->bctx.blocksize = 128; + ctx->bctx.bwrite = transform; + +#ifdef USE_ARM_NEON_ASM + ctx->use_neon = (features & HWF_ARM_NEON) != 0; +#endif +#ifdef USE_SSSE3 + ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; +#endif +#ifdef USE_AVX + ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD); +#endif +#ifdef USE_AVX2 + ctx->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2); +#endif + + (void)features; +} + + +static const u64 k[] = + { + U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd), + U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc), + U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019), + U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118), + U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe), + U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2), + U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1), + U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694), + U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3), + U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65), + U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483), + U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5), + U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210), + U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4), + U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725), + U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70), + U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926), + U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df), + U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8), + U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b), + U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001), + U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30), + U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910), + U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8), + U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53), + U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8), + U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb), + U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3), + U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60), + U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec), + U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9), + U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b), + U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207), + U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178), + U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6), + U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b), + U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493), + U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c), + U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a), + U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817) + }; + +#ifndef USE_ARM_ASM + +static inline u64 +ROTR (u64 x, u64 n) +{ + return ((x >> n) | (x << (64 - n))); +} + +static inline u64 +Ch (u64 x, u64 y, u64 z) +{ + return ((x & y) ^ ( ~x & z)); +} + +static inline u64 +Maj (u64 x, u64 y, u64 z) +{ + return ((x & y) ^ (x & z) ^ (y & z)); +} + +static inline u64 +Sum0 (u64 x) +{ + return (ROTR (x, 28) ^ ROTR (x, 34) ^ ROTR (x, 39)); +} + +static inline u64 +Sum1 (u64 x) +{ + return (ROTR (x, 14) ^ ROTR (x, 18) ^ ROTR (x, 41)); +} + +/**************** + * Transform the message W which consists of 16 64-bit-words + */ +static unsigned int +transform_blk (SHA512_STATE *hd, const unsigned char *data) +{ + u64 a, b, c, d, e, f, g, h; + u64 w[16]; + int t; + + /* get values from the chaining vars */ + a = hd->h0; + b = hd->h1; + c = hd->h2; + d = hd->h3; + e = hd->h4; + f = hd->h5; + g = hd->h6; + h = hd->h7; + + for ( t = 0; t < 16; t++ ) + w[t] = buf_get_be64(data + t * 8); + +#define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) +#define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) + + for (t = 0; t < 80 - 16; ) + { + u64 t1, t2; + + /* Performance on a AMD Athlon(tm) Dual Core Processor 4050e + with gcc 4.3.3 using gcry_md_hash_buffer of each 10000 bytes + initialized to 0,1,2,3...255,0,... and 1000 iterations: + + Not unrolled with macros: 440ms + Unrolled with macros: 350ms + Unrolled with inline: 330ms + */ +#if 0 /* Not unrolled. */ + t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t%16]; + w[t%16] += S1 (w[(t - 2)%16]) + w[(t - 7)%16] + S0 (w[(t - 15)%16]); + t2 = Sum0 (a) + Maj (a, b, c); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + t++; +#else /* Unrolled to interweave the chain variables. */ + t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0]; + w[0] += S1 (w[14]) + w[9] + S0 (w[1]); + t2 = Sum0 (a) + Maj (a, b, c); + d += t1; + h = t1 + t2; + + t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1]; + w[1] += S1 (w[15]) + w[10] + S0 (w[2]); + t2 = Sum0 (h) + Maj (h, a, b); + c += t1; + g = t1 + t2; + + t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2]; + w[2] += S1 (w[0]) + w[11] + S0 (w[3]); + t2 = Sum0 (g) + Maj (g, h, a); + b += t1; + f = t1 + t2; + + t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3]; + w[3] += S1 (w[1]) + w[12] + S0 (w[4]); + t2 = Sum0 (f) + Maj (f, g, h); + a += t1; + e = t1 + t2; + + t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4]; + w[4] += S1 (w[2]) + w[13] + S0 (w[5]); + t2 = Sum0 (e) + Maj (e, f, g); + h += t1; + d = t1 + t2; + + t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5]; + w[5] += S1 (w[3]) + w[14] + S0 (w[6]); + t2 = Sum0 (d) + Maj (d, e, f); + g += t1; + c = t1 + t2; + + t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6]; + w[6] += S1 (w[4]) + w[15] + S0 (w[7]); + t2 = Sum0 (c) + Maj (c, d, e); + f += t1; + b = t1 + t2; + + t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7]; + w[7] += S1 (w[5]) + w[0] + S0 (w[8]); + t2 = Sum0 (b) + Maj (b, c, d); + e += t1; + a = t1 + t2; + + t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8]; + w[8] += S1 (w[6]) + w[1] + S0 (w[9]); + t2 = Sum0 (a) + Maj (a, b, c); + d += t1; + h = t1 + t2; + + t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9]; + w[9] += S1 (w[7]) + w[2] + S0 (w[10]); + t2 = Sum0 (h) + Maj (h, a, b); + c += t1; + g = t1 + t2; + + t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10]; + w[10] += S1 (w[8]) + w[3] + S0 (w[11]); + t2 = Sum0 (g) + Maj (g, h, a); + b += t1; + f = t1 + t2; + + t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11]; + w[11] += S1 (w[9]) + w[4] + S0 (w[12]); + t2 = Sum0 (f) + Maj (f, g, h); + a += t1; + e = t1 + t2; + + t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12]; + w[12] += S1 (w[10]) + w[5] + S0 (w[13]); + t2 = Sum0 (e) + Maj (e, f, g); + h += t1; + d = t1 + t2; + + t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13]; + w[13] += S1 (w[11]) + w[6] + S0 (w[14]); + t2 = Sum0 (d) + Maj (d, e, f); + g += t1; + c = t1 + t2; + + t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14]; + w[14] += S1 (w[12]) + w[7] + S0 (w[15]); + t2 = Sum0 (c) + Maj (c, d, e); + f += t1; + b = t1 + t2; + + t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15]; + w[15] += S1 (w[13]) + w[8] + S0 (w[0]); + t2 = Sum0 (b) + Maj (b, c, d); + e += t1; + a = t1 + t2; + + t += 16; +#endif + } + + for (; t < 80; ) + { + u64 t1, t2; + +#if 0 /* Not unrolled. */ + t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t%16]; + t2 = Sum0 (a) + Maj (a, b, c); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + t++; +#else /* Unrolled to interweave the chain variables. */ + t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0]; + t2 = Sum0 (a) + Maj (a, b, c); + d += t1; + h = t1 + t2; + + t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1]; + t2 = Sum0 (h) + Maj (h, a, b); + c += t1; + g = t1 + t2; + + t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2]; + t2 = Sum0 (g) + Maj (g, h, a); + b += t1; + f = t1 + t2; + + t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3]; + t2 = Sum0 (f) + Maj (f, g, h); + a += t1; + e = t1 + t2; + + t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4]; + t2 = Sum0 (e) + Maj (e, f, g); + h += t1; + d = t1 + t2; + + t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5]; + t2 = Sum0 (d) + Maj (d, e, f); + g += t1; + c = t1 + t2; + + t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6]; + t2 = Sum0 (c) + Maj (c, d, e); + f += t1; + b = t1 + t2; + + t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7]; + t2 = Sum0 (b) + Maj (b, c, d); + e += t1; + a = t1 + t2; + + t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8]; + t2 = Sum0 (a) + Maj (a, b, c); + d += t1; + h = t1 + t2; + + t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9]; + t2 = Sum0 (h) + Maj (h, a, b); + c += t1; + g = t1 + t2; + + t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10]; + t2 = Sum0 (g) + Maj (g, h, a); + b += t1; + f = t1 + t2; + + t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11]; + t2 = Sum0 (f) + Maj (f, g, h); + a += t1; + e = t1 + t2; + + t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12]; + t2 = Sum0 (e) + Maj (e, f, g); + h += t1; + d = t1 + t2; + + t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13]; + t2 = Sum0 (d) + Maj (d, e, f); + g += t1; + c = t1 + t2; + + t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14]; + t2 = Sum0 (c) + Maj (c, d, e); + f += t1; + b = t1 + t2; + + t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15]; + t2 = Sum0 (b) + Maj (b, c, d); + e += t1; + a = t1 + t2; + + t += 16; +#endif + } + + /* Update chaining vars. */ + hd->h0 += a; + hd->h1 += b; + hd->h2 += c; + hd->h3 += d; + hd->h4 += e; + hd->h5 += f; + hd->h6 += g; + hd->h7 += h; + + return /* burn_stack */ (8 + 16) * sizeof(u64) + sizeof(u32) + + 3 * sizeof(void*); +} +#endif /*!USE_ARM_ASM*/ + +/* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#undef ASM_EXTRA_STACK +#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) +# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_EXTRA_STACK (10 * 16) +# else +# define ASM_FUNC_ABI +# define ASM_EXTRA_STACK 0 +# endif +#endif + + +#ifdef USE_ARM_NEON_ASM +void _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd, + const unsigned char *data, + const u64 k[], size_t num_blks); +#endif + +#ifdef USE_ARM_ASM +unsigned int _gcry_sha512_transform_arm (SHA512_STATE *hd, + const unsigned char *data, + const u64 k[], size_t num_blks); +#endif + +#ifdef USE_SSSE3 +unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data, + void *state, + size_t num_blks) ASM_FUNC_ABI; +#endif + +#ifdef USE_AVX +unsigned int _gcry_sha512_transform_amd64_avx(const void *input_data, + void *state, + size_t num_blks) ASM_FUNC_ABI; +#endif + +#ifdef USE_AVX2 +unsigned int _gcry_sha512_transform_amd64_avx2(const void *input_data, + void *state, + size_t num_blks) ASM_FUNC_ABI; +#endif + + +static unsigned int +transform (void *context, const unsigned char *data, size_t nblks) +{ + SHA512_CONTEXT *ctx = context; + unsigned int burn; + +#ifdef USE_AVX2 + if (ctx->use_avx2) + return _gcry_sha512_transform_amd64_avx2 (data, &ctx->state, nblks) + + 4 * sizeof(void*) + ASM_EXTRA_STACK; +#endif + +#ifdef USE_AVX + if (ctx->use_avx) + return _gcry_sha512_transform_amd64_avx (data, &ctx->state, nblks) + + 4 * sizeof(void*) + ASM_EXTRA_STACK; +#endif + +#ifdef USE_SSSE3 + if (ctx->use_ssse3) + return _gcry_sha512_transform_amd64_ssse3 (data, &ctx->state, nblks) + + 4 * sizeof(void*) + ASM_EXTRA_STACK; +#endif + +#ifdef USE_ARM_NEON_ASM + if (ctx->use_neon) + { + _gcry_sha512_transform_armv7_neon (&ctx->state, data, k, nblks); + + /* _gcry_sha512_transform_armv7_neon does not store sensitive data + * to stack. */ + return /* no burn_stack */ 0; + } +#endif + +#ifdef USE_ARM_ASM + burn = _gcry_sha512_transform_arm (&ctx->state, data, k, nblks); +#else + do + { + burn = transform_blk (&ctx->state, data) + 3 * sizeof(void*); + data += 128; + } + while (--nblks); + +#ifdef ASM_EXTRA_STACK + /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at + * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to + * here too. + */ + burn += ASM_EXTRA_STACK; +#endif +#endif + + return burn; +} + + +/* The routine final terminates the computation and + * returns the digest. + * The handle is prepared for a new cycle, but adding bytes to the + * handle will the destroy the returned buffer. + * Returns: 64 bytes representing the digest. When used for sha384, + * we take the leftmost 48 of those bytes. + */ + +static void +sha512_final (void *context) +{ + SHA512_CONTEXT *hd = context; + unsigned int stack_burn_depth; + u64 t, th, msb, lsb; + byte *p; + + _gcry_md_block_write (context, NULL, 0); /* flush */ ; + + t = hd->bctx.nblocks; + /* if (sizeof t == sizeof hd->bctx.nblocks) */ + th = hd->bctx.nblocks_high; + /* else */ + /* th = hd->bctx.nblocks >> 64; In case we ever use u128 */ + + /* multiply by 128 to make a byte count */ + lsb = t << 7; + msb = (th << 7) | (t >> 57); + /* add the count */ + t = lsb; + if ((lsb += hd->bctx.count) < t) + msb++; + /* multiply by 8 to make a bit count */ + t = lsb; + lsb <<= 3; + msb <<= 3; + msb |= t >> 61; + + if (hd->bctx.count < 112) + { /* enough room */ + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */ + while (hd->bctx.count < 112) + hd->bctx.buf[hd->bctx.count++] = 0; /* pad */ + } + else + { /* need one extra block */ + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */ + while (hd->bctx.count < 128) + hd->bctx.buf[hd->bctx.count++] = 0; + _gcry_md_block_write (context, NULL, 0); /* flush */ ; + memset (hd->bctx.buf, 0, 112); /* fill next block with zeroes */ + } + /* append the 128 bit count */ + buf_put_be64(hd->bctx.buf + 112, msb); + buf_put_be64(hd->bctx.buf + 120, lsb); + stack_burn_depth = transform (hd, hd->bctx.buf, 1); + _gcry_burn_stack (stack_burn_depth); + + p = hd->bctx.buf; +#define X(a) do { buf_put_be64(p, hd->state.h##a); p += 8; } while (0) + X (0); + X (1); + X (2); + X (3); + X (4); + X (5); + /* Note that these last two chunks are included even for SHA384. + We just ignore them. */ + X (6); + X (7); +#undef X +} + +static byte * +sha512_read (void *context) +{ + SHA512_CONTEXT *hd = (SHA512_CONTEXT *) context; + return hd->bctx.buf; +} + + +/* Shortcut functions which puts the hash value of the supplied buffer + * into outbuf which must have a size of 64 bytes. */ +void +_gcry_sha512_hash_buffer (void *outbuf, const void *buffer, size_t length) +{ + SHA512_CONTEXT hd; + + sha512_init (&hd, 0); + _gcry_md_block_write (&hd, buffer, length); + sha512_final (&hd); + memcpy (outbuf, hd.bctx.buf, 64); +} + + +/* Variant of the above shortcut function using multiple buffers. */ +void +_gcry_sha512_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt) +{ + SHA512_CONTEXT hd; + + sha512_init (&hd, 0); + for (;iovcnt > 0; iov++, iovcnt--) + _gcry_md_block_write (&hd, + (const char*)iov[0].data + iov[0].off, iov[0].len); + sha512_final (&hd); + memcpy (outbuf, hd.bctx.buf, 64); +} + + + +/* + Self-test section. + */ + + +static gpg_err_code_t +selftests_sha384 (int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + + what = "short string"; + errtxt = _gcry_hash_selftest_check_one + (GCRY_MD_SHA384, 0, + "abc", 3, + "\xcb\x00\x75\x3f\x45\xa3\x5e\x8b\xb5\xa0\x3d\x69\x9a\xc6\x50\x07" + "\x27\x2c\x32\xab\x0e\xde\xd1\x63\x1a\x8b\x60\x5a\x43\xff\x5b\xed" + "\x80\x86\x07\x2b\xa1\xe7\xcc\x23\x58\xba\xec\xa1\x34\xc8\x25\xa7", 48); + if (errtxt) + goto failed; + + if (extended) + { + what = "long string"; + errtxt = _gcry_hash_selftest_check_one + (GCRY_MD_SHA384, 0, + "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn" + "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112, + "\x09\x33\x0C\x33\xF7\x11\x47\xE8\x3D\x19\x2F\xC7\x82\xCD\x1B\x47" + "\x53\x11\x1B\x17\x3B\x3B\x05\xD2\x2F\xA0\x80\x86\xE3\xB0\xF7\x12" + "\xFC\xC7\xC7\x1A\x55\x7E\x2D\xB9\x66\xC3\xE9\xFA\x91\x74\x60\x39", + 48); + if (errtxt) + goto failed; + + what = "one million \"a\""; + errtxt = _gcry_hash_selftest_check_one + (GCRY_MD_SHA384, 1, + NULL, 0, + "\x9D\x0E\x18\x09\x71\x64\x74\xCB\x08\x6E\x83\x4E\x31\x0A\x4A\x1C" + "\xED\x14\x9E\x9C\x00\xF2\x48\x52\x79\x72\xCE\xC5\x70\x4C\x2A\x5B" + "\x07\xB8\xB3\xDC\x38\xEC\xC4\xEB\xAE\x97\xDD\xD8\x7F\x3D\x89\x85", + 48); + if (errtxt) + goto failed; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("digest", GCRY_MD_SHA384, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + +static gpg_err_code_t +selftests_sha512 (int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + + what = "short string"; + errtxt = _gcry_hash_selftest_check_one + (GCRY_MD_SHA512, 0, + "abc", 3, + "\xDD\xAF\x35\xA1\x93\x61\x7A\xBA\xCC\x41\x73\x49\xAE\x20\x41\x31" + "\x12\xE6\xFA\x4E\x89\xA9\x7E\xA2\x0A\x9E\xEE\xE6\x4B\x55\xD3\x9A" + "\x21\x92\x99\x2A\x27\x4F\xC1\xA8\x36\xBA\x3C\x23\xA3\xFE\xEB\xBD" + "\x45\x4D\x44\x23\x64\x3C\xE8\x0E\x2A\x9A\xC9\x4F\xA5\x4C\xA4\x9F", 64); + if (errtxt) + goto failed; + + if (extended) + { + what = "long string"; + errtxt = _gcry_hash_selftest_check_one + (GCRY_MD_SHA512, 0, + "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn" + "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112, + "\x8E\x95\x9B\x75\xDA\xE3\x13\xDA\x8C\xF4\xF7\x28\x14\xFC\x14\x3F" + "\x8F\x77\x79\xC6\xEB\x9F\x7F\xA1\x72\x99\xAE\xAD\xB6\x88\x90\x18" + "\x50\x1D\x28\x9E\x49\x00\xF7\xE4\x33\x1B\x99\xDE\xC4\xB5\x43\x3A" + "\xC7\xD3\x29\xEE\xB6\xDD\x26\x54\x5E\x96\xE5\x5B\x87\x4B\xE9\x09", + 64); + if (errtxt) + goto failed; + + what = "one million \"a\""; + errtxt = _gcry_hash_selftest_check_one + (GCRY_MD_SHA512, 1, + NULL, 0, + "\xE7\x18\x48\x3D\x0C\xE7\x69\x64\x4E\x2E\x42\xC7\xBC\x15\xB4\x63" + "\x8E\x1F\x98\xB1\x3B\x20\x44\x28\x56\x32\xA8\x03\xAF\xA9\x73\xEB" + "\xDE\x0F\xF2\x44\x87\x7E\xA6\x0A\x4C\xB0\x43\x2C\xE5\x77\xC3\x1B" + "\xEB\x00\x9C\x5C\x2C\x49\xAA\x2E\x4E\xAD\xB2\x17\xAD\x8C\xC0\x9B", + 64); + if (errtxt) + goto failed; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("digest", GCRY_MD_SHA512, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +/* Run a full self-test for ALGO and return 0 on success. */ +static gpg_err_code_t +run_selftests (int algo, int extended, selftest_report_func_t report) +{ + gpg_err_code_t ec; + + switch (algo) + { + case GCRY_MD_SHA384: + ec = selftests_sha384 (extended, report); + break; + case GCRY_MD_SHA512: + ec = selftests_sha512 (extended, report); + break; + default: + ec = GPG_ERR_DIGEST_ALGO; + break; + + } + return ec; +} + + + + +static byte sha512_asn[] = /* Object ID is 2.16.840.1.101.3.4.2.3 */ + { + 0x30, 0x51, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86, + 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03, 0x05, + 0x00, 0x04, 0x40 + }; + +static gcry_md_oid_spec_t oid_spec_sha512[] = + { + { "2.16.840.1.101.3.4.2.3" }, + + /* PKCS#1 sha512WithRSAEncryption */ + { "1.2.840.113549.1.1.13" }, + + { NULL } + }; + +gcry_md_spec_t _gcry_digest_spec_sha512 = + { + GCRY_MD_SHA512, {0, 1}, + "SHA512", sha512_asn, DIM (sha512_asn), oid_spec_sha512, 64, + sha512_init, _gcry_md_block_write, sha512_final, sha512_read, NULL, + sizeof (SHA512_CONTEXT), + run_selftests + }; + +static byte sha384_asn[] = /* Object ID is 2.16.840.1.101.3.4.2.2 */ + { + 0x30, 0x41, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86, + 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02, 0x05, + 0x00, 0x04, 0x30 + }; + +static gcry_md_oid_spec_t oid_spec_sha384[] = + { + { "2.16.840.1.101.3.4.2.2" }, + + /* PKCS#1 sha384WithRSAEncryption */ + { "1.2.840.113549.1.1.12" }, + + /* SHA384WithECDSA: RFC 7427 (A.3.3.) */ + { "1.2.840.10045.4.3.3" }, + + { NULL }, + }; + +gcry_md_spec_t _gcry_digest_spec_sha384 = + { + GCRY_MD_SHA384, {0, 1}, + "SHA384", sha384_asn, DIM (sha384_asn), oid_spec_sha384, 48, + sha384_init, _gcry_md_block_write, sha512_final, sha512_read, NULL, + sizeof (SHA512_CONTEXT), + run_selftests + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/stribog.c b/libotr/libgcrypt-1.8.7/cipher/stribog.c new file mode 100644 index 0000000..7b6e330 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/stribog.c @@ -0,0 +1,1358 @@ +/* stribog.c - GOST R 34.11-2012 (Stribog) hash function + * Copyright (C) 2013 Dmitry Eremin-Solenikov + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" +#include "bithelp.h" +#include "bufhelp.h" +#include "cipher.h" +#include "hash-common.h" + + +typedef struct +{ + gcry_md_block_ctx_t bctx; + union + { + u64 h[8]; + unsigned char result[64]; + }; + u64 N[8]; + u64 Sigma[8]; +} STRIBOG_CONTEXT; + + +/* Pre-computed results of multiplication of bytes on A and reordered with + Pi[]. */ +static const u64 stribog_table[8][256] = +{ + /* 0 */ + { U64_C(0xd01f715b5c7ef8e6), U64_C(0x16fa240980778325), + U64_C(0xa8a42e857ee049c8), U64_C(0x6ac1068fa186465b), + U64_C(0x6e417bd7a2e9320b), U64_C(0x665c8167a437daab), + U64_C(0x7666681aa89617f6), U64_C(0x4b959163700bdcf5), + U64_C(0xf14be6b78df36248), U64_C(0xc585bd689a625cff), + U64_C(0x9557d7fca67d82cb), U64_C(0x89f0b969af6dd366), + U64_C(0xb0833d48749f6c35), U64_C(0xa1998c23b1ecbc7c), + U64_C(0x8d70c431ac02a736), U64_C(0xd6dfbc2fd0a8b69e), + U64_C(0x37aeb3e551fa198b), U64_C(0x0b7d128a40b5cf9c), + U64_C(0x5a8f2008b5780cbc), U64_C(0xedec882284e333e5), + U64_C(0xd25fc177d3c7c2ce), U64_C(0x5e0f5d50b61778ec), + U64_C(0x1d873683c0c24cb9), U64_C(0xad040bcbb45d208c), + U64_C(0x2f89a0285b853c76), U64_C(0x5732fff6791b8d58), + U64_C(0x3e9311439ef6ec3f), U64_C(0xc9183a809fd3c00f), + U64_C(0x83adf3f5260a01ee), U64_C(0xa6791941f4e8ef10), + U64_C(0x103ae97d0ca1cd5d), U64_C(0x2ce948121dee1b4a), + U64_C(0x39738421dbf2bf53), U64_C(0x093da2a6cf0cf5b4), + U64_C(0xcd9847d89cbcb45f), U64_C(0xf9561c078b2d8ae8), + U64_C(0x9c6a755a6971777f), U64_C(0xbc1ebaa0712ef0c5), + U64_C(0x72e61542abf963a6), U64_C(0x78bb5fde229eb12e), + U64_C(0x14ba94250fceb90d), U64_C(0x844d6697630e5282), + U64_C(0x98ea08026a1e032f), U64_C(0xf06bbea144217f5c), + U64_C(0xdb6263d11ccb377a), U64_C(0x641c314b2b8ee083), + U64_C(0x320e96ab9b4770cf), U64_C(0x1ee7deb986a96b85), + U64_C(0xe96cf57a878c47b5), U64_C(0xfdd6615f8842feb8), + U64_C(0xc83862965601dd1b), U64_C(0x2ea9f83e92572162), + U64_C(0xf876441142ff97fc), U64_C(0xeb2c455608357d9d), + U64_C(0x5612a7e0b0c9904c), U64_C(0x6c01cbfb2d500823), + U64_C(0x4548a6a7fa037a2d), U64_C(0xabc4c6bf388b6ef4), + U64_C(0xbade77d4fdf8bebd), U64_C(0x799b07c8eb4cac3a), + U64_C(0x0c9d87e805b19cf0), U64_C(0xcb588aac106afa27), + U64_C(0xea0c1d40c1e76089), U64_C(0x2869354a1e816f1a), + U64_C(0xff96d17307fbc490), U64_C(0x9f0a9d602f1a5043), + U64_C(0x96373fc6e016a5f7), U64_C(0x5292dab8b3a6e41c), + U64_C(0x9b8ae0382c752413), U64_C(0x4f15ec3b7364a8a5), + U64_C(0x3fb349555724f12b), U64_C(0xc7c50d4415db66d7), + U64_C(0x92b7429ee379d1a7), U64_C(0xd37f99611a15dfda), + U64_C(0x231427c05e34a086), U64_C(0xa439a96d7b51d538), + U64_C(0xb403401077f01865), U64_C(0xdda2aea5901d7902), + U64_C(0x0a5d4a9c8967d288), U64_C(0xc265280adf660f93), + U64_C(0x8bb0094520d4e94e), U64_C(0x2a29856691385532), + U64_C(0x42a833c5bf072941), U64_C(0x73c64d54622b7eb2), + U64_C(0x07e095624504536c), U64_C(0x8a905153e906f45a), + U64_C(0x6f6123c16b3b2f1f), U64_C(0xc6e55552dc097bc3), + U64_C(0x4468feb133d16739), U64_C(0xe211e7f0c7398829), + U64_C(0xa2f96419f7879b40), U64_C(0x19074bdbc3ad38e9), + U64_C(0xf4ebc3f9474e0b0c), U64_C(0x43886bd376d53455), + U64_C(0xd8028beb5aa01046), U64_C(0x51f23282f5cdc320), + U64_C(0xe7b1c2be0d84e16d), U64_C(0x081dfab006dee8a0), + U64_C(0x3b33340d544b857b), U64_C(0x7f5bcabc679ae242), + U64_C(0x0edd37c48a08a6d8), U64_C(0x81ed43d9a9b33bc6), + U64_C(0xb1a3655ebd4d7121), U64_C(0x69a1eeb5e7ed6167), + U64_C(0xf6ab73d5c8f73124), U64_C(0x1a67a3e185c61fd5), + U64_C(0x2dc91004d43c065e), U64_C(0x0240b02c8fb93a28), + U64_C(0x90f7f2b26cc0eb8f), U64_C(0x3cd3a16f114fd617), + U64_C(0xaae49ea9f15973e0), U64_C(0x06c0cd748cd64e78), + U64_C(0xda423bc7d5192a6e), U64_C(0xc345701c16b41287), + U64_C(0x6d2193ede4821537), U64_C(0xfcf639494190e3ac), + U64_C(0x7c3b228621f1c57e), U64_C(0xfb16ac2b0494b0c0), + U64_C(0xbf7e529a3745d7f9), U64_C(0x6881b6a32e3f7c73), + U64_C(0xca78d2bad9b8e733), U64_C(0xbbfe2fc2342aa3a9), + U64_C(0x0dbddffecc6381e4), U64_C(0x70a6a56e2440598e), + U64_C(0xe4d12a844befc651), U64_C(0x8c509c2765d0ba22), + U64_C(0xee8c6018c28814d9), U64_C(0x17da7c1f49a59e31), + U64_C(0x609c4c1328e194d3), U64_C(0xb3e3d57232f44b09), + U64_C(0x91d7aaa4a512f69b), U64_C(0x0ffd6fd243dabbcc), + U64_C(0x50d26a943c1fde34), U64_C(0x6be15e9968545b4f), + U64_C(0x94778fea6faf9fdf), U64_C(0x2b09dd7058ea4826), + U64_C(0x677cd9716de5c7bf), U64_C(0x49d5214fffb2e6dd), + U64_C(0x0360e83a466b273c), U64_C(0x1fc786af4f7b7691), + U64_C(0xa0b9d435783ea168), U64_C(0xd49f0c035f118cb6), + U64_C(0x01205816c9d21d14), U64_C(0xac2453dd7d8f3d98), + U64_C(0x545217cc3f70aa64), U64_C(0x26b4028e9489c9c2), + U64_C(0xdec2469fd6765e3e), U64_C(0x04807d58036f7450), + U64_C(0xe5f17292823ddb45), U64_C(0xf30b569b024a5860), + U64_C(0x62dcfc3fa758aefb), U64_C(0xe84cad6c4e5e5aa1), + U64_C(0xccb81fce556ea94b), U64_C(0x53b282ae7a74f908), + U64_C(0x1b47fbf74c1402c1), U64_C(0x368eebf39828049f), + U64_C(0x7afbeff2ad278b06), U64_C(0xbe5e0a8cfe97caed), + U64_C(0xcfd8f7f413058e77), U64_C(0xf78b2bc301252c30), + U64_C(0x4d555c17fcdd928d), U64_C(0x5f2f05467fc565f8), + U64_C(0x24f4b2a21b30f3ea), U64_C(0x860dd6bbecb768aa), + U64_C(0x4c750401350f8f99), U64_C(0x0000000000000000), + U64_C(0xecccd0344d312ef1), U64_C(0xb5231806be220571), + U64_C(0xc105c030990d28af), U64_C(0x653c695de25cfd97), + U64_C(0x159acc33c61ca419), U64_C(0xb89ec7f872418495), + U64_C(0xa9847693b73254dc), U64_C(0x58cf90243ac13694), + U64_C(0x59efc832f3132b80), U64_C(0x5c4fed7c39ae42c4), + U64_C(0x828dabe3efd81cfa), U64_C(0xd13f294d95ace5f2), + U64_C(0x7d1b7a90e823d86a), U64_C(0xb643f03cf849224d), + U64_C(0x3df3f979d89dcb03), U64_C(0x7426d836272f2dde), + U64_C(0xdfe21e891fa4432a), U64_C(0x3a136c1b9d99986f), + U64_C(0xfa36f43dcd46add4), U64_C(0xc025982650df35bb), + U64_C(0x856d3e81aadc4f96), U64_C(0xc4a5e57e53b041eb), + U64_C(0x4708168b75ba4005), U64_C(0xaf44bbe73be41aa4), + U64_C(0x971767d029c4b8e3), U64_C(0xb9be9feebb939981), + U64_C(0x215497ecd18d9aae), U64_C(0x316e7e91dd2c57f3), + U64_C(0xcef8afe2dad79363), U64_C(0x3853dc371220a247), + U64_C(0x35ee03c9de4323a3), U64_C(0xe6919aa8c456fc79), + U64_C(0xe05157dc4880b201), U64_C(0x7bdbb7e464f59612), + U64_C(0x127a59518318f775), U64_C(0x332ecebd52956ddb), + U64_C(0x8f30741d23bb9d1e), U64_C(0xd922d3fd93720d52), + U64_C(0x7746300c61440ae2), U64_C(0x25d4eab4d2e2eefe), + U64_C(0x75068020eefd30ca), U64_C(0x135a01474acaea61), + U64_C(0x304e268714fe4ae7), U64_C(0xa519f17bb283c82c), + U64_C(0xdc82f6b359cf6416), U64_C(0x5baf781e7caa11a8), + U64_C(0xb2c38d64fb26561d), U64_C(0x34ce5bdf17913eb7), + U64_C(0x5d6fb56af07c5fd0), U64_C(0x182713cd0a7f25fd), + U64_C(0x9e2ac576e6c84d57), U64_C(0x9aaab82ee5a73907), + U64_C(0xa3d93c0f3e558654), U64_C(0x7e7b92aaae48ff56), + U64_C(0x872d8ead256575be), U64_C(0x41c8dbfff96c0e7d), + U64_C(0x99ca5014a3cc1e3b), U64_C(0x40e883e930be1369), + U64_C(0x1ca76e95091051ad), U64_C(0x4e35b42dbab6b5b1), + U64_C(0x05a0254ecabd6944), U64_C(0xe1710fca8152af15), + U64_C(0xf22b0e8dcb984574), U64_C(0xb763a82a319b3f59), + U64_C(0x63fca4296e8ab3ef), U64_C(0x9d4a2d4ca0a36a6b), + U64_C(0xe331bfe60eeb953d), U64_C(0xd5bf541596c391a2), + U64_C(0xf5cb9bef8e9c1618), U64_C(0x46284e9dbc685d11), + U64_C(0x2074cffa185f87ba), U64_C(0xbd3ee2b6b8fcedd1), + U64_C(0xae64e3f1f23607b0), U64_C(0xfeb68965ce29d984), + U64_C(0x55724fdaf6a2b770), U64_C(0x29496d5cd753720e), + U64_C(0xa75941573d3af204), U64_C(0x8e102c0bea69800a), + U64_C(0x111ab16bc573d049), U64_C(0xd7ffe439197aab8a), + U64_C(0xefac380e0b5a09cd), U64_C(0x48f579593660fbc9), + U64_C(0x22347fd697e6bd92), U64_C(0x61bc1405e13389c7), + U64_C(0x4ab5c975b9d9c1e1), U64_C(0x80cd1bcf606126d2), + U64_C(0x7186fd78ed92449a), U64_C(0x93971a882aabccb3), + U64_C(0x88d0e17f66bfce72), U64_C(0x27945a985d5bd4d6) }, + /* 1 */ + { U64_C(0xde553f8c05a811c8), U64_C(0x1906b59631b4f565), + U64_C(0x436e70d6b1964ff7), U64_C(0x36d343cb8b1e9d85), + U64_C(0x843dfacc858aab5a), U64_C(0xfdfc95c299bfc7f9), + U64_C(0x0f634bdea1d51fa2), U64_C(0x6d458b3b76efb3cd), + U64_C(0x85c3f77cf8593f80), U64_C(0x3c91315fbe737cb2), + U64_C(0x2148b03366ace398), U64_C(0x18f8b8264c6761bf), + U64_C(0xc830c1c495c9fb0f), U64_C(0x981a76102086a0aa), + U64_C(0xaa16012142f35760), U64_C(0x35cc54060c763cf6), + U64_C(0x42907d66cc45db2d), U64_C(0x8203d44b965af4bc), + U64_C(0x3d6f3cefc3a0e868), U64_C(0xbc73ff69d292bda7), + U64_C(0x8722ed0102e20a29), U64_C(0x8f8185e8cd34deb7), + U64_C(0x9b0561dda7ee01d9), U64_C(0x5335a0193227fad6), + U64_C(0xc9cecc74e81a6fd5), U64_C(0x54f5832e5c2431ea), + U64_C(0x99e47ba05d553470), U64_C(0xf7bee756acd226ce), + U64_C(0x384e05a5571816fd), U64_C(0xd1367452a47d0e6a), + U64_C(0xf29fde1c386ad85b), U64_C(0x320c77316275f7ca), + U64_C(0xd0c879e2d9ae9ab0), U64_C(0xdb7406c69110ef5d), + U64_C(0x45505e51a2461011), U64_C(0xfc029872e46c5323), + U64_C(0xfa3cb6f5f7bc0cc5), U64_C(0x031f17cd8768a173), + U64_C(0xbd8df2d9af41297d), U64_C(0x9d3b4f5ab43e5e3f), + U64_C(0x4071671b36feee84), U64_C(0x716207e7d3e3b83d), + U64_C(0x48d20ff2f9283a1a), U64_C(0x27769eb4757cbc7e), + U64_C(0x5c56ebc793f2e574), U64_C(0xa48b474f9ef5dc18), + U64_C(0x52cbada94ff46e0c), U64_C(0x60c7da982d8199c6), + U64_C(0x0e9d466edc068b78), U64_C(0x4eec2175eaf865fc), + U64_C(0x550b8e9e21f7a530), U64_C(0x6b7ba5bc653fec2b), + U64_C(0x5eb7f1ba6949d0dd), U64_C(0x57ea94e3db4c9099), + U64_C(0xf640eae6d101b214), U64_C(0xdd4a284182c0b0bb), + U64_C(0xff1d8fbf6304f250), U64_C(0xb8accb933bf9d7e8), + U64_C(0xe8867c478eb68c4d), U64_C(0x3f8e2692391bddc1), + U64_C(0xcb2fd60912a15a7c), U64_C(0xaec935dbab983d2f), + U64_C(0xf55ffd2b56691367), U64_C(0x80e2ce366ce1c115), + U64_C(0x179bf3f8edb27e1d), U64_C(0x01fe0db07dd394da), + U64_C(0xda8a0b76ecc37b87), U64_C(0x44ae53e1df9584cb), + U64_C(0xb310b4b77347a205), U64_C(0xdfab323c787b8512), + U64_C(0x3b511268d070b78e), U64_C(0x65e6e3d2b9396753), + U64_C(0x6864b271e2574d58), U64_C(0x259784c98fc789d7), + U64_C(0x02e11a7dfabb35a9), U64_C(0x8841a6dfa337158b), + U64_C(0x7ade78c39b5dcdd0), U64_C(0xb7cf804d9a2cc84a), + U64_C(0x20b6bd831b7f7742), U64_C(0x75bd331d3a88d272), + U64_C(0x418f6aab4b2d7a5e), U64_C(0xd9951cbb6babdaf4), + U64_C(0xb6318dfde7ff5c90), U64_C(0x1f389b112264aa83), + U64_C(0x492c024284fbaec0), U64_C(0xe33a0363c608f9a0), + U64_C(0x2688930408af28a4), U64_C(0xc7538a1a341ce4ad), + U64_C(0x5da8e677ee2171ae), U64_C(0x8c9e92254a5c7fc4), + U64_C(0x63d8cd55aae938b5), U64_C(0x29ebd8daa97a3706), + U64_C(0x959827b37be88aa1), U64_C(0x1484e4356adadf6e), + U64_C(0xa7945082199d7d6b), U64_C(0xbf6ce8a455fa1cd4), + U64_C(0x9cc542eac9edcae5), U64_C(0x79c16f0e1c356ca3), + U64_C(0x89bfab6fdee48151), U64_C(0xd4174d1830c5f0ff), + U64_C(0x9258048415eb419d), U64_C(0x6139d72850520d1c), + U64_C(0x6a85a80c18ec78f1), U64_C(0xcd11f88e0171059a), + U64_C(0xcceff53e7ca29140), U64_C(0xd229639f2315af19), + U64_C(0x90b91ef9ef507434), U64_C(0x5977d28d074a1be1), + U64_C(0x311360fce51d56b9), U64_C(0xc093a92d5a1f2f91), + U64_C(0x1a19a25bb6dc5416), U64_C(0xeb996b8a09de2d3e), + U64_C(0xfee3820f1ed7668a), U64_C(0xd7085ad5b7ad518c), + U64_C(0x7fff41890fe53345), U64_C(0xec5948bd67dde602), + U64_C(0x2fd5f65dbaaa68e0), U64_C(0xa5754affe32648c2), + U64_C(0xf8ddac880d07396c), U64_C(0x6fa491468c548664), + U64_C(0x0c7c5c1326bdbed1), U64_C(0x4a33158f03930fb3), + U64_C(0x699abfc19f84d982), U64_C(0xe4fa2054a80b329c), + U64_C(0x6707f9af438252fa), U64_C(0x08a368e9cfd6d49e), + U64_C(0x47b1442c58fd25b8), U64_C(0xbbb3dc5ebc91769b), + U64_C(0x1665fe489061eac7), U64_C(0x33f27a811fa66310), + U64_C(0x93a609346838d547), U64_C(0x30ed6d4c98cec263), + U64_C(0x1dd9816cd8df9f2a), U64_C(0x94662a03063b1e7b), + U64_C(0x83fdd9fbeb896066), U64_C(0x7b207573e68e590a), + U64_C(0x5f49fc0a149a4407), U64_C(0x343259b671a5a82c), + U64_C(0xfbc2bb458a6f981f), U64_C(0xc272b350a0a41a38), + U64_C(0x3aaf1fd8ada32354), U64_C(0x6cbb868b0b3c2717), + U64_C(0xa2b569c88d2583fe), U64_C(0xf180c9d1bf027928), + U64_C(0xaf37386bd64ba9f5), U64_C(0x12bacab2790a8088), + U64_C(0x4c0d3b0810435055), U64_C(0xb2eeb9070e9436df), + U64_C(0xc5b29067cea7d104), U64_C(0xdcb425f1ff132461), + U64_C(0x4f122cc5972bf126), U64_C(0xac282fa651230886), + U64_C(0xe7e537992f6393ef), U64_C(0xe61b3a2952b00735), + U64_C(0x709c0a57ae302ce7), U64_C(0xe02514ae416058d3), + U64_C(0xc44c9dd7b37445de), U64_C(0x5a68c5408022ba92), + U64_C(0x1c278cdca50c0bf0), U64_C(0x6e5a9cf6f18712be), + U64_C(0x86dce0b17f319ef3), U64_C(0x2d34ec2040115d49), + U64_C(0x4bcd183f7e409b69), U64_C(0x2815d56ad4a9a3dc), + U64_C(0x24698979f2141d0d), U64_C(0x0000000000000000), + U64_C(0x1ec696a15fb73e59), U64_C(0xd86b110b16784e2e), + U64_C(0x8e7f8858b0e74a6d), U64_C(0x063e2e8713d05fe6), + U64_C(0xe2c40ed3bbdb6d7a), U64_C(0xb1f1aeca89fc97ac), + U64_C(0xe1db191e3cb3cc09), U64_C(0x6418ee62c4eaf389), + U64_C(0xc6ad87aa49cf7077), U64_C(0xd6f65765ca7ec556), + U64_C(0x9afb6c6dda3d9503), U64_C(0x7ce05644888d9236), + U64_C(0x8d609f95378feb1e), U64_C(0x23a9aa4e9c17d631), + U64_C(0x6226c0e5d73aac6f), U64_C(0x56149953a69f0443), + U64_C(0xeeb852c09d66d3ab), U64_C(0x2b0ac2a753c102af), + U64_C(0x07c023376e03cb3c), U64_C(0x2ccae1903dc2c993), + U64_C(0xd3d76e2f5ec63bc3), U64_C(0x9e2458973356ff4c), + U64_C(0xa66a5d32644ee9b1), U64_C(0x0a427294356de137), + U64_C(0x783f62be61e6f879), U64_C(0x1344c70204d91452), + U64_C(0x5b96c8f0fdf12e48), U64_C(0xa90916ecc59bf613), + U64_C(0xbe92e5142829880e), U64_C(0x727d102a548b194e), + U64_C(0x1be7afebcb0fc0cc), U64_C(0x3e702b2244c8491b), + U64_C(0xd5e940a84d166425), U64_C(0x66f9f41f3e51c620), + U64_C(0xabe80c913f20c3ba), U64_C(0xf07ec461c2d1edf2), + U64_C(0xf361d3ac45b94c81), U64_C(0x0521394a94b8fe95), + U64_C(0xadd622162cf09c5c), U64_C(0xe97871f7f3651897), + U64_C(0xf4a1f09b2bba87bd), U64_C(0x095d6559b2054044), + U64_C(0x0bbc7f2448be75ed), U64_C(0x2af4cf172e129675), + U64_C(0x157ae98517094bb4), U64_C(0x9fda55274e856b96), + U64_C(0x914713499283e0ee), U64_C(0xb952c623462a4332), + U64_C(0x74433ead475b46a8), U64_C(0x8b5eb112245fb4f8), + U64_C(0xa34b6478f0f61724), U64_C(0x11a5dd7ffe6221fb), + U64_C(0xc16da49d27ccbb4b), U64_C(0x76a224d0bde07301), + U64_C(0x8aa0bca2598c2022), U64_C(0x4df336b86d90c48f), + U64_C(0xea67663a740db9e4), U64_C(0xef465f70e0b54771), + U64_C(0x39b008152acb8227), U64_C(0x7d1e5bf4f55e06ec), + U64_C(0x105bd0cf83b1b521), U64_C(0x775c2960c033e7db), + U64_C(0x7e014c397236a79f), U64_C(0x811cc386113255cf), + U64_C(0xeda7450d1a0e72d8), U64_C(0x5889df3d7a998f3b), + U64_C(0x2e2bfbedc779fc3a), U64_C(0xce0eef438619a4e9), + U64_C(0x372d4e7bf6cd095f), U64_C(0x04df34fae96b6a4f), + U64_C(0xf923a13870d4adb6), U64_C(0xa1aa7e050a4d228d), + U64_C(0xa8f71b5cb84862c9), U64_C(0xb52e9a306097fde3), + U64_C(0x0d8251a35b6e2a0b), U64_C(0x2257a7fee1c442eb), + U64_C(0x73831d9a29588d94), U64_C(0x51d4ba64c89ccf7f), + U64_C(0x502ab7d4b54f5ba5), U64_C(0x97793dce8153bf08), + U64_C(0xe5042de4d5d8a646), U64_C(0x9687307efc802bd2), + U64_C(0xa05473b5779eb657), U64_C(0xb4d097801d446939), + U64_C(0xcff0e2f3fbca3033), U64_C(0xc38cbee0dd778ee2), + U64_C(0x464f499c252eb162), U64_C(0xcad1dbb96f72cea6), + U64_C(0xba4dd1eec142e241), U64_C(0xb00fa37af42f0376) }, + /* 2 */ + { U64_C(0xcce4cd3aa968b245), U64_C(0x089d5484e80b7faf), + U64_C(0x638246c1b3548304), U64_C(0xd2fe0ec8c2355492), + U64_C(0xa7fbdf7ff2374eee), U64_C(0x4df1600c92337a16), + U64_C(0x84e503ea523b12fb), U64_C(0x0790bbfd53ab0c4a), + U64_C(0x198a780f38f6ea9d), U64_C(0x2ab30c8f55ec48cb), + U64_C(0xe0f7fed6b2c49db5), U64_C(0xb6ecf3f422cadbdc), + U64_C(0x409c9a541358df11), U64_C(0xd3ce8a56dfde3fe3), + U64_C(0xc3e9224312c8c1a0), U64_C(0x0d6dfa58816ba507), + U64_C(0xddf3e1b179952777), U64_C(0x04c02a42748bb1d9), + U64_C(0x94c2abff9f2decb8), U64_C(0x4f91752da8f8acf4), + U64_C(0x78682befb169bf7b), U64_C(0xe1c77a48af2ff6c4), + U64_C(0x0c5d7ec69c80ce76), U64_C(0x4cc1e4928fd81167), + U64_C(0xfeed3d24d9997b62), U64_C(0x518bb6dfc3a54a23), + U64_C(0x6dbf2d26151f9b90), U64_C(0xb5bc624b05ea664f), + U64_C(0xe86aaa525acfe21a), U64_C(0x4801ced0fb53a0be), + U64_C(0xc91463e6c00868ed), U64_C(0x1027a815cd16fe43), + U64_C(0xf67069a0319204cd), U64_C(0xb04ccc976c8abce7), + U64_C(0xc0b9b3fc35e87c33), U64_C(0xf380c77c58f2de65), + U64_C(0x50bb3241de4e2152), U64_C(0xdf93f490435ef195), + U64_C(0xf1e0d25d62390887), U64_C(0xaf668bfb1a3c3141), + U64_C(0xbc11b251f00a7291), U64_C(0x73a5eed47e427d47), + U64_C(0x25bee3f6ee4c3b2e), U64_C(0x43cc0beb34786282), + U64_C(0xc824e778dde3039c), U64_C(0xf97d86d98a327728), + U64_C(0xf2b043e24519b514), U64_C(0xe297ebf7880f4b57), + U64_C(0x3a94a49a98fab688), U64_C(0x868516cb68f0c419), + U64_C(0xeffa11af0964ee50), U64_C(0xa4ab4ec0d517f37d), + U64_C(0xa9c6b498547c567a), U64_C(0x8e18424f80fbbbb6), + U64_C(0x0bcdc53bcf2bc23c), U64_C(0x137739aaea3643d0), + U64_C(0x2c1333ec1bac2ff0), U64_C(0x8d48d3f0a7db0625), + U64_C(0x1e1ac3f26b5de6d7), U64_C(0xf520f81f16b2b95e), + U64_C(0x9f0f6ec450062e84), U64_C(0x0130849e1deb6b71), + U64_C(0xd45e31ab8c7533a9), U64_C(0x652279a2fd14e43f), + U64_C(0x3209f01e70f1c927), U64_C(0xbe71a770cac1a473), + U64_C(0x0e3d6be7a64b1894), U64_C(0x7ec8148cff29d840), + U64_C(0xcb7476c7fac3be0f), U64_C(0x72956a4a63a91636), + U64_C(0x37f95ec21991138f), U64_C(0x9e3fea5a4ded45f5), + U64_C(0x7b38ba50964902e8), U64_C(0x222e580bbde73764), + U64_C(0x61e253e0899f55e6), U64_C(0xfc8d2805e352ad80), + U64_C(0x35994be3235ac56d), U64_C(0x09add01af5e014de), + U64_C(0x5e8659a6780539c6), U64_C(0xb17c48097161d796), + U64_C(0x026015213acbd6e2), U64_C(0xd1ae9f77e515e901), + U64_C(0xb7dc776a3f21b0ad), U64_C(0xaba6a1b96eb78098), + U64_C(0x9bcf4486248d9f5d), U64_C(0x582666c536455efd), + U64_C(0xfdbdac9bfeb9c6f1), U64_C(0xc47999be4163cdea), + U64_C(0x765540081722a7ef), U64_C(0x3e548ed8ec710751), + U64_C(0x3d041f67cb51bac2), U64_C(0x7958af71ac82d40a), + U64_C(0x36c9da5c047a78fe), U64_C(0xed9a048e33af38b2), + U64_C(0x26ee7249c96c86bd), U64_C(0x900281bdeba65d61), + U64_C(0x11172c8bd0fd9532), U64_C(0xea0abf73600434f8), + U64_C(0x42fc8f75299309f3), U64_C(0x34a9cf7d3eb1ae1c), + U64_C(0x2b838811480723ba), U64_C(0x5ce64c8742ceef24), + U64_C(0x1adae9b01fd6570e), U64_C(0x3c349bf9d6bad1b3), + U64_C(0x82453c891c7b75c0), U64_C(0x97923a40b80d512b), + U64_C(0x4a61dbf1c198765c), U64_C(0xb48ce6d518010d3e), + U64_C(0xcfb45c858e480fd6), U64_C(0xd933cbf30d1e96ae), + U64_C(0xd70ea014ab558e3a), U64_C(0xc189376228031742), + U64_C(0x9262949cd16d8b83), U64_C(0xeb3a3bed7def5f89), + U64_C(0x49314a4ee6b8cbcf), U64_C(0xdcc3652f647e4c06), + U64_C(0xda635a4c2a3e2b3d), U64_C(0x470c21a940f3d35b), + U64_C(0x315961a157d174b4), U64_C(0x6672e81dda3459ac), + U64_C(0x5b76f77a1165e36e), U64_C(0x445cb01667d36ec8), + U64_C(0xc5491d205c88a69b), U64_C(0x456c34887a3805b9), + U64_C(0xffddb9bac4721013), U64_C(0x99af51a71e4649bf), + U64_C(0xa15be01cbc7729d5), U64_C(0x52db2760e485f7b0), + U64_C(0x8c78576eba306d54), U64_C(0xae560f6507d75a30), + U64_C(0x95f22f6182c687c9), U64_C(0x71c5fbf54489aba5), + U64_C(0xca44f259e728d57e), U64_C(0x88b87d2ccebbdc8d), + U64_C(0xbab18d32be4a15aa), U64_C(0x8be8ec93e99b611e), + U64_C(0x17b713e89ebdf209), U64_C(0xb31c5d284baa0174), + U64_C(0xeeca9531148f8521), U64_C(0xb8d198138481c348), + U64_C(0x8988f9b2d350b7fc), U64_C(0xb9e11c8d996aa839), + U64_C(0x5a4673e40c8e881f), U64_C(0x1687977683569978), + U64_C(0xbf4123eed72acf02), U64_C(0x4ea1f1b3b513c785), + U64_C(0xe767452be16f91ff), U64_C(0x7505d1b730021a7c), + U64_C(0xa59bca5ec8fc980c), U64_C(0xad069eda20f7e7a3), + U64_C(0x38f4b1bba231606a), U64_C(0x60d2d77e94743e97), + U64_C(0x9affc0183966f42c), U64_C(0x248e6768f3a7505f), + U64_C(0xcdd449a4b483d934), U64_C(0x87b59255751baf68), + U64_C(0x1bea6d2e023d3c7f), U64_C(0x6b1f12455b5ffcab), + U64_C(0x743555292de9710d), U64_C(0xd8034f6d10f5fddf), + U64_C(0xc6198c9f7ba81b08), U64_C(0xbb8109aca3a17edb), + U64_C(0xfa2d1766ad12cabb), U64_C(0xc729080166437079), + U64_C(0x9c5fff7b77269317), U64_C(0x0000000000000000), + U64_C(0x15d706c9a47624eb), U64_C(0x6fdf38072fd44d72), + U64_C(0x5fb6dd3865ee52b7), U64_C(0xa33bf53d86bcff37), + U64_C(0xe657c1b5fc84fa8e), U64_C(0xaa962527735cebe9), + U64_C(0x39c43525bfda0b1b), U64_C(0x204e4d2a872ce186), + U64_C(0x7a083ece8ba26999), U64_C(0x554b9c9db72efbfa), + U64_C(0xb22cd9b656416a05), U64_C(0x96a2bedea5e63a5a), + U64_C(0x802529a826b0a322), U64_C(0x8115ad363b5bc853), + U64_C(0x8375b81701901eb1), U64_C(0x3069e53f4a3a1fc5), + U64_C(0xbd2136cfede119e0), U64_C(0x18bafc91251d81ec), + U64_C(0x1d4a524d4c7d5b44), U64_C(0x05f0aedc6960daa8), + U64_C(0x29e39d3072ccf558), U64_C(0x70f57f6b5962c0d4), + U64_C(0x989fd53903ad22ce), U64_C(0xf84d024797d91c59), + U64_C(0x547b1803aac5908b), U64_C(0xf0d056c37fd263f6), + U64_C(0xd56eb535919e58d8), U64_C(0x1c7ad6d351963035), + U64_C(0x2e7326cd2167f912), U64_C(0xac361a443d1c8cd2), + U64_C(0x697f076461942a49), U64_C(0x4b515f6fdc731d2d), + U64_C(0x8ad8680df4700a6f), U64_C(0x41ac1eca0eb3b460), + U64_C(0x7d988533d80965d3), U64_C(0xa8f6300649973d0b), + U64_C(0x7765c4960ac9cc9e), U64_C(0x7ca801adc5e20ea2), + U64_C(0xdea3700e5eb59ae4), U64_C(0xa06b6482a19c42a4), + U64_C(0x6a2f96db46b497da), U64_C(0x27def6d7d487edcc), + U64_C(0x463ca5375d18b82a), U64_C(0xa6cb5be1efdc259f), + U64_C(0x53eba3fef96e9cc1), U64_C(0xce84d81b93a364a7), + U64_C(0xf4107c810b59d22f), U64_C(0x333974806d1aa256), + U64_C(0x0f0def79bba073e5), U64_C(0x231edc95a00c5c15), + U64_C(0xe437d494c64f2c6c), U64_C(0x91320523f64d3610), + U64_C(0x67426c83c7df32dd), U64_C(0x6eefbc99323f2603), + U64_C(0x9d6f7be56acdf866), U64_C(0x5916e25b2bae358c), + U64_C(0x7ff89012e2c2b331), U64_C(0x035091bf2720bd93), + U64_C(0x561b0d22900e4669), U64_C(0x28d319ae6f279e29), + U64_C(0x2f43a2533c8c9263), U64_C(0xd09e1be9f8fe8270), + U64_C(0xf740ed3e2c796fbc), U64_C(0xdb53ded237d5404c), + U64_C(0x62b2c25faebfe875), U64_C(0x0afd41a5d2c0a94d), + U64_C(0x6412fd3ce0ff8f4e), U64_C(0xe3a76f6995e42026), + U64_C(0x6c8fa9b808f4f0e1), U64_C(0xc2d9a6dd0f23aad1), + U64_C(0x8f28c6d19d10d0c7), U64_C(0x85d587744fd0798a), + U64_C(0xa20b71a39b579446), U64_C(0x684f83fa7c7f4138), + U64_C(0xe507500adba4471d), U64_C(0x3f640a46f19a6c20), + U64_C(0x1247bd34f7dd28a1), U64_C(0x2d23b77206474481), + U64_C(0x93521002cc86e0f2), U64_C(0x572b89bc8de52d18), + U64_C(0xfb1d93f8b0f9a1ca), U64_C(0xe95a2ecc4724896b), + U64_C(0x3ba420048511ddf9), U64_C(0xd63e248ab6bee54b), + U64_C(0x5dd6c8195f258455), U64_C(0x06a03f634e40673b), + U64_C(0x1f2a476c76b68da6), U64_C(0x217ec9b49ac78af7), + U64_C(0xecaa80102e4453c3), U64_C(0x14e78257b99d4f9a) }, + /* 3 */ + { U64_C(0x20329b2cc87bba05), U64_C(0x4f5eb6f86546a531), + U64_C(0xd4f44775f751b6b1), U64_C(0x8266a47b850dfa8b), + U64_C(0xbb986aa15a6ca985), U64_C(0xc979eb08f9ae0f99), + U64_C(0x2da6f447a2375ea1), U64_C(0x1e74275dcd7d8576), + U64_C(0xbc20180a800bc5f8), U64_C(0xb4a2f701b2dc65be), + U64_C(0xe726946f981b6d66), U64_C(0x48e6c453bf21c94c), + U64_C(0x42cad9930f0a4195), U64_C(0xefa47b64aacccd20), + U64_C(0x71180a8960409a42), U64_C(0x8bb3329bf6a44e0c), + U64_C(0xd34c35de2d36dacc), U64_C(0xa92f5b7cbc23dc96), + U64_C(0xb31a85aa68bb09c3), U64_C(0x13e04836a73161d2), + U64_C(0xb24dfc4129c51d02), U64_C(0x8ae44b70b7da5acd), + U64_C(0xe671ed84d96579a7), U64_C(0xa4bb3417d66f3832), + U64_C(0x4572ab38d56d2de8), U64_C(0xb1b47761ea47215c), + U64_C(0xe81c09cf70aba15d), U64_C(0xffbdb872ce7f90ac), + U64_C(0xa8782297fd5dc857), U64_C(0x0d946f6b6a4ce4a4), + U64_C(0xe4df1f4f5b995138), U64_C(0x9ebc71edca8c5762), + U64_C(0x0a2c1dc0b02b88d9), U64_C(0x3b503c115d9d7b91), + U64_C(0xc64376a8111ec3a2), U64_C(0xcec199a323c963e4), + U64_C(0xdc76a87ec58616f7), U64_C(0x09d596e073a9b487), + U64_C(0x14583a9d7d560daf), U64_C(0xf4c6dc593f2a0cb4), + U64_C(0xdd21d19584f80236), U64_C(0x4a4836983ddde1d3), + U64_C(0xe58866a41ae745f9), U64_C(0xf591a5b27e541875), + U64_C(0x891dc05074586693), U64_C(0x5b068c651810a89e), + U64_C(0xa30346bc0c08544f), U64_C(0x3dbf3751c684032d), + U64_C(0x2a1e86ec785032dc), U64_C(0xf73f5779fca830ea), + U64_C(0xb60c05ca30204d21), U64_C(0x0cc316802b32f065), + U64_C(0x8770241bdd96be69), U64_C(0xb861e18199ee95db), + U64_C(0xf805cad91418fcd1), U64_C(0x29e70dccbbd20e82), + U64_C(0xc7140f435060d763), U64_C(0x0f3a9da0e8b0cc3b), + U64_C(0xa2543f574d76408e), U64_C(0xbd7761e1c175d139), + U64_C(0x4b1f4f737ca3f512), U64_C(0x6dc2df1f2fc137ab), + U64_C(0xf1d05c3967b14856), U64_C(0xa742bf3715ed046c), + U64_C(0x654030141d1697ed), U64_C(0x07b872abda676c7d), + U64_C(0x3ce84eba87fa17ec), U64_C(0xc1fb0403cb79afdf), + U64_C(0x3e46bc7105063f73), U64_C(0x278ae987121cd678), + U64_C(0xa1adb4778ef47cd0), U64_C(0x26dd906c5362c2b9), + U64_C(0x05168060589b44e2), U64_C(0xfbfc41f9d79ac08f), + U64_C(0x0e6de44ba9ced8fa), U64_C(0x9feb08068bf243a3), + U64_C(0x7b341749d06b129b), U64_C(0x229c69e74a87929a), + U64_C(0xe09ee6c4427c011b), U64_C(0x5692e30e725c4c3a), + U64_C(0xda99a33e5e9f6e4b), U64_C(0x353dd85af453a36b), + U64_C(0x25241b4c90e0fee7), U64_C(0x5de987258309d022), + U64_C(0xe230140fc0802984), U64_C(0x93281e86a0c0b3c6), + U64_C(0xf229d719a4337408), U64_C(0x6f6c2dd4ad3d1f34), + U64_C(0x8ea5b2fbae3f0aee), U64_C(0x8331dd90c473ee4a), + U64_C(0x346aa1b1b52db7aa), U64_C(0xdf8f235e06042aa9), + U64_C(0xcc6f6b68a1354b7b), U64_C(0x6c95a6f46ebf236a), + U64_C(0x52d31a856bb91c19), U64_C(0x1a35ded6d498d555), + U64_C(0xf37eaef2e54d60c9), U64_C(0x72e181a9a3c2a61c), + U64_C(0x98537aad51952fde), U64_C(0x16f6c856ffaa2530), + U64_C(0xd960281e9d1d5215), U64_C(0x3a0745fa1ce36f50), + U64_C(0x0b7b642bf1559c18), U64_C(0x59a87eae9aec8001), + U64_C(0x5e100c05408bec7c), U64_C(0x0441f98b19e55023), + U64_C(0xd70dcc5534d38aef), U64_C(0x927f676de1bea707), + U64_C(0x9769e70db925e3e5), U64_C(0x7a636ea29115065a), + U64_C(0x468b201816ef11b6), U64_C(0xab81a9b73edff409), + U64_C(0xc0ac7de88a07bb1e), U64_C(0x1f235eb68c0391b7), + U64_C(0x6056b074458dd30f), U64_C(0xbe8eeac102f7ed67), + U64_C(0xcd381283e04b5fba), U64_C(0x5cbefecec277c4e3), + U64_C(0xd21b4c356c48ce0d), U64_C(0x1019c31664b35d8c), + U64_C(0x247362a7d19eea26), U64_C(0xebe582efb3299d03), + U64_C(0x02aef2cb82fc289f), U64_C(0x86275df09ce8aaa8), + U64_C(0x28b07427faac1a43), U64_C(0x38a9b7319e1f47cf), + U64_C(0xc82e92e3b8d01b58), U64_C(0x06ef0b409b1978bc), + U64_C(0x62f842bfc771fb90), U64_C(0x9904034610eb3b1f), + U64_C(0xded85ab5477a3e68), U64_C(0x90d195a663428f98), + U64_C(0x5384636e2ac708d8), U64_C(0xcbd719c37b522706), + U64_C(0xae9729d76644b0eb), U64_C(0x7c8c65e20a0c7ee6), + U64_C(0x80c856b007f1d214), U64_C(0x8c0b40302cc32271), + U64_C(0xdbcedad51fe17a8a), U64_C(0x740e8ae938dbdea0), + U64_C(0xa615c6dc549310ad), U64_C(0x19cc55f6171ae90b), + U64_C(0x49b1bdb8fe5fdd8d), U64_C(0xed0a89af2830e5bf), + U64_C(0x6a7aadb4f5a65bd6), U64_C(0x7e22972988f05679), + U64_C(0xf952b3325566e810), U64_C(0x39fecedadf61530e), + U64_C(0x6101c99f04f3c7ce), U64_C(0x2e5f7f6761b562ff), + U64_C(0xf08725d226cf5c97), U64_C(0x63af3b54860fef51), + U64_C(0x8ff2cb10ef411e2f), U64_C(0x884ab9bb35267252), + U64_C(0x4df04433e7ba8dae), U64_C(0x9afd8866d3690741), + U64_C(0x66b9bb34de94abb3), U64_C(0x9baaf18d92171380), + U64_C(0x543c11c5f0a064a5), U64_C(0x17a1b1bdbed431f1), + U64_C(0xb5f58eeaf3a2717f), U64_C(0xc355f6c849858740), + U64_C(0xec5df044694ef17e), U64_C(0xd83751f5dc6346d4), + U64_C(0xfc4433520dfdacf2), U64_C(0x0000000000000000), + U64_C(0x5a51f58e596ebc5f), U64_C(0x3285aaf12e34cf16), + U64_C(0x8d5c39db6dbd36b0), U64_C(0x12b731dde64f7513), + U64_C(0x94906c2d7aa7dfbb), U64_C(0x302b583aacc8e789), + U64_C(0x9d45facd090e6b3c), U64_C(0x2165e2c78905aec4), + U64_C(0x68d45f7f775a7349), U64_C(0x189b2c1d5664fdca), + U64_C(0xe1c99f2f030215da), U64_C(0x6983269436246788), + U64_C(0x8489af3b1e148237), U64_C(0xe94b702431d5b59c), + U64_C(0x33d2d31a6f4adbd7), U64_C(0xbfd9932a4389f9a6), + U64_C(0xb0e30e8aab39359d), U64_C(0xd1e2c715afcaf253), + U64_C(0x150f43763c28196e), U64_C(0xc4ed846393e2eb3d), + U64_C(0x03f98b20c3823c5e), U64_C(0xfd134ab94c83b833), + U64_C(0x556b682eb1de7064), U64_C(0x36c4537a37d19f35), + U64_C(0x7559f30279a5ca61), U64_C(0x799ae58252973a04), + U64_C(0x9c12832648707ffd), U64_C(0x78cd9c6913e92ec5), + U64_C(0x1d8dac7d0effb928), U64_C(0x439da0784e745554), + U64_C(0x413352b3cc887dcb), U64_C(0xbacf134a1b12bd44), + U64_C(0x114ebafd25cd494d), U64_C(0x2f08068c20cb763e), + U64_C(0x76a07822ba27f63f), U64_C(0xeab2fb04f25789c2), + U64_C(0xe3676de481fe3d45), U64_C(0x1b62a73d95e6c194), + U64_C(0x641749ff5c68832c), U64_C(0xa5ec4dfc97112cf3), + U64_C(0xf6682e92bdd6242b), U64_C(0x3f11c59a44782bb2), + U64_C(0x317c21d1edb6f348), U64_C(0xd65ab5be75ad9e2e), + U64_C(0x6b2dd45fb4d84f17), U64_C(0xfaab381296e4d44e), + U64_C(0xd0b5befeeeb4e692), U64_C(0x0882ef0b32d7a046), + U64_C(0x512a91a5a83b2047), U64_C(0x963e9ee6f85bf724), + U64_C(0x4e09cf132438b1f0), U64_C(0x77f701c9fb59e2fe), + U64_C(0x7ddb1c094b726a27), U64_C(0x5f4775ee01f5f8bd), + U64_C(0x9186ec4d223c9b59), U64_C(0xfeeac1998f01846d), + U64_C(0xac39db1ce4b89874), U64_C(0xb75b7c21715e59e0), + U64_C(0xafc0503c273aa42a), U64_C(0x6e3b543fec430bf5), + U64_C(0x704f7362213e8e83), U64_C(0x58ff0745db9294c0), + U64_C(0x67eec2df9feabf72), U64_C(0xa0facd9ccf8a6811), + U64_C(0xb936986ad890811a), U64_C(0x95c715c63bd9cb7a), + U64_C(0xca8060283a2c33c7), U64_C(0x507de84ee9453486), + U64_C(0x85ded6d05f6a96f6), U64_C(0x1cdad5964f81ade9), + U64_C(0xd5a33e9eb62fa270), U64_C(0x40642b588df6690a), + U64_C(0x7f75eec2c98e42b8), U64_C(0x2cf18dace3494a60), + U64_C(0x23cb100c0bf9865b), U64_C(0xeef3028febb2d9e1), + U64_C(0x4425d2d394133929), U64_C(0xaad6d05c7fa1e0c8), + U64_C(0xad6ea2f7a5c68cb5), U64_C(0xc2028f2308fb9381), + U64_C(0x819f2f5b468fc6d5), U64_C(0xc5bafd88d29cfffc), + U64_C(0x47dc59f357910577), U64_C(0x2b49ff07392e261d), + U64_C(0x57c59ae5332258fb), U64_C(0x73b6f842e2bcb2dd), + U64_C(0xcf96e04862b77725), U64_C(0x4ca73dd8a6c4996f), + U64_C(0x015779eb417e14c1), U64_C(0x37932a9176af8bf4) }, + /* 4 */ + { U64_C(0x190a2c9b249df23e), U64_C(0x2f62f8b62263e1e9), + U64_C(0x7a7f754740993655), U64_C(0x330b7ba4d5564d9f), + U64_C(0x4c17a16a46672582), U64_C(0xb22f08eb7d05f5b8), + U64_C(0x535f47f40bc148cc), U64_C(0x3aec5d27d4883037), + U64_C(0x10ed0a1825438f96), U64_C(0x516101f72c233d17), + U64_C(0x13cc6f949fd04eae), U64_C(0x739853c441474bfd), + U64_C(0x653793d90d3f5b1b), U64_C(0x5240647b96b0fc2f), + U64_C(0x0c84890ad27623e0), U64_C(0xd7189b32703aaea3), + U64_C(0x2685de3523bd9c41), U64_C(0x99317c5b11bffefa), + U64_C(0x0d9baa854f079703), U64_C(0x70b93648fbd48ac5), + U64_C(0xa80441fce30bc6be), U64_C(0x7287704bdc36ff1e), + U64_C(0xb65384ed33dc1f13), U64_C(0xd36417343ee34408), + U64_C(0x39cd38ab6e1bf10f), U64_C(0x5ab861770a1f3564), + U64_C(0x0ebacf09f594563b), U64_C(0xd04572b884708530), + U64_C(0x3cae9722bdb3af47), U64_C(0x4a556b6f2f5cbaf2), + U64_C(0xe1704f1f76c4bd74), U64_C(0x5ec4ed7144c6dfcf), + U64_C(0x16afc01d4c7810e6), U64_C(0x283f113cd629ca7a), + U64_C(0xaf59a8761741ed2d), U64_C(0xeed5a3991e215fac), + U64_C(0x3bf37ea849f984d4), U64_C(0xe413e096a56ce33c), + U64_C(0x2c439d3a98f020d1), U64_C(0x637559dc6404c46b), + U64_C(0x9e6c95d1e5f5d569), U64_C(0x24bb9836045fe99a), + U64_C(0x44efa466dac8ecc9), U64_C(0xc6eab2a5c80895d6), + U64_C(0x803b50c035220cc4), U64_C(0x0321658cba93c138), + U64_C(0x8f9ebc465dc7ee1c), U64_C(0xd15a5137190131d3), + U64_C(0x0fa5ec8668e5e2d8), U64_C(0x91c979578d1037b1), + U64_C(0x0642ca05693b9f70), U64_C(0xefca80168350eb4f), + U64_C(0x38d21b24f36a45ec), U64_C(0xbeab81e1af73d658), + U64_C(0x8cbfd9cae7542f24), U64_C(0xfd19cc0d81f11102), + U64_C(0x0ac6430fbb4dbc90), U64_C(0x1d76a09d6a441895), + U64_C(0x2a01573ff1cbbfa1), U64_C(0xb572e161894fde2b), + U64_C(0x8124734fa853b827), U64_C(0x614b1fdf43e6b1b0), + U64_C(0x68ac395c4238cc18), U64_C(0x21d837bfd7f7b7d2), + U64_C(0x20c714304a860331), U64_C(0x5cfaab726324aa14), + U64_C(0x74c5ba4eb50d606e), U64_C(0xf3a3030474654739), + U64_C(0x23e671bcf015c209), U64_C(0x45f087e947b9582a), + U64_C(0xd8bd77b418df4c7b), U64_C(0xe06f6c90ebb50997), + U64_C(0x0bd96080263c0873), U64_C(0x7e03f9410e40dcfe), + U64_C(0xb8e94be4c6484928), U64_C(0xfb5b0608e8ca8e72), + U64_C(0x1a2b49179e0e3306), U64_C(0x4e29e76961855059), + U64_C(0x4f36c4e6fcf4e4ba), U64_C(0x49740ee395cf7bca), + U64_C(0xc2963ea386d17f7d), U64_C(0x90d65ad810618352), + U64_C(0x12d34c1b02a1fa4d), U64_C(0xfa44258775bb3a91), + U64_C(0x18150f14b9ec46dd), U64_C(0x1491861e6b9a653d), + U64_C(0x9a1019d7ab2c3fc2), U64_C(0x3668d42d06fe13d7), + U64_C(0xdcc1fbb25606a6d0), U64_C(0x969490dd795a1c22), + U64_C(0x3549b1a1bc6dd2ef), U64_C(0xc94f5e23a0ed770e), + U64_C(0xb9f6686b5b39fdcb), U64_C(0xc4d4f4a6efeae00d), + U64_C(0xe732851a1fff2204), U64_C(0x94aad6de5eb869f9), + U64_C(0x3f8ff2ae07206e7f), U64_C(0xfe38a9813b62d03a), + U64_C(0xa7a1ad7a8bee2466), U64_C(0x7b6056c8dde882b6), + U64_C(0x302a1e286fc58ca7), U64_C(0x8da0fa457a259bc7), + U64_C(0xb3302b64e074415b), U64_C(0x5402ae7eff8b635f), + U64_C(0x08f8050c9cafc94b), U64_C(0xae468bf98a3059ce), + U64_C(0x88c355cca98dc58f), U64_C(0xb10e6d67c7963480), + U64_C(0xbad70de7e1aa3cf3), U64_C(0xbfb4a26e320262bb), + U64_C(0xcb711820870f02d5), U64_C(0xce12b7a954a75c9d), + U64_C(0x563ce87dd8691684), U64_C(0x9f73b65e7884618a), + U64_C(0x2b1e74b06cba0b42), U64_C(0x47cec1ea605b2df1), + U64_C(0x1c698312f735ac76), U64_C(0x5fdbcefed9b76b2c), + U64_C(0x831a354c8fb1cdfc), U64_C(0x820516c312c0791f), + U64_C(0xb74ca762aeadabf0), U64_C(0xfc06ef821c80a5e1), + U64_C(0x5723cbf24518a267), U64_C(0x9d4df05d5f661451), + U64_C(0x588627742dfd40bf), U64_C(0xda8331b73f3d39a0), + U64_C(0x17b0e392d109a405), U64_C(0xf965400bcf28fba9), + U64_C(0x7c3dbf4229a2a925), U64_C(0x023e460327e275db), + U64_C(0x6cd0b55a0ce126b3), U64_C(0xe62da695828e96e7), + U64_C(0x42ad6e63b3f373b9), U64_C(0xe50cc319381d57df), + U64_C(0xc5cbd729729b54ee), U64_C(0x46d1e265fd2a9912), + U64_C(0x6428b056904eeff8), U64_C(0x8be23040131e04b7), + U64_C(0x6709d5da2add2ec0), U64_C(0x075de98af44a2b93), + U64_C(0x8447dcc67bfbe66f), U64_C(0x6616f655b7ac9a23), + U64_C(0xd607b8bded4b1a40), U64_C(0x0563af89d3a85e48), + U64_C(0x3db1b4ad20c21ba4), U64_C(0x11f22997b8323b75), + U64_C(0x292032b34b587e99), U64_C(0x7f1cdace9331681d), + U64_C(0x8e819fc9c0b65aff), U64_C(0xa1e3677fe2d5bb16), + U64_C(0xcd33d225ee349da5), U64_C(0xd9a2543b85aef898), + U64_C(0x795e10cbfa0af76d), U64_C(0x25a4bbb9992e5d79), + U64_C(0x78413344677b438e), U64_C(0xf0826688cef68601), + U64_C(0xd27b34bba392f0eb), U64_C(0x551d8df162fad7bc), + U64_C(0x1e57c511d0d7d9ad), U64_C(0xdeffbdb171e4d30b), + U64_C(0xf4feea8e802f6caa), U64_C(0xa480c8f6317de55e), + U64_C(0xa0fc44f07fa40ff5), U64_C(0x95b5f551c3c9dd1a), + U64_C(0x22f952336d6476ea), U64_C(0x0000000000000000), + U64_C(0xa6be8ef5169f9085), U64_C(0xcc2cf1aa73452946), + U64_C(0x2e7ddb39bf12550a), U64_C(0xd526dd3157d8db78), + U64_C(0x486b2d6c08becf29), U64_C(0x9b0f3a58365d8b21), + U64_C(0xac78cdfaadd22c15), U64_C(0xbc95c7e28891a383), + U64_C(0x6a927f5f65dab9c3), U64_C(0xc3891d2c1ba0cb9e), + U64_C(0xeaa92f9f50f8b507), U64_C(0xcf0d9426c9d6e87e), + U64_C(0xca6e3baf1a7eb636), U64_C(0xab25247059980786), + U64_C(0x69b31ad3df4978fb), U64_C(0xe2512a93cc577c4c), + U64_C(0xff278a0ea61364d9), U64_C(0x71a615c766a53e26), + U64_C(0x89dc764334fc716c), U64_C(0xf87a638452594f4a), + U64_C(0xf2bc208be914f3da), U64_C(0x8766b94ac1682757), + U64_C(0xbbc82e687cdb8810), U64_C(0x626a7a53f9757088), + U64_C(0xa2c202f358467a2e), U64_C(0x4d0882e5db169161), + U64_C(0x09e7268301de7da8), U64_C(0xe897699c771ac0dc), + U64_C(0xc8507dac3d9cc3ed), U64_C(0xc0a878a0a1330aa6), + U64_C(0x978bb352e42ba8c1), U64_C(0xe9884a13ea6b743f), + U64_C(0x279afdbabecc28a2), U64_C(0x047c8c064ed9eaab), + U64_C(0x507e2278b15289f4), U64_C(0x599904fbb08cf45c), + U64_C(0xbd8ae46d15e01760), U64_C(0x31353da7f2b43844), + U64_C(0x8558ff49e68a528c), U64_C(0x76fbfc4d92ef15b5), + U64_C(0x3456922e211c660c), U64_C(0x86799ac55c1993b4), + U64_C(0x3e90d1219a51da9c), U64_C(0x2d5cbeb505819432), + U64_C(0x982e5fd48cce4a19), U64_C(0xdb9c1238a24c8d43), + U64_C(0xd439febecaa96f9b), U64_C(0x418c0bef0960b281), + U64_C(0x158ea591f6ebd1de), U64_C(0x1f48e69e4da66d4e), + U64_C(0x8afd13cf8e6fb054), U64_C(0xf5e1c9011d5ed849), + U64_C(0xe34e091c5126c8af), U64_C(0xad67ee7530a398f6), + U64_C(0x43b24dec2e82c75a), U64_C(0x75da99c1287cd48d), + U64_C(0x92e81cdb3783f689), U64_C(0xa3dd217cc537cecd), + U64_C(0x60543c50de970553), U64_C(0x93f73f54aaf2426a), + U64_C(0xa91b62737e7a725d), U64_C(0xf19d4507538732e2), + U64_C(0x77e4dfc20f9ea156), U64_C(0x7d229ccdb4d31dc6), + U64_C(0x1b346a98037f87e5), U64_C(0xedf4c615a4b29e94), + U64_C(0x4093286094110662), U64_C(0xb0114ee85ae78063), + U64_C(0x6ff1d0d6b672e78b), U64_C(0x6dcf96d591909250), + U64_C(0xdfe09e3eec9567e8), U64_C(0x3214582b4827f97c), + U64_C(0xb46dc2ee143e6ac8), U64_C(0xf6c0ac8da7cd1971), + U64_C(0xebb60c10cd8901e4), U64_C(0xf7df8f023abcad92), + U64_C(0x9c52d3d2c217a0b2), U64_C(0x6b8d5cd0f8ab0d20), + U64_C(0x3777f7a29b8fa734), U64_C(0x011f238f9d71b4e3), + U64_C(0xc1b75b2f3c42be45), U64_C(0x5de588fdfe551ef7), + U64_C(0x6eeef3592b035368), U64_C(0xaa3a07ffc4e9b365), + U64_C(0xecebe59a39c32a77), U64_C(0x5ba742f8976e8187), + U64_C(0x4b4a48e0b22d0e11), U64_C(0xddded83dcb771233), + U64_C(0xa59feb79ac0c51bd), U64_C(0xc7f5912a55792135) }, + /* 5 */ + { U64_C(0x6d6ae04668a9b08a), U64_C(0x3ab3f04b0be8c743), + U64_C(0xe51e166b54b3c908), U64_C(0xbe90a9eb35c2f139), + U64_C(0xb2c7066637f2bec1), U64_C(0xaa6945613392202c), + U64_C(0x9a28c36f3b5201eb), U64_C(0xddce5a93ab536994), + U64_C(0x0e34133ef6382827), U64_C(0x52a02ba1ec55048b), + U64_C(0xa2f88f97c4b2a177), U64_C(0x8640e513ca2251a5), + U64_C(0xcdf1d36258137622), U64_C(0xfe6cb708dedf8ddb), + U64_C(0x8a174a9ec8121e5d), U64_C(0x679896036b81560e), + U64_C(0x59ed033395795fee), U64_C(0x1dd778ab8b74edaf), + U64_C(0xee533ef92d9f926d), U64_C(0x2a8c79baf8a8d8f5), + U64_C(0x6bcf398e69b119f6), U64_C(0xe20491742fafdd95), + U64_C(0x276488e0809c2aec), U64_C(0xea955b82d88f5cce), + U64_C(0x7102c63a99d9e0c4), U64_C(0xf9763017a5c39946), + U64_C(0x429fa2501f151b3d), U64_C(0x4659c72bea05d59e), + U64_C(0x984b7fdccf5a6634), U64_C(0xf742232953fbb161), + U64_C(0x3041860e08c021c7), U64_C(0x747bfd9616cd9386), + U64_C(0x4bb1367192312787), U64_C(0x1b72a1638a6c44d3), + U64_C(0x4a0e68a6e8359a66), U64_C(0x169a5039f258b6ca), + U64_C(0xb98a2ef44edee5a4), U64_C(0xd9083fe85e43a737), + U64_C(0x967f6ce239624e13), U64_C(0x8874f62d3c1a7982), + U64_C(0x3c1629830af06e3f), U64_C(0x9165ebfd427e5a8e), + U64_C(0xb5dd81794ceeaa5c), U64_C(0x0de8f15a7834f219), + U64_C(0x70bd98ede3dd5d25), U64_C(0xaccc9ca9328a8950), + U64_C(0x56664eda1945ca28), U64_C(0x221db34c0f8859ae), + U64_C(0x26dbd637fa98970d), U64_C(0x1acdffb4f068f932), + U64_C(0x4585254f64090fa0), U64_C(0x72de245e17d53afa), + U64_C(0x1546b25d7c546cf4), U64_C(0x207e0ffffb803e71), + U64_C(0xfaaad2732bcf4378), U64_C(0xb462dfae36ea17bd), + U64_C(0xcf926fd1ac1b11fd), U64_C(0xe0672dc7dba7ba4a), + U64_C(0xd3fa49ad5d6b41b3), U64_C(0x8ba81449b216a3bc), + U64_C(0x14f9ec8a0650d115), U64_C(0x40fc1ee3eb1d7ce2), + U64_C(0x23a2ed9b758ce44f), U64_C(0x782c521b14fddc7e), + U64_C(0x1c68267cf170504e), U64_C(0xbcf31558c1ca96e6), + U64_C(0xa781b43b4ba6d235), U64_C(0xf6fd7dfe29ff0c80), + U64_C(0xb0a4bad5c3fad91e), U64_C(0xd199f51ea963266c), + U64_C(0x414340349119c103), U64_C(0x5405f269ed4dadf7), + U64_C(0xabd61bb649969dcd), U64_C(0x6813dbeae7bdc3c8), + U64_C(0x65fb2ab09f8931d1), U64_C(0xf1e7fae152e3181d), + U64_C(0xc1a67cef5a2339da), U64_C(0x7a4feea8e0f5bba1), + U64_C(0x1e0b9acf05783791), U64_C(0x5b8ebf8061713831), + U64_C(0x80e53cdbcb3af8d9), U64_C(0x7e898bd315e57502), + U64_C(0xc6bcfbf0213f2d47), U64_C(0x95a38e86b76e942d), + U64_C(0x092e94218d243cba), U64_C(0x8339debf453622e7), + U64_C(0xb11be402b9fe64ff), U64_C(0x57d9100d634177c9), + U64_C(0xcc4e8db52217cbc3), U64_C(0x3b0cae9c71ec7aa2), + U64_C(0xfb158ca451cbfe99), U64_C(0x2b33276d82ac6514), + U64_C(0x01bf5ed77a04bde1), U64_C(0xc5601994af33f779), + U64_C(0x75c4a3416cc92e67), U64_C(0xf3844652a6eb7fc2), + U64_C(0x3487e375fdd0ef64), U64_C(0x18ae430704609eed), + U64_C(0x4d14efb993298efb), U64_C(0x815a620cb13e4538), + U64_C(0x125c354207487869), U64_C(0x9eeea614ce42cf48), + U64_C(0xce2d3106d61fac1c), U64_C(0xbbe99247bad6827b), + U64_C(0x071a871f7b1c149d), U64_C(0x2e4a1cc10db81656), + U64_C(0x77a71ff298c149b8), U64_C(0x06a5d9c80118a97c), + U64_C(0xad73c27e488e34b1), U64_C(0x443a7b981e0db241), + U64_C(0xe3bbcfa355ab6074), U64_C(0x0af276450328e684), + U64_C(0x73617a896dd1871b), U64_C(0x58525de4ef7de20f), + U64_C(0xb7be3dcab8e6cd83), U64_C(0x19111dd07e64230c), + U64_C(0x842359a03e2a367a), U64_C(0x103f89f1f3401fb6), + U64_C(0xdc710444d157d475), U64_C(0xb835702334da5845), + U64_C(0x4320fc876511a6dc), U64_C(0xd026abc9d3679b8d), + U64_C(0x17250eee885c0b2b), U64_C(0x90dab52a387ae76f), + U64_C(0x31fed8d972c49c26), U64_C(0x89cba8fa461ec463), + U64_C(0x2ff5421677bcabb7), U64_C(0x396f122f85e41d7d), + U64_C(0xa09b332430bac6a8), U64_C(0xc888e8ced7070560), + U64_C(0xaeaf201ac682ee8f), U64_C(0x1180d7268944a257), + U64_C(0xf058a43628e7a5fc), U64_C(0xbd4c4b8fbbce2b07), + U64_C(0xa1246df34abe7b49), U64_C(0x7d5569b79be9af3c), + U64_C(0xa9b5a705bd9efa12), U64_C(0xdb6b835baa4bc0e8), + U64_C(0x05793bac8f147342), U64_C(0x21c1512881848390), + U64_C(0xfdb0556c50d357e5), U64_C(0x613d4fcb6a99ff72), + U64_C(0x03dce2648e0cda3e), U64_C(0xe949b9e6568386f0), + U64_C(0xfc0f0bbb2ad7ea04), U64_C(0x6a70675913b5a417), + U64_C(0x7f36d5046fe1c8e3), U64_C(0x0c57af8d02304ff8), + U64_C(0x32223abdfcc84618), U64_C(0x0891caf6f720815b), + U64_C(0xa63eeaec31a26fd4), U64_C(0x2507345374944d33), + U64_C(0x49d28ac266394058), U64_C(0xf5219f9aa7f3d6be), + U64_C(0x2d96fea583b4cc68), U64_C(0x5a31e1571b7585d0), + U64_C(0x8ed12fe53d02d0fe), U64_C(0xdfade6205f5b0e4b), + U64_C(0x4cabb16ee92d331a), U64_C(0x04c6657bf510cea3), + U64_C(0xd73c2cd6a87b8f10), U64_C(0xe1d87310a1a307ab), + U64_C(0x6cd5be9112ad0d6b), U64_C(0x97c032354366f3f2), + U64_C(0xd4e0ceb22677552e), U64_C(0x0000000000000000), + U64_C(0x29509bde76a402cb), U64_C(0xc27a9e8bd42fe3e4), + U64_C(0x5ef7842cee654b73), U64_C(0xaf107ecdbc86536e), + U64_C(0x3fcacbe784fcb401), U64_C(0xd55f90655c73e8cf), + U64_C(0xe6c2f40fdabf1336), U64_C(0xe8f6e7312c873b11), + U64_C(0xeb2a0555a28be12f), U64_C(0xe4a148bc2eb774e9), + U64_C(0x9b979db84156bc0a), U64_C(0x6eb60222e6a56ab4), + U64_C(0x87ffbbc4b026ec44), U64_C(0xc703a5275b3b90a6), + U64_C(0x47e699fc9001687f), U64_C(0x9c8d1aa73a4aa897), + U64_C(0x7cea3760e1ed12dd), U64_C(0x4ec80ddd1d2554c5), + U64_C(0x13e36b957d4cc588), U64_C(0x5d2b66486069914d), + U64_C(0x92b90999cc7280b0), U64_C(0x517cc9c56259deb5), + U64_C(0xc937b619ad03b881), U64_C(0xec30824ad997f5b2), + U64_C(0xa45d565fc5aa080b), U64_C(0xd6837201d27f32f1), + U64_C(0x635ef3789e9198ad), U64_C(0x531f75769651b96a), + U64_C(0x4f77530a6721e924), U64_C(0x486dd4151c3dfdb9), + U64_C(0x5f48dafb9461f692), U64_C(0x375b011173dc355a), + U64_C(0x3da9775470f4d3de), U64_C(0x8d0dcd81b30e0ac0), + U64_C(0x36e45fc609d888bb), U64_C(0x55baacbe97491016), + U64_C(0x8cb29356c90ab721), U64_C(0x76184125e2c5f459), + U64_C(0x99f4210bb55edbd5), U64_C(0x6f095cf59ca1d755), + U64_C(0x9f51f8c3b44672a9), U64_C(0x3538bda287d45285), + U64_C(0x50c39712185d6354), U64_C(0xf23b1885dcefc223), + U64_C(0x79930ccc6ef9619f), U64_C(0xed8fdc9da3934853), + U64_C(0xcb540aaa590bdf5e), U64_C(0x5c94389f1a6d2cac), + U64_C(0xe77daad8a0bbaed7), U64_C(0x28efc5090ca0bf2a), + U64_C(0xbf2ff73c4fc64cd8), U64_C(0xb37858b14df60320), + U64_C(0xf8c96ec0dfc724a7), U64_C(0x828680683f329f06), + U64_C(0x941cd051cd6a29cc), U64_C(0xc3c5c05cae2b5e05), + U64_C(0xb601631dc2e27062), U64_C(0xc01922382027843b), + U64_C(0x24b86a840e90f0d2), U64_C(0xd245177a276ffc52), + U64_C(0x0f8b4de98c3c95c6), U64_C(0x3e759530fef809e0), + U64_C(0x0b4d2892792c5b65), U64_C(0xc4df4743d5374a98), + U64_C(0xa5e20888bfaeb5ea), U64_C(0xba56cc90c0d23f9a), + U64_C(0x38d04cf8ffe0a09c), U64_C(0x62e1adafe495254c), + U64_C(0x0263bcb3f40867df), U64_C(0xcaeb547d230f62bf), + U64_C(0x6082111c109d4293), U64_C(0xdad4dd8cd04f7d09), + U64_C(0xefec602e579b2f8c), U64_C(0x1fb4c4187f7c8a70), + U64_C(0xffd3e9dfa4db303a), U64_C(0x7bf0b07f9af10640), + U64_C(0xf49ec14dddf76b5f), U64_C(0x8f6e713247066d1f), + U64_C(0x339d646a86ccfbf9), U64_C(0x64447467e58d8c30), + U64_C(0x2c29a072f9b07189), U64_C(0xd8b7613f24471ad6), + U64_C(0x6627c8d41185ebef), U64_C(0xa347d140beb61c96), + U64_C(0xde12b8f7255fb3aa), U64_C(0x9d324470404e1576), + U64_C(0x9306574eb6763d51), U64_C(0xa80af9d2c79a47f3), + U64_C(0x859c0777442e8b9b), U64_C(0x69ac853d9db97e29) }, + /* 6 */ + { U64_C(0xc3407dfc2de6377e), U64_C(0x5b9e93eea4256f77), + U64_C(0xadb58fdd50c845e0), U64_C(0x5219ff11a75bed86), + U64_C(0x356b61cfd90b1de9), U64_C(0xfb8f406e25abe037), + U64_C(0x7a5a0231c0f60796), U64_C(0x9d3cd216e1f5020b), + U64_C(0x0c6550fb6b48d8f3), U64_C(0xf57508c427ff1c62), + U64_C(0x4ad35ffa71cb407d), U64_C(0x6290a2da1666aa6d), + U64_C(0xe284ec2349355f9f), U64_C(0xb3c307c53d7c84ec), + U64_C(0x05e23c0468365a02), U64_C(0x190bac4d6c9ebfa8), + U64_C(0x94bbbee9e28b80fa), U64_C(0xa34fc777529cb9b5), + U64_C(0xcc7b39f095bcd978), U64_C(0x2426addb0ce532e3), + U64_C(0x7e79329312ce4fc7), U64_C(0xab09a72eebec2917), + U64_C(0xf8d15499f6b9d6c2), U64_C(0x1a55b8babf8c895d), + U64_C(0xdb8add17fb769a85), U64_C(0xb57f2f368658e81b), + U64_C(0x8acd36f18f3f41f6), U64_C(0x5ce3b7bba50f11d3), + U64_C(0x114dcc14d5ee2f0a), U64_C(0xb91a7fcded1030e8), + U64_C(0x81d5425fe55de7a1), U64_C(0xb6213bc1554adeee), + U64_C(0x80144ef95f53f5f2), U64_C(0x1e7688186db4c10c), + U64_C(0x3b912965db5fe1bc), U64_C(0xc281715a97e8252d), + U64_C(0x54a5d7e21c7f8171), U64_C(0x4b12535ccbc5522e), + U64_C(0x1d289cefbea6f7f9), U64_C(0x6ef5f2217d2e729e), + U64_C(0xe6a7dc819b0d17ce), U64_C(0x1b94b41c05829b0e), + U64_C(0x33d7493c622f711e), U64_C(0xdcf7f942fa5ce421), + U64_C(0x600fba8b7f7a8ecb), U64_C(0x46b60f011a83988e), + U64_C(0x235b898e0dcf4c47), U64_C(0x957ab24f588592a9), + U64_C(0x4354330572b5c28c), U64_C(0xa5f3ef84e9b8d542), + U64_C(0x8c711e02341b2d01), U64_C(0x0b1874ae6a62a657), + U64_C(0x1213d8e306fc19ff), U64_C(0xfe6d7c6a4d9dba35), + U64_C(0x65ed868f174cd4c9), U64_C(0x88522ea0e6236550), + U64_C(0x899322065c2d7703), U64_C(0xc01e690bfef4018b), + U64_C(0x915982ed8abddaf8), U64_C(0xbe675b98ec3a4e4c), + U64_C(0xa996bf7f82f00db1), U64_C(0xe1daf8d49a27696a), + U64_C(0x2effd5d3dc8986e7), U64_C(0xd153a51f2b1a2e81), + U64_C(0x18caa0ebd690adfb), U64_C(0x390e3134b243c51a), + U64_C(0x2778b92cdff70416), U64_C(0x029f1851691c24a6), + U64_C(0x5e7cafeacc133575), U64_C(0xfa4e4cc89fa5f264), + U64_C(0x5a5f9f481e2b7d24), U64_C(0x484c47ab18d764db), + U64_C(0x400a27f2a1a7f479), U64_C(0xaeeb9b2a83da7315), + U64_C(0x721c626879869734), U64_C(0x042330a2d2384851), + U64_C(0x85f672fd3765aff0), U64_C(0xba446b3a3e02061d), + U64_C(0x73dd6ecec3888567), U64_C(0xffac70ccf793a866), + U64_C(0xdfa9edb5294ed2d4), U64_C(0x6c6aea7014325638), + U64_C(0x834a5a0e8c41c307), U64_C(0xcdba35562fb2cb2b), + U64_C(0x0ad97808d06cb404), U64_C(0x0f3b440cb85aee06), + U64_C(0xe5f9c876481f213b), U64_C(0x98deee1289c35809), + U64_C(0x59018bbfcd394bd1), U64_C(0xe01bf47220297b39), + U64_C(0xde68e1139340c087), U64_C(0x9fa3ca4788e926ad), + U64_C(0xbb85679c840c144e), U64_C(0x53d8f3b71d55ffd5), + U64_C(0x0da45c5dd146caa0), U64_C(0x6f34fe87c72060cd), + U64_C(0x57fbc315cf6db784), U64_C(0xcee421a1fca0fdde), + U64_C(0x3d2d0196607b8d4b), U64_C(0x642c8a29ad42c69a), + U64_C(0x14aff010bdd87508), U64_C(0xac74837beac657b3), + U64_C(0x3216459ad821634d), U64_C(0x3fb219c70967a9ed), + U64_C(0x06bc28f3bb246cf7), U64_C(0xf2082c9126d562c6), + U64_C(0x66b39278c45ee23c), U64_C(0xbd394f6f3f2878b9), + U64_C(0xfd33689d9e8f8cc0), U64_C(0x37f4799eb017394f), + U64_C(0x108cc0b26fe03d59), U64_C(0xda4bd1b1417888d6), + U64_C(0xb09d1332ee6eb219), U64_C(0x2f3ed975668794b4), + U64_C(0x58c0871977375982), U64_C(0x7561463d78ace990), + U64_C(0x09876cff037e82f1), U64_C(0x7fb83e35a8c05d94), + U64_C(0x26b9b58a65f91645), U64_C(0xef20b07e9873953f), + U64_C(0x3148516d0b3355b8), U64_C(0x41cb2b541ba9e62a), + U64_C(0x790416c613e43163), U64_C(0xa011d380818e8f40), + U64_C(0x3a5025c36151f3ef), U64_C(0xd57095bdf92266d0), + U64_C(0x498d4b0da2d97688), U64_C(0x8b0c3a57353153a5), + U64_C(0x21c491df64d368e1), U64_C(0x8f2f0af5e7091bf4), + U64_C(0x2da1c1240f9bb012), U64_C(0xc43d59a92ccc49da), + U64_C(0xbfa6573e56345c1f), U64_C(0x828b56a8364fd154), + U64_C(0x9a41f643e0df7caf), U64_C(0xbcf843c985266aea), + U64_C(0x2b1de9d7b4bfdce5), U64_C(0x20059d79dedd7ab2), + U64_C(0x6dabe6d6ae3c446b), U64_C(0x45e81bf6c991ae7b), + U64_C(0x6351ae7cac68b83e), U64_C(0xa432e32253b6c711), + U64_C(0xd092a9b991143cd2), U64_C(0xcac711032e98b58f), + U64_C(0xd8d4c9e02864ac70), U64_C(0xc5fc550f96c25b89), + U64_C(0xd7ef8dec903e4276), U64_C(0x67729ede7e50f06f), + U64_C(0xeac28c7af045cf3d), U64_C(0xb15c1f945460a04a), + U64_C(0x9cfddeb05bfb1058), U64_C(0x93c69abce3a1fe5e), + U64_C(0xeb0380dc4a4bdd6e), U64_C(0xd20db1e8f8081874), + U64_C(0x229a8528b7c15e14), U64_C(0x44291750739fbc28), + U64_C(0xd3ccbd4e42060a27), U64_C(0xf62b1c33f4ed2a97), + U64_C(0x86a8660ae4779905), U64_C(0xd62e814a2a305025), + U64_C(0x477703a7a08d8add), U64_C(0x7b9b0e977af815c5), + U64_C(0x78c51a60a9ea2330), U64_C(0xa6adfb733aaae3b7), + U64_C(0x97e5aa1e3199b60f), U64_C(0x0000000000000000), + U64_C(0xf4b404629df10e31), U64_C(0x5564db44a6719322), + U64_C(0x9207961a59afec0d), U64_C(0x9624a6b88b97a45c), + U64_C(0x363575380a192b1c), U64_C(0x2c60cd82b595a241), + U64_C(0x7d272664c1dc7932), U64_C(0x7142769faa94a1c1), + U64_C(0xa1d0df263b809d13), U64_C(0x1630e841d4c451ae), + U64_C(0xc1df65ad44fa13d8), U64_C(0x13d2d445bcf20bac), + U64_C(0xd915c546926abe23), U64_C(0x38cf3d92084dd749), + U64_C(0xe766d0272103059d), U64_C(0xc7634d5effde7f2f), + U64_C(0x077d2455012a7ea4), U64_C(0xedbfa82ff16fb199), + U64_C(0xaf2a978c39d46146), U64_C(0x42953fa3c8bbd0df), + U64_C(0xcb061da59496a7dc), U64_C(0x25e7a17db6eb20b0), + U64_C(0x34aa6d6963050fba), U64_C(0xa76cf7d580a4f1e4), + U64_C(0xf7ea10954ee338c4), U64_C(0xfcf2643b24819e93), + U64_C(0xcf252d0746aeef8d), U64_C(0x4ef06f58a3f3082c), + U64_C(0x563acfb37563a5d7), U64_C(0x5086e740ce47c920), + U64_C(0x2982f186dda3f843), U64_C(0x87696aac5e798b56), + U64_C(0x5d22bb1d1f010380), U64_C(0x035e14f7d31236f5), + U64_C(0x3cec0d30da759f18), U64_C(0xf3c920379cdb7095), + U64_C(0xb8db736b571e22bb), U64_C(0xdd36f5e44052f672), + U64_C(0xaac8ab8851e23b44), U64_C(0xa857b3d938fe1fe2), + U64_C(0x17f1e4e76eca43fd), U64_C(0xec7ea4894b61a3ca), + U64_C(0x9e62c6e132e734fe), U64_C(0xd4b1991b432c7483), + U64_C(0x6ad6c283af163acf), U64_C(0x1ce9904904a8e5aa), + U64_C(0x5fbda34c761d2726), U64_C(0xf910583f4cb7c491), + U64_C(0xc6a241f845d06d7c), U64_C(0x4f3163fe19fd1a7f), + U64_C(0xe99c988d2357f9c8), U64_C(0x8eee06535d0709a7), + U64_C(0x0efa48aa0254fc55), U64_C(0xb4be23903c56fa48), + U64_C(0x763f52caabbedf65), U64_C(0xeee1bcd8227d876c), + U64_C(0xe345e085f33b4dcc), U64_C(0x3e731561b369bbbe), + U64_C(0x2843fd2067adea10), U64_C(0x2adce5710eb1ceb6), + U64_C(0xb7e03767ef44ccbd), U64_C(0x8db012a48e153f52), + U64_C(0x61ceb62dc5749c98), U64_C(0xe85d942b9959eb9b), + U64_C(0x4c6f7709caef2c8a), U64_C(0x84377e5b8d6bbda3), + U64_C(0x30895dcbb13d47eb), U64_C(0x74a04a9bc2a2fbc3), + U64_C(0x6b17ce251518289c), U64_C(0xe438c4d0f2113368), + U64_C(0x1fb784bed7bad35f), U64_C(0x9b80fae55ad16efc), + U64_C(0x77fe5e6c11b0cd36), U64_C(0xc858095247849129), + U64_C(0x08466059b97090a2), U64_C(0x01c10ca6ba0e1253), + U64_C(0x6988d6747c040c3a), U64_C(0x6849dad2c60a1e69), + U64_C(0x5147ebe67449db73), U64_C(0xc99905f4fd8a837a), + U64_C(0x991fe2b433cd4a5a), U64_C(0xf09734c04fc94660), + U64_C(0xa28ecbd1e892abe6), U64_C(0xf1563866f5c75433), + U64_C(0x4dae7baf70e13ed9), U64_C(0x7ce62ac27bd26b61), + U64_C(0x70837a39109ab392), U64_C(0x90988e4b30b3c8ab), + U64_C(0xb2020b63877296bf), U64_C(0x156efcb607d6675b) }, + /* 7 */ + { U64_C(0xe63f55ce97c331d0), U64_C(0x25b506b0015bba16), + U64_C(0xc8706e29e6ad9ba8), U64_C(0x5b43d3775d521f6a), + U64_C(0x0bfa3d577035106e), U64_C(0xab95fc172afb0e66), + U64_C(0xf64b63979e7a3276), U64_C(0xf58b4562649dad4b), + U64_C(0x48f7c3dbae0c83f1), U64_C(0xff31916642f5c8c5), + U64_C(0xcbb048dc1c4a0495), U64_C(0x66b8f83cdf622989), + U64_C(0x35c130e908e2b9b0), U64_C(0x7c761a61f0b34fa1), + U64_C(0x3601161cf205268d), U64_C(0x9e54ccfe2219b7d6), + U64_C(0x8b7d90a538940837), U64_C(0x9cd403588ea35d0b), + U64_C(0xbc3c6fea9ccc5b5a), U64_C(0xe5ff733b6d24aeed), + U64_C(0xceed22de0f7eb8d2), U64_C(0xec8581cab1ab545e), + U64_C(0xb96105e88ff8e71d), U64_C(0x8ca03501871a5ead), + U64_C(0x76ccce65d6db2a2f), U64_C(0x5883f582a7b58057), + U64_C(0x3f7be4ed2e8adc3e), U64_C(0x0fe7be06355cd9c9), + U64_C(0xee054e6c1d11be83), U64_C(0x1074365909b903a6), + U64_C(0x5dde9f80b4813c10), U64_C(0x4a770c7d02b6692c), + U64_C(0x5379c8d5d7809039), U64_C(0xb4067448161ed409), + U64_C(0x5f5e5026183bd6cd), U64_C(0xe898029bf4c29df9), + U64_C(0x7fb63c940a54d09c), U64_C(0xc5171f897f4ba8bc), + U64_C(0xa6f28db7b31d3d72), U64_C(0x2e4f3be7716eaa78), + U64_C(0x0d6771a099e63314), U64_C(0x82076254e41bf284), + U64_C(0x2f0fd2b42733df98), U64_C(0x5c9e76d3e2dc49f0), + U64_C(0x7aeb569619606cdb), U64_C(0x83478b07b2468764), + U64_C(0xcfadcb8d5923cd32), U64_C(0x85dac7f05b95a41e), + U64_C(0xb5469d1b4043a1e9), U64_C(0xb821ecbbd9a592fd), + U64_C(0x1b8e0b0e798c13c8), U64_C(0x62a57b6d9a0be02e), + U64_C(0xfcf1b793b81257f8), U64_C(0x9d94ea0bd8fe28eb), + U64_C(0x4cea408aeb654a56), U64_C(0x23284a47e888996c), + U64_C(0x2d8f1d128b893545), U64_C(0xf4cbac3132c0d8ab), + U64_C(0xbd7c86b9ca912eba), U64_C(0x3a268eef3dbe6079), + U64_C(0xf0d62f6077a9110c), U64_C(0x2735c916ade150cb), + U64_C(0x89fd5f03942ee2ea), U64_C(0x1acee25d2fd16628), + U64_C(0x90f39bab41181bff), U64_C(0x430dfe8cde39939f), + U64_C(0xf70b8ac4c8274796), U64_C(0x1c53aeaac6024552), + U64_C(0x13b410acf35e9c9b), U64_C(0xa532ab4249faa24f), + U64_C(0x2b1251e5625a163f), U64_C(0xd7e3e676da4841c7), + U64_C(0xa7b264e4e5404892), U64_C(0xda8497d643ae72d3), + U64_C(0x861ae105a1723b23), U64_C(0x38a6414991048aa4), + U64_C(0x6578dec92585b6b4), U64_C(0x0280cfa6acbaeadd), + U64_C(0x88bdb650c273970a), U64_C(0x9333bd5ebbff84c2), + U64_C(0x4e6a8f2c47dfa08b), U64_C(0x321c954db76cef2a), + U64_C(0x418d312a72837942), U64_C(0xb29b38bfffcdf773), + U64_C(0x6c022c38f90a4c07), U64_C(0x5a033a240b0f6a8a), + U64_C(0x1f93885f3ce5da6f), U64_C(0xc38a537e96988bc6), + U64_C(0x39e6a81ac759ff44), U64_C(0x29929e43cee0fce2), + U64_C(0x40cdd87924de0ca2), U64_C(0xe9d8ebc8a29fe819), + U64_C(0x0c2798f3cfbb46f4), U64_C(0x55e484223e53b343), + U64_C(0x4650948ecd0d2fd8), U64_C(0x20e86cb2126f0651), + U64_C(0x6d42c56baf5739e7), U64_C(0xa06fc1405ace1e08), + U64_C(0x7babbfc54f3d193b), U64_C(0x424d17df8864e67f), + U64_C(0xd8045870ef14980e), U64_C(0xc6d7397c85ac3781), + U64_C(0x21a885e1443273b1), U64_C(0x67f8116f893f5c69), + U64_C(0x24f5efe35706cff6), U64_C(0xd56329d076f2ab1a), + U64_C(0x5e1eb9754e66a32d), U64_C(0x28d2771098bd8902), + U64_C(0x8f6013f47dfdc190), U64_C(0x17a993fdb637553c), + U64_C(0xe0a219397e1012aa), U64_C(0x786b9930b5da8606), + U64_C(0x6e82e39e55b0a6da), U64_C(0x875a0856f72f4ec3), + U64_C(0x3741ff4fa458536d), U64_C(0xac4859b3957558fc), + U64_C(0x7ef6d5c75c09a57c), U64_C(0xc04a758b6c7f14fb), + U64_C(0xf9acdd91ab26ebbf), U64_C(0x7391a467c5ef9668), + U64_C(0x335c7c1ee1319aca), U64_C(0xa91533b18641e4bb), + U64_C(0xe4bf9a683b79db0d), U64_C(0x8e20faa72ba0b470), + U64_C(0x51f907737b3a7ae4), U64_C(0x2268a314bed5ec8c), + U64_C(0xd944b123b949edee), U64_C(0x31dcb3b84d8b7017), + U64_C(0xd3fe65279f218860), U64_C(0x097af2f1dc8ffab3), + U64_C(0x9b09a6fc312d0b91), U64_C(0xcc6ded78a3c4520f), + U64_C(0x3481d9ba5ebfcc50), U64_C(0x4f2a667f1182d56b), + U64_C(0xdfd9fdd4509ace94), U64_C(0x26752045fbbc252b), + U64_C(0xbffc491f662bc467), U64_C(0xdd593272fc202449), + U64_C(0x3cbbc218d46d4303), U64_C(0x91b372f817456e1f), + U64_C(0x681faf69bc6385a0), U64_C(0xb686bbeebaa43ed4), + U64_C(0x1469b5084cd0ca01), U64_C(0x98c98009cbca94ac), + U64_C(0x6438379a73d8c354), U64_C(0xc2caba2dc0c5fe26), + U64_C(0x3e3b0dbe78d7a9de), U64_C(0x50b9ee202d670f04), + U64_C(0x4590b27b37eab0e5), U64_C(0x6025b4cb36b10af3), + U64_C(0xfb2c1237079c0162), U64_C(0xa12f28130c936be8), + U64_C(0x4b37e52e54eb1ccc), U64_C(0x083a1ba28ad28f53), + U64_C(0xc10a9cd83a22611b), U64_C(0x9f1425ad7444c236), + U64_C(0x069d4cf7e9d3237a), U64_C(0xedc56899e7f621be), + U64_C(0x778c273680865fcf), U64_C(0x309c5aeb1bd605f7), + U64_C(0x8de0dc52d1472b4d), U64_C(0xf8ec34c2fd7b9e5f), + U64_C(0xea18cd3d58787724), U64_C(0xaad515447ca67b86), + U64_C(0x9989695a9d97e14c), U64_C(0x0000000000000000), + U64_C(0xf196c63321f464ec), U64_C(0x71116bc169557cb5), + U64_C(0xaf887f466f92c7c1), U64_C(0x972e3e0ffe964d65), + U64_C(0x190ec4a8d536f915), U64_C(0x95aef1a9522ca7b8), + U64_C(0xdc19db21aa7d51a9), U64_C(0x94ee18fa0471d258), + U64_C(0x8087adf248a11859), U64_C(0xc457f6da2916dd5c), + U64_C(0xfa6cfb6451c17482), U64_C(0xf256e0c6db13fbd1), + U64_C(0x6a9f60cf10d96f7d), U64_C(0x4daaa9d9bd383fb6), + U64_C(0x03c026f5fae79f3d), U64_C(0xde99148706c7bb74), + U64_C(0x2a52b8b6340763df), U64_C(0x6fc20acd03edd33a), + U64_C(0xd423c08320afdefa), U64_C(0xbbe1ca4e23420dc0), + U64_C(0x966ed75ca8cb3885), U64_C(0xeb58246e0e2502c4), + U64_C(0x055d6a021334bc47), U64_C(0xa47242111fa7d7af), + U64_C(0xe3623fcc84f78d97), U64_C(0x81c744a11efc6db9), + U64_C(0xaec8961539cfb221), U64_C(0xf31609958d4e8e31), + U64_C(0x63e5923ecc5695ce), U64_C(0x47107ddd9b505a38), + U64_C(0xa3afe7b5a0298135), U64_C(0x792b7063e387f3e6), + U64_C(0x0140e953565d75e0), U64_C(0x12f4f9ffa503e97b), + U64_C(0x750ce8902c3cb512), U64_C(0xdbc47e8515f30733), + U64_C(0x1ed3610c6ab8af8f), U64_C(0x5239218681dde5d9), + U64_C(0xe222d69fd2aaf877), U64_C(0xfe71783514a8bd25), + U64_C(0xcaf0a18f4a177175), U64_C(0x61655d9860ec7f13), + U64_C(0xe77fbc9dc19e4430), U64_C(0x2ccff441ddd440a5), + U64_C(0x16e97aaee06a20dc), U64_C(0xa855dae2d01c915b), + U64_C(0x1d1347f9905f30b2), U64_C(0xb7c652bdecf94b34), + U64_C(0xd03e43d265c6175d), U64_C(0xfdb15ec0ee4f2218), + U64_C(0x57644b8492e9599e), U64_C(0x07dda5a4bf8e569a), + U64_C(0x54a46d71680ec6a3), U64_C(0x5624a2d7c4b42c7e), + U64_C(0xbebca04c3076b187), U64_C(0x7d36f332a6ee3a41), + U64_C(0x3b6667bc6be31599), U64_C(0x695f463aea3ef040), + U64_C(0xad08b0e0c3282d1c), U64_C(0xb15b1e4a052a684e), + U64_C(0x44d05b2861b7c505), U64_C(0x15295c5b1a8dbfe1), + U64_C(0x744c01c37a61c0f2), U64_C(0x59c31cd1f1e8f5b7), + U64_C(0xef45a73f4b4ccb63), U64_C(0x6bdf899c46841a9d), + U64_C(0x3dfb2b4b823036e3), U64_C(0xa2ef0ee6f674f4d5), + U64_C(0x184e2dfb836b8cf5), U64_C(0x1134df0a5fe47646), + U64_C(0xbaa1231d751f7820), U64_C(0xd17eaa81339b62bd), + U64_C(0xb01bf71953771dae), U64_C(0x849a2ea30dc8d1fe), + U64_C(0x705182923f080955), U64_C(0x0ea757556301ac29), + U64_C(0x041d83514569c9a7), U64_C(0x0abad4042668658e), + U64_C(0x49b72a88f851f611), U64_C(0x8a3d79f66ec97dd7), + U64_C(0xcd2d042bf59927ef), U64_C(0xc930877ab0f0ee48), + U64_C(0x9273540deda2f122), U64_C(0xc797d02fd3f14261), + U64_C(0xe1e2f06a284d674a), U64_C(0xd2be8c74c97cfd80), + U64_C(0x9a494faf67707e71), U64_C(0xb3dbd1eca9908293), + U64_C(0x72d14d3493b2e388), U64_C(0xd6a30f258c153427) }, +}; + +static const u64 C16[12][8] = +{ + { U64_C(0xdd806559f2a64507), U64_C(0x05767436cc744d23), + U64_C(0xa2422a08a460d315), U64_C(0x4b7ce09192676901), + U64_C(0x714eb88d7585c4fc), U64_C(0x2f6a76432e45d016), + U64_C(0xebcb2f81c0657c1f), U64_C(0xb1085bda1ecadae9) }, + { U64_C(0xe679047021b19bb7), U64_C(0x55dda21bd7cbcd56), + U64_C(0x5cb561c2db0aa7ca), U64_C(0x9ab5176b12d69958), + U64_C(0x61d55e0f16b50131), U64_C(0xf3feea720a232b98), + U64_C(0x4fe39d460f70b5d7), U64_C(0x6fa3b58aa99d2f1a) }, + { U64_C(0x991e96f50aba0ab2), U64_C(0xc2b6f443867adb31), + U64_C(0xc1c93a376062db09), U64_C(0xd3e20fe490359eb1), + U64_C(0xf2ea7514b1297b7b), U64_C(0x06f15e5f529c1f8b), + U64_C(0x0a39fc286a3d8435), U64_C(0xf574dcac2bce2fc7) }, + { U64_C(0x220cbebc84e3d12e), U64_C(0x3453eaa193e837f1), + U64_C(0xd8b71333935203be), U64_C(0xa9d72c82ed03d675), + U64_C(0x9d721cad685e353f), U64_C(0x488e857e335c3c7d), + U64_C(0xf948e1a05d71e4dd), U64_C(0xef1fdfb3e81566d2) }, + { U64_C(0x601758fd7c6cfe57), U64_C(0x7a56a27ea9ea63f5), + U64_C(0xdfff00b723271a16), U64_C(0xbfcd1747253af5a3), + U64_C(0x359e35d7800fffbd), U64_C(0x7f151c1f1686104a), + U64_C(0x9a3f410c6ca92363), U64_C(0x4bea6bacad474799) }, + { U64_C(0xfa68407a46647d6e), U64_C(0xbf71c57236904f35), + U64_C(0x0af21f66c2bec6b6), U64_C(0xcffaa6b71c9ab7b4), + U64_C(0x187f9ab49af08ec6), U64_C(0x2d66c4f95142a46c), + U64_C(0x6fa4c33b7a3039c0), U64_C(0xae4faeae1d3ad3d9) }, + { U64_C(0x8886564d3a14d493), U64_C(0x3517454ca23c4af3), + U64_C(0x06476983284a0504), U64_C(0x0992abc52d822c37), + U64_C(0xd3473e33197a93c9), U64_C(0x399ec6c7e6bf87c9), + U64_C(0x51ac86febf240954), U64_C(0xf4c70e16eeaac5ec) }, + { U64_C(0xa47f0dd4bf02e71e), U64_C(0x36acc2355951a8d9), + U64_C(0x69d18d2bd1a5c42f), U64_C(0xf4892bcb929b0690), + U64_C(0x89b4443b4ddbc49a), U64_C(0x4eb7f8719c36de1e), + U64_C(0x03e7aa020c6e4141), U64_C(0x9b1f5b424d93c9a7) }, + { U64_C(0x7261445183235adb), U64_C(0x0e38dc92cb1f2a60), + U64_C(0x7b2b8a9aa6079c54), U64_C(0x800a440bdbb2ceb1), + U64_C(0x3cd955b7e00d0984), U64_C(0x3a7d3a1b25894224), + U64_C(0x944c9ad8ec165fde), U64_C(0x378f5a541631229b) }, + { U64_C(0x74b4c7fb98459ced), U64_C(0x3698fad1153bb6c3), + U64_C(0x7a1e6c303b7652f4), U64_C(0x9fe76702af69334b), + U64_C(0x1fffe18a1b336103), U64_C(0x8941e71cff8a78db), + U64_C(0x382ae548b2e4f3f3), U64_C(0xabbedea680056f52) }, + { U64_C(0x6bcaa4cd81f32d1b), U64_C(0xdea2594ac06fd85d), + U64_C(0xefbacd1d7d476e98), U64_C(0x8a1d71efea48b9ca), + U64_C(0x2001802114846679), U64_C(0xd8fa6bbbebab0761), + U64_C(0x3002c6cd635afe94), U64_C(0x7bcd9ed0efc889fb) }, + { U64_C(0x48bc924af11bd720), U64_C(0xfaf417d5d9b21b99), + U64_C(0xe71da4aa88e12852), U64_C(0x5d80ef9d1891cc86), + U64_C(0xf82012d430219f9b), U64_C(0xcda43c32bcdf1d77), + U64_C(0xd21380b00449b17a), U64_C(0x378ee767f11631ba) }, +}; + + +#define strido(out, temp, i) do { \ + u64 t; \ + t = stribog_table[0][(temp[0] >> (i * 8)) & 0xff]; \ + t ^= stribog_table[1][(temp[1] >> (i * 8)) & 0xff]; \ + t ^= stribog_table[2][(temp[2] >> (i * 8)) & 0xff]; \ + t ^= stribog_table[3][(temp[3] >> (i * 8)) & 0xff]; \ + t ^= stribog_table[4][(temp[4] >> (i * 8)) & 0xff]; \ + t ^= stribog_table[5][(temp[5] >> (i * 8)) & 0xff]; \ + t ^= stribog_table[6][(temp[6] >> (i * 8)) & 0xff]; \ + t ^= stribog_table[7][(temp[7] >> (i * 8)) & 0xff]; \ + out[i] = t; } while(0) + +static void LPSX (u64 *out, const u64 *a, const u64 *b) +{ + u64 temp[8]; + temp[0] = a[0] ^ b[0]; + temp[1] = a[1] ^ b[1]; + temp[2] = a[2] ^ b[2]; + temp[3] = a[3] ^ b[3]; + temp[4] = a[4] ^ b[4]; + temp[5] = a[5] ^ b[5]; + temp[6] = a[6] ^ b[6]; + temp[7] = a[7] ^ b[7]; + strido (out, temp, 0); + strido (out, temp, 1); + strido (out, temp, 2); + strido (out, temp, 3); + strido (out, temp, 4); + strido (out, temp, 5); + strido (out, temp, 6); + strido (out, temp, 7); +} + +static inline void g (u64 *h, u64 *m, u64 *N) +{ + u64 K[8]; + u64 T[8]; + int i; + + LPSX (K, h, N); + + LPSX (T, K, m); + LPSX (K, K, C16[0]); + for (i = 1; i < 12; i++) + { + LPSX (T, K, T); + LPSX (K, K, C16[i]); + } + + h[0] ^= T[0] ^ K[0] ^ m[0]; + h[1] ^= T[1] ^ K[1] ^ m[1]; + h[2] ^= T[2] ^ K[2] ^ m[2]; + h[3] ^= T[3] ^ K[3] ^ m[3]; + h[4] ^= T[4] ^ K[4] ^ m[4]; + h[5] ^= T[5] ^ K[5] ^ m[5]; + h[6] ^= T[6] ^ K[6] ^ m[6]; + h[7] ^= T[7] ^ K[7] ^ m[7]; +} + + +static unsigned int +transform (void *context, const unsigned char *inbuf_arg, size_t datalen); + + +static void +stribog_init_512 (void *context, unsigned int flags) +{ + STRIBOG_CONTEXT *hd = context; + + (void)flags; + + memset (hd, 0, sizeof (*hd)); + + hd->bctx.blocksize = 64; + hd->bctx.bwrite = transform; +} + +static void +stribog_init_256 (void *context, unsigned int flags) +{ + STRIBOG_CONTEXT *hd = context; + + stribog_init_512 (context, flags); + memset (hd->h, 1, 64); +} + +static void +transform_bits (STRIBOG_CONTEXT *hd, const unsigned char *data, unsigned count) +{ + u64 M[8]; + u64 l; + int i; + + for (i = 0; i < 8; i++) + M[i] = buf_get_le64(data + i * 8); + + g (hd->h, M, hd->N); + l = hd->N[0]; + hd->N[0] += count; + if (hd->N[0] < l) + { /* overflow */ + for (i = 1; i < 8; i++) + { + hd->N[i]++; + if (hd->N[i] != 0) + break; + } + } + + hd->Sigma[0] += M[0]; + for (i = 1; i < 8; i++) + if (hd->Sigma[i-1] < M[i-1]) + hd->Sigma[i] += M[i] + 1; + else + hd->Sigma[i] += M[i]; +} + +static unsigned int +transform_blk (void *context, const unsigned char *inbuf_arg) +{ + STRIBOG_CONTEXT *hd = context; + + transform_bits (hd, inbuf_arg, 64 * 8); + + return /* burn_stack */ 768; +} + +static unsigned int +transform ( void *c, const unsigned char *data, size_t nblks ) +{ + unsigned int burn; + + do + { + burn = transform_blk (c, data); + data += 64; + } + while (--nblks); + + return burn; +} + +/* + The routine finally terminates the computation and returns the + digest. The handle is prepared for a new cycle, but adding bytes + to the handle will the destroy the returned buffer. Returns: 32 + bytes with the message the digest. */ +static void +stribog_final (void *context) +{ + STRIBOG_CONTEXT *hd = context; + u64 Z[8] = {}; + int i; + + _gcry_md_block_write (context, NULL, 0); /* flush */ ; + /* PAD. It does not count towards message length */ + i = hd->bctx.count; + /* After flush we have at least one byte free) */ + hd->bctx.buf[i++] = 1; + while (i < 64) + hd->bctx.buf[i++] = 0; + transform_bits (hd, hd->bctx.buf, hd->bctx.count * 8); + + g (hd->h, hd->N, Z); + g (hd->h, hd->Sigma, Z); + + for (i = 0; i < 8; i++) + hd->h[i] = le_bswap64(hd->h[i]); + + _gcry_burn_stack (768); +} + +static byte * +stribog_read_512 (void *context) +{ + STRIBOG_CONTEXT *hd = context; + + return hd->result; +} + +static byte * +stribog_read_256 (void *context) +{ + STRIBOG_CONTEXT *hd = context; + + return hd->result + 32; +} + +static gcry_md_oid_spec_t oid_spec_stribog256[] = + { + /* id-tc26-signwithdigest-gost3410-12-256 */ + { "1.2.643.7.1.1.3.2" }, + /* id-tc26-gost3411-12-256 */ + { "1.2.643.7.1.1.2.2" }, + { NULL }, + }; + +static gcry_md_oid_spec_t oid_spec_stribog512[] = + { + /* id-tc26-signwithdigest-gost3410-12-512 */ + { "1.2.643.7.1.1.3.3" }, + /* id-tc26-gost3411-12-512 */ + { "1.2.643.7.1.1.2.3" }, + { NULL }, + }; + +gcry_md_spec_t _gcry_digest_spec_stribog_256 = + { + GCRY_MD_STRIBOG256, {0, 0}, + "STRIBOG256", NULL, 0, oid_spec_stribog256, 32, + stribog_init_256, _gcry_md_block_write, stribog_final, stribog_read_256, + NULL, + sizeof (STRIBOG_CONTEXT) + }; + +gcry_md_spec_t _gcry_digest_spec_stribog_512 = + { + GCRY_MD_STRIBOG512, {0, 0}, + "STRIBOG512", NULL, 0, oid_spec_stribog512, 64, + stribog_init_512, _gcry_md_block_write, stribog_final, stribog_read_512, + NULL, + sizeof (STRIBOG_CONTEXT) + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/tiger.c b/libotr/libgcrypt-1.8.7/cipher/tiger.c new file mode 100644 index 0000000..b60ec16 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/tiger.c @@ -0,0 +1,852 @@ +/* tiger.c - The TIGER hash function + * Copyright (C) 1998, 2001, 2002, 2003, 2010 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +/* See http://www.cs.technion.ac.il/~biham/Reports/Tiger/ */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" +#include "cipher.h" +#include "hash-common.h" +#include "bithelp.h" +#include "bufhelp.h" + +typedef struct +{ + gcry_md_block_ctx_t bctx; + u64 a, b, c; + int variant; /* 0 = old code, 1 = fixed code, 2 - TIGER2. */ +} TIGER_CONTEXT; + + +/********************************* + * Okay, okay, this is not the fastest code - improvements are welcome. + * + */ + +/* Some test vectors: + * "" 24F0130C63AC9332 16166E76B1BB925F F373DE2D49584E7A + * "abc" F258C1E88414AB2A 527AB541FFC5B8BF 935F7B951C132951 + * "Tiger" 9F00F599072300DD 276ABB38C8EB6DEC 37790C116F9D2BDF + * "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-" + * 87FB2A9083851CF7 470D2CF810E6DF9E B586445034A5A386 + * "ABCDEFGHIJKLMNOPQRSTUVWXYZ=abcdefghijklmnopqrstuvwxyz+0123456789" + * 467DB80863EBCE48 8DF1CD1261655DE9 57896565975F9197 + * "Tiger - A Fast New Hash Function, by Ross Anderson and Eli Biham" + * 0C410A042968868A 1671DA5A3FD29A72 5EC1E457D3CDB303 + * "Tiger - A Fast New Hash Function, by Ross Anderson and Eli Biham, proc" + * "eedings of Fast Software Encryption 3, Cambridge." + * EBF591D5AFA655CE 7F22894FF87F54AC 89C811B6B0DA3193 + * "Tiger - A Fast New Hash Function, by Ross Anderson and Eli Biham, proc" + * "eedings of Fast Software Encryption 3, Cambridge, 1996." + * 3D9AEB03D1BD1A63 57B2774DFD6D5B24 DD68151D503974FC + * "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-ABCDEF" + * "GHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-" + * 00B83EB4E53440C5 76AC6AAEE0A74858 25FD15E70A59FFE4 + */ + +static u64 sbox1[256] = { + U64_C(0x02aab17cf7e90c5e) /* 0 */, U64_C(0xac424b03e243a8ec) /* 1 */, + U64_C(0x72cd5be30dd5fcd3) /* 2 */, U64_C(0x6d019b93f6f97f3a) /* 3 */, + U64_C(0xcd9978ffd21f9193) /* 4 */, U64_C(0x7573a1c9708029e2) /* 5 */, + U64_C(0xb164326b922a83c3) /* 6 */, U64_C(0x46883eee04915870) /* 7 */, + U64_C(0xeaace3057103ece6) /* 8 */, U64_C(0xc54169b808a3535c) /* 9 */, + U64_C(0x4ce754918ddec47c) /* 10 */, U64_C(0x0aa2f4dfdc0df40c) /* 11 */, + U64_C(0x10b76f18a74dbefa) /* 12 */, U64_C(0xc6ccb6235ad1ab6a) /* 13 */, + U64_C(0x13726121572fe2ff) /* 14 */, U64_C(0x1a488c6f199d921e) /* 15 */, + U64_C(0x4bc9f9f4da0007ca) /* 16 */, U64_C(0x26f5e6f6e85241c7) /* 17 */, + U64_C(0x859079dbea5947b6) /* 18 */, U64_C(0x4f1885c5c99e8c92) /* 19 */, + U64_C(0xd78e761ea96f864b) /* 20 */, U64_C(0x8e36428c52b5c17d) /* 21 */, + U64_C(0x69cf6827373063c1) /* 22 */, U64_C(0xb607c93d9bb4c56e) /* 23 */, + U64_C(0x7d820e760e76b5ea) /* 24 */, U64_C(0x645c9cc6f07fdc42) /* 25 */, + U64_C(0xbf38a078243342e0) /* 26 */, U64_C(0x5f6b343c9d2e7d04) /* 27 */, + U64_C(0xf2c28aeb600b0ec6) /* 28 */, U64_C(0x6c0ed85f7254bcac) /* 29 */, + U64_C(0x71592281a4db4fe5) /* 30 */, U64_C(0x1967fa69ce0fed9f) /* 31 */, + U64_C(0xfd5293f8b96545db) /* 32 */, U64_C(0xc879e9d7f2a7600b) /* 33 */, + U64_C(0x860248920193194e) /* 34 */, U64_C(0xa4f9533b2d9cc0b3) /* 35 */, + U64_C(0x9053836c15957613) /* 36 */, U64_C(0xdb6dcf8afc357bf1) /* 37 */, + U64_C(0x18beea7a7a370f57) /* 38 */, U64_C(0x037117ca50b99066) /* 39 */, + U64_C(0x6ab30a9774424a35) /* 40 */, U64_C(0xf4e92f02e325249b) /* 41 */, + U64_C(0x7739db07061ccae1) /* 42 */, U64_C(0xd8f3b49ceca42a05) /* 43 */, + U64_C(0xbd56be3f51382f73) /* 44 */, U64_C(0x45faed5843b0bb28) /* 45 */, + U64_C(0x1c813d5c11bf1f83) /* 46 */, U64_C(0x8af0e4b6d75fa169) /* 47 */, + U64_C(0x33ee18a487ad9999) /* 48 */, U64_C(0x3c26e8eab1c94410) /* 49 */, + U64_C(0xb510102bc0a822f9) /* 50 */, U64_C(0x141eef310ce6123b) /* 51 */, + U64_C(0xfc65b90059ddb154) /* 52 */, U64_C(0xe0158640c5e0e607) /* 53 */, + U64_C(0x884e079826c3a3cf) /* 54 */, U64_C(0x930d0d9523c535fd) /* 55 */, + U64_C(0x35638d754e9a2b00) /* 56 */, U64_C(0x4085fccf40469dd5) /* 57 */, + U64_C(0xc4b17ad28be23a4c) /* 58 */, U64_C(0xcab2f0fc6a3e6a2e) /* 59 */, + U64_C(0x2860971a6b943fcd) /* 60 */, U64_C(0x3dde6ee212e30446) /* 61 */, + U64_C(0x6222f32ae01765ae) /* 62 */, U64_C(0x5d550bb5478308fe) /* 63 */, + U64_C(0xa9efa98da0eda22a) /* 64 */, U64_C(0xc351a71686c40da7) /* 65 */, + U64_C(0x1105586d9c867c84) /* 66 */, U64_C(0xdcffee85fda22853) /* 67 */, + U64_C(0xccfbd0262c5eef76) /* 68 */, U64_C(0xbaf294cb8990d201) /* 69 */, + U64_C(0xe69464f52afad975) /* 70 */, U64_C(0x94b013afdf133e14) /* 71 */, + U64_C(0x06a7d1a32823c958) /* 72 */, U64_C(0x6f95fe5130f61119) /* 73 */, + U64_C(0xd92ab34e462c06c0) /* 74 */, U64_C(0xed7bde33887c71d2) /* 75 */, + U64_C(0x79746d6e6518393e) /* 76 */, U64_C(0x5ba419385d713329) /* 77 */, + U64_C(0x7c1ba6b948a97564) /* 78 */, U64_C(0x31987c197bfdac67) /* 79 */, + U64_C(0xde6c23c44b053d02) /* 80 */, U64_C(0x581c49fed002d64d) /* 81 */, + U64_C(0xdd474d6338261571) /* 82 */, U64_C(0xaa4546c3e473d062) /* 83 */, + U64_C(0x928fce349455f860) /* 84 */, U64_C(0x48161bbacaab94d9) /* 85 */, + U64_C(0x63912430770e6f68) /* 86 */, U64_C(0x6ec8a5e602c6641c) /* 87 */, + U64_C(0x87282515337ddd2b) /* 88 */, U64_C(0x2cda6b42034b701b) /* 89 */, + U64_C(0xb03d37c181cb096d) /* 90 */, U64_C(0xe108438266c71c6f) /* 91 */, + U64_C(0x2b3180c7eb51b255) /* 92 */, U64_C(0xdf92b82f96c08bbc) /* 93 */, + U64_C(0x5c68c8c0a632f3ba) /* 94 */, U64_C(0x5504cc861c3d0556) /* 95 */, + U64_C(0xabbfa4e55fb26b8f) /* 96 */, U64_C(0x41848b0ab3baceb4) /* 97 */, + U64_C(0xb334a273aa445d32) /* 98 */, U64_C(0xbca696f0a85ad881) /* 99 */, + U64_C(0x24f6ec65b528d56c) /* 100 */, U64_C(0x0ce1512e90f4524a) /* 101 */, + U64_C(0x4e9dd79d5506d35a) /* 102 */, U64_C(0x258905fac6ce9779) /* 103 */, + U64_C(0x2019295b3e109b33) /* 104 */, U64_C(0xf8a9478b73a054cc) /* 105 */, + U64_C(0x2924f2f934417eb0) /* 106 */, U64_C(0x3993357d536d1bc4) /* 107 */, + U64_C(0x38a81ac21db6ff8b) /* 108 */, U64_C(0x47c4fbf17d6016bf) /* 109 */, + U64_C(0x1e0faadd7667e3f5) /* 110 */, U64_C(0x7abcff62938beb96) /* 111 */, + U64_C(0xa78dad948fc179c9) /* 112 */, U64_C(0x8f1f98b72911e50d) /* 113 */, + U64_C(0x61e48eae27121a91) /* 114 */, U64_C(0x4d62f7ad31859808) /* 115 */, + U64_C(0xeceba345ef5ceaeb) /* 116 */, U64_C(0xf5ceb25ebc9684ce) /* 117 */, + U64_C(0xf633e20cb7f76221) /* 118 */, U64_C(0xa32cdf06ab8293e4) /* 119 */, + U64_C(0x985a202ca5ee2ca4) /* 120 */, U64_C(0xcf0b8447cc8a8fb1) /* 121 */, + U64_C(0x9f765244979859a3) /* 122 */, U64_C(0xa8d516b1a1240017) /* 123 */, + U64_C(0x0bd7ba3ebb5dc726) /* 124 */, U64_C(0xe54bca55b86adb39) /* 125 */, + U64_C(0x1d7a3afd6c478063) /* 126 */, U64_C(0x519ec608e7669edd) /* 127 */, + U64_C(0x0e5715a2d149aa23) /* 128 */, U64_C(0x177d4571848ff194) /* 129 */, + U64_C(0xeeb55f3241014c22) /* 130 */, U64_C(0x0f5e5ca13a6e2ec2) /* 131 */, + U64_C(0x8029927b75f5c361) /* 132 */, U64_C(0xad139fabc3d6e436) /* 133 */, + U64_C(0x0d5df1a94ccf402f) /* 134 */, U64_C(0x3e8bd948bea5dfc8) /* 135 */, + U64_C(0xa5a0d357bd3ff77e) /* 136 */, U64_C(0xa2d12e251f74f645) /* 137 */, + U64_C(0x66fd9e525e81a082) /* 138 */, U64_C(0x2e0c90ce7f687a49) /* 139 */, + U64_C(0xc2e8bcbeba973bc5) /* 140 */, U64_C(0x000001bce509745f) /* 141 */, + U64_C(0x423777bbe6dab3d6) /* 142 */, U64_C(0xd1661c7eaef06eb5) /* 143 */, + U64_C(0xa1781f354daacfd8) /* 144 */, U64_C(0x2d11284a2b16affc) /* 145 */, + U64_C(0xf1fc4f67fa891d1f) /* 146 */, U64_C(0x73ecc25dcb920ada) /* 147 */, + U64_C(0xae610c22c2a12651) /* 148 */, U64_C(0x96e0a810d356b78a) /* 149 */, + U64_C(0x5a9a381f2fe7870f) /* 150 */, U64_C(0xd5ad62ede94e5530) /* 151 */, + U64_C(0xd225e5e8368d1427) /* 152 */, U64_C(0x65977b70c7af4631) /* 153 */, + U64_C(0x99f889b2de39d74f) /* 154 */, U64_C(0x233f30bf54e1d143) /* 155 */, + U64_C(0x9a9675d3d9a63c97) /* 156 */, U64_C(0x5470554ff334f9a8) /* 157 */, + U64_C(0x166acb744a4f5688) /* 158 */, U64_C(0x70c74caab2e4aead) /* 159 */, + U64_C(0xf0d091646f294d12) /* 160 */, U64_C(0x57b82a89684031d1) /* 161 */, + U64_C(0xefd95a5a61be0b6b) /* 162 */, U64_C(0x2fbd12e969f2f29a) /* 163 */, + U64_C(0x9bd37013feff9fe8) /* 164 */, U64_C(0x3f9b0404d6085a06) /* 165 */, + U64_C(0x4940c1f3166cfe15) /* 166 */, U64_C(0x09542c4dcdf3defb) /* 167 */, + U64_C(0xb4c5218385cd5ce3) /* 168 */, U64_C(0xc935b7dc4462a641) /* 169 */, + U64_C(0x3417f8a68ed3b63f) /* 170 */, U64_C(0xb80959295b215b40) /* 171 */, + U64_C(0xf99cdaef3b8c8572) /* 172 */, U64_C(0x018c0614f8fcb95d) /* 173 */, + U64_C(0x1b14accd1a3acdf3) /* 174 */, U64_C(0x84d471f200bb732d) /* 175 */, + U64_C(0xc1a3110e95e8da16) /* 176 */, U64_C(0x430a7220bf1a82b8) /* 177 */, + U64_C(0xb77e090d39df210e) /* 178 */, U64_C(0x5ef4bd9f3cd05e9d) /* 179 */, + U64_C(0x9d4ff6da7e57a444) /* 180 */, U64_C(0xda1d60e183d4a5f8) /* 181 */, + U64_C(0xb287c38417998e47) /* 182 */, U64_C(0xfe3edc121bb31886) /* 183 */, + U64_C(0xc7fe3ccc980ccbef) /* 184 */, U64_C(0xe46fb590189bfd03) /* 185 */, + U64_C(0x3732fd469a4c57dc) /* 186 */, U64_C(0x7ef700a07cf1ad65) /* 187 */, + U64_C(0x59c64468a31d8859) /* 188 */, U64_C(0x762fb0b4d45b61f6) /* 189 */, + U64_C(0x155baed099047718) /* 190 */, U64_C(0x68755e4c3d50baa6) /* 191 */, + U64_C(0xe9214e7f22d8b4df) /* 192 */, U64_C(0x2addbf532eac95f4) /* 193 */, + U64_C(0x32ae3909b4bd0109) /* 194 */, U64_C(0x834df537b08e3450) /* 195 */, + U64_C(0xfa209da84220728d) /* 196 */, U64_C(0x9e691d9b9efe23f7) /* 197 */, + U64_C(0x0446d288c4ae8d7f) /* 198 */, U64_C(0x7b4cc524e169785b) /* 199 */, + U64_C(0x21d87f0135ca1385) /* 200 */, U64_C(0xcebb400f137b8aa5) /* 201 */, + U64_C(0x272e2b66580796be) /* 202 */, U64_C(0x3612264125c2b0de) /* 203 */, + U64_C(0x057702bdad1efbb2) /* 204 */, U64_C(0xd4babb8eacf84be9) /* 205 */, + U64_C(0x91583139641bc67b) /* 206 */, U64_C(0x8bdc2de08036e024) /* 207 */, + U64_C(0x603c8156f49f68ed) /* 208 */, U64_C(0xf7d236f7dbef5111) /* 209 */, + U64_C(0x9727c4598ad21e80) /* 210 */, U64_C(0xa08a0896670a5fd7) /* 211 */, + U64_C(0xcb4a8f4309eba9cb) /* 212 */, U64_C(0x81af564b0f7036a1) /* 213 */, + U64_C(0xc0b99aa778199abd) /* 214 */, U64_C(0x959f1ec83fc8e952) /* 215 */, + U64_C(0x8c505077794a81b9) /* 216 */, U64_C(0x3acaaf8f056338f0) /* 217 */, + U64_C(0x07b43f50627a6778) /* 218 */, U64_C(0x4a44ab49f5eccc77) /* 219 */, + U64_C(0x3bc3d6e4b679ee98) /* 220 */, U64_C(0x9cc0d4d1cf14108c) /* 221 */, + U64_C(0x4406c00b206bc8a0) /* 222 */, U64_C(0x82a18854c8d72d89) /* 223 */, + U64_C(0x67e366b35c3c432c) /* 224 */, U64_C(0xb923dd61102b37f2) /* 225 */, + U64_C(0x56ab2779d884271d) /* 226 */, U64_C(0xbe83e1b0ff1525af) /* 227 */, + U64_C(0xfb7c65d4217e49a9) /* 228 */, U64_C(0x6bdbe0e76d48e7d4) /* 229 */, + U64_C(0x08df828745d9179e) /* 230 */, U64_C(0x22ea6a9add53bd34) /* 231 */, + U64_C(0xe36e141c5622200a) /* 232 */, U64_C(0x7f805d1b8cb750ee) /* 233 */, + U64_C(0xafe5c7a59f58e837) /* 234 */, U64_C(0xe27f996a4fb1c23c) /* 235 */, + U64_C(0xd3867dfb0775f0d0) /* 236 */, U64_C(0xd0e673de6e88891a) /* 237 */, + U64_C(0x123aeb9eafb86c25) /* 238 */, U64_C(0x30f1d5d5c145b895) /* 239 */, + U64_C(0xbb434a2dee7269e7) /* 240 */, U64_C(0x78cb67ecf931fa38) /* 241 */, + U64_C(0xf33b0372323bbf9c) /* 242 */, U64_C(0x52d66336fb279c74) /* 243 */, + U64_C(0x505f33ac0afb4eaa) /* 244 */, U64_C(0xe8a5cd99a2cce187) /* 245 */, + U64_C(0x534974801e2d30bb) /* 246 */, U64_C(0x8d2d5711d5876d90) /* 247 */, + U64_C(0x1f1a412891bc038e) /* 248 */, U64_C(0xd6e2e71d82e56648) /* 249 */, + U64_C(0x74036c3a497732b7) /* 250 */, U64_C(0x89b67ed96361f5ab) /* 251 */, + U64_C(0xffed95d8f1ea02a2) /* 252 */, U64_C(0xe72b3bd61464d43d) /* 253 */, + U64_C(0xa6300f170bdc4820) /* 254 */, U64_C(0xebc18760ed78a77a) /* 255 */ +}; +static u64 sbox2[256] = { + U64_C(0xe6a6be5a05a12138) /* 256 */, U64_C(0xb5a122a5b4f87c98) /* 257 */, + U64_C(0x563c6089140b6990) /* 258 */, U64_C(0x4c46cb2e391f5dd5) /* 259 */, + U64_C(0xd932addbc9b79434) /* 260 */, U64_C(0x08ea70e42015aff5) /* 261 */, + U64_C(0xd765a6673e478cf1) /* 262 */, U64_C(0xc4fb757eab278d99) /* 263 */, + U64_C(0xdf11c6862d6e0692) /* 264 */, U64_C(0xddeb84f10d7f3b16) /* 265 */, + U64_C(0x6f2ef604a665ea04) /* 266 */, U64_C(0x4a8e0f0ff0e0dfb3) /* 267 */, + U64_C(0xa5edeef83dbcba51) /* 268 */, U64_C(0xfc4f0a2a0ea4371e) /* 269 */, + U64_C(0xe83e1da85cb38429) /* 270 */, U64_C(0xdc8ff882ba1b1ce2) /* 271 */, + U64_C(0xcd45505e8353e80d) /* 272 */, U64_C(0x18d19a00d4db0717) /* 273 */, + U64_C(0x34a0cfeda5f38101) /* 274 */, U64_C(0x0be77e518887caf2) /* 275 */, + U64_C(0x1e341438b3c45136) /* 276 */, U64_C(0xe05797f49089ccf9) /* 277 */, + U64_C(0xffd23f9df2591d14) /* 278 */, U64_C(0x543dda228595c5cd) /* 279 */, + U64_C(0x661f81fd99052a33) /* 280 */, U64_C(0x8736e641db0f7b76) /* 281 */, + U64_C(0x15227725418e5307) /* 282 */, U64_C(0xe25f7f46162eb2fa) /* 283 */, + U64_C(0x48a8b2126c13d9fe) /* 284 */, U64_C(0xafdc541792e76eea) /* 285 */, + U64_C(0x03d912bfc6d1898f) /* 286 */, U64_C(0x31b1aafa1b83f51b) /* 287 */, + U64_C(0xf1ac2796e42ab7d9) /* 288 */, U64_C(0x40a3a7d7fcd2ebac) /* 289 */, + U64_C(0x1056136d0afbbcc5) /* 290 */, U64_C(0x7889e1dd9a6d0c85) /* 291 */, + U64_C(0xd33525782a7974aa) /* 292 */, U64_C(0xa7e25d09078ac09b) /* 293 */, + U64_C(0xbd4138b3eac6edd0) /* 294 */, U64_C(0x920abfbe71eb9e70) /* 295 */, + U64_C(0xa2a5d0f54fc2625c) /* 296 */, U64_C(0xc054e36b0b1290a3) /* 297 */, + U64_C(0xf6dd59ff62fe932b) /* 298 */, U64_C(0x3537354511a8ac7d) /* 299 */, + U64_C(0xca845e9172fadcd4) /* 300 */, U64_C(0x84f82b60329d20dc) /* 301 */, + U64_C(0x79c62ce1cd672f18) /* 302 */, U64_C(0x8b09a2add124642c) /* 303 */, + U64_C(0xd0c1e96a19d9e726) /* 304 */, U64_C(0x5a786a9b4ba9500c) /* 305 */, + U64_C(0x0e020336634c43f3) /* 306 */, U64_C(0xc17b474aeb66d822) /* 307 */, + U64_C(0x6a731ae3ec9baac2) /* 308 */, U64_C(0x8226667ae0840258) /* 309 */, + U64_C(0x67d4567691caeca5) /* 310 */, U64_C(0x1d94155c4875adb5) /* 311 */, + U64_C(0x6d00fd985b813fdf) /* 312 */, U64_C(0x51286efcb774cd06) /* 313 */, + U64_C(0x5e8834471fa744af) /* 314 */, U64_C(0xf72ca0aee761ae2e) /* 315 */, + U64_C(0xbe40e4cdaee8e09a) /* 316 */, U64_C(0xe9970bbb5118f665) /* 317 */, + U64_C(0x726e4beb33df1964) /* 318 */, U64_C(0x703b000729199762) /* 319 */, + U64_C(0x4631d816f5ef30a7) /* 320 */, U64_C(0xb880b5b51504a6be) /* 321 */, + U64_C(0x641793c37ed84b6c) /* 322 */, U64_C(0x7b21ed77f6e97d96) /* 323 */, + U64_C(0x776306312ef96b73) /* 324 */, U64_C(0xae528948e86ff3f4) /* 325 */, + U64_C(0x53dbd7f286a3f8f8) /* 326 */, U64_C(0x16cadce74cfc1063) /* 327 */, + U64_C(0x005c19bdfa52c6dd) /* 328 */, U64_C(0x68868f5d64d46ad3) /* 329 */, + U64_C(0x3a9d512ccf1e186a) /* 330 */, U64_C(0x367e62c2385660ae) /* 331 */, + U64_C(0xe359e7ea77dcb1d7) /* 332 */, U64_C(0x526c0773749abe6e) /* 333 */, + U64_C(0x735ae5f9d09f734b) /* 334 */, U64_C(0x493fc7cc8a558ba8) /* 335 */, + U64_C(0xb0b9c1533041ab45) /* 336 */, U64_C(0x321958ba470a59bd) /* 337 */, + U64_C(0x852db00b5f46c393) /* 338 */, U64_C(0x91209b2bd336b0e5) /* 339 */, + U64_C(0x6e604f7d659ef19f) /* 340 */, U64_C(0xb99a8ae2782ccb24) /* 341 */, + U64_C(0xccf52ab6c814c4c7) /* 342 */, U64_C(0x4727d9afbe11727b) /* 343 */, + U64_C(0x7e950d0c0121b34d) /* 344 */, U64_C(0x756f435670ad471f) /* 345 */, + U64_C(0xf5add442615a6849) /* 346 */, U64_C(0x4e87e09980b9957a) /* 347 */, + U64_C(0x2acfa1df50aee355) /* 348 */, U64_C(0xd898263afd2fd556) /* 349 */, + U64_C(0xc8f4924dd80c8fd6) /* 350 */, U64_C(0xcf99ca3d754a173a) /* 351 */, + U64_C(0xfe477bacaf91bf3c) /* 352 */, U64_C(0xed5371f6d690c12d) /* 353 */, + U64_C(0x831a5c285e687094) /* 354 */, U64_C(0xc5d3c90a3708a0a4) /* 355 */, + U64_C(0x0f7f903717d06580) /* 356 */, U64_C(0x19f9bb13b8fdf27f) /* 357 */, + U64_C(0xb1bd6f1b4d502843) /* 358 */, U64_C(0x1c761ba38fff4012) /* 359 */, + U64_C(0x0d1530c4e2e21f3b) /* 360 */, U64_C(0x8943ce69a7372c8a) /* 361 */, + U64_C(0xe5184e11feb5ce66) /* 362 */, U64_C(0x618bdb80bd736621) /* 363 */, + U64_C(0x7d29bad68b574d0b) /* 364 */, U64_C(0x81bb613e25e6fe5b) /* 365 */, + U64_C(0x071c9c10bc07913f) /* 366 */, U64_C(0xc7beeb7909ac2d97) /* 367 */, + U64_C(0xc3e58d353bc5d757) /* 368 */, U64_C(0xeb017892f38f61e8) /* 369 */, + U64_C(0xd4effb9c9b1cc21a) /* 370 */, U64_C(0x99727d26f494f7ab) /* 371 */, + U64_C(0xa3e063a2956b3e03) /* 372 */, U64_C(0x9d4a8b9a4aa09c30) /* 373 */, + U64_C(0x3f6ab7d500090fb4) /* 374 */, U64_C(0x9cc0f2a057268ac0) /* 375 */, + U64_C(0x3dee9d2dedbf42d1) /* 376 */, U64_C(0x330f49c87960a972) /* 377 */, + U64_C(0xc6b2720287421b41) /* 378 */, U64_C(0x0ac59ec07c00369c) /* 379 */, + U64_C(0xef4eac49cb353425) /* 380 */, U64_C(0xf450244eef0129d8) /* 381 */, + U64_C(0x8acc46e5caf4deb6) /* 382 */, U64_C(0x2ffeab63989263f7) /* 383 */, + U64_C(0x8f7cb9fe5d7a4578) /* 384 */, U64_C(0x5bd8f7644e634635) /* 385 */, + U64_C(0x427a7315bf2dc900) /* 386 */, U64_C(0x17d0c4aa2125261c) /* 387 */, + U64_C(0x3992486c93518e50) /* 388 */, U64_C(0xb4cbfee0a2d7d4c3) /* 389 */, + U64_C(0x7c75d6202c5ddd8d) /* 390 */, U64_C(0xdbc295d8e35b6c61) /* 391 */, + U64_C(0x60b369d302032b19) /* 392 */, U64_C(0xce42685fdce44132) /* 393 */, + U64_C(0x06f3ddb9ddf65610) /* 394 */, U64_C(0x8ea4d21db5e148f0) /* 395 */, + U64_C(0x20b0fce62fcd496f) /* 396 */, U64_C(0x2c1b912358b0ee31) /* 397 */, + U64_C(0xb28317b818f5a308) /* 398 */, U64_C(0xa89c1e189ca6d2cf) /* 399 */, + U64_C(0x0c6b18576aaadbc8) /* 400 */, U64_C(0xb65deaa91299fae3) /* 401 */, + U64_C(0xfb2b794b7f1027e7) /* 402 */, U64_C(0x04e4317f443b5beb) /* 403 */, + U64_C(0x4b852d325939d0a6) /* 404 */, U64_C(0xd5ae6beefb207ffc) /* 405 */, + U64_C(0x309682b281c7d374) /* 406 */, U64_C(0xbae309a194c3b475) /* 407 */, + U64_C(0x8cc3f97b13b49f05) /* 408 */, U64_C(0x98a9422ff8293967) /* 409 */, + U64_C(0x244b16b01076ff7c) /* 410 */, U64_C(0xf8bf571c663d67ee) /* 411 */, + U64_C(0x1f0d6758eee30da1) /* 412 */, U64_C(0xc9b611d97adeb9b7) /* 413 */, + U64_C(0xb7afd5887b6c57a2) /* 414 */, U64_C(0x6290ae846b984fe1) /* 415 */, + U64_C(0x94df4cdeacc1a5fd) /* 416 */, U64_C(0x058a5bd1c5483aff) /* 417 */, + U64_C(0x63166cc142ba3c37) /* 418 */, U64_C(0x8db8526eb2f76f40) /* 419 */, + U64_C(0xe10880036f0d6d4e) /* 420 */, U64_C(0x9e0523c9971d311d) /* 421 */, + U64_C(0x45ec2824cc7cd691) /* 422 */, U64_C(0x575b8359e62382c9) /* 423 */, + U64_C(0xfa9e400dc4889995) /* 424 */, U64_C(0xd1823ecb45721568) /* 425 */, + U64_C(0xdafd983b8206082f) /* 426 */, U64_C(0xaa7d29082386a8cb) /* 427 */, + U64_C(0x269fcd4403b87588) /* 428 */, U64_C(0x1b91f5f728bdd1e0) /* 429 */, + U64_C(0xe4669f39040201f6) /* 430 */, U64_C(0x7a1d7c218cf04ade) /* 431 */, + U64_C(0x65623c29d79ce5ce) /* 432 */, U64_C(0x2368449096c00bb1) /* 433 */, + U64_C(0xab9bf1879da503ba) /* 434 */, U64_C(0xbc23ecb1a458058e) /* 435 */, + U64_C(0x9a58df01bb401ecc) /* 436 */, U64_C(0xa070e868a85f143d) /* 437 */, + U64_C(0x4ff188307df2239e) /* 438 */, U64_C(0x14d565b41a641183) /* 439 */, + U64_C(0xee13337452701602) /* 440 */, U64_C(0x950e3dcf3f285e09) /* 441 */, + U64_C(0x59930254b9c80953) /* 442 */, U64_C(0x3bf299408930da6d) /* 443 */, + U64_C(0xa955943f53691387) /* 444 */, U64_C(0xa15edecaa9cb8784) /* 445 */, + U64_C(0x29142127352be9a0) /* 446 */, U64_C(0x76f0371fff4e7afb) /* 447 */, + U64_C(0x0239f450274f2228) /* 448 */, U64_C(0xbb073af01d5e868b) /* 449 */, + U64_C(0xbfc80571c10e96c1) /* 450 */, U64_C(0xd267088568222e23) /* 451 */, + U64_C(0x9671a3d48e80b5b0) /* 452 */, U64_C(0x55b5d38ae193bb81) /* 453 */, + U64_C(0x693ae2d0a18b04b8) /* 454 */, U64_C(0x5c48b4ecadd5335f) /* 455 */, + U64_C(0xfd743b194916a1ca) /* 456 */, U64_C(0x2577018134be98c4) /* 457 */, + U64_C(0xe77987e83c54a4ad) /* 458 */, U64_C(0x28e11014da33e1b9) /* 459 */, + U64_C(0x270cc59e226aa213) /* 460 */, U64_C(0x71495f756d1a5f60) /* 461 */, + U64_C(0x9be853fb60afef77) /* 462 */, U64_C(0xadc786a7f7443dbf) /* 463 */, + U64_C(0x0904456173b29a82) /* 464 */, U64_C(0x58bc7a66c232bd5e) /* 465 */, + U64_C(0xf306558c673ac8b2) /* 466 */, U64_C(0x41f639c6b6c9772a) /* 467 */, + U64_C(0x216defe99fda35da) /* 468 */, U64_C(0x11640cc71c7be615) /* 469 */, + U64_C(0x93c43694565c5527) /* 470 */, U64_C(0xea038e6246777839) /* 471 */, + U64_C(0xf9abf3ce5a3e2469) /* 472 */, U64_C(0x741e768d0fd312d2) /* 473 */, + U64_C(0x0144b883ced652c6) /* 474 */, U64_C(0xc20b5a5ba33f8552) /* 475 */, + U64_C(0x1ae69633c3435a9d) /* 476 */, U64_C(0x97a28ca4088cfdec) /* 477 */, + U64_C(0x8824a43c1e96f420) /* 478 */, U64_C(0x37612fa66eeea746) /* 479 */, + U64_C(0x6b4cb165f9cf0e5a) /* 480 */, U64_C(0x43aa1c06a0abfb4a) /* 481 */, + U64_C(0x7f4dc26ff162796b) /* 482 */, U64_C(0x6cbacc8e54ed9b0f) /* 483 */, + U64_C(0xa6b7ffefd2bb253e) /* 484 */, U64_C(0x2e25bc95b0a29d4f) /* 485 */, + U64_C(0x86d6a58bdef1388c) /* 486 */, U64_C(0xded74ac576b6f054) /* 487 */, + U64_C(0x8030bdbc2b45805d) /* 488 */, U64_C(0x3c81af70e94d9289) /* 489 */, + U64_C(0x3eff6dda9e3100db) /* 490 */, U64_C(0xb38dc39fdfcc8847) /* 491 */, + U64_C(0x123885528d17b87e) /* 492 */, U64_C(0xf2da0ed240b1b642) /* 493 */, + U64_C(0x44cefadcd54bf9a9) /* 494 */, U64_C(0x1312200e433c7ee6) /* 495 */, + U64_C(0x9ffcc84f3a78c748) /* 496 */, U64_C(0xf0cd1f72248576bb) /* 497 */, + U64_C(0xec6974053638cfe4) /* 498 */, U64_C(0x2ba7b67c0cec4e4c) /* 499 */, + U64_C(0xac2f4df3e5ce32ed) /* 500 */, U64_C(0xcb33d14326ea4c11) /* 501 */, + U64_C(0xa4e9044cc77e58bc) /* 502 */, U64_C(0x5f513293d934fcef) /* 503 */, + U64_C(0x5dc9645506e55444) /* 504 */, U64_C(0x50de418f317de40a) /* 505 */, + U64_C(0x388cb31a69dde259) /* 506 */, U64_C(0x2db4a83455820a86) /* 507 */, + U64_C(0x9010a91e84711ae9) /* 508 */, U64_C(0x4df7f0b7b1498371) /* 509 */, + U64_C(0xd62a2eabc0977179) /* 510 */, U64_C(0x22fac097aa8d5c0e) /* 511 */ +}; +static u64 sbox3[256] = { + U64_C(0xf49fcc2ff1daf39b) /* 512 */, U64_C(0x487fd5c66ff29281) /* 513 */, + U64_C(0xe8a30667fcdca83f) /* 514 */, U64_C(0x2c9b4be3d2fcce63) /* 515 */, + U64_C(0xda3ff74b93fbbbc2) /* 516 */, U64_C(0x2fa165d2fe70ba66) /* 517 */, + U64_C(0xa103e279970e93d4) /* 518 */, U64_C(0xbecdec77b0e45e71) /* 519 */, + U64_C(0xcfb41e723985e497) /* 520 */, U64_C(0xb70aaa025ef75017) /* 521 */, + U64_C(0xd42309f03840b8e0) /* 522 */, U64_C(0x8efc1ad035898579) /* 523 */, + U64_C(0x96c6920be2b2abc5) /* 524 */, U64_C(0x66af4163375a9172) /* 525 */, + U64_C(0x2174abdcca7127fb) /* 526 */, U64_C(0xb33ccea64a72ff41) /* 527 */, + U64_C(0xf04a4933083066a5) /* 528 */, U64_C(0x8d970acdd7289af5) /* 529 */, + U64_C(0x8f96e8e031c8c25e) /* 530 */, U64_C(0xf3fec02276875d47) /* 531 */, + U64_C(0xec7bf310056190dd) /* 532 */, U64_C(0xf5adb0aebb0f1491) /* 533 */, + U64_C(0x9b50f8850fd58892) /* 534 */, U64_C(0x4975488358b74de8) /* 535 */, + U64_C(0xa3354ff691531c61) /* 536 */, U64_C(0x0702bbe481d2c6ee) /* 537 */, + U64_C(0x89fb24057deded98) /* 538 */, U64_C(0xac3075138596e902) /* 539 */, + U64_C(0x1d2d3580172772ed) /* 540 */, U64_C(0xeb738fc28e6bc30d) /* 541 */, + U64_C(0x5854ef8f63044326) /* 542 */, U64_C(0x9e5c52325add3bbe) /* 543 */, + U64_C(0x90aa53cf325c4623) /* 544 */, U64_C(0xc1d24d51349dd067) /* 545 */, + U64_C(0x2051cfeea69ea624) /* 546 */, U64_C(0x13220f0a862e7e4f) /* 547 */, + U64_C(0xce39399404e04864) /* 548 */, U64_C(0xd9c42ca47086fcb7) /* 549 */, + U64_C(0x685ad2238a03e7cc) /* 550 */, U64_C(0x066484b2ab2ff1db) /* 551 */, + U64_C(0xfe9d5d70efbf79ec) /* 552 */, U64_C(0x5b13b9dd9c481854) /* 553 */, + U64_C(0x15f0d475ed1509ad) /* 554 */, U64_C(0x0bebcd060ec79851) /* 555 */, + U64_C(0xd58c6791183ab7f8) /* 556 */, U64_C(0xd1187c5052f3eee4) /* 557 */, + U64_C(0xc95d1192e54e82ff) /* 558 */, U64_C(0x86eea14cb9ac6ca2) /* 559 */, + U64_C(0x3485beb153677d5d) /* 560 */, U64_C(0xdd191d781f8c492a) /* 561 */, + U64_C(0xf60866baa784ebf9) /* 562 */, U64_C(0x518f643ba2d08c74) /* 563 */, + U64_C(0x8852e956e1087c22) /* 564 */, U64_C(0xa768cb8dc410ae8d) /* 565 */, + U64_C(0x38047726bfec8e1a) /* 566 */, U64_C(0xa67738b4cd3b45aa) /* 567 */, + U64_C(0xad16691cec0dde19) /* 568 */, U64_C(0xc6d4319380462e07) /* 569 */, + U64_C(0xc5a5876d0ba61938) /* 570 */, U64_C(0x16b9fa1fa58fd840) /* 571 */, + U64_C(0x188ab1173ca74f18) /* 572 */, U64_C(0xabda2f98c99c021f) /* 573 */, + U64_C(0x3e0580ab134ae816) /* 574 */, U64_C(0x5f3b05b773645abb) /* 575 */, + U64_C(0x2501a2be5575f2f6) /* 576 */, U64_C(0x1b2f74004e7e8ba9) /* 577 */, + U64_C(0x1cd7580371e8d953) /* 578 */, U64_C(0x7f6ed89562764e30) /* 579 */, + U64_C(0xb15926ff596f003d) /* 580 */, U64_C(0x9f65293da8c5d6b9) /* 581 */, + U64_C(0x6ecef04dd690f84c) /* 582 */, U64_C(0x4782275fff33af88) /* 583 */, + U64_C(0xe41433083f820801) /* 584 */, U64_C(0xfd0dfe409a1af9b5) /* 585 */, + U64_C(0x4325a3342cdb396b) /* 586 */, U64_C(0x8ae77e62b301b252) /* 587 */, + U64_C(0xc36f9e9f6655615a) /* 588 */, U64_C(0x85455a2d92d32c09) /* 589 */, + U64_C(0xf2c7dea949477485) /* 590 */, U64_C(0x63cfb4c133a39eba) /* 591 */, + U64_C(0x83b040cc6ebc5462) /* 592 */, U64_C(0x3b9454c8fdb326b0) /* 593 */, + U64_C(0x56f56a9e87ffd78c) /* 594 */, U64_C(0x2dc2940d99f42bc6) /* 595 */, + U64_C(0x98f7df096b096e2d) /* 596 */, U64_C(0x19a6e01e3ad852bf) /* 597 */, + U64_C(0x42a99ccbdbd4b40b) /* 598 */, U64_C(0xa59998af45e9c559) /* 599 */, + U64_C(0x366295e807d93186) /* 600 */, U64_C(0x6b48181bfaa1f773) /* 601 */, + U64_C(0x1fec57e2157a0a1d) /* 602 */, U64_C(0x4667446af6201ad5) /* 603 */, + U64_C(0xe615ebcacfb0f075) /* 604 */, U64_C(0xb8f31f4f68290778) /* 605 */, + U64_C(0x22713ed6ce22d11e) /* 606 */, U64_C(0x3057c1a72ec3c93b) /* 607 */, + U64_C(0xcb46acc37c3f1f2f) /* 608 */, U64_C(0xdbb893fd02aaf50e) /* 609 */, + U64_C(0x331fd92e600b9fcf) /* 610 */, U64_C(0xa498f96148ea3ad6) /* 611 */, + U64_C(0xa8d8426e8b6a83ea) /* 612 */, U64_C(0xa089b274b7735cdc) /* 613 */, + U64_C(0x87f6b3731e524a11) /* 614 */, U64_C(0x118808e5cbc96749) /* 615 */, + U64_C(0x9906e4c7b19bd394) /* 616 */, U64_C(0xafed7f7e9b24a20c) /* 617 */, + U64_C(0x6509eadeeb3644a7) /* 618 */, U64_C(0x6c1ef1d3e8ef0ede) /* 619 */, + U64_C(0xb9c97d43e9798fb4) /* 620 */, U64_C(0xa2f2d784740c28a3) /* 621 */, + U64_C(0x7b8496476197566f) /* 622 */, U64_C(0x7a5be3e6b65f069d) /* 623 */, + U64_C(0xf96330ed78be6f10) /* 624 */, U64_C(0xeee60de77a076a15) /* 625 */, + U64_C(0x2b4bee4aa08b9bd0) /* 626 */, U64_C(0x6a56a63ec7b8894e) /* 627 */, + U64_C(0x02121359ba34fef4) /* 628 */, U64_C(0x4cbf99f8283703fc) /* 629 */, + U64_C(0x398071350caf30c8) /* 630 */, U64_C(0xd0a77a89f017687a) /* 631 */, + U64_C(0xf1c1a9eb9e423569) /* 632 */, U64_C(0x8c7976282dee8199) /* 633 */, + U64_C(0x5d1737a5dd1f7abd) /* 634 */, U64_C(0x4f53433c09a9fa80) /* 635 */, + U64_C(0xfa8b0c53df7ca1d9) /* 636 */, U64_C(0x3fd9dcbc886ccb77) /* 637 */, + U64_C(0xc040917ca91b4720) /* 638 */, U64_C(0x7dd00142f9d1dcdf) /* 639 */, + U64_C(0x8476fc1d4f387b58) /* 640 */, U64_C(0x23f8e7c5f3316503) /* 641 */, + U64_C(0x032a2244e7e37339) /* 642 */, U64_C(0x5c87a5d750f5a74b) /* 643 */, + U64_C(0x082b4cc43698992e) /* 644 */, U64_C(0xdf917becb858f63c) /* 645 */, + U64_C(0x3270b8fc5bf86dda) /* 646 */, U64_C(0x10ae72bb29b5dd76) /* 647 */, + U64_C(0x576ac94e7700362b) /* 648 */, U64_C(0x1ad112dac61efb8f) /* 649 */, + U64_C(0x691bc30ec5faa427) /* 650 */, U64_C(0xff246311cc327143) /* 651 */, + U64_C(0x3142368e30e53206) /* 652 */, U64_C(0x71380e31e02ca396) /* 653 */, + U64_C(0x958d5c960aad76f1) /* 654 */, U64_C(0xf8d6f430c16da536) /* 655 */, + U64_C(0xc8ffd13f1be7e1d2) /* 656 */, U64_C(0x7578ae66004ddbe1) /* 657 */, + U64_C(0x05833f01067be646) /* 658 */, U64_C(0xbb34b5ad3bfe586d) /* 659 */, + U64_C(0x095f34c9a12b97f0) /* 660 */, U64_C(0x247ab64525d60ca8) /* 661 */, + U64_C(0xdcdbc6f3017477d1) /* 662 */, U64_C(0x4a2e14d4decad24d) /* 663 */, + U64_C(0xbdb5e6d9be0a1eeb) /* 664 */, U64_C(0x2a7e70f7794301ab) /* 665 */, + U64_C(0xdef42d8a270540fd) /* 666 */, U64_C(0x01078ec0a34c22c1) /* 667 */, + U64_C(0xe5de511af4c16387) /* 668 */, U64_C(0x7ebb3a52bd9a330a) /* 669 */, + U64_C(0x77697857aa7d6435) /* 670 */, U64_C(0x004e831603ae4c32) /* 671 */, + U64_C(0xe7a21020ad78e312) /* 672 */, U64_C(0x9d41a70c6ab420f2) /* 673 */, + U64_C(0x28e06c18ea1141e6) /* 674 */, U64_C(0xd2b28cbd984f6b28) /* 675 */, + U64_C(0x26b75f6c446e9d83) /* 676 */, U64_C(0xba47568c4d418d7f) /* 677 */, + U64_C(0xd80badbfe6183d8e) /* 678 */, U64_C(0x0e206d7f5f166044) /* 679 */, + U64_C(0xe258a43911cbca3e) /* 680 */, U64_C(0x723a1746b21dc0bc) /* 681 */, + U64_C(0xc7caa854f5d7cdd3) /* 682 */, U64_C(0x7cac32883d261d9c) /* 683 */, + U64_C(0x7690c26423ba942c) /* 684 */, U64_C(0x17e55524478042b8) /* 685 */, + U64_C(0xe0be477656a2389f) /* 686 */, U64_C(0x4d289b5e67ab2da0) /* 687 */, + U64_C(0x44862b9c8fbbfd31) /* 688 */, U64_C(0xb47cc8049d141365) /* 689 */, + U64_C(0x822c1b362b91c793) /* 690 */, U64_C(0x4eb14655fb13dfd8) /* 691 */, + U64_C(0x1ecbba0714e2a97b) /* 692 */, U64_C(0x6143459d5cde5f14) /* 693 */, + U64_C(0x53a8fbf1d5f0ac89) /* 694 */, U64_C(0x97ea04d81c5e5b00) /* 695 */, + U64_C(0x622181a8d4fdb3f3) /* 696 */, U64_C(0xe9bcd341572a1208) /* 697 */, + U64_C(0x1411258643cce58a) /* 698 */, U64_C(0x9144c5fea4c6e0a4) /* 699 */, + U64_C(0x0d33d06565cf620f) /* 700 */, U64_C(0x54a48d489f219ca1) /* 701 */, + U64_C(0xc43e5eac6d63c821) /* 702 */, U64_C(0xa9728b3a72770daf) /* 703 */, + U64_C(0xd7934e7b20df87ef) /* 704 */, U64_C(0xe35503b61a3e86e5) /* 705 */, + U64_C(0xcae321fbc819d504) /* 706 */, U64_C(0x129a50b3ac60bfa6) /* 707 */, + U64_C(0xcd5e68ea7e9fb6c3) /* 708 */, U64_C(0xb01c90199483b1c7) /* 709 */, + U64_C(0x3de93cd5c295376c) /* 710 */, U64_C(0xaed52edf2ab9ad13) /* 711 */, + U64_C(0x2e60f512c0a07884) /* 712 */, U64_C(0xbc3d86a3e36210c9) /* 713 */, + U64_C(0x35269d9b163951ce) /* 714 */, U64_C(0x0c7d6e2ad0cdb5fa) /* 715 */, + U64_C(0x59e86297d87f5733) /* 716 */, U64_C(0x298ef221898db0e7) /* 717 */, + U64_C(0x55000029d1a5aa7e) /* 718 */, U64_C(0x8bc08ae1b5061b45) /* 719 */, + U64_C(0xc2c31c2b6c92703a) /* 720 */, U64_C(0x94cc596baf25ef42) /* 721 */, + U64_C(0x0a1d73db22540456) /* 722 */, U64_C(0x04b6a0f9d9c4179a) /* 723 */, + U64_C(0xeffdafa2ae3d3c60) /* 724 */, U64_C(0xf7c8075bb49496c4) /* 725 */, + U64_C(0x9cc5c7141d1cd4e3) /* 726 */, U64_C(0x78bd1638218e5534) /* 727 */, + U64_C(0xb2f11568f850246a) /* 728 */, U64_C(0xedfabcfa9502bc29) /* 729 */, + U64_C(0x796ce5f2da23051b) /* 730 */, U64_C(0xaae128b0dc93537c) /* 731 */, + U64_C(0x3a493da0ee4b29ae) /* 732 */, U64_C(0xb5df6b2c416895d7) /* 733 */, + U64_C(0xfcabbd25122d7f37) /* 734 */, U64_C(0x70810b58105dc4b1) /* 735 */, + U64_C(0xe10fdd37f7882a90) /* 736 */, U64_C(0x524dcab5518a3f5c) /* 737 */, + U64_C(0x3c9e85878451255b) /* 738 */, U64_C(0x4029828119bd34e2) /* 739 */, + U64_C(0x74a05b6f5d3ceccb) /* 740 */, U64_C(0xb610021542e13eca) /* 741 */, + U64_C(0x0ff979d12f59e2ac) /* 742 */, U64_C(0x6037da27e4f9cc50) /* 743 */, + U64_C(0x5e92975a0df1847d) /* 744 */, U64_C(0xd66de190d3e623fe) /* 745 */, + U64_C(0x5032d6b87b568048) /* 746 */, U64_C(0x9a36b7ce8235216e) /* 747 */, + U64_C(0x80272a7a24f64b4a) /* 748 */, U64_C(0x93efed8b8c6916f7) /* 749 */, + U64_C(0x37ddbff44cce1555) /* 750 */, U64_C(0x4b95db5d4b99bd25) /* 751 */, + U64_C(0x92d3fda169812fc0) /* 752 */, U64_C(0xfb1a4a9a90660bb6) /* 753 */, + U64_C(0x730c196946a4b9b2) /* 754 */, U64_C(0x81e289aa7f49da68) /* 755 */, + U64_C(0x64669a0f83b1a05f) /* 756 */, U64_C(0x27b3ff7d9644f48b) /* 757 */, + U64_C(0xcc6b615c8db675b3) /* 758 */, U64_C(0x674f20b9bcebbe95) /* 759 */, + U64_C(0x6f31238275655982) /* 760 */, U64_C(0x5ae488713e45cf05) /* 761 */, + U64_C(0xbf619f9954c21157) /* 762 */, U64_C(0xeabac46040a8eae9) /* 763 */, + U64_C(0x454c6fe9f2c0c1cd) /* 764 */, U64_C(0x419cf6496412691c) /* 765 */, + U64_C(0xd3dc3bef265b0f70) /* 766 */, U64_C(0x6d0e60f5c3578a9e) /* 767 */ +}; +static u64 sbox4[256] = { + U64_C(0x5b0e608526323c55) /* 768 */, U64_C(0x1a46c1a9fa1b59f5) /* 769 */, + U64_C(0xa9e245a17c4c8ffa) /* 770 */, U64_C(0x65ca5159db2955d7) /* 771 */, + U64_C(0x05db0a76ce35afc2) /* 772 */, U64_C(0x81eac77ea9113d45) /* 773 */, + U64_C(0x528ef88ab6ac0a0d) /* 774 */, U64_C(0xa09ea253597be3ff) /* 775 */, + U64_C(0x430ddfb3ac48cd56) /* 776 */, U64_C(0xc4b3a67af45ce46f) /* 777 */, + U64_C(0x4ececfd8fbe2d05e) /* 778 */, U64_C(0x3ef56f10b39935f0) /* 779 */, + U64_C(0x0b22d6829cd619c6) /* 780 */, U64_C(0x17fd460a74df2069) /* 781 */, + U64_C(0x6cf8cc8e8510ed40) /* 782 */, U64_C(0xd6c824bf3a6ecaa7) /* 783 */, + U64_C(0x61243d581a817049) /* 784 */, U64_C(0x048bacb6bbc163a2) /* 785 */, + U64_C(0xd9a38ac27d44cc32) /* 786 */, U64_C(0x7fddff5baaf410ab) /* 787 */, + U64_C(0xad6d495aa804824b) /* 788 */, U64_C(0xe1a6a74f2d8c9f94) /* 789 */, + U64_C(0xd4f7851235dee8e3) /* 790 */, U64_C(0xfd4b7f886540d893) /* 791 */, + U64_C(0x247c20042aa4bfda) /* 792 */, U64_C(0x096ea1c517d1327c) /* 793 */, + U64_C(0xd56966b4361a6685) /* 794 */, U64_C(0x277da5c31221057d) /* 795 */, + U64_C(0x94d59893a43acff7) /* 796 */, U64_C(0x64f0c51ccdc02281) /* 797 */, + U64_C(0x3d33bcc4ff6189db) /* 798 */, U64_C(0xe005cb184ce66af1) /* 799 */, + U64_C(0xff5ccd1d1db99bea) /* 800 */, U64_C(0xb0b854a7fe42980f) /* 801 */, + U64_C(0x7bd46a6a718d4b9f) /* 802 */, U64_C(0xd10fa8cc22a5fd8c) /* 803 */, + U64_C(0xd31484952be4bd31) /* 804 */, U64_C(0xc7fa975fcb243847) /* 805 */, + U64_C(0x4886ed1e5846c407) /* 806 */, U64_C(0x28cddb791eb70b04) /* 807 */, + U64_C(0xc2b00be2f573417f) /* 808 */, U64_C(0x5c9590452180f877) /* 809 */, + U64_C(0x7a6bddfff370eb00) /* 810 */, U64_C(0xce509e38d6d9d6a4) /* 811 */, + U64_C(0xebeb0f00647fa702) /* 812 */, U64_C(0x1dcc06cf76606f06) /* 813 */, + U64_C(0xe4d9f28ba286ff0a) /* 814 */, U64_C(0xd85a305dc918c262) /* 815 */, + U64_C(0x475b1d8732225f54) /* 816 */, U64_C(0x2d4fb51668ccb5fe) /* 817 */, + U64_C(0xa679b9d9d72bba20) /* 818 */, U64_C(0x53841c0d912d43a5) /* 819 */, + U64_C(0x3b7eaa48bf12a4e8) /* 820 */, U64_C(0x781e0e47f22f1ddf) /* 821 */, + U64_C(0xeff20ce60ab50973) /* 822 */, U64_C(0x20d261d19dffb742) /* 823 */, + U64_C(0x16a12b03062a2e39) /* 824 */, U64_C(0x1960eb2239650495) /* 825 */, + U64_C(0x251c16fed50eb8b8) /* 826 */, U64_C(0x9ac0c330f826016e) /* 827 */, + U64_C(0xed152665953e7671) /* 828 */, U64_C(0x02d63194a6369570) /* 829 */, + U64_C(0x5074f08394b1c987) /* 830 */, U64_C(0x70ba598c90b25ce1) /* 831 */, + U64_C(0x794a15810b9742f6) /* 832 */, U64_C(0x0d5925e9fcaf8c6c) /* 833 */, + U64_C(0x3067716cd868744e) /* 834 */, U64_C(0x910ab077e8d7731b) /* 835 */, + U64_C(0x6a61bbdb5ac42f61) /* 836 */, U64_C(0x93513efbf0851567) /* 837 */, + U64_C(0xf494724b9e83e9d5) /* 838 */, U64_C(0xe887e1985c09648d) /* 839 */, + U64_C(0x34b1d3c675370cfd) /* 840 */, U64_C(0xdc35e433bc0d255d) /* 841 */, + U64_C(0xd0aab84234131be0) /* 842 */, U64_C(0x08042a50b48b7eaf) /* 843 */, + U64_C(0x9997c4ee44a3ab35) /* 844 */, U64_C(0x829a7b49201799d0) /* 845 */, + U64_C(0x263b8307b7c54441) /* 846 */, U64_C(0x752f95f4fd6a6ca6) /* 847 */, + U64_C(0x927217402c08c6e5) /* 848 */, U64_C(0x2a8ab754a795d9ee) /* 849 */, + U64_C(0xa442f7552f72943d) /* 850 */, U64_C(0x2c31334e19781208) /* 851 */, + U64_C(0x4fa98d7ceaee6291) /* 852 */, U64_C(0x55c3862f665db309) /* 853 */, + U64_C(0xbd0610175d53b1f3) /* 854 */, U64_C(0x46fe6cb840413f27) /* 855 */, + U64_C(0x3fe03792df0cfa59) /* 856 */, U64_C(0xcfe700372eb85e8f) /* 857 */, + U64_C(0xa7be29e7adbce118) /* 858 */, U64_C(0xe544ee5cde8431dd) /* 859 */, + U64_C(0x8a781b1b41f1873e) /* 860 */, U64_C(0xa5c94c78a0d2f0e7) /* 861 */, + U64_C(0x39412e2877b60728) /* 862 */, U64_C(0xa1265ef3afc9a62c) /* 863 */, + U64_C(0xbcc2770c6a2506c5) /* 864 */, U64_C(0x3ab66dd5dce1ce12) /* 865 */, + U64_C(0xe65499d04a675b37) /* 866 */, U64_C(0x7d8f523481bfd216) /* 867 */, + U64_C(0x0f6f64fcec15f389) /* 868 */, U64_C(0x74efbe618b5b13c8) /* 869 */, + U64_C(0xacdc82b714273e1d) /* 870 */, U64_C(0xdd40bfe003199d17) /* 871 */, + U64_C(0x37e99257e7e061f8) /* 872 */, U64_C(0xfa52626904775aaa) /* 873 */, + U64_C(0x8bbbf63a463d56f9) /* 874 */, U64_C(0xf0013f1543a26e64) /* 875 */, + U64_C(0xa8307e9f879ec898) /* 876 */, U64_C(0xcc4c27a4150177cc) /* 877 */, + U64_C(0x1b432f2cca1d3348) /* 878 */, U64_C(0xde1d1f8f9f6fa013) /* 879 */, + U64_C(0x606602a047a7ddd6) /* 880 */, U64_C(0xd237ab64cc1cb2c7) /* 881 */, + U64_C(0x9b938e7225fcd1d3) /* 882 */, U64_C(0xec4e03708e0ff476) /* 883 */, + U64_C(0xfeb2fbda3d03c12d) /* 884 */, U64_C(0xae0bced2ee43889a) /* 885 */, + U64_C(0x22cb8923ebfb4f43) /* 886 */, U64_C(0x69360d013cf7396d) /* 887 */, + U64_C(0x855e3602d2d4e022) /* 888 */, U64_C(0x073805bad01f784c) /* 889 */, + U64_C(0x33e17a133852f546) /* 890 */, U64_C(0xdf4874058ac7b638) /* 891 */, + U64_C(0xba92b29c678aa14a) /* 892 */, U64_C(0x0ce89fc76cfaadcd) /* 893 */, + U64_C(0x5f9d4e0908339e34) /* 894 */, U64_C(0xf1afe9291f5923b9) /* 895 */, + U64_C(0x6e3480f60f4a265f) /* 896 */, U64_C(0xeebf3a2ab29b841c) /* 897 */, + U64_C(0xe21938a88f91b4ad) /* 898 */, U64_C(0x57dfeff845c6d3c3) /* 899 */, + U64_C(0x2f006b0bf62caaf2) /* 900 */, U64_C(0x62f479ef6f75ee78) /* 901 */, + U64_C(0x11a55ad41c8916a9) /* 902 */, U64_C(0xf229d29084fed453) /* 903 */, + U64_C(0x42f1c27b16b000e6) /* 904 */, U64_C(0x2b1f76749823c074) /* 905 */, + U64_C(0x4b76eca3c2745360) /* 906 */, U64_C(0x8c98f463b91691bd) /* 907 */, + U64_C(0x14bcc93cf1ade66a) /* 908 */, U64_C(0x8885213e6d458397) /* 909 */, + U64_C(0x8e177df0274d4711) /* 910 */, U64_C(0xb49b73b5503f2951) /* 911 */, + U64_C(0x10168168c3f96b6b) /* 912 */, U64_C(0x0e3d963b63cab0ae) /* 913 */, + U64_C(0x8dfc4b5655a1db14) /* 914 */, U64_C(0xf789f1356e14de5c) /* 915 */, + U64_C(0x683e68af4e51dac1) /* 916 */, U64_C(0xc9a84f9d8d4b0fd9) /* 917 */, + U64_C(0x3691e03f52a0f9d1) /* 918 */, U64_C(0x5ed86e46e1878e80) /* 919 */, + U64_C(0x3c711a0e99d07150) /* 920 */, U64_C(0x5a0865b20c4e9310) /* 921 */, + U64_C(0x56fbfc1fe4f0682e) /* 922 */, U64_C(0xea8d5de3105edf9b) /* 923 */, + U64_C(0x71abfdb12379187a) /* 924 */, U64_C(0x2eb99de1bee77b9c) /* 925 */, + U64_C(0x21ecc0ea33cf4523) /* 926 */, U64_C(0x59a4d7521805c7a1) /* 927 */, + U64_C(0x3896f5eb56ae7c72) /* 928 */, U64_C(0xaa638f3db18f75dc) /* 929 */, + U64_C(0x9f39358dabe9808e) /* 930 */, U64_C(0xb7defa91c00b72ac) /* 931 */, + U64_C(0x6b5541fd62492d92) /* 932 */, U64_C(0x6dc6dee8f92e4d5b) /* 933 */, + U64_C(0x353f57abc4beea7e) /* 934 */, U64_C(0x735769d6da5690ce) /* 935 */, + U64_C(0x0a234aa642391484) /* 936 */, U64_C(0xf6f9508028f80d9d) /* 937 */, + U64_C(0xb8e319a27ab3f215) /* 938 */, U64_C(0x31ad9c1151341a4d) /* 939 */, + U64_C(0x773c22a57bef5805) /* 940 */, U64_C(0x45c7561a07968633) /* 941 */, + U64_C(0xf913da9e249dbe36) /* 942 */, U64_C(0xda652d9b78a64c68) /* 943 */, + U64_C(0x4c27a97f3bc334ef) /* 944 */, U64_C(0x76621220e66b17f4) /* 945 */, + U64_C(0x967743899acd7d0b) /* 946 */, U64_C(0xf3ee5bcae0ed6782) /* 947 */, + U64_C(0x409f753600c879fc) /* 948 */, U64_C(0x06d09a39b5926db6) /* 949 */, + U64_C(0x6f83aeb0317ac588) /* 950 */, U64_C(0x01e6ca4a86381f21) /* 951 */, + U64_C(0x66ff3462d19f3025) /* 952 */, U64_C(0x72207c24ddfd3bfb) /* 953 */, + U64_C(0x4af6b6d3e2ece2eb) /* 954 */, U64_C(0x9c994dbec7ea08de) /* 955 */, + U64_C(0x49ace597b09a8bc4) /* 956 */, U64_C(0xb38c4766cf0797ba) /* 957 */, + U64_C(0x131b9373c57c2a75) /* 958 */, U64_C(0xb1822cce61931e58) /* 959 */, + U64_C(0x9d7555b909ba1c0c) /* 960 */, U64_C(0x127fafdd937d11d2) /* 961 */, + U64_C(0x29da3badc66d92e4) /* 962 */, U64_C(0xa2c1d57154c2ecbc) /* 963 */, + U64_C(0x58c5134d82f6fe24) /* 964 */, U64_C(0x1c3ae3515b62274f) /* 965 */, + U64_C(0xe907c82e01cb8126) /* 966 */, U64_C(0xf8ed091913e37fcb) /* 967 */, + U64_C(0x3249d8f9c80046c9) /* 968 */, U64_C(0x80cf9bede388fb63) /* 969 */, + U64_C(0x1881539a116cf19e) /* 970 */, U64_C(0x5103f3f76bd52457) /* 971 */, + U64_C(0x15b7e6f5ae47f7a8) /* 972 */, U64_C(0xdbd7c6ded47e9ccf) /* 973 */, + U64_C(0x44e55c410228bb1a) /* 974 */, U64_C(0xb647d4255edb4e99) /* 975 */, + U64_C(0x5d11882bb8aafc30) /* 976 */, U64_C(0xf5098bbb29d3212a) /* 977 */, + U64_C(0x8fb5ea14e90296b3) /* 978 */, U64_C(0x677b942157dd025a) /* 979 */, + U64_C(0xfb58e7c0a390acb5) /* 980 */, U64_C(0x89d3674c83bd4a01) /* 981 */, + U64_C(0x9e2da4df4bf3b93b) /* 982 */, U64_C(0xfcc41e328cab4829) /* 983 */, + U64_C(0x03f38c96ba582c52) /* 984 */, U64_C(0xcad1bdbd7fd85db2) /* 985 */, + U64_C(0xbbb442c16082ae83) /* 986 */, U64_C(0xb95fe86ba5da9ab0) /* 987 */, + U64_C(0xb22e04673771a93f) /* 988 */, U64_C(0x845358c9493152d8) /* 989 */, + U64_C(0xbe2a488697b4541e) /* 990 */, U64_C(0x95a2dc2dd38e6966) /* 991 */, + U64_C(0xc02c11ac923c852b) /* 992 */, U64_C(0x2388b1990df2a87b) /* 993 */, + U64_C(0x7c8008fa1b4f37be) /* 994 */, U64_C(0x1f70d0c84d54e503) /* 995 */, + U64_C(0x5490adec7ece57d4) /* 996 */, U64_C(0x002b3c27d9063a3a) /* 997 */, + U64_C(0x7eaea3848030a2bf) /* 998 */, U64_C(0xc602326ded2003c0) /* 999 */, + U64_C(0x83a7287d69a94086) /* 1000 */, U64_C(0xc57a5fcb30f57a8a) /* 1001 */, + U64_C(0xb56844e479ebe779) /* 1002 */, U64_C(0xa373b40f05dcbce9) /* 1003 */, + U64_C(0xd71a786e88570ee2) /* 1004 */, U64_C(0x879cbacdbde8f6a0) /* 1005 */, + U64_C(0x976ad1bcc164a32f) /* 1006 */, U64_C(0xab21e25e9666d78b) /* 1007 */, + U64_C(0x901063aae5e5c33c) /* 1008 */, U64_C(0x9818b34448698d90) /* 1009 */, + U64_C(0xe36487ae3e1e8abb) /* 1010 */, U64_C(0xafbdf931893bdcb4) /* 1011 */, + U64_C(0x6345a0dc5fbbd519) /* 1012 */, U64_C(0x8628fe269b9465ca) /* 1013 */, + U64_C(0x1e5d01603f9c51ec) /* 1014 */, U64_C(0x4de44006a15049b7) /* 1015 */, + U64_C(0xbf6c70e5f776cbb1) /* 1016 */, U64_C(0x411218f2ef552bed) /* 1017 */, + U64_C(0xcb0c0708705a36a3) /* 1018 */, U64_C(0xe74d14754f986044) /* 1019 */, + U64_C(0xcd56d9430ea8280e) /* 1020 */, U64_C(0xc12591d7535f5065) /* 1021 */, + U64_C(0xc83223f1720aef96) /* 1022 */, U64_C(0xc3a0396f7363a51f) /* 1023 */ +}; + +static unsigned int +transform ( void *ctx, const unsigned char *data, size_t nblks ); + +static void +do_init (void *context, int variant) +{ + TIGER_CONTEXT *hd = context; + + hd->a = 0x0123456789abcdefLL; + hd->b = 0xfedcba9876543210LL; + hd->c = 0xf096a5b4c3b2e187LL; + + hd->bctx.nblocks = 0; + hd->bctx.nblocks_high = 0; + hd->bctx.count = 0; + hd->bctx.blocksize = 64; + hd->bctx.bwrite = transform; + hd->variant = variant; +} + +static void +tiger_init (void *context, unsigned int flags) +{ + (void)flags; + + do_init (context, 0); +} + +static void +tiger1_init (void *context, unsigned int flags) +{ + (void)flags; + + do_init (context, 1); +} + +static void +tiger2_init (void *context, unsigned int flags) +{ + (void)flags; + + do_init (context, 2); +} + + +#define tiger_round(xa, xb, xc, xx, xmul) { \ + xc ^= xx; \ + xa -= ( sbox1[ (xc) & 0xff ] ^ sbox2[ ((xc) >> 16) & 0xff ] \ + ^ sbox3[ ((xc) >> 32) & 0xff ] ^ sbox4[ ((xc) >> 48) & 0xff ]); \ + xb += ( sbox4[ ((xc) >> 8) & 0xff ] ^ sbox3[ ((xc) >> 24) & 0xff ] \ + ^ sbox2[ ((xc) >> 40) & 0xff ] ^ sbox1[ ((xc) >> 56) & 0xff ]); \ + xb *= xmul; } + + +#define pass(ya, yb, yc, yx, ymul) { \ + tiger_round( ya, yb, yc, yx[0], ymul ); \ + tiger_round( yb, yc, ya, yx[1], ymul ); \ + tiger_round( yc, ya, yb, yx[2], ymul ); \ + tiger_round( ya, yb, yc, yx[3], ymul ); \ + tiger_round( yb, yc, ya, yx[4], ymul ); \ + tiger_round( yc, ya, yb, yx[5], ymul ); \ + tiger_round( ya, yb, yc, yx[6], ymul ); \ + tiger_round( yb, yc, ya, yx[7], ymul ); } + + +#define key_schedule(x) { \ + x[0] -= x[7] ^ 0xa5a5a5a5a5a5a5a5LL; \ + x[1] ^= x[0]; \ + x[2] += x[1]; \ + x[3] -= x[2] ^ ((~x[1]) << 19 ); \ + x[4] ^= x[3]; \ + x[5] += x[4]; \ + x[6] -= x[5] ^ ((~x[4]) >> 23 ); \ + x[7] ^= x[6]; \ + x[0] += x[7]; \ + x[1] -= x[0] ^ ((~x[7]) << 19 ); \ + x[2] ^= x[1]; \ + x[3] += x[2]; \ + x[4] -= x[3] ^ ((~x[2]) >> 23 ); \ + x[5] ^= x[4]; \ + x[6] += x[5]; \ + x[7] -= x[6] ^ 0x0123456789abcdefLL; } + + +/**************** + * Transform the message DATA which consists of 512 bytes (8 words) + */ +static unsigned int +transform_blk ( void *ctx, const unsigned char *data ) +{ + TIGER_CONTEXT *hd = ctx; + u64 a,b,c,aa,bb,cc; + u64 x[8]; + int i; + + for ( i = 0; i < 8; i++ ) + x[i] = buf_get_le64(data + i * 8); + + /* save */ + a = aa = hd->a; + b = bb = hd->b; + c = cc = hd->c; + + pass( a, b, c, x, 5); + key_schedule( x ); + pass( c, a, b, x, 7); + key_schedule( x ); + pass( b, c, a, x, 9); + + /* feedforward */ + a ^= aa; + b -= bb; + c += cc; + /* store */ + hd->a = a; + hd->b = b; + hd->c = c; + + return /*burn_stack*/ 21*8+11*sizeof(void*); +} + + +static unsigned int +transform ( void *c, const unsigned char *data, size_t nblks ) +{ + unsigned int burn; + + do + { + burn = transform_blk (c, data); + data += 64; + } + while (--nblks); + + return burn; +} + + + +/* The routine terminates the computation + */ +static void +tiger_final( void *context ) +{ + TIGER_CONTEXT *hd = context; + u32 t, th, msb, lsb; + byte *p; + unsigned int burn; + byte pad = hd->variant == 2? 0x80 : 0x01; + + _gcry_md_block_write(hd, NULL, 0); /* flush */; + + t = hd->bctx.nblocks; + if (sizeof t == sizeof hd->bctx.nblocks) + th = hd->bctx.nblocks_high; + else + th = hd->bctx.nblocks >> 32; + + /* multiply by 64 to make a byte count */ + lsb = t << 6; + msb = (th << 6) | (t >> 26); + /* add the count */ + t = lsb; + if( (lsb += hd->bctx.count) < t ) + msb++; + /* multiply by 8 to make a bit count */ + t = lsb; + lsb <<= 3; + msb <<= 3; + msb |= t >> 29; + + if( hd->bctx.count < 56 ) /* enough room */ + { + hd->bctx.buf[hd->bctx.count++] = pad; + while( hd->bctx.count < 56 ) + hd->bctx.buf[hd->bctx.count++] = 0; /* pad */ + } + else /* need one extra block */ + { + hd->bctx.buf[hd->bctx.count++] = pad; /* pad character */ + while( hd->bctx.count < 64 ) + hd->bctx.buf[hd->bctx.count++] = 0; + _gcry_md_block_write(hd, NULL, 0); /* flush */; + memset(hd->bctx.buf, 0, 56 ); /* fill next block with zeroes */ + } + /* append the 64 bit count */ + buf_put_le32(hd->bctx.buf + 56, lsb); + buf_put_le32(hd->bctx.buf + 60, msb); + burn = transform( hd, hd->bctx.buf, 1 ); + _gcry_burn_stack (burn); + + p = hd->bctx.buf; +#define X(a) do { buf_put_be64(p, hd->a); p += 8; } while(0) +#define Y(a) do { buf_put_le64(p, hd->a); p += 8; } while(0) + if (hd->variant == 0) + { + X(a); + X(b); + X(c); + } + else + { + Y(a); + Y(b); + Y(c); + } +#undef X +#undef Y +} + +static byte * +tiger_read( void *context ) +{ + TIGER_CONTEXT *hd = context; + + return hd->bctx.buf; +} + + + +/* This is the old TIGER variant based on the unfixed reference + implementation. IT was used in GnupG up to 1.3.2. We don't provide + an OID anymore because that would not be correct. */ +gcry_md_spec_t _gcry_digest_spec_tiger = + { + GCRY_MD_TIGER, {0, 0}, + "TIGER192", NULL, 0, NULL, 24, + tiger_init, _gcry_md_block_write, tiger_final, tiger_read, NULL, + sizeof (TIGER_CONTEXT) + }; + + + +/* This is the fixed TIGER implementation. */ +static byte asn1[19] = /* Object ID is 1.3.6.1.4.1.11591.12.2 */ + { 0x30, 0x29, 0x30, 0x0d, 0x06, 0x09, 0x2b, 0x06, + 0x01, 0x04, 0x01, 0xda, 0x47, 0x0c, 0x02, + 0x05, 0x00, 0x04, 0x18 }; + +static gcry_md_oid_spec_t oid_spec_tiger1[] = + { + /* GNU.digestAlgorithm TIGER */ + { "1.3.6.1.4.1.11591.12.2" }, + { NULL } + }; + +gcry_md_spec_t _gcry_digest_spec_tiger1 = + { + GCRY_MD_TIGER1, {0, 0}, + "TIGER", asn1, DIM (asn1), oid_spec_tiger1, 24, + tiger1_init, _gcry_md_block_write, tiger_final, tiger_read, NULL, + sizeof (TIGER_CONTEXT) + }; + + + +/* This is TIGER2 which usues a changed padding algorithm. */ +gcry_md_spec_t _gcry_digest_spec_tiger2 = + { + GCRY_MD_TIGER2, {0, 0}, + "TIGER2", NULL, 0, NULL, 24, + tiger2_init, _gcry_md_block_write, tiger_final, tiger_read, NULL, + sizeof (TIGER_CONTEXT) + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/twofish-aarch64.S b/libotr/libgcrypt-1.8.7/cipher/twofish-aarch64.S new file mode 100644 index 0000000..99c4675 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/twofish-aarch64.S @@ -0,0 +1,317 @@ +/* twofish-aarch64.S - ARMv8/AArch64 assembly implementation of Twofish cipher + * + * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(__AARCH64EL__) +#ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS + +.text + +/* structure of TWOFISH_context: */ +#define s0 0 +#define s1 ((s0) + 4 * 256) +#define s2 ((s1) + 4 * 256) +#define s3 ((s2) + 4 * 256) +#define w ((s3) + 4 * 256) +#define k ((w) + 4 * 8) + +/* register macros */ +#define CTX x0 +#define RDST x1 +#define RSRC x2 +#define CTXs0 CTX +#define CTXs1 x3 +#define CTXs2 x4 +#define CTXs3 x5 +#define CTXw x17 + +#define RA w6 +#define RB w7 +#define RC w8 +#define RD w9 + +#define RX w10 +#define RY w11 + +#define xRX x10 +#define xRY x11 + +#define RMASK w12 + +#define RT0 w13 +#define RT1 w14 +#define RT2 w15 +#define RT3 w16 + +#define xRT0 x13 +#define xRT1 x14 +#define xRT2 x15 +#define xRT3 x16 + +/* helper macros */ +#ifndef __AARCH64EL__ + /* bswap on big-endian */ + #define host_to_le(reg) \ + rev reg, reg; + #define le_to_host(reg) \ + rev reg, reg; +#else + /* nop on little-endian */ + #define host_to_le(reg) /*_*/ + #define le_to_host(reg) /*_*/ +#endif + +#define ldr_input_aligned_le(rin, a, b, c, d) \ + ldr a, [rin, #0]; \ + ldr b, [rin, #4]; \ + le_to_host(a); \ + ldr c, [rin, #8]; \ + le_to_host(b); \ + ldr d, [rin, #12]; \ + le_to_host(c); \ + le_to_host(d); + +#define str_output_aligned_le(rout, a, b, c, d) \ + le_to_host(a); \ + le_to_host(b); \ + str a, [rout, #0]; \ + le_to_host(c); \ + str b, [rout, #4]; \ + le_to_host(d); \ + str c, [rout, #8]; \ + str d, [rout, #12]; + +/* unaligned word reads/writes allowed */ +#define ldr_input_le(rin, ra, rb, rc, rd, rtmp) \ + ldr_input_aligned_le(rin, ra, rb, rc, rd) + +#define str_output_le(rout, ra, rb, rc, rd, rtmp0, rtmp1) \ + str_output_aligned_le(rout, ra, rb, rc, rd) + +/********************************************************************** + 1-way twofish + **********************************************************************/ +#define encrypt_round(a, b, rc, rd, n, ror_a, adj_a) \ + and RT0, RMASK, b, lsr#(8 - 2); \ + and RY, RMASK, b, lsr#(16 - 2); \ + and RT1, RMASK, b, lsr#(24 - 2); \ + ldr RY, [CTXs3, xRY]; \ + and RT2, RMASK, b, lsl#(2); \ + ldr RT0, [CTXs2, xRT0]; \ + and RT3, RMASK, a, lsr#(16 - 2 + (adj_a)); \ + ldr RT1, [CTXs0, xRT1]; \ + and RX, RMASK, a, lsr#(8 - 2 + (adj_a)); \ + ldr RT2, [CTXs1, xRT2]; \ + ldr RX, [CTXs1, xRX]; \ + ror_a(a); \ + \ + eor RY, RY, RT0; \ + ldr RT3, [CTXs2, xRT3]; \ + and RT0, RMASK, a, lsl#(2); \ + eor RY, RY, RT1; \ + and RT1, RMASK, a, lsr#(24 - 2); \ + eor RY, RY, RT2; \ + ldr RT0, [CTXs0, xRT0]; \ + eor RX, RX, RT3; \ + ldr RT1, [CTXs3, xRT1]; \ + eor RX, RX, RT0; \ + \ + ldr RT3, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \ + eor RX, RX, RT1; \ + ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \ + \ + add RT0, RX, RY, lsl #1; \ + add RX, RX, RY; \ + add RT0, RT0, RT3; \ + add RX, RX, RT2; \ + eor rd, RT0, rd, ror #31; \ + eor rc, rc, RX; + +#define dummy(x) /*_*/ + +#define ror1(r) \ + ror r, r, #1; + +#define decrypt_round(a, b, rc, rd, n, ror_b, adj_b) \ + and RT3, RMASK, b, lsl#(2 - (adj_b)); \ + and RT1, RMASK, b, lsr#(8 - 2 + (adj_b)); \ + ror_b(b); \ + and RT2, RMASK, a, lsl#(2); \ + and RT0, RMASK, a, lsr#(8 - 2); \ + \ + ldr RY, [CTXs1, xRT3]; \ + ldr RX, [CTXs0, xRT2]; \ + and RT3, RMASK, b, lsr#(16 - 2); \ + ldr RT1, [CTXs2, xRT1]; \ + and RT2, RMASK, a, lsr#(16 - 2); \ + ldr RT0, [CTXs1, xRT0]; \ + \ + ldr RT3, [CTXs3, xRT3]; \ + eor RY, RY, RT1; \ + \ + and RT1, RMASK, b, lsr#(24 - 2); \ + eor RX, RX, RT0; \ + ldr RT2, [CTXs2, xRT2]; \ + and RT0, RMASK, a, lsr#(24 - 2); \ + \ + ldr RT1, [CTXs0, xRT1]; \ + \ + eor RY, RY, RT3; \ + ldr RT0, [CTXs3, xRT0]; \ + eor RX, RX, RT2; \ + eor RY, RY, RT1; \ + \ + ldr RT1, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \ + eor RX, RX, RT0; \ + ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \ + \ + add RT0, RX, RY, lsl #1; \ + add RX, RX, RY; \ + add RT0, RT0, RT1; \ + add RX, RX, RT2; \ + eor rd, rd, RT0; \ + eor rc, RX, rc, ror #31; + +#define first_encrypt_cycle(nc) \ + encrypt_round(RA, RB, RC, RD, (nc) * 2, dummy, 0); \ + encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); + +#define encrypt_cycle(nc) \ + encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \ + encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); + +#define last_encrypt_cycle(nc) \ + encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \ + encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \ + ror1(RA); + +#define first_decrypt_cycle(nc) \ + decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, dummy, 0); \ + decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); + +#define decrypt_cycle(nc) \ + decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \ + decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); + +#define last_decrypt_cycle(nc) \ + decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \ + decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \ + ror1(RD); + +.globl _gcry_twofish_arm_encrypt_block +.type _gcry_twofish_arm_encrypt_block,%function; + +_gcry_twofish_arm_encrypt_block: + /* input: + * x0: ctx + * x1: dst + * x2: src + */ + + add CTXw, CTX, #(w); + + ldr_input_le(RSRC, RA, RB, RC, RD, RT0); + + /* Input whitening */ + ldp RT0, RT1, [CTXw, #(0*8)]; + ldp RT2, RT3, [CTXw, #(1*8)]; + add CTXs3, CTX, #(s3); + add CTXs2, CTX, #(s2); + add CTXs1, CTX, #(s1); + mov RMASK, #(0xff << 2); + eor RA, RA, RT0; + eor RB, RB, RT1; + eor RC, RC, RT2; + eor RD, RD, RT3; + + first_encrypt_cycle(0); + encrypt_cycle(1); + encrypt_cycle(2); + encrypt_cycle(3); + encrypt_cycle(4); + encrypt_cycle(5); + encrypt_cycle(6); + last_encrypt_cycle(7); + + /* Output whitening */ + ldp RT0, RT1, [CTXw, #(2*8)]; + ldp RT2, RT3, [CTXw, #(3*8)]; + eor RC, RC, RT0; + eor RD, RD, RT1; + eor RA, RA, RT2; + eor RB, RB, RT3; + + str_output_le(RDST, RC, RD, RA, RB, RT0, RT1); + + ret; +.ltorg +.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block; + +.globl _gcry_twofish_arm_decrypt_block +.type _gcry_twofish_arm_decrypt_block,%function; + +_gcry_twofish_arm_decrypt_block: + /* input: + * %r0: ctx + * %r1: dst + * %r2: src + */ + + add CTXw, CTX, #(w); + + ldr_input_le(RSRC, RC, RD, RA, RB, RT0); + + /* Input whitening */ + ldp RT0, RT1, [CTXw, #(2*8)]; + ldp RT2, RT3, [CTXw, #(3*8)]; + add CTXs3, CTX, #(s3); + add CTXs2, CTX, #(s2); + add CTXs1, CTX, #(s1); + mov RMASK, #(0xff << 2); + eor RC, RC, RT0; + eor RD, RD, RT1; + eor RA, RA, RT2; + eor RB, RB, RT3; + + first_decrypt_cycle(7); + decrypt_cycle(6); + decrypt_cycle(5); + decrypt_cycle(4); + decrypt_cycle(3); + decrypt_cycle(2); + decrypt_cycle(1); + last_decrypt_cycle(0); + + /* Output whitening */ + ldp RT0, RT1, [CTXw, #(0*8)]; + ldp RT2, RT3, [CTXw, #(1*8)]; + eor RA, RA, RT0; + eor RB, RB, RT1; + eor RC, RC, RT2; + eor RD, RD, RT3; + + str_output_le(RDST, RA, RB, RC, RD, RT0, RT1); + + ret; +.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block; + +#endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ +#endif /*__AARCH64EL__*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/twofish-amd64.S b/libotr/libgcrypt-1.8.7/cipher/twofish-amd64.S new file mode 100644 index 0000000..aa964e0 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/twofish-amd64.S @@ -0,0 +1,1046 @@ +/* twofish-amd64.S - AMD64 assembly implementation of Twofish cipher + * + * Copyright (C) 2013-2015 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_TWOFISH) + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#ifdef __PIC__ +# define RIP %rip +#else +# define RIP +#endif + +.text + +/* structure of TWOFISH_context: */ +#define s0 0 +#define s1 ((s0) + 4 * 256) +#define s2 ((s1) + 4 * 256) +#define s3 ((s2) + 4 * 256) +#define w ((s3) + 4 * 256) +#define k ((w) + 4 * 8) + +/* register macros */ +#define CTX %rdi + +#define RA %rax +#define RB %rbx +#define RC %rcx +#define RD %rdx + +#define RAd %eax +#define RBd %ebx +#define RCd %ecx +#define RDd %edx + +#define RAbl %al +#define RBbl %bl +#define RCbl %cl +#define RDbl %dl + +#define RAbh %ah +#define RBbh %bh +#define RCbh %ch +#define RDbh %dh + +#define RX %r8 +#define RY %r9 + +#define RXd %r8d +#define RYd %r9d + +#define RT0 %rsi +#define RT1 %rbp +#define RT2 %r10 +#define RT3 %r11 + +#define RT0d %esi +#define RT1d %ebp +#define RT2d %r10d +#define RT3d %r11d + +/*********************************************************************** + * AMD64 assembly implementation of the Twofish cipher + ***********************************************************************/ +#define enc_g1_2(a, b, x, y) \ + movzbl b ## bl, RT3d; \ + movzbl b ## bh, RT1d; \ + movzbl a ## bl, RT2d; \ + movzbl a ## bh, RT0d; \ + rorl $16, b ## d; \ + rorl $16, a ## d; \ + movl s1(CTX, RT3, 4), RYd; \ + movzbl b ## bl, RT3d; \ + movl s0(CTX, RT2, 4), RXd; \ + movzbl a ## bl, RT2d; \ + xorl s2(CTX, RT1, 4), RYd; \ + movzbl b ## bh, RT1d; \ + xorl s1(CTX, RT0, 4), RXd; \ + movzbl a ## bh, RT0d; \ + rorl $16, b ## d; \ + rorl $16, a ## d; \ + xorl s3(CTX, RT3, 4), RYd; \ + xorl s2(CTX, RT2, 4), RXd; \ + xorl s0(CTX, RT1, 4), RYd; \ + xorl s3(CTX, RT0, 4), RXd; + +#define dec_g1_2(a, b, x, y) \ + movzbl a ## bl, RT2d; \ + movzbl a ## bh, RT0d; \ + movzbl b ## bl, RT3d; \ + movzbl b ## bh, RT1d; \ + rorl $16, a ## d; \ + rorl $16, b ## d; \ + movl s0(CTX, RT2, 4), RXd; \ + movzbl a ## bl, RT2d; \ + movl s1(CTX, RT3, 4), RYd; \ + movzbl b ## bl, RT3d; \ + xorl s1(CTX, RT0, 4), RXd; \ + movzbl a ## bh, RT0d; \ + xorl s2(CTX, RT1, 4), RYd; \ + movzbl b ## bh, RT1d; \ + rorl $16, a ## d; \ + rorl $16, b ## d; \ + xorl s2(CTX, RT2, 4), RXd; \ + xorl s3(CTX, RT3, 4), RYd; \ + xorl s3(CTX, RT0, 4), RXd; \ + xorl s0(CTX, RT1, 4), RYd; + +#define encrypt_round(ra, rb, rc, rd, n) \ + enc_g1_2(##ra, ##rb, RX, RY); \ + \ + leal (RXd, RYd, 2), RT0d; \ + addl RYd, RXd; \ + addl (k + 8 * (n) + 4)(CTX), RT0d; \ + roll $1, rd ## d; \ + addl (k + 8 * (n))(CTX), RXd; \ + xorl RT0d, rd ## d; \ + xorl RXd, rc ## d; \ + rorl $1, rc ## d; + +#define decrypt_round(ra, rb, rc, rd, n) \ + dec_g1_2(##ra, ##rb, RX, RY); \ + \ + leal (RXd, RYd, 2), RT0d; \ + addl RYd, RXd; \ + addl (k + 8 * (n) + 4)(CTX), RT0d; \ + roll $1, rc ## d; \ + addl (k + 8 * (n))(CTX), RXd; \ + xorl RXd, rc ## d; \ + xorl RT0d, rd ## d; \ + rorl $1, rd ## d; + +#define encrypt_cycle(a, b, c, d, nc) \ + encrypt_round(##a, ##b, ##c, ##d, (nc) * 2); \ + encrypt_round(##c, ##d, ##a, ##b, (nc) * 2 + 1); + +#define decrypt_cycle(a, b, c, d, nc) \ + decrypt_round(##c, ##d, ##a, ##b, (nc) * 2 + 1); \ + decrypt_round(##a, ##b, ##c, ##d, (nc) * 2); + +#define inpack(in, n, x, m) \ + movl (4 * (n))(in), x; \ + xorl (w + 4 * (m))(CTX), x; + +#define outunpack(out, n, x, m) \ + xorl (w + 4 * (m))(CTX), x; \ + movl x, (4 * (n))(out); + +.align 8 +.globl _gcry_twofish_amd64_encrypt_block +ELF(.type _gcry_twofish_amd64_encrypt_block,@function;) + +_gcry_twofish_amd64_encrypt_block: + /* input: + * %rdi: context, CTX + * %rsi: dst + * %rdx: src + */ + subq $(3 * 8), %rsp; + movq %rsi, (0 * 8)(%rsp); + movq %rbp, (1 * 8)(%rsp); + movq %rbx, (2 * 8)(%rsp); + + movq %rdx, RX; + inpack(RX, 0, RAd, 0); + inpack(RX, 1, RBd, 1); + inpack(RX, 2, RCd, 2); + inpack(RX, 3, RDd, 3); + + encrypt_cycle(RA, RB, RC, RD, 0); + encrypt_cycle(RA, RB, RC, RD, 1); + encrypt_cycle(RA, RB, RC, RD, 2); + encrypt_cycle(RA, RB, RC, RD, 3); + encrypt_cycle(RA, RB, RC, RD, 4); + encrypt_cycle(RA, RB, RC, RD, 5); + encrypt_cycle(RA, RB, RC, RD, 6); + encrypt_cycle(RA, RB, RC, RD, 7); + + movq (0 * 8)(%rsp), RX; /*dst*/ + outunpack(RX, 0, RCd, 4); + outunpack(RX, 1, RDd, 5); + outunpack(RX, 2, RAd, 6); + outunpack(RX, 3, RBd, 7); + + movq (2 * 8)(%rsp), %rbx; + movq (1 * 8)(%rsp), %rbp; + addq $(3 * 8), %rsp; + + ret; +ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;) + +.align 8 +.globl _gcry_twofish_amd64_decrypt_block +ELF(.type _gcry_twofish_amd64_decrypt_block,@function;) + +_gcry_twofish_amd64_decrypt_block: + /* input: + * %rdi: context, CTX + * %rsi: dst + * %rdx: src + */ + subq $(3 * 8), %rsp; + movq %rsi, (0 * 8)(%rsp); + movq %rbp, (1 * 8)(%rsp); + movq %rbx, (2 * 8)(%rsp); + + movq %rdx, RX; + inpack(RX, 0, RCd, 4); + inpack(RX, 1, RDd, 5); + inpack(RX, 2, RAd, 6); + inpack(RX, 3, RBd, 7); + + decrypt_cycle(RA, RB, RC, RD, 7); + decrypt_cycle(RA, RB, RC, RD, 6); + decrypt_cycle(RA, RB, RC, RD, 5); + decrypt_cycle(RA, RB, RC, RD, 4); + decrypt_cycle(RA, RB, RC, RD, 3); + decrypt_cycle(RA, RB, RC, RD, 2); + decrypt_cycle(RA, RB, RC, RD, 1); + decrypt_cycle(RA, RB, RC, RD, 0); + + movq (0 * 8)(%rsp), RX; /*dst*/ + outunpack(RX, 0, RAd, 0); + outunpack(RX, 1, RBd, 1); + outunpack(RX, 2, RCd, 2); + outunpack(RX, 3, RDd, 3); + + movq (2 * 8)(%rsp), %rbx; + movq (1 * 8)(%rsp), %rbp; + addq $(3 * 8), %rsp; + + ret; +ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;) + +#undef CTX + +#undef RA +#undef RB +#undef RC +#undef RD + +#undef RAd +#undef RBd +#undef RCd +#undef RDd + +#undef RAbl +#undef RBbl +#undef RCbl +#undef RDbl + +#undef RAbh +#undef RBbh +#undef RCbh +#undef RDbh + +#undef RX +#undef RY + +#undef RXd +#undef RYd + +#undef RT0 +#undef RT1 +#undef RT2 +#undef RT3 + +#undef RT0d +#undef RT1d +#undef RT2d +#undef RT3d + +/*********************************************************************** + * AMD64 assembly implementation of the Twofish cipher, 3-way parallel + ***********************************************************************/ +#define CTX %rdi +#define RIO %rdx + +#define RAB0 %rax +#define RAB1 %rbx +#define RAB2 %rcx + +#define RAB0d %eax +#define RAB1d %ebx +#define RAB2d %ecx + +#define RAB0bh %ah +#define RAB1bh %bh +#define RAB2bh %ch + +#define RAB0bl %al +#define RAB1bl %bl +#define RAB2bl %cl + +#define RCD0 %r8 +#define RCD1 %r9 +#define RCD2 %r10 + +#define RCD0d %r8d +#define RCD1d %r9d +#define RCD2d %r10d + +#define RX0 %rbp +#define RX1 %r11 +#define RX2 %r12 + +#define RX0d %ebp +#define RX1d %r11d +#define RX2d %r12d + +#define RY0 %r13 +#define RY1 %r14 +#define RY2 %r15 + +#define RY0d %r13d +#define RY1d %r14d +#define RY2d %r15d + +#define RT0 %rdx +#define RT1 %rsi + +#define RT0d %edx +#define RT1d %esi + +#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \ + movzbl ab ## bl, tmp2 ## d; \ + movzbl ab ## bh, tmp1 ## d; \ + rorq $(rot), ab; \ + op1##l T0(CTX, tmp2, 4), dst ## d; \ + op2##l T1(CTX, tmp1, 4), dst ## d; + +/* + * Combined G1 & G2 function. Reordered with help of rotates to have moves + * at beginning. + */ +#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \ + /* G1,1 && G2,1 */ \ + do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \ + do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \ + \ + do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \ + do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \ + \ + do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \ + do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \ + \ + /* G1,2 && G2,2 */ \ + do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \ + do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \ + xchgq cd ## 0, ab ## 0; \ + \ + do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \ + do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \ + xchgq cd ## 1, ab ## 1; \ + \ + do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \ + do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \ + xchgq cd ## 2, ab ## 2; + +#define enc_round_end(ab, x, y, n) \ + addl y ## d, x ## d; \ + addl x ## d, y ## d; \ + addl k+4*(2*(n))(CTX), x ## d; \ + xorl ab ## d, x ## d; \ + addl k+4*(2*(n)+1)(CTX), y ## d; \ + shrq $32, ab; \ + roll $1, ab ## d; \ + xorl y ## d, ab ## d; \ + shlq $32, ab; \ + rorl $1, x ## d; \ + orq x, ab; + +#define dec_round_end(ba, x, y, n) \ + addl y ## d, x ## d; \ + addl x ## d, y ## d; \ + addl k+4*(2*(n))(CTX), x ## d; \ + addl k+4*(2*(n)+1)(CTX), y ## d; \ + xorl ba ## d, y ## d; \ + shrq $32, ba; \ + roll $1, ba ## d; \ + xorl x ## d, ba ## d; \ + shlq $32, ba; \ + rorl $1, y ## d; \ + orq y, ba; + +#define encrypt_round3(ab, cd, n) \ + g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \ + \ + enc_round_end(ab ## 0, RX0, RY0, n); \ + enc_round_end(ab ## 1, RX1, RY1, n); \ + enc_round_end(ab ## 2, RX2, RY2, n); + +#define decrypt_round3(ba, dc, n) \ + g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \ + \ + dec_round_end(ba ## 0, RX0, RY0, n); \ + dec_round_end(ba ## 1, RX1, RY1, n); \ + dec_round_end(ba ## 2, RX2, RY2, n); + +#define encrypt_cycle3(ab, cd, n) \ + encrypt_round3(ab, cd, n*2); \ + encrypt_round3(ab, cd, (n*2)+1); + +#define decrypt_cycle3(ba, dc, n) \ + decrypt_round3(ba, dc, (n*2)+1); \ + decrypt_round3(ba, dc, (n*2)); + +#define inpack3(xy, m) \ + xorq w+4*m(CTX), xy ## 0; \ + xorq w+4*m(CTX), xy ## 1; \ + xorq w+4*m(CTX), xy ## 2; + +#define outunpack3(xy, m) \ + xorq w+4*m(CTX), xy ## 0; \ + xorq w+4*m(CTX), xy ## 1; \ + xorq w+4*m(CTX), xy ## 2; + +#define inpack_enc3() \ + inpack3(RAB, 0); \ + inpack3(RCD, 2); + +#define outunpack_enc3() \ + outunpack3(RAB, 6); \ + outunpack3(RCD, 4); + +#define inpack_dec3() \ + inpack3(RAB, 4); \ + rorq $32, RAB0; \ + rorq $32, RAB1; \ + rorq $32, RAB2; \ + inpack3(RCD, 6); \ + rorq $32, RCD0; \ + rorq $32, RCD1; \ + rorq $32, RCD2; + +#define outunpack_dec3() \ + rorq $32, RCD0; \ + rorq $32, RCD1; \ + rorq $32, RCD2; \ + outunpack3(RCD, 0); \ + rorq $32, RAB0; \ + rorq $32, RAB1; \ + rorq $32, RAB2; \ + outunpack3(RAB, 2); + +.align 8 +ELF(.type __twofish_enc_blk3,@function;) + +__twofish_enc_blk3: + /* input: + * %rdi: ctx, CTX + * RAB0,RCD0,RAB1,RCD1,RAB2,RCD2: three plaintext blocks + * output: + * RCD0,RAB0,RCD1,RAB1,RCD2,RAB2: three ciphertext blocks + */ + inpack_enc3(); + + encrypt_cycle3(RAB, RCD, 0); + encrypt_cycle3(RAB, RCD, 1); + encrypt_cycle3(RAB, RCD, 2); + encrypt_cycle3(RAB, RCD, 3); + encrypt_cycle3(RAB, RCD, 4); + encrypt_cycle3(RAB, RCD, 5); + encrypt_cycle3(RAB, RCD, 6); + encrypt_cycle3(RAB, RCD, 7); + + outunpack_enc3(); + + ret; +ELF(.size __twofish_enc_blk3,.-__twofish_enc_blk3;) + +.align 8 +ELF(.type __twofish_dec_blk3,@function;) + +__twofish_dec_blk3: + /* input: + * %rdi: ctx, CTX + * RAB0,RCD0,RAB1,RCD1,RAB2,RCD2: three ciphertext blocks + * output: + * RCD0,RAB0,RCD1,RAB1,RCD2,RAB2: three plaintext blocks + */ + inpack_dec3(); + + decrypt_cycle3(RAB, RCD, 7); + decrypt_cycle3(RAB, RCD, 6); + decrypt_cycle3(RAB, RCD, 5); + decrypt_cycle3(RAB, RCD, 4); + decrypt_cycle3(RAB, RCD, 3); + decrypt_cycle3(RAB, RCD, 2); + decrypt_cycle3(RAB, RCD, 1); + decrypt_cycle3(RAB, RCD, 0); + + outunpack_dec3(); + + ret; +ELF(.size __twofish_dec_blk3,.-__twofish_dec_blk3;) + +.align 8 +.globl _gcry_twofish_amd64_ctr_enc +ELF(.type _gcry_twofish_amd64_ctr_enc,@function;) +_gcry_twofish_amd64_ctr_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %rcx: iv (big endian, 128bit) + */ + subq $(8 * 8), %rsp; + movq %rbp, (0 * 8)(%rsp); + movq %rbx, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + movq %r14, (4 * 8)(%rsp); + movq %r15, (5 * 8)(%rsp); + + movq %rsi, (6 * 8)(%rsp); + movq %rdx, (7 * 8)(%rsp); + movq %rcx, RX0; + + /* load IV and byteswap */ + movq 8(RX0), RT0; + movq 0(RX0), RT1; + movq RT0, RCD0; + movq RT1, RAB0; + bswapq RT0; + bswapq RT1; + + /* construct IVs */ + movq RT0, RCD1; + movq RT1, RAB1; + movq RT0, RCD2; + movq RT1, RAB2; + addq $1, RCD1; + adcq $0, RAB1; + bswapq RCD1; + bswapq RAB1; + addq $2, RCD2; + adcq $0, RAB2; + bswapq RCD2; + bswapq RAB2; + addq $3, RT0; + adcq $0, RT1; + bswapq RT0; + bswapq RT1; + + /* store new IV */ + movq RT0, 8(RX0); + movq RT1, 0(RX0); + + call __twofish_enc_blk3; + + movq (7 * 8)(%rsp), RX0; /*src*/ + movq (6 * 8)(%rsp), RX1; /*dst*/ + + /* XOR key-stream with plaintext */ + xorq (0 * 8)(RX0), RCD0; + xorq (1 * 8)(RX0), RAB0; + xorq (2 * 8)(RX0), RCD1; + xorq (3 * 8)(RX0), RAB1; + xorq (4 * 8)(RX0), RCD2; + xorq (5 * 8)(RX0), RAB2; + movq RCD0, (0 * 8)(RX1); + movq RAB0, (1 * 8)(RX1); + movq RCD1, (2 * 8)(RX1); + movq RAB1, (3 * 8)(RX1); + movq RCD2, (4 * 8)(RX1); + movq RAB2, (5 * 8)(RX1); + + movq (0 * 8)(%rsp), %rbp; + movq (1 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + movq (4 * 8)(%rsp), %r14; + movq (5 * 8)(%rsp), %r15; + addq $(8 * 8), %rsp; + + ret; +ELF(.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;) + +.align 8 +.globl _gcry_twofish_amd64_cbc_dec +ELF(.type _gcry_twofish_amd64_cbc_dec,@function;) +_gcry_twofish_amd64_cbc_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %rcx: iv (128bit) + */ + subq $(9 * 8), %rsp; + movq %rbp, (0 * 8)(%rsp); + movq %rbx, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + movq %r14, (4 * 8)(%rsp); + movq %r15, (5 * 8)(%rsp); + + movq %rsi, (6 * 8)(%rsp); + movq %rdx, (7 * 8)(%rsp); + movq %rcx, (8 * 8)(%rsp); + movq %rdx, RX0; + + /* load input */ + movq (0 * 8)(RX0), RAB0; + movq (1 * 8)(RX0), RCD0; + movq (2 * 8)(RX0), RAB1; + movq (3 * 8)(RX0), RCD1; + movq (4 * 8)(RX0), RAB2; + movq (5 * 8)(RX0), RCD2; + + call __twofish_dec_blk3; + + movq (8 * 8)(%rsp), RT0; /*iv*/ + movq (7 * 8)(%rsp), RX0; /*src*/ + movq (6 * 8)(%rsp), RX1; /*dst*/ + + movq (4 * 8)(RX0), RY0; + movq (5 * 8)(RX0), RY1; + xorq (0 * 8)(RT0), RCD0; + xorq (1 * 8)(RT0), RAB0; + xorq (0 * 8)(RX0), RCD1; + xorq (1 * 8)(RX0), RAB1; + xorq (2 * 8)(RX0), RCD2; + xorq (3 * 8)(RX0), RAB2; + movq RY0, (0 * 8)(RT0); + movq RY1, (1 * 8)(RT0); + + movq RCD0, (0 * 8)(RX1); + movq RAB0, (1 * 8)(RX1); + movq RCD1, (2 * 8)(RX1); + movq RAB1, (3 * 8)(RX1); + movq RCD2, (4 * 8)(RX1); + movq RAB2, (5 * 8)(RX1); + + movq (0 * 8)(%rsp), %rbp; + movq (1 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + movq (4 * 8)(%rsp), %r14; + movq (5 * 8)(%rsp), %r15; + addq $(9 * 8), %rsp; + + ret; +ELF(.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;) + +.align 8 +.globl _gcry_twofish_amd64_cfb_dec +ELF(.type _gcry_twofish_amd64_cfb_dec,@function;) +_gcry_twofish_amd64_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %rcx: iv (128bit) + */ + subq $(8 * 8), %rsp; + movq %rbp, (0 * 8)(%rsp); + movq %rbx, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + movq %r14, (4 * 8)(%rsp); + movq %r15, (5 * 8)(%rsp); + + movq %rsi, (6 * 8)(%rsp); + movq %rdx, (7 * 8)(%rsp); + movq %rdx, RX0; + movq %rcx, RX1; + + /* load input */ + movq (0 * 8)(RX1), RAB0; + movq (1 * 8)(RX1), RCD0; + movq (0 * 8)(RX0), RAB1; + movq (1 * 8)(RX0), RCD1; + movq (2 * 8)(RX0), RAB2; + movq (3 * 8)(RX0), RCD2; + + /* Update IV */ + movq (4 * 8)(RX0), RY0; + movq (5 * 8)(RX0), RY1; + movq RY0, (0 * 8)(RX1); + movq RY1, (1 * 8)(RX1); + + call __twofish_enc_blk3; + + movq (7 * 8)(%rsp), RX0; /*src*/ + movq (6 * 8)(%rsp), RX1; /*dst*/ + + xorq (0 * 8)(RX0), RCD0; + xorq (1 * 8)(RX0), RAB0; + xorq (2 * 8)(RX0), RCD1; + xorq (3 * 8)(RX0), RAB1; + xorq (4 * 8)(RX0), RCD2; + xorq (5 * 8)(RX0), RAB2; + movq RCD0, (0 * 8)(RX1); + movq RAB0, (1 * 8)(RX1); + movq RCD1, (2 * 8)(RX1); + movq RAB1, (3 * 8)(RX1); + movq RCD2, (4 * 8)(RX1); + movq RAB2, (5 * 8)(RX1); + + movq (0 * 8)(%rsp), %rbp; + movq (1 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + movq (4 * 8)(%rsp), %r14; + movq (5 * 8)(%rsp), %r15; + addq $(8 * 8), %rsp; + + ret; +ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;) + +.align 8 +.globl _gcry_twofish_amd64_ocb_enc +ELF(.type _gcry_twofish_amd64_ocb_enc,@function;) +_gcry_twofish_amd64_ocb_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[3]) + */ + subq $(8 * 8), %rsp; + movq %rbp, (0 * 8)(%rsp); + movq %rbx, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + movq %r14, (4 * 8)(%rsp); + movq %r15, (5 * 8)(%rsp); + + movq %rsi, (6 * 8)(%rsp); + movq %rdx, RX0; + movq %rcx, RX1; + movq %r8, RX2; + movq %r9, RY0; + movq %rsi, RY1; + + /* Load offset */ + movq (0 * 8)(RX1), RT0; + movq (1 * 8)(RX1), RT1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq (RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (0 * 8)(RX0), RAB0; + movq (1 * 8)(RX0), RCD0; + /* Store Offset_i */ + movq RT0, (0 * 8)(RY1); + movq RT1, (1 * 8)(RY1); + /* Checksum_i = Checksum_{i-1} xor P_i */ + xor RAB0, (0 * 8)(RX2); + xor RCD0, (1 * 8)(RX2); + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB0; + xorq RT1, RCD0; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 8(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (2 * 8)(RX0), RAB1; + movq (3 * 8)(RX0), RCD1; + /* Store Offset_i */ + movq RT0, (2 * 8)(RY1); + movq RT1, (3 * 8)(RY1); + /* Checksum_i = Checksum_{i-1} xor P_i */ + xor RAB1, (0 * 8)(RX2); + xor RCD1, (1 * 8)(RX2); + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB1; + xorq RT1, RCD1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 16(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (4 * 8)(RX0), RAB2; + movq (5 * 8)(RX0), RCD2; + /* Store Offset_i */ + movq RT0, (4 * 8)(RY1); + movq RT1, (5 * 8)(RY1); + /* Checksum_i = Checksum_{i-1} xor P_i */ + xor RAB2, (0 * 8)(RX2); + xor RCD2, (1 * 8)(RX2); + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB2; + xorq RT1, RCD2; + + /* Store offset */ + movq RT0, (0 * 8)(RX1); + movq RT1, (1 * 8)(RX1); + + /* CX_i = ENCIPHER(K, PX_i) */ + call __twofish_enc_blk3; + + movq (6 * 8)(%rsp), RX1; /*dst*/ + + /* C_i = CX_i xor Offset_i */ + xorq RCD0, (0 * 8)(RX1); + xorq RAB0, (1 * 8)(RX1); + xorq RCD1, (2 * 8)(RX1); + xorq RAB1, (3 * 8)(RX1); + xorq RCD2, (4 * 8)(RX1); + xorq RAB2, (5 * 8)(RX1); + + movq (0 * 8)(%rsp), %rbp; + movq (1 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + movq (4 * 8)(%rsp), %r14; + movq (5 * 8)(%rsp), %r15; + addq $(8 * 8), %rsp; + + ret; +ELF(.size _gcry_twofish_amd64_ocb_enc,.-_gcry_twofish_amd64_ocb_enc;) + +.align 8 +.globl _gcry_twofish_amd64_ocb_dec +ELF(.type _gcry_twofish_amd64_ocb_dec,@function;) +_gcry_twofish_amd64_ocb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[3]) + */ + subq $(8 * 8), %rsp; + movq %rbp, (0 * 8)(%rsp); + movq %rbx, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + movq %r14, (4 * 8)(%rsp); + movq %r15, (5 * 8)(%rsp); + + movq %rsi, (6 * 8)(%rsp); + movq %r8, (7 * 8)(%rsp); + movq %rdx, RX0; + movq %rcx, RX1; + movq %r9, RY0; + movq %rsi, RY1; + + /* Load offset */ + movq (0 * 8)(RX1), RT0; + movq (1 * 8)(RX1), RT1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq (RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (0 * 8)(RX0), RAB0; + movq (1 * 8)(RX0), RCD0; + /* Store Offset_i */ + movq RT0, (0 * 8)(RY1); + movq RT1, (1 * 8)(RY1); + /* CX_i = C_i xor Offset_i */ + xorq RT0, RAB0; + xorq RT1, RCD0; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 8(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (2 * 8)(RX0), RAB1; + movq (3 * 8)(RX0), RCD1; + /* Store Offset_i */ + movq RT0, (2 * 8)(RY1); + movq RT1, (3 * 8)(RY1); + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB1; + xorq RT1, RCD1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 16(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (4 * 8)(RX0), RAB2; + movq (5 * 8)(RX0), RCD2; + /* Store Offset_i */ + movq RT0, (4 * 8)(RY1); + movq RT1, (5 * 8)(RY1); + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB2; + xorq RT1, RCD2; + + /* Store offset */ + movq RT0, (0 * 8)(RX1); + movq RT1, (1 * 8)(RX1); + + /* PX_i = DECIPHER(K, CX_i) */ + call __twofish_dec_blk3; + + movq (7 * 8)(%rsp), RX2; /*checksum*/ + movq (6 * 8)(%rsp), RX1; /*dst*/ + + /* Load checksum */ + movq (0 * 8)(RX2), RT0; + movq (1 * 8)(RX2), RT1; + + /* P_i = PX_i xor Offset_i */ + xorq RCD0, (0 * 8)(RX1); + xorq RAB0, (1 * 8)(RX1); + xorq RCD1, (2 * 8)(RX1); + xorq RAB1, (3 * 8)(RX1); + xorq RCD2, (4 * 8)(RX1); + xorq RAB2, (5 * 8)(RX1); + + /* Checksum_i = Checksum_{i-1} xor P_i */ + xorq (0 * 8)(RX1), RT0; + xorq (1 * 8)(RX1), RT1; + xorq (2 * 8)(RX1), RT0; + xorq (3 * 8)(RX1), RT1; + xorq (4 * 8)(RX1), RT0; + xorq (5 * 8)(RX1), RT1; + + /* Store checksum */ + movq RT0, (0 * 8)(RX2); + movq RT1, (1 * 8)(RX2); + + movq (0 * 8)(%rsp), %rbp; + movq (1 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + movq (4 * 8)(%rsp), %r14; + movq (5 * 8)(%rsp), %r15; + addq $(8 * 8), %rsp; + + ret; +ELF(.size _gcry_twofish_amd64_ocb_dec,.-_gcry_twofish_amd64_ocb_dec;) + +.align 8 +.globl _gcry_twofish_amd64_ocb_auth +ELF(.type _gcry_twofish_amd64_ocb_auth,@function;) +_gcry_twofish_amd64_ocb_auth: + /* input: + * %rdi: ctx, CTX + * %rsi: abuf (3 blocks) + * %rdx: offset + * %rcx: checksum + * %r8 : L pointers (void *L[3]) + */ + subq $(8 * 8), %rsp; + movq %rbp, (0 * 8)(%rsp); + movq %rbx, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + movq %r14, (4 * 8)(%rsp); + movq %r15, (5 * 8)(%rsp); + + movq %rcx, (6 * 8)(%rsp); + movq %rsi, RX0; + movq %rdx, RX1; + movq %r8, RY0; + + /* Load offset */ + movq (0 * 8)(RX1), RT0; + movq (1 * 8)(RX1), RT1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq (RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (0 * 8)(RX0), RAB0; + movq (1 * 8)(RX0), RCD0; + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB0; + xorq RT1, RCD0; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 8(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (2 * 8)(RX0), RAB1; + movq (3 * 8)(RX0), RCD1; + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB1; + xorq RT1, RCD1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 16(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (4 * 8)(RX0), RAB2; + movq (5 * 8)(RX0), RCD2; + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB2; + xorq RT1, RCD2; + + /* Store offset */ + movq RT0, (0 * 8)(RX1); + movq RT1, (1 * 8)(RX1); + + /* C_i = ENCIPHER(K, PX_i) */ + call __twofish_enc_blk3; + + movq (6 * 8)(%rsp), RX1; /*checksum*/ + + /* Checksum_i = C_i xor Checksum_i */ + xorq RCD0, RCD1; + xorq RAB0, RAB1; + xorq RCD1, RCD2; + xorq RAB1, RAB2; + xorq RCD2, (0 * 8)(RX1); + xorq RAB2, (1 * 8)(RX1); + + movq (0 * 8)(%rsp), %rbp; + movq (1 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + movq (4 * 8)(%rsp), %r14; + movq (5 * 8)(%rsp), %r15; + addq $(8 * 8), %rsp; + + ret; +ELF(.size _gcry_twofish_amd64_ocb_auth,.-_gcry_twofish_amd64_ocb_auth;) + +#endif /*USE_TWOFISH*/ +#endif /*__x86_64*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/twofish-arm.S b/libotr/libgcrypt-1.8.7/cipher/twofish-arm.S new file mode 100644 index 0000000..2e1da6c --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/twofish-arm.S @@ -0,0 +1,363 @@ +/* twofish-arm.S - ARM assembly implementation of Twofish cipher + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(__ARMEL__) +#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS + +.text + +.syntax unified +.arm + +/* structure of TWOFISH_context: */ +#define s0 0 +#define s1 ((s0) + 4 * 256) +#define s2 ((s1) + 4 * 256) +#define s3 ((s2) + 4 * 256) +#define w ((s3) + 4 * 256) +#define k ((w) + 4 * 8) + +/* register macros */ +#define CTX %r0 +#define CTXs0 %r0 +#define CTXs1 %r1 +#define CTXs3 %r7 + +#define RA %r3 +#define RB %r4 +#define RC %r5 +#define RD %r6 + +#define RX %r2 +#define RY %ip + +#define RMASK %lr + +#define RT0 %r8 +#define RT1 %r9 +#define RT2 %r10 +#define RT3 %r11 + +/* helper macros */ +#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 0)]; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 3)]; \ + orr rout, rout, rtmp, lsl #24; + +#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \ + mov rtmp0, rin, lsr #8; \ + strb rin, [rdst, #((offs) + 0)]; \ + mov rtmp1, rin, lsr #16; \ + strb rtmp0, [rdst, #((offs) + 1)]; \ + mov rtmp0, rin, lsr #24; \ + strb rtmp1, [rdst, #((offs) + 2)]; \ + strb rtmp0, [rdst, #((offs) + 3)]; + +#ifndef __ARMEL__ + /* bswap on big-endian */ + #define host_to_le(reg) \ + rev reg, reg; + #define le_to_host(reg) \ + rev reg, reg; +#else + /* nop on little-endian */ + #define host_to_le(reg) /*_*/ + #define le_to_host(reg) /*_*/ +#endif + +#define ldr_input_aligned_le(rin, a, b, c, d) \ + ldr a, [rin, #0]; \ + ldr b, [rin, #4]; \ + le_to_host(a); \ + ldr c, [rin, #8]; \ + le_to_host(b); \ + ldr d, [rin, #12]; \ + le_to_host(c); \ + le_to_host(d); + +#define str_output_aligned_le(rout, a, b, c, d) \ + le_to_host(a); \ + le_to_host(b); \ + str a, [rout, #0]; \ + le_to_host(c); \ + str b, [rout, #4]; \ + le_to_host(d); \ + str c, [rout, #8]; \ + str d, [rout, #12]; + +#ifdef __ARM_FEATURE_UNALIGNED + /* unaligned word reads/writes allowed */ + #define ldr_input_le(rin, ra, rb, rc, rd, rtmp) \ + ldr_input_aligned_le(rin, ra, rb, rc, rd) + + #define str_output_le(rout, ra, rb, rc, rd, rtmp0, rtmp1) \ + str_output_aligned_le(rout, ra, rb, rc, rd) +#else + /* need to handle unaligned reads/writes by byte reads */ + #define ldr_input_le(rin, ra, rb, rc, rd, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_le(ra, rin, 0, rtmp0); \ + ldr_unaligned_le(rb, rin, 4, rtmp0); \ + ldr_unaligned_le(rc, rin, 8, rtmp0); \ + ldr_unaligned_le(rd, rin, 12, rtmp0); \ + b 2f; \ + 1:;\ + ldr_input_aligned_le(rin, ra, rb, rc, rd); \ + 2:; + + #define str_output_le(rout, ra, rb, rc, rd, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_le(ra, rout, 0, rtmp0, rtmp1); \ + str_unaligned_le(rb, rout, 4, rtmp0, rtmp1); \ + str_unaligned_le(rc, rout, 8, rtmp0, rtmp1); \ + str_unaligned_le(rd, rout, 12, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + str_output_aligned_le(rout, ra, rb, rc, rd); \ + 2:; +#endif + +/********************************************************************** + 1-way twofish + **********************************************************************/ +#define encrypt_round(a, b, rc, rd, n, ror_a, adj_a) \ + and RT0, RMASK, b, lsr#(8 - 2); \ + and RY, RMASK, b, lsr#(16 - 2); \ + add RT0, RT0, #(s2 - s1); \ + and RT1, RMASK, b, lsr#(24 - 2); \ + ldr RY, [CTXs3, RY]; \ + and RT2, RMASK, b, lsl#(2); \ + ldr RT0, [CTXs1, RT0]; \ + and RT3, RMASK, a, lsr#(16 - 2 + (adj_a)); \ + ldr RT1, [CTXs0, RT1]; \ + and RX, RMASK, a, lsr#(8 - 2 + (adj_a)); \ + ldr RT2, [CTXs1, RT2]; \ + add RT3, RT3, #(s2 - s1); \ + ldr RX, [CTXs1, RX]; \ + ror_a(a); \ + \ + eor RY, RY, RT0; \ + ldr RT3, [CTXs1, RT3]; \ + and RT0, RMASK, a, lsl#(2); \ + eor RY, RY, RT1; \ + and RT1, RMASK, a, lsr#(24 - 2); \ + eor RY, RY, RT2; \ + ldr RT0, [CTXs0, RT0]; \ + eor RX, RX, RT3; \ + ldr RT1, [CTXs3, RT1]; \ + eor RX, RX, RT0; \ + \ + ldr RT3, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \ + eor RX, RX, RT1; \ + ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \ + \ + add RT0, RX, RY, lsl #1; \ + add RX, RX, RY; \ + add RT0, RT0, RT3; \ + add RX, RX, RT2; \ + eor rd, RT0, rd, ror #31; \ + eor rc, rc, RX; + +#define dummy(x) /*_*/ + +#define ror1(r) \ + ror r, r, #1; + +#define decrypt_round(a, b, rc, rd, n, ror_b, adj_b) \ + and RT3, RMASK, b, lsl#(2 - (adj_b)); \ + and RT1, RMASK, b, lsr#(8 - 2 + (adj_b)); \ + ror_b(b); \ + and RT2, RMASK, a, lsl#(2); \ + and RT0, RMASK, a, lsr#(8 - 2); \ + \ + ldr RY, [CTXs1, RT3]; \ + add RT1, RT1, #(s2 - s1); \ + ldr RX, [CTXs0, RT2]; \ + and RT3, RMASK, b, lsr#(16 - 2); \ + ldr RT1, [CTXs1, RT1]; \ + and RT2, RMASK, a, lsr#(16 - 2); \ + ldr RT0, [CTXs1, RT0]; \ + \ + add RT2, RT2, #(s2 - s1); \ + ldr RT3, [CTXs3, RT3]; \ + eor RY, RY, RT1; \ + \ + and RT1, RMASK, b, lsr#(24 - 2); \ + eor RX, RX, RT0; \ + ldr RT2, [CTXs1, RT2]; \ + and RT0, RMASK, a, lsr#(24 - 2); \ + \ + ldr RT1, [CTXs0, RT1]; \ + \ + eor RY, RY, RT3; \ + ldr RT0, [CTXs3, RT0]; \ + eor RX, RX, RT2; \ + eor RY, RY, RT1; \ + \ + ldr RT1, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \ + eor RX, RX, RT0; \ + ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \ + \ + add RT0, RX, RY, lsl #1; \ + add RX, RX, RY; \ + add RT0, RT0, RT1; \ + add RX, RX, RT2; \ + eor rd, rd, RT0; \ + eor rc, RX, rc, ror #31; + +#define first_encrypt_cycle(nc) \ + encrypt_round(RA, RB, RC, RD, (nc) * 2, dummy, 0); \ + encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); + +#define encrypt_cycle(nc) \ + encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \ + encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); + +#define last_encrypt_cycle(nc) \ + encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \ + encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \ + ror1(RA); + +#define first_decrypt_cycle(nc) \ + decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, dummy, 0); \ + decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); + +#define decrypt_cycle(nc) \ + decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \ + decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); + +#define last_decrypt_cycle(nc) \ + decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \ + decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \ + ror1(RD); + +.align 3 +.globl _gcry_twofish_arm_encrypt_block +.type _gcry_twofish_arm_encrypt_block,%function; + +_gcry_twofish_arm_encrypt_block: + /* input: + * %r0: ctx + * %r1: dst + * %r2: src + */ + push {%r1, %r4-%r11, %ip, %lr}; + + add RY, CTXs0, #w; + + ldr_input_le(%r2, RA, RB, RC, RD, RT0); + + /* Input whitening */ + ldm RY, {RT0, RT1, RT2, RT3}; + add CTXs3, CTXs0, #(s3 - s0); + add CTXs1, CTXs0, #(s1 - s0); + mov RMASK, #(0xff << 2); + eor RA, RA, RT0; + eor RB, RB, RT1; + eor RC, RC, RT2; + eor RD, RD, RT3; + + first_encrypt_cycle(0); + encrypt_cycle(1); + encrypt_cycle(2); + encrypt_cycle(3); + encrypt_cycle(4); + encrypt_cycle(5); + encrypt_cycle(6); + last_encrypt_cycle(7); + + add RY, CTXs3, #(w + 4*4 - s3); + pop {%r1}; /* dst */ + + /* Output whitening */ + ldm RY, {RT0, RT1, RT2, RT3}; + eor RC, RC, RT0; + eor RD, RD, RT1; + eor RA, RA, RT2; + eor RB, RB, RT3; + + str_output_le(%r1, RC, RD, RA, RB, RT0, RT1); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block; + +.align 3 +.globl _gcry_twofish_arm_decrypt_block +.type _gcry_twofish_arm_decrypt_block,%function; + +_gcry_twofish_arm_decrypt_block: + /* input: + * %r0: ctx + * %r1: dst + * %r2: src + */ + push {%r1, %r4-%r11, %ip, %lr}; + + add CTXs3, CTXs0, #(s3 - s0); + + ldr_input_le(%r2, RC, RD, RA, RB, RT0); + + add RY, CTXs3, #(w + 4*4 - s3); + add CTXs3, CTXs0, #(s3 - s0); + + /* Input whitening */ + ldm RY, {RT0, RT1, RT2, RT3}; + add CTXs1, CTXs0, #(s1 - s0); + mov RMASK, #(0xff << 2); + eor RC, RC, RT0; + eor RD, RD, RT1; + eor RA, RA, RT2; + eor RB, RB, RT3; + + first_decrypt_cycle(7); + decrypt_cycle(6); + decrypt_cycle(5); + decrypt_cycle(4); + decrypt_cycle(3); + decrypt_cycle(2); + decrypt_cycle(1); + last_decrypt_cycle(0); + + add RY, CTXs0, #w; + pop {%r1}; /* dst */ + + /* Output whitening */ + ldm RY, {RT0, RT1, RT2, RT3}; + eor RA, RA, RT0; + eor RB, RB, RT1; + eor RC, RC, RT2; + eor RD, RD, RT3; + + str_output_le(%r1, RA, RB, RC, RD, RT0, RT1); + + pop {%r4-%r11, %ip, %pc}; +.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block; + +#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/ +#endif /*__ARMEL__*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/twofish-avx2-amd64.S b/libotr/libgcrypt-1.8.7/cipher/twofish-avx2-amd64.S new file mode 100644 index 0000000..db6e218 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/twofish-avx2-amd64.S @@ -0,0 +1,1012 @@ +/* twofish-avx2-amd64.S - AMD64/AVX2 assembly implementation of Twofish cipher + * + * Copyright (C) 2013-2017 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_TWOFISH) && \ + defined(ENABLE_AVX2_SUPPORT) + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#ifdef __PIC__ +# define RIP (%rip) +#else +# define RIP +#endif + +.text + +/* structure of TWOFISH_context: */ +#define s0 0 +#define s1 ((s0) + 4 * 256) +#define s2 ((s1) + 4 * 256) +#define s3 ((s2) + 4 * 256) +#define w ((s3) + 4 * 256) +#define k ((w) + 4 * 8) + +/* register macros */ +#define CTX %rdi + +#define RROUND %rbp +#define RROUNDd %ebp +#define RS0 CTX +#define RS1 %r8 +#define RS2 %r9 +#define RS3 %r10 +#define RK %r11 +#define RW %rax + +#define RA0 %ymm8 +#define RB0 %ymm9 +#define RC0 %ymm10 +#define RD0 %ymm11 +#define RA1 %ymm12 +#define RB1 %ymm13 +#define RC1 %ymm14 +#define RD1 %ymm15 + +/* temp regs */ +#define RX0 %ymm0 +#define RY0 %ymm1 +#define RX1 %ymm2 +#define RY1 %ymm3 +#define RT0 %ymm4 +#define RIDX %ymm5 + +#define RX0x %xmm0 +#define RY0x %xmm1 +#define RX1x %xmm2 +#define RY1x %xmm3 +#define RT0x %xmm4 +#define RIDXx %xmm5 + +#define RTMP0 RX0 +#define RTMP0x RX0x +#define RTMP1 RX1 +#define RTMP1x RX1x +#define RTMP2 RY0 +#define RTMP2x RY0x +#define RTMP3 RY1 +#define RTMP3x RY1x +#define RTMP4 RIDX +#define RTMP4x RIDXx + +/* vpgatherdd mask and '-1' */ +#define RNOT %ymm6 +#define RNOTx %xmm6 + +/* byte mask, (-1 >> 24) */ +#define RBYTE %ymm7 + +/********************************************************************** + 16-way AVX2 twofish + **********************************************************************/ +#define init_round_constants() \ + vpcmpeqd RNOT, RNOT, RNOT; \ + leaq k(CTX), RK; \ + leaq w(CTX), RW; \ + vpsrld $24, RNOT, RBYTE; \ + leaq s1(CTX), RS1; \ + leaq s2(CTX), RS2; \ + leaq s3(CTX), RS3; \ + +#define g16(ab, rs0, rs1, rs2, rs3, xy) \ + vpand RBYTE, ab ## 0, RIDX; \ + vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 0; \ + vpcmpeqd RNOT, RNOT, RNOT; \ + \ + vpand RBYTE, ab ## 1, RIDX; \ + vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 1; \ + vpcmpeqd RNOT, RNOT, RNOT; \ + \ + vpsrld $8, ab ## 0, RIDX; \ + vpand RBYTE, RIDX, RIDX; \ + vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \ + vpcmpeqd RNOT, RNOT, RNOT; \ + vpxor RT0, xy ## 0, xy ## 0; \ + \ + vpsrld $8, ab ## 1, RIDX; \ + vpand RBYTE, RIDX, RIDX; \ + vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \ + vpcmpeqd RNOT, RNOT, RNOT; \ + vpxor RT0, xy ## 1, xy ## 1; \ + \ + vpsrld $16, ab ## 0, RIDX; \ + vpand RBYTE, RIDX, RIDX; \ + vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \ + vpcmpeqd RNOT, RNOT, RNOT; \ + vpxor RT0, xy ## 0, xy ## 0; \ + \ + vpsrld $16, ab ## 1, RIDX; \ + vpand RBYTE, RIDX, RIDX; \ + vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \ + vpcmpeqd RNOT, RNOT, RNOT; \ + vpxor RT0, xy ## 1, xy ## 1; \ + \ + vpsrld $24, ab ## 0, RIDX; \ + vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \ + vpcmpeqd RNOT, RNOT, RNOT; \ + vpxor RT0, xy ## 0, xy ## 0; \ + \ + vpsrld $24, ab ## 1, RIDX; \ + vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \ + vpcmpeqd RNOT, RNOT, RNOT; \ + vpxor RT0, xy ## 1, xy ## 1; + +#define g1_16(a, x) \ + g16(a, RS0, RS1, RS2, RS3, x); + +#define g2_16(b, y) \ + g16(b, RS1, RS2, RS3, RS0, y); + +#define encrypt_round_end16(a, b, c, d, nk, r) \ + vpaddd RY0, RX0, RX0; \ + vpaddd RX0, RY0, RY0; \ + vpbroadcastd ((nk)+((r)*8))(RK), RT0; \ + vpaddd RT0, RX0, RX0; \ + vpbroadcastd 4+((nk)+((r)*8))(RK), RT0; \ + vpaddd RT0, RY0, RY0; \ + \ + vpxor RY0, d ## 0, d ## 0; \ + \ + vpxor RX0, c ## 0, c ## 0; \ + vpsrld $1, c ## 0, RT0; \ + vpslld $31, c ## 0, c ## 0; \ + vpor RT0, c ## 0, c ## 0; \ + \ + vpaddd RY1, RX1, RX1; \ + vpaddd RX1, RY1, RY1; \ + vpbroadcastd ((nk)+((r)*8))(RK), RT0; \ + vpaddd RT0, RX1, RX1; \ + vpbroadcastd 4+((nk)+((r)*8))(RK), RT0; \ + vpaddd RT0, RY1, RY1; \ + \ + vpxor RY1, d ## 1, d ## 1; \ + \ + vpxor RX1, c ## 1, c ## 1; \ + vpsrld $1, c ## 1, RT0; \ + vpslld $31, c ## 1, c ## 1; \ + vpor RT0, c ## 1, c ## 1; \ + +#define encrypt_round16(a, b, c, d, nk, r) \ + g2_16(b, RY); \ + \ + vpslld $1, b ## 0, RT0; \ + vpsrld $31, b ## 0, b ## 0; \ + vpor RT0, b ## 0, b ## 0; \ + \ + vpslld $1, b ## 1, RT0; \ + vpsrld $31, b ## 1, b ## 1; \ + vpor RT0, b ## 1, b ## 1; \ + \ + g1_16(a, RX); \ + \ + encrypt_round_end16(a, b, c, d, nk, r); + +#define encrypt_round_first16(a, b, c, d, nk, r) \ + vpslld $1, d ## 0, RT0; \ + vpsrld $31, d ## 0, d ## 0; \ + vpor RT0, d ## 0, d ## 0; \ + \ + vpslld $1, d ## 1, RT0; \ + vpsrld $31, d ## 1, d ## 1; \ + vpor RT0, d ## 1, d ## 1; \ + \ + encrypt_round16(a, b, c, d, nk, r); + +#define encrypt_round_last16(a, b, c, d, nk, r) \ + g2_16(b, RY); \ + \ + g1_16(a, RX); \ + \ + encrypt_round_end16(a, b, c, d, nk, r); + +#define decrypt_round_end16(a, b, c, d, nk, r) \ + vpaddd RY0, RX0, RX0; \ + vpaddd RX0, RY0, RY0; \ + vpbroadcastd ((nk)+((r)*8))(RK), RT0; \ + vpaddd RT0, RX0, RX0; \ + vpbroadcastd 4+((nk)+((r)*8))(RK), RT0; \ + vpaddd RT0, RY0, RY0; \ + \ + vpxor RX0, c ## 0, c ## 0; \ + \ + vpxor RY0, d ## 0, d ## 0; \ + vpsrld $1, d ## 0, RT0; \ + vpslld $31, d ## 0, d ## 0; \ + vpor RT0, d ## 0, d ## 0; \ + \ + vpaddd RY1, RX1, RX1; \ + vpaddd RX1, RY1, RY1; \ + vpbroadcastd ((nk)+((r)*8))(RK), RT0; \ + vpaddd RT0, RX1, RX1; \ + vpbroadcastd 4+((nk)+((r)*8))(RK), RT0; \ + vpaddd RT0, RY1, RY1; \ + \ + vpxor RX1, c ## 1, c ## 1; \ + \ + vpxor RY1, d ## 1, d ## 1; \ + vpsrld $1, d ## 1, RT0; \ + vpslld $31, d ## 1, d ## 1; \ + vpor RT0, d ## 1, d ## 1; + +#define decrypt_round16(a, b, c, d, nk, r) \ + g1_16(a, RX); \ + \ + vpslld $1, a ## 0, RT0; \ + vpsrld $31, a ## 0, a ## 0; \ + vpor RT0, a ## 0, a ## 0; \ + \ + vpslld $1, a ## 1, RT0; \ + vpsrld $31, a ## 1, a ## 1; \ + vpor RT0, a ## 1, a ## 1; \ + \ + g2_16(b, RY); \ + \ + decrypt_round_end16(a, b, c, d, nk, r); + +#define decrypt_round_first16(a, b, c, d, nk, r) \ + vpslld $1, c ## 0, RT0; \ + vpsrld $31, c ## 0, c ## 0; \ + vpor RT0, c ## 0, c ## 0; \ + \ + vpslld $1, c ## 1, RT0; \ + vpsrld $31, c ## 1, c ## 1; \ + vpor RT0, c ## 1, c ## 1; \ + \ + decrypt_round16(a, b, c, d, nk, r) + +#define decrypt_round_last16(a, b, c, d, nk, r) \ + g1_16(a, RX); \ + \ + g2_16(b, RY); \ + \ + decrypt_round_end16(a, b, c, d, nk, r); + +#define encrypt_cycle16(r) \ + encrypt_round16(RA, RB, RC, RD, 0, r); \ + encrypt_round16(RC, RD, RA, RB, 8, r); + +#define encrypt_cycle_first16(r) \ + encrypt_round_first16(RA, RB, RC, RD, 0, r); \ + encrypt_round16(RC, RD, RA, RB, 8, r); + +#define encrypt_cycle_last16(r) \ + encrypt_round16(RA, RB, RC, RD, 0, r); \ + encrypt_round_last16(RC, RD, RA, RB, 8, r); + +#define decrypt_cycle16(r) \ + decrypt_round16(RC, RD, RA, RB, 8, r); \ + decrypt_round16(RA, RB, RC, RD, 0, r); + +#define decrypt_cycle_first16(r) \ + decrypt_round_first16(RC, RD, RA, RB, 8, r); \ + decrypt_round16(RA, RB, RC, RD, 0, r); + +#define decrypt_cycle_last16(r) \ + decrypt_round16(RC, RD, RA, RB, 8, r); \ + decrypt_round_last16(RA, RB, RC, RD, 0, r); + +#define transpose_4x4(x0,x1,x2,x3,t1,t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +#define read_blocks8(offs,a,b,c,d) \ + vmovdqu 16*offs(RIO), a; \ + vmovdqu 16*offs+32(RIO), b; \ + vmovdqu 16*offs+64(RIO), c; \ + vmovdqu 16*offs+96(RIO), d; \ + \ + transpose_4x4(a, b, c, d, RX0, RY0); + +#define write_blocks8(offs,a,b,c,d) \ + transpose_4x4(a, b, c, d, RX0, RY0); \ + \ + vmovdqu a, 16*offs(RIO); \ + vmovdqu b, 16*offs+32(RIO); \ + vmovdqu c, 16*offs+64(RIO); \ + vmovdqu d, 16*offs+96(RIO); + +#define inpack_enc8(a,b,c,d) \ + vpbroadcastd 4*0(RW), RT0; \ + vpxor RT0, a, a; \ + \ + vpbroadcastd 4*1(RW), RT0; \ + vpxor RT0, b, b; \ + \ + vpbroadcastd 4*2(RW), RT0; \ + vpxor RT0, c, c; \ + \ + vpbroadcastd 4*3(RW), RT0; \ + vpxor RT0, d, d; + +#define outunpack_enc8(a,b,c,d) \ + vpbroadcastd 4*4(RW), RX0; \ + vpbroadcastd 4*5(RW), RY0; \ + vpxor RX0, c, RX0; \ + vpxor RY0, d, RY0; \ + \ + vpbroadcastd 4*6(RW), RT0; \ + vpxor RT0, a, c; \ + vpbroadcastd 4*7(RW), RT0; \ + vpxor RT0, b, d; \ + \ + vmovdqa RX0, a; \ + vmovdqa RY0, b; + +#define inpack_dec8(a,b,c,d) \ + vpbroadcastd 4*4(RW), RX0; \ + vpbroadcastd 4*5(RW), RY0; \ + vpxor RX0, a, RX0; \ + vpxor RY0, b, RY0; \ + \ + vpbroadcastd 4*6(RW), RT0; \ + vpxor RT0, c, a; \ + vpbroadcastd 4*7(RW), RT0; \ + vpxor RT0, d, b; \ + \ + vmovdqa RX0, c; \ + vmovdqa RY0, d; + +#define outunpack_dec8(a,b,c,d) \ + vpbroadcastd 4*0(RW), RT0; \ + vpxor RT0, a, a; \ + \ + vpbroadcastd 4*1(RW), RT0; \ + vpxor RT0, b, b; \ + \ + vpbroadcastd 4*2(RW), RT0; \ + vpxor RT0, c, c; \ + \ + vpbroadcastd 4*3(RW), RT0; \ + vpxor RT0, d, d; + +#define transpose4x4_16(a,b,c,d) \ + transpose_4x4(a ## 0, b ## 0, c ## 0, d ## 0, RX0, RY0); \ + transpose_4x4(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0); + +#define inpack_enc16(a,b,c,d) \ + inpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \ + inpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1); + +#define outunpack_enc16(a,b,c,d) \ + outunpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \ + outunpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1); + +#define inpack_dec16(a,b,c,d) \ + inpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \ + inpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1); + +#define outunpack_dec16(a,b,c,d) \ + outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \ + outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1); + +.align 8 +ELF(.type __twofish_enc_blk16,@function;) +__twofish_enc_blk16: + /* input: + * %rdi: ctx, CTX + * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel + * plaintext blocks + * output: + * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel + * ciphertext blocks + */ + init_round_constants(); + + transpose4x4_16(RA, RB, RC, RD); + inpack_enc16(RA, RB, RC, RD); + + encrypt_cycle_first16(0); + encrypt_cycle16(2); + encrypt_cycle16(4); + encrypt_cycle16(6); + encrypt_cycle16(8); + encrypt_cycle16(10); + encrypt_cycle16(12); + encrypt_cycle_last16(14); + + outunpack_enc16(RA, RB, RC, RD); + transpose4x4_16(RA, RB, RC, RD); + + ret; +ELF(.size __twofish_enc_blk16,.-__twofish_enc_blk16;) + +.align 8 +ELF(.type __twofish_dec_blk16,@function;) +__twofish_dec_blk16: + /* input: + * %rdi: ctx, CTX + * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel + * plaintext blocks + * output: + * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel + * ciphertext blocks + */ + init_round_constants(); + + transpose4x4_16(RA, RB, RC, RD); + inpack_dec16(RA, RB, RC, RD); + + decrypt_cycle_first16(14); + decrypt_cycle16(12); + decrypt_cycle16(10); + decrypt_cycle16(8); + decrypt_cycle16(6); + decrypt_cycle16(4); + decrypt_cycle16(2); + decrypt_cycle_last16(0); + + outunpack_dec16(RA, RB, RC, RD); + transpose4x4_16(RA, RB, RC, RD); + + ret; +ELF(.size __twofish_dec_blk16,.-__twofish_dec_blk16;) + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + +.align 8 +.globl _gcry_twofish_avx2_ctr_enc +ELF(.type _gcry_twofish_avx2_ctr_enc,@function;) +_gcry_twofish_avx2_ctr_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv (big endian, 128bit) + */ + + movq 8(%rcx), %rax; + bswapq %rax; + + vzeroupper; + + vbroadcasti128 .Lbswap128_mask RIP, RTMP3; + vpcmpeqd RNOT, RNOT, RNOT; + vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */ + vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */ + + /* load IV and byteswap */ + vmovdqu (%rcx), RTMP4x; + vpshufb RTMP3x, RTMP4x, RTMP4x; + vmovdqa RTMP4x, RTMP0x; + inc_le128(RTMP4x, RNOTx, RTMP1x); + vinserti128 $1, RTMP4x, RTMP0, RTMP0; + vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */ + + /* check need for handling 64-bit overflow and carry */ + cmpq $(0xffffffffffffffff - 16), %rax; + ja .Lhandle_ctr_carry; + + /* construct IVs */ + vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */ + vpshufb RTMP3, RTMP0, RB0; + vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */ + vpshufb RTMP3, RTMP0, RC0; + vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */ + vpshufb RTMP3, RTMP0, RD0; + vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */ + vpshufb RTMP3, RTMP0, RA1; + vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */ + vpshufb RTMP3, RTMP0, RB1; + vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */ + vpshufb RTMP3, RTMP0, RC1; + vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */ + vpshufb RTMP3, RTMP0, RD1; + vpsubq RTMP2, RTMP0, RTMP0; /* +16 */ + vpshufb RTMP3x, RTMP0x, RTMP0x; + + jmp .Lctr_carry_done; + +.Lhandle_ctr_carry: + /* construct IVs */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB0; /* +3 ; +2 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RC0; /* +5 ; +4 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RD0; /* +7 ; +6 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA1; /* +9 ; +8 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RC1; /* +13 ; +12 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RD1; /* +15 ; +14 */ + inc_le128(RTMP0, RNOT, RTMP1); + vextracti128 $1, RTMP0, RTMP0x; + vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */ + +.align 4 +.Lctr_carry_done: + /* store new IV */ + vmovdqu RTMP0x, (%rcx); + + call __twofish_enc_blk16; + + vpxor (0 * 32)(%rdx), RA0, RA0; + vpxor (1 * 32)(%rdx), RB0, RB0; + vpxor (2 * 32)(%rdx), RC0, RC0; + vpxor (3 * 32)(%rdx), RD0, RD0; + vpxor (4 * 32)(%rdx), RA1, RA1; + vpxor (5 * 32)(%rdx), RB1, RB1; + vpxor (6 * 32)(%rdx), RC1, RC1; + vpxor (7 * 32)(%rdx), RD1, RD1; + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RB0, (1 * 32)(%rsi); + vmovdqu RC0, (2 * 32)(%rsi); + vmovdqu RD0, (3 * 32)(%rsi); + vmovdqu RA1, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RC1, (6 * 32)(%rsi); + vmovdqu RD1, (7 * 32)(%rsi); + + vzeroall; + + ret +ELF(.size _gcry_twofish_avx2_ctr_enc,.-_gcry_twofish_avx2_ctr_enc;) + +.align 8 +.globl _gcry_twofish_avx2_cbc_dec +ELF(.type _gcry_twofish_avx2_cbc_dec,@function;) +_gcry_twofish_avx2_cbc_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + + vzeroupper; + + vmovdqu (0 * 32)(%rdx), RA0; + vmovdqu (1 * 32)(%rdx), RB0; + vmovdqu (2 * 32)(%rdx), RC0; + vmovdqu (3 * 32)(%rdx), RD0; + vmovdqu (4 * 32)(%rdx), RA1; + vmovdqu (5 * 32)(%rdx), RB1; + vmovdqu (6 * 32)(%rdx), RC1; + vmovdqu (7 * 32)(%rdx), RD1; + + call __twofish_dec_blk16; + + vmovdqu (%rcx), RNOTx; + vinserti128 $1, (%rdx), RNOT, RNOT; + vpxor RNOT, RA0, RA0; + vpxor (0 * 32 + 16)(%rdx), RB0, RB0; + vpxor (1 * 32 + 16)(%rdx), RC0, RC0; + vpxor (2 * 32 + 16)(%rdx), RD0, RD0; + vpxor (3 * 32 + 16)(%rdx), RA1, RA1; + vpxor (4 * 32 + 16)(%rdx), RB1, RB1; + vpxor (5 * 32 + 16)(%rdx), RC1, RC1; + vpxor (6 * 32 + 16)(%rdx), RD1, RD1; + vmovdqu (7 * 32 + 16)(%rdx), RNOTx; + vmovdqu RNOTx, (%rcx); /* store new IV */ + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RB0, (1 * 32)(%rsi); + vmovdqu RC0, (2 * 32)(%rsi); + vmovdqu RD0, (3 * 32)(%rsi); + vmovdqu RA1, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RC1, (6 * 32)(%rsi); + vmovdqu RD1, (7 * 32)(%rsi); + + vzeroall; + + ret +ELF(.size _gcry_twofish_avx2_cbc_dec,.-_gcry_twofish_avx2_cbc_dec;) + +.align 8 +.globl _gcry_twofish_avx2_cfb_dec +ELF(.type _gcry_twofish_avx2_cfb_dec,@function;) +_gcry_twofish_avx2_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + + vzeroupper; + + /* Load input */ + vmovdqu (%rcx), RNOTx; + vinserti128 $1, (%rdx), RNOT, RA0; + vmovdqu (0 * 32 + 16)(%rdx), RB0; + vmovdqu (1 * 32 + 16)(%rdx), RC0; + vmovdqu (2 * 32 + 16)(%rdx), RD0; + vmovdqu (3 * 32 + 16)(%rdx), RA1; + vmovdqu (4 * 32 + 16)(%rdx), RB1; + vmovdqu (5 * 32 + 16)(%rdx), RC1; + vmovdqu (6 * 32 + 16)(%rdx), RD1; + + /* Update IV */ + vmovdqu (7 * 32 + 16)(%rdx), RNOTx; + vmovdqu RNOTx, (%rcx); + + call __twofish_enc_blk16; + + vpxor (0 * 32)(%rdx), RA0, RA0; + vpxor (1 * 32)(%rdx), RB0, RB0; + vpxor (2 * 32)(%rdx), RC0, RC0; + vpxor (3 * 32)(%rdx), RD0, RD0; + vpxor (4 * 32)(%rdx), RA1, RA1; + vpxor (5 * 32)(%rdx), RB1, RB1; + vpxor (6 * 32)(%rdx), RC1, RC1; + vpxor (7 * 32)(%rdx), RD1, RD1; + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RB0, (1 * 32)(%rsi); + vmovdqu RC0, (2 * 32)(%rsi); + vmovdqu RD0, (3 * 32)(%rsi); + vmovdqu RA1, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RC1, (6 * 32)(%rsi); + vmovdqu RD1, (7 * 32)(%rsi); + + vzeroall; + + ret +ELF(.size _gcry_twofish_avx2_cfb_dec,.-_gcry_twofish_avx2_cfb_dec;) + +.align 8 +.globl _gcry_twofish_avx2_ocb_enc +ELF(.type _gcry_twofish_avx2_ocb_enc,@function;) + +_gcry_twofish_avx2_ocb_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[16]) + */ + + vzeroupper; + + subq $(4 * 8), %rsp; + + movq %r10, (0 * 8)(%rsp); + movq %r11, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + + vmovdqu (%rcx), RTMP0x; + vmovdqu (%r8), RTMP1x; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rdx), yreg; \ + vpxor (l0reg), RTMP0x, RNOTx; \ + vpxor (l1reg), RNOTx, RTMP0x; \ + vinserti128 $1, RTMP0x, RNOT, RNOT; \ + vpxor yreg, RTMP1, RTMP1; \ + vpxor yreg, RNOT, yreg; \ + vmovdqu RNOT, (n * 32)(%rsi); + + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %r11, RA0); + OCB_INPUT(1, %r12, %r13, RB0); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(2, %r10, %r11, RC0); + OCB_INPUT(3, %r12, %r13, RD0); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %r11, RA1); + OCB_INPUT(5, %r12, %r13, RB1); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(6, %r10, %r11, RC1); + OCB_INPUT(7, %r12, %r13, RD1); +#undef OCB_INPUT + + vextracti128 $1, RTMP1, RNOTx; + vmovdqu RTMP0x, (%rcx); + vpxor RNOTx, RTMP1x, RTMP1x; + vmovdqu RTMP1x, (%r8); + + movq (0 * 8)(%rsp), %r10; + movq (1 * 8)(%rsp), %r11; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + + call __twofish_enc_blk16; + + addq $(4 * 8), %rsp; + + vpxor (0 * 32)(%rsi), RA0, RA0; + vpxor (1 * 32)(%rsi), RB0, RB0; + vpxor (2 * 32)(%rsi), RC0, RC0; + vpxor (3 * 32)(%rsi), RD0, RD0; + vpxor (4 * 32)(%rsi), RA1, RA1; + vpxor (5 * 32)(%rsi), RB1, RB1; + vpxor (6 * 32)(%rsi), RC1, RC1; + vpxor (7 * 32)(%rsi), RD1, RD1; + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RB0, (1 * 32)(%rsi); + vmovdqu RC0, (2 * 32)(%rsi); + vmovdqu RD0, (3 * 32)(%rsi); + vmovdqu RA1, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RC1, (6 * 32)(%rsi); + vmovdqu RD1, (7 * 32)(%rsi); + + vzeroall; + + ret; +ELF(.size _gcry_twofish_avx2_ocb_enc,.-_gcry_twofish_avx2_ocb_enc;) + +.align 8 +.globl _gcry_twofish_avx2_ocb_dec +ELF(.type _gcry_twofish_avx2_ocb_dec,@function;) + +_gcry_twofish_avx2_ocb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[16]) + */ + + vzeroupper; + + subq $(4 * 8), %rsp; + + movq %r10, (0 * 8)(%rsp); + movq %r11, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + + vmovdqu (%rcx), RTMP0x; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rdx), yreg; \ + vpxor (l0reg), RTMP0x, RNOTx; \ + vpxor (l1reg), RNOTx, RTMP0x; \ + vinserti128 $1, RTMP0x, RNOT, RNOT; \ + vpxor yreg, RNOT, yreg; \ + vmovdqu RNOT, (n * 32)(%rsi); + + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %r11, RA0); + OCB_INPUT(1, %r12, %r13, RB0); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(2, %r10, %r11, RC0); + OCB_INPUT(3, %r12, %r13, RD0); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %r11, RA1); + OCB_INPUT(5, %r12, %r13, RB1); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(6, %r10, %r11, RC1); + OCB_INPUT(7, %r12, %r13, RD1); +#undef OCB_INPUT + + vmovdqu RTMP0x, (%rcx); + mov %r8, %rcx + + movq (0 * 8)(%rsp), %r10; + movq (1 * 8)(%rsp), %r11; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + + call __twofish_dec_blk16; + + vmovdqu (%rcx), RTMP1x; + + vpxor (0 * 32)(%rsi), RA0, RA0; + vpxor (1 * 32)(%rsi), RB0, RB0; + vpxor (2 * 32)(%rsi), RC0, RC0; + vpxor (3 * 32)(%rsi), RD0, RD0; + vpxor (4 * 32)(%rsi), RA1, RA1; + vpxor (5 * 32)(%rsi), RB1, RB1; + vpxor (6 * 32)(%rsi), RC1, RC1; + vpxor (7 * 32)(%rsi), RD1, RD1; + + addq $(4 * 8), %rsp; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + + vmovdqu RA0, (0 * 32)(%rsi); + vpxor RA0, RTMP1, RTMP1; + vmovdqu RB0, (1 * 32)(%rsi); + vpxor RB0, RTMP1, RTMP1; + vmovdqu RC0, (2 * 32)(%rsi); + vpxor RC0, RTMP1, RTMP1; + vmovdqu RD0, (3 * 32)(%rsi); + vpxor RD0, RTMP1, RTMP1; + vmovdqu RA1, (4 * 32)(%rsi); + vpxor RA1, RTMP1, RTMP1; + vmovdqu RB1, (5 * 32)(%rsi); + vpxor RB1, RTMP1, RTMP1; + vmovdqu RC1, (6 * 32)(%rsi); + vpxor RC1, RTMP1, RTMP1; + vmovdqu RD1, (7 * 32)(%rsi); + vpxor RD1, RTMP1, RTMP1; + + vextracti128 $1, RTMP1, RNOTx; + vpxor RNOTx, RTMP1x, RTMP1x; + vmovdqu RTMP1x, (%rcx); + + vzeroall; + + ret; +ELF(.size _gcry_twofish_avx2_ocb_dec,.-_gcry_twofish_avx2_ocb_dec;) + +.align 8 +.globl _gcry_twofish_avx2_ocb_auth +ELF(.type _gcry_twofish_avx2_ocb_auth,@function;) + +_gcry_twofish_avx2_ocb_auth: + /* input: + * %rdi: ctx, CTX + * %rsi: abuf (16 blocks) + * %rdx: offset + * %rcx: checksum + * %r8 : L pointers (void *L[16]) + */ + + vzeroupper; + + subq $(4 * 8), %rsp; + + movq %r10, (0 * 8)(%rsp); + movq %r11, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + + vmovdqu (%rdx), RTMP0x; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rsi), yreg; \ + vpxor (l0reg), RTMP0x, RNOTx; \ + vpxor (l1reg), RNOTx, RTMP0x; \ + vinserti128 $1, RTMP0x, RNOT, RNOT; \ + vpxor yreg, RNOT, yreg; + + movq (0 * 8)(%r8), %r10; + movq (1 * 8)(%r8), %r11; + movq (2 * 8)(%r8), %r12; + movq (3 * 8)(%r8), %r13; + OCB_INPUT(0, %r10, %r11, RA0); + OCB_INPUT(1, %r12, %r13, RB0); + movq (4 * 8)(%r8), %r10; + movq (5 * 8)(%r8), %r11; + movq (6 * 8)(%r8), %r12; + movq (7 * 8)(%r8), %r13; + OCB_INPUT(2, %r10, %r11, RC0); + OCB_INPUT(3, %r12, %r13, RD0); + movq (8 * 8)(%r8), %r10; + movq (9 * 8)(%r8), %r11; + movq (10 * 8)(%r8), %r12; + movq (11 * 8)(%r8), %r13; + OCB_INPUT(4, %r10, %r11, RA1); + OCB_INPUT(5, %r12, %r13, RB1); + movq (12 * 8)(%r8), %r10; + movq (13 * 8)(%r8), %r11; + movq (14 * 8)(%r8), %r12; + movq (15 * 8)(%r8), %r13; + OCB_INPUT(6, %r10, %r11, RC1); + OCB_INPUT(7, %r12, %r13, RD1); +#undef OCB_INPUT + + vmovdqu RTMP0x, (%rdx); + + movq (0 * 8)(%rsp), %r10; + movq (1 * 8)(%rsp), %r11; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + + call __twofish_enc_blk16; + + vpxor RA0, RB0, RA0; + vpxor RC0, RD0, RC0; + vpxor RA1, RB1, RA1; + vpxor RC1, RD1, RC1; + + vpxor RA0, RC0, RA0; + vpxor RA1, RC1, RA1; + + addq $(4 * 8), %rsp; + + vpxor RA1, RA0, RTMP1; + + vextracti128 $1, RTMP1, RNOTx; + vpxor (%rcx), RTMP1x, RTMP1x; + vpxor RNOTx, RTMP1x, RTMP1x; + vmovdqu RTMP1x, (%rcx); + + vzeroall; + + ret; +ELF(.size _gcry_twofish_avx2_ocb_auth,.-_gcry_twofish_avx2_ocb_auth;) + +.align 16 + +/* For CTR-mode IV byteswap */ + _gcry_twofish_bswap128_mask: +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +ELF(.size _gcry_twofish_bswap128_mask,.-_gcry_twofish_bswap128_mask;) + +#endif /*defined(USE_TWOFISH) && defined(ENABLE_AVX2_SUPPORT)*/ +#endif /*__x86_64*/ diff --git a/libotr/libgcrypt-1.8.7/cipher/twofish.c b/libotr/libgcrypt-1.8.7/cipher/twofish.c new file mode 100644 index 0000000..942e8d4 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/twofish.c @@ -0,0 +1,1860 @@ +/* Twofish for GPG + * Copyright (C) 1998, 2002, 2003 Free Software Foundation, Inc. + * Written by Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998 + * 256-bit key length added March 20, 1999 + * Some modifications to reduce the text size by Werner Koch, April, 1998 + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + ******************************************************************** + * + * This code is a "clean room" implementation, written from the paper + * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, + * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available + * through http://www.counterpane.com/twofish.html + * + * For background information on multiplication in finite fields, used for + * the matrix operations in the key schedule, see the book _Contemporary + * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the + * Third Edition. + * + * Only the 128- and 256-bit key sizes are supported. This code is intended + * for GNU C on a 32-bit system, but it should work almost anywhere. Loops + * are unrolled, precomputation tables are used, etc., for maximum speed at + * some cost in memory consumption. */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> /* for memcmp() */ + +#include "types.h" /* for byte and u32 typedefs */ +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "cipher-internal.h" +#include "cipher-selftest.h" + + +#define TWOFISH_BLOCKSIZE 16 + + +/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ +#undef USE_AMD64_ASM +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AMD64_ASM 1 +#endif + +/* USE_ARM_ASM indicates whether to use ARM assembly code. */ +#undef USE_ARM_ASM +#if defined(__ARMEL__) +# if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) +# define USE_ARM_ASM 1 +# endif +#endif +# if defined(__AARCH64EL__) +# ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS +# define USE_ARM_ASM 1 +# endif +# endif + +/* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */ +#undef USE_AVX2 +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# if defined(ENABLE_AVX2_SUPPORT) +# define USE_AVX2 1 +# endif +#endif + + +/* Prototype for the self-test function. */ +static const char *selftest(void); + +/* Structure for an expanded Twofish key. s contains the key-dependent + * S-boxes composed with the MDS matrix; w contains the eight "whitening" + * subkeys, K[0] through K[7]. k holds the remaining, "round" subkeys. Note + * that k[i] corresponds to what the Twofish paper calls K[i+8]. */ +typedef struct { + u32 s[4][256], w[8], k[32]; + +#ifdef USE_AVX2 + int use_avx2; +#endif +} TWOFISH_context; + + +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#if defined(USE_AVX2) +# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# else +# define ASM_FUNC_ABI +# endif +#endif + + +/* These two tables are the q0 and q1 permutations, exactly as described in + * the Twofish paper. */ + +static const byte q0[256] = { + 0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78, + 0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C, + 0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30, + 0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82, + 0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE, + 0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B, + 0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45, + 0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7, + 0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF, + 0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8, + 0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED, + 0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90, + 0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B, + 0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B, + 0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F, + 0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A, + 0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17, + 0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72, + 0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68, + 0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4, + 0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42, + 0x4A, 0x5E, 0xC1, 0xE0 +}; + +static const byte q1[256] = { + 0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B, + 0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1, + 0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B, + 0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5, + 0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54, + 0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96, + 0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7, + 0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8, + 0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF, + 0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9, + 0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D, + 0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E, + 0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21, + 0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01, + 0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E, + 0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64, + 0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44, + 0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E, + 0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B, + 0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9, + 0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56, + 0x55, 0x09, 0xBE, 0x91 +}; + +/* These MDS tables are actually tables of MDS composed with q0 and q1, + * because it is only ever used that way and we can save some time by + * precomputing. Of course the main saving comes from precomputing the + * GF(2^8) multiplication involved in the MDS matrix multiply; by looking + * things up in these tables we reduce the matrix multiply to four lookups + * and three XORs. Semi-formally, the definition of these tables is: + * mds[0][i] = MDS (q1[i] 0 0 0)^T mds[1][i] = MDS (0 q0[i] 0 0)^T + * mds[2][i] = MDS (0 0 q1[i] 0)^T mds[3][i] = MDS (0 0 0 q0[i])^T + * where ^T means "transpose", the matrix multiply is performed in GF(2^8) + * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described + * by Schneier et al, and I'm casually glossing over the byte/word + * conversion issues. */ + +static const u32 mds[4][256] = { + {0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B, + 0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B, + 0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32, + 0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1, + 0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA, + 0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B, + 0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1, + 0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5, + 0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490, + 0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154, + 0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0, + 0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796, + 0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228, + 0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7, + 0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3, + 0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8, + 0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477, + 0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF, + 0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C, + 0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9, + 0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA, + 0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D, + 0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72, + 0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E, + 0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76, + 0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321, + 0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39, + 0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01, + 0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D, + 0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E, + 0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5, + 0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64, + 0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7, + 0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544, + 0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E, + 0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E, + 0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A, + 0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B, + 0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2, + 0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9, + 0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504, + 0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756, + 0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91}, + + {0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252, + 0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A, + 0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020, + 0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141, + 0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444, + 0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424, + 0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A, + 0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757, + 0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383, + 0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A, + 0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9, + 0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656, + 0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1, + 0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898, + 0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414, + 0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3, + 0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1, + 0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989, + 0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5, + 0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282, + 0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E, + 0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E, + 0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202, + 0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC, + 0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565, + 0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A, + 0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808, + 0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272, + 0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A, + 0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969, + 0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505, + 0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5, + 0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D, + 0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343, + 0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF, + 0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3, + 0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F, + 0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646, + 0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6, + 0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF, + 0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A, + 0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7, + 0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8}, + + {0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B, + 0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F, + 0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A, + 0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783, + 0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70, + 0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3, + 0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB, + 0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA, + 0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4, + 0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41, + 0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C, + 0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07, + 0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622, + 0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18, + 0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035, + 0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96, + 0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84, + 0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E, + 0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F, + 0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD, + 0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558, + 0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40, + 0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA, + 0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85, + 0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF, + 0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773, + 0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D, + 0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B, + 0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C, + 0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19, + 0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086, + 0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D, + 0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74, + 0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755, + 0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691, + 0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D, + 0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4, + 0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53, + 0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E, + 0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9, + 0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705, + 0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7, + 0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF}, + + {0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98, + 0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866, + 0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643, + 0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77, + 0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9, + 0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C, + 0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3, + 0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216, + 0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F, + 0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25, + 0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF, + 0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7, + 0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4, + 0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E, + 0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA, + 0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C, + 0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12, + 0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A, + 0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D, + 0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE, + 0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A, + 0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C, + 0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B, + 0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4, + 0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B, + 0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3, + 0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE, + 0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB, + 0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85, + 0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA, + 0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E, + 0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8, + 0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33, + 0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC, + 0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718, + 0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA, + 0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8, + 0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872, + 0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882, + 0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D, + 0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10, + 0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6, + 0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8} +}; + +/* The exp_to_poly and poly_to_exp tables are used to perform efficient + * operations in GF(2^8) represented as GF(2)[x]/w(x) where + * w(x)=x^8+x^6+x^3+x^2+1. We care about doing that because it's part of the + * definition of the RS matrix in the key schedule. Elements of that field + * are polynomials of degree not greater than 7 and all coefficients 0 or 1, + * which can be represented naturally by bytes (just substitute x=2). In that + * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8) + * multiplication is inefficient without hardware support. To multiply + * faster, I make use of the fact x is a generator for the nonzero elements, + * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for + * some n in 0..254. Note that that caret is exponentiation in GF(2^8), + * *not* polynomial notation. So if I want to compute pq where p and q are + * in GF(2^8), I can just say: + * 1. if p=0 or q=0 then pq=0 + * 2. otherwise, find m and n such that p=x^m and q=x^n + * 3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq + * The translations in steps 2 and 3 are looked up in the tables + * poly_to_exp (for step 2) and exp_to_poly (for step 3). To see this + * in action, look at the CALC_S macro. As additional wrinkles, note that + * one of my operands is always a constant, so the poly_to_exp lookup on it + * is done in advance; I included the original values in the comments so + * readers can have some chance of recognizing that this *is* the RS matrix + * from the Twofish paper. I've only included the table entries I actually + * need; I never do a lookup on a variable input of zero and the biggest + * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll + * never sum to more than 491. I'm repeating part of the exp_to_poly table + * so that I don't have to do mod-255 reduction in the exponent arithmetic. + * Since I know my constant operands are never zero, I only have to worry + * about zero values in the variable operand, and I do it with a simple + * conditional branch. I know conditionals are expensive, but I couldn't + * see a non-horrible way of avoiding them, and I did manage to group the + * statements so that each if covers four group multiplications. */ + +static const u16 poly_to_exp[256] = { + 492, + 0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19, + 0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A, + 0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C, + 0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B, + 0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47, + 0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D, + 0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8, + 0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C, + 0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83, + 0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48, + 0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26, + 0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E, + 0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3, + 0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9, + 0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A, + 0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D, + 0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75, + 0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84, + 0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64, + 0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49, + 0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF, + 0x85, 0xC8, 0xA1 +}; + +static const byte exp_to_poly[492 + 256] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2, + 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03, + 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6, + 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A, + 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63, + 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C, + 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07, + 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88, + 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12, + 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7, + 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C, + 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8, + 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25, + 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A, + 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE, + 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC, + 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E, + 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92, + 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89, + 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB, + 0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1, + 0x8F, 0x53, 0xA6, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, + 0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, + 0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, + 0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, + 0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, + 0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, + 0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, + 0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, + 0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, + 0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, + 0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, + 0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, + 0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, + 0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, + 0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, + 0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, + 0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, + 0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, + 0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, + 0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, +}; + + +/* The table constants are indices of + * S-box entries, preprocessed through q0 and q1. */ +static byte calc_sb_tbl[512] = { + 0xA9, 0x75, 0x67, 0xF3, 0xB3, 0xC6, 0xE8, 0xF4, + 0x04, 0xDB, 0xFD, 0x7B, 0xA3, 0xFB, 0x76, 0xC8, + 0x9A, 0x4A, 0x92, 0xD3, 0x80, 0xE6, 0x78, 0x6B, + 0xE4, 0x45, 0xDD, 0x7D, 0xD1, 0xE8, 0x38, 0x4B, + 0x0D, 0xD6, 0xC6, 0x32, 0x35, 0xD8, 0x98, 0xFD, + 0x18, 0x37, 0xF7, 0x71, 0xEC, 0xF1, 0x6C, 0xE1, + 0x43, 0x30, 0x75, 0x0F, 0x37, 0xF8, 0x26, 0x1B, + 0xFA, 0x87, 0x13, 0xFA, 0x94, 0x06, 0x48, 0x3F, + 0xF2, 0x5E, 0xD0, 0xBA, 0x8B, 0xAE, 0x30, 0x5B, + 0x84, 0x8A, 0x54, 0x00, 0xDF, 0xBC, 0x23, 0x9D, + 0x19, 0x6D, 0x5B, 0xC1, 0x3D, 0xB1, 0x59, 0x0E, + 0xF3, 0x80, 0xAE, 0x5D, 0xA2, 0xD2, 0x82, 0xD5, + 0x63, 0xA0, 0x01, 0x84, 0x83, 0x07, 0x2E, 0x14, + 0xD9, 0xB5, 0x51, 0x90, 0x9B, 0x2C, 0x7C, 0xA3, + 0xA6, 0xB2, 0xEB, 0x73, 0xA5, 0x4C, 0xBE, 0x54, + 0x16, 0x92, 0x0C, 0x74, 0xE3, 0x36, 0x61, 0x51, + 0xC0, 0x38, 0x8C, 0xB0, 0x3A, 0xBD, 0xF5, 0x5A, + 0x73, 0xFC, 0x2C, 0x60, 0x25, 0x62, 0x0B, 0x96, + 0xBB, 0x6C, 0x4E, 0x42, 0x89, 0xF7, 0x6B, 0x10, + 0x53, 0x7C, 0x6A, 0x28, 0xB4, 0x27, 0xF1, 0x8C, + 0xE1, 0x13, 0xE6, 0x95, 0xBD, 0x9C, 0x45, 0xC7, + 0xE2, 0x24, 0xF4, 0x46, 0xB6, 0x3B, 0x66, 0x70, + 0xCC, 0xCA, 0x95, 0xE3, 0x03, 0x85, 0x56, 0xCB, + 0xD4, 0x11, 0x1C, 0xD0, 0x1E, 0x93, 0xD7, 0xB8, + 0xFB, 0xA6, 0xC3, 0x83, 0x8E, 0x20, 0xB5, 0xFF, + 0xE9, 0x9F, 0xCF, 0x77, 0xBF, 0xC3, 0xBA, 0xCC, + 0xEA, 0x03, 0x77, 0x6F, 0x39, 0x08, 0xAF, 0xBF, + 0x33, 0x40, 0xC9, 0xE7, 0x62, 0x2B, 0x71, 0xE2, + 0x81, 0x79, 0x79, 0x0C, 0x09, 0xAA, 0xAD, 0x82, + 0x24, 0x41, 0xCD, 0x3A, 0xF9, 0xEA, 0xD8, 0xB9, + 0xE5, 0xE4, 0xC5, 0x9A, 0xB9, 0xA4, 0x4D, 0x97, + 0x44, 0x7E, 0x08, 0xDA, 0x86, 0x7A, 0xE7, 0x17, + 0xA1, 0x66, 0x1D, 0x94, 0xAA, 0xA1, 0xED, 0x1D, + 0x06, 0x3D, 0x70, 0xF0, 0xB2, 0xDE, 0xD2, 0xB3, + 0x41, 0x0B, 0x7B, 0x72, 0xA0, 0xA7, 0x11, 0x1C, + 0x31, 0xEF, 0xC2, 0xD1, 0x27, 0x53, 0x90, 0x3E, + 0x20, 0x8F, 0xF6, 0x33, 0x60, 0x26, 0xFF, 0x5F, + 0x96, 0xEC, 0x5C, 0x76, 0xB1, 0x2A, 0xAB, 0x49, + 0x9E, 0x81, 0x9C, 0x88, 0x52, 0xEE, 0x1B, 0x21, + 0x5F, 0xC4, 0x93, 0x1A, 0x0A, 0xEB, 0xEF, 0xD9, + 0x91, 0xC5, 0x85, 0x39, 0x49, 0x99, 0xEE, 0xCD, + 0x2D, 0xAD, 0x4F, 0x31, 0x8F, 0x8B, 0x3B, 0x01, + 0x47, 0x18, 0x87, 0x23, 0x6D, 0xDD, 0x46, 0x1F, + 0xD6, 0x4E, 0x3E, 0x2D, 0x69, 0xF9, 0x64, 0x48, + 0x2A, 0x4F, 0xCE, 0xF2, 0xCB, 0x65, 0x2F, 0x8E, + 0xFC, 0x78, 0x97, 0x5C, 0x05, 0x58, 0x7A, 0x19, + 0xAC, 0x8D, 0x7F, 0xE5, 0xD5, 0x98, 0x1A, 0x57, + 0x4B, 0x67, 0x0E, 0x7F, 0xA7, 0x05, 0x5A, 0x64, + 0x28, 0xAF, 0x14, 0x63, 0x3F, 0xB6, 0x29, 0xFE, + 0x88, 0xF5, 0x3C, 0xB7, 0x4C, 0x3C, 0x02, 0xA5, + 0xB8, 0xCE, 0xDA, 0xE9, 0xB0, 0x68, 0x17, 0x44, + 0x55, 0xE0, 0x1F, 0x4D, 0x8A, 0x43, 0x7D, 0x69, + 0x57, 0x29, 0xC7, 0x2E, 0x8D, 0xAC, 0x74, 0x15, + 0xB7, 0x59, 0xC4, 0xA8, 0x9F, 0x0A, 0x72, 0x9E, + 0x7E, 0x6E, 0x15, 0x47, 0x22, 0xDF, 0x12, 0x34, + 0x58, 0x35, 0x07, 0x6A, 0x99, 0xCF, 0x34, 0xDC, + 0x6E, 0x22, 0x50, 0xC9, 0xDE, 0xC0, 0x68, 0x9B, + 0x65, 0x89, 0xBC, 0xD4, 0xDB, 0xED, 0xF8, 0xAB, + 0xC8, 0x12, 0xA8, 0xA2, 0x2B, 0x0D, 0x40, 0x52, + 0xDC, 0xBB, 0xFE, 0x02, 0x32, 0x2F, 0xA4, 0xA9, + 0xCA, 0xD7, 0x10, 0x61, 0x21, 0x1E, 0xF0, 0xB4, + 0xD3, 0x50, 0x5D, 0x04, 0x0F, 0xF6, 0x00, 0xC2, + 0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56, + 0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91 +}; + +/* Macro to perform one column of the RS matrix multiplication. The + * parameters a, b, c, and d are the four bytes of output; i is the index + * of the key bytes, and w, x, y, and z, are the column of constants from + * the RS matrix, preprocessed through the poly_to_exp table. */ + +#define CALC_S(a, b, c, d, i, w, x, y, z) \ + { \ + tmp = poly_to_exp[key[i]]; \ + (a) ^= exp_to_poly[tmp + (w)]; \ + (b) ^= exp_to_poly[tmp + (x)]; \ + (c) ^= exp_to_poly[tmp + (y)]; \ + (d) ^= exp_to_poly[tmp + (z)]; \ + } + +/* Macros to calculate the key-dependent S-boxes for a 128-bit key using + * the S vector from CALC_S. CALC_SB_2 computes a single entry in all + * four S-boxes, where i is the index of the entry to compute, and a and b + * are the index numbers preprocessed through the q0 and q1 tables + * respectively. CALC_SB is simply a convenience to make the code shorter; + * it calls CALC_SB_2 four times with consecutive indices from i to i+3, + * using the remaining parameters two by two. */ + +#define CALC_SB_2(i, a, b) \ + ctx->s[0][i] = mds[0][q0[(a) ^ sa] ^ se]; \ + ctx->s[1][i] = mds[1][q0[(b) ^ sb] ^ sf]; \ + ctx->s[2][i] = mds[2][q1[(a) ^ sc] ^ sg]; \ + ctx->s[3][i] = mds[3][q1[(b) ^ sd] ^ sh] + +#define CALC_SB(i, a, b, c, d, e, f, g, h) \ + CALC_SB_2 (i, a, b); CALC_SB_2 ((i)+1, c, d); \ + CALC_SB_2 ((i)+2, e, f); CALC_SB_2 ((i)+3, g, h) + +/* Macros exactly like CALC_SB and CALC_SB_2, but for 256-bit keys. */ + +#define CALC_SB256_2(i, a, b) \ + ctx->s[0][i] = mds[0][q0[q0[q1[(b) ^ sa] ^ se] ^ si] ^ sm]; \ + ctx->s[1][i] = mds[1][q0[q1[q1[(a) ^ sb] ^ sf] ^ sj] ^ sn]; \ + ctx->s[2][i] = mds[2][q1[q0[q0[(a) ^ sc] ^ sg] ^ sk] ^ so]; \ + ctx->s[3][i] = mds[3][q1[q1[q0[(b) ^ sd] ^ sh] ^ sl] ^ sp]; + +#define CALC_SB256(i, a, b, c, d, e, f, g, h) \ + CALC_SB256_2 (i, a, b); CALC_SB256_2 ((i)+1, c, d); \ + CALC_SB256_2 ((i)+2, e, f); CALC_SB256_2 ((i)+3, g, h) + +/* Macros to calculate the whitening and round subkeys. CALC_K_2 computes the + * last two stages of the h() function for a given index (either 2i or 2i+1). + * a, b, c, and d are the four bytes going into the last two stages. For + * 128-bit keys, this is the entire h() function and a and c are the index + * preprocessed through q0 and q1 respectively; for longer keys they are the + * output of previous stages. j is the index of the first key byte to use. + * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2 + * twice, doing the Pseudo-Hadamard Transform, and doing the necessary + * rotations. Its parameters are: a, the array to write the results into, + * j, the index of the first output entry, k and l, the preprocessed indices + * for index 2i, and m and n, the preprocessed indices for index 2i+1. + * CALC_K256_2 expands CALC_K_2 to handle 256-bit keys, by doing two + * additional lookup-and-XOR stages. The parameters a and b are the index + * preprocessed through q0 and q1 respectively; j is the index of the first + * key byte to use. CALC_K256 is identical to CALC_K but for using the + * CALC_K256_2 macro instead of CALC_K_2. */ + +#define CALC_K_2(a, b, c, d, j) \ + mds[0][q0[a ^ key[(j) + 8]] ^ key[j]] \ + ^ mds[1][q0[b ^ key[(j) + 9]] ^ key[(j) + 1]] \ + ^ mds[2][q1[c ^ key[(j) + 10]] ^ key[(j) + 2]] \ + ^ mds[3][q1[d ^ key[(j) + 11]] ^ key[(j) + 3]] + +#define CALC_K(a, j, k, l, m, n) \ + x = CALC_K_2 (k, l, k, l, 0); \ + y = CALC_K_2 (m, n, m, n, 4); \ + y = (y << 8) + (y >> 24); \ + x += y; y += x; ctx->a[j] = x; \ + ctx->a[(j) + 1] = (y << 9) + (y >> 23) + +#define CALC_K256_2(a, b, j) \ + CALC_K_2 (q0[q1[b ^ key[(j) + 24]] ^ key[(j) + 16]], \ + q1[q1[a ^ key[(j) + 25]] ^ key[(j) + 17]], \ + q0[q0[a ^ key[(j) + 26]] ^ key[(j) + 18]], \ + q1[q0[b ^ key[(j) + 27]] ^ key[(j) + 19]], j) + +#define CALC_K256(a, j, k, l, m, n) \ + x = CALC_K256_2 (k, l, 0); \ + y = CALC_K256_2 (m, n, 4); \ + y = (y << 8) + (y >> 24); \ + x += y; y += x; ctx->a[j] = x; \ + ctx->a[(j) + 1] = (y << 9) + (y >> 23) + + + +/* Perform the key setup. Note that this works only with 128- and 256-bit + * keys, despite the API that looks like it might support other sizes. */ + +static gcry_err_code_t +do_twofish_setkey (TWOFISH_context *ctx, const byte *key, const unsigned keylen) +{ + int i, j, k; + + /* Temporaries for CALC_K. */ + u32 x, y; + + /* The S vector used to key the S-boxes, split up into individual bytes. + * 128-bit keys use only sa through sh; 256-bit use all of them. */ + byte sa = 0, sb = 0, sc = 0, sd = 0, se = 0, sf = 0, sg = 0, sh = 0; + byte si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0; + + /* Temporary for CALC_S. */ + unsigned int tmp; + + /* Flags for self-test. */ + static int initialized = 0; + static const char *selftest_failed=0; + + /* Check key length. */ + if( ( ( keylen - 16 ) | 16 ) != 16 ) + return GPG_ERR_INV_KEYLEN; + + /* Do self-test if necessary. */ + if (!initialized) + { + initialized = 1; + selftest_failed = selftest (); + if( selftest_failed ) + log_error("%s\n", selftest_failed ); + } + if( selftest_failed ) + return GPG_ERR_SELFTEST_FAILED; + + /* Compute the first two words of the S vector. The magic numbers are + * the entries of the RS matrix, preprocessed through poly_to_exp. The + * numbers in the comments are the original (polynomial form) matrix + * entries. */ + CALC_S (sa, sb, sc, sd, 0, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ + CALC_S (sa, sb, sc, sd, 1, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ + CALC_S (sa, sb, sc, sd, 2, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ + CALC_S (sa, sb, sc, sd, 3, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ + CALC_S (sa, sb, sc, sd, 4, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ + CALC_S (sa, sb, sc, sd, 5, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ + CALC_S (sa, sb, sc, sd, 6, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ + CALC_S (sa, sb, sc, sd, 7, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ + CALC_S (se, sf, sg, sh, 8, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ + CALC_S (se, sf, sg, sh, 9, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ + CALC_S (se, sf, sg, sh, 10, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ + CALC_S (se, sf, sg, sh, 11, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ + CALC_S (se, sf, sg, sh, 12, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ + CALC_S (se, sf, sg, sh, 13, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ + CALC_S (se, sf, sg, sh, 14, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ + CALC_S (se, sf, sg, sh, 15, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ + + if (keylen == 32) /* 256-bit key */ + { + /* Calculate the remaining two words of the S vector */ + CALC_S (si, sj, sk, sl, 16, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ + CALC_S (si, sj, sk, sl, 17, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ + CALC_S (si, sj, sk, sl, 18, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ + CALC_S (si, sj, sk, sl, 19, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ + CALC_S (si, sj, sk, sl, 20, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ + CALC_S (si, sj, sk, sl, 21, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ + CALC_S (si, sj, sk, sl, 22, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ + CALC_S (si, sj, sk, sl, 23, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ + CALC_S (sm, sn, so, sp, 24, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ + CALC_S (sm, sn, so, sp, 25, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ + CALC_S (sm, sn, so, sp, 26, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ + CALC_S (sm, sn, so, sp, 27, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ + CALC_S (sm, sn, so, sp, 28, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ + CALC_S (sm, sn, so, sp, 29, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ + CALC_S (sm, sn, so, sp, 30, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ + CALC_S (sm, sn, so, sp, 31, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ + + /* Compute the S-boxes. */ + for(i=j=0,k=1; i < 256; i++, j += 2, k += 2 ) + { + CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); + } + + /* Calculate whitening and round subkeys. */ + for (i = 0; i < 8; i += 2) + { + CALC_K256 ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] ); + } + for (j = 0; j < 32; j += 2, i += 2) + { + CALC_K256 ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] ); + } + } + else + { + /* Compute the S-boxes. */ + for(i=j=0,k=1; i < 256; i++, j += 2, k += 2 ) + { + CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); + } + + /* Calculate whitening and round subkeys. */ + for (i = 0; i < 8; i += 2) + { + CALC_K ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] ); + } + for (j = 0; j < 32; j += 2, i += 2) + { + CALC_K ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] ); + } + } + + return 0; +} + +static gcry_err_code_t +twofish_setkey (void *context, const byte *key, unsigned int keylen) +{ + TWOFISH_context *ctx = context; + unsigned int hwfeatures = _gcry_get_hw_features (); + int rc; + + rc = do_twofish_setkey (ctx, key, keylen); + +#ifdef USE_AVX2 + ctx->use_avx2 = 0; + if ((hwfeatures & HWF_INTEL_AVX2) && (hwfeatures & HWF_INTEL_FAST_VPGATHER)) + { + ctx->use_avx2 = 1; + } +#endif + + (void)hwfeatures; + + _gcry_burn_stack (23+6*sizeof(void*)); + return rc; +} + + +#ifdef USE_AVX2 +/* Assembler implementations of Twofish using AVX2. Process 16 block in + parallel. + */ +extern void _gcry_twofish_avx2_ctr_enc(const TWOFISH_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *ctr) ASM_FUNC_ABI; + +extern void _gcry_twofish_avx2_cbc_dec(const TWOFISH_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_twofish_avx2_cfb_dec(const TWOFISH_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_twofish_avx2_ocb_enc(const TWOFISH_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[16]) ASM_FUNC_ABI; + +extern void _gcry_twofish_avx2_ocb_dec(const TWOFISH_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[16]) ASM_FUNC_ABI; + +extern void _gcry_twofish_avx2_ocb_auth(const TWOFISH_context *ctx, + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[16]) ASM_FUNC_ABI; +#endif + + +#ifdef USE_AMD64_ASM + +/* Assembly implementations of Twofish. */ +extern void _gcry_twofish_amd64_encrypt_block(const TWOFISH_context *c, + byte *out, const byte *in); + +extern void _gcry_twofish_amd64_decrypt_block(const TWOFISH_context *c, + byte *out, const byte *in); + +/* These assembly implementations process three blocks in parallel. */ +extern void _gcry_twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, + const byte *in, byte *ctr); + +extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, + const byte *in, byte *iv); + +extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, + const byte *in, byte *iv); + +extern void _gcry_twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, + const byte *in, byte *offset, + byte *checksum, const u64 Ls[3]); + +extern void _gcry_twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, + const byte *in, byte *offset, + byte *checksum, const u64 Ls[3]); + +extern void _gcry_twofish_amd64_ocb_auth(const TWOFISH_context *ctx, + const byte *abuf, byte *offset, + byte *checksum, const u64 Ls[3]); + +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +static inline void +call_sysv_fn (const void *fn, const void *arg1, const void *arg2, + const void *arg3, const void *arg4) +{ + /* Call SystemV ABI function without storing non-volatile XMM registers, + * as target function does not use vector instruction sets. */ + asm volatile ("callq *%0\n\t" + : "+a" (fn), + "+D" (arg1), + "+S" (arg2), + "+d" (arg3), + "+c" (arg4) + : + : "cc", "memory", "r8", "r9", "r10", "r11"); +} + +static inline void +call_sysv_fn5 (const void *fn, const void *arg1, const void *arg2, + const void *arg3, const void *arg4, const void *arg5) +{ + /* Call SystemV ABI function without storing non-volatile XMM registers, + * as target function does not use vector instruction sets. */ + asm volatile ("movq %[arg5], %%r8\n\t" + "callq *%0\n\t" + : "+a" (fn), + "+D" (arg1), + "+S" (arg2), + "+d" (arg3), + "+c" (arg4) + : [arg5] "g" (arg5) + : "cc", "memory", "r8", "r9", "r10", "r11"); +} + +static inline void +call_sysv_fn6 (const void *fn, const void *arg1, const void *arg2, + const void *arg3, const void *arg4, const void *arg5, + const void *arg6) +{ + /* Call SystemV ABI function without storing non-volatile XMM registers, + * as target function does not use vector instruction sets. */ + asm volatile ("movq %[arg5], %%r8\n\t" + "movq %[arg6], %%r9\n\t" + "callq *%0\n\t" + : "+a" (fn), + "+D" (arg1), + "+S" (arg2), + "+d" (arg3), + "+c" (arg4) + : [arg5] "g" (arg5), + [arg6] "g" (arg6) + : "cc", "memory", "r8", "r9", "r10", "r11"); +} +#endif + +static inline void +twofish_amd64_encrypt_block(const TWOFISH_context *c, byte *out, const byte *in) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn(_gcry_twofish_amd64_encrypt_block, c, out, in, NULL); +#else + _gcry_twofish_amd64_encrypt_block(c, out, in); +#endif +} + +static inline void +twofish_amd64_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn(_gcry_twofish_amd64_decrypt_block, c, out, in, NULL); +#else + _gcry_twofish_amd64_decrypt_block(c, out, in); +#endif +} + +static inline void +twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, const byte *in, + byte *ctr) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn(_gcry_twofish_amd64_ctr_enc, c, out, in, ctr); +#else + _gcry_twofish_amd64_ctr_enc(c, out, in, ctr); +#endif +} + +static inline void +twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, const byte *in, + byte *iv) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn(_gcry_twofish_amd64_cbc_dec, c, out, in, iv); +#else + _gcry_twofish_amd64_cbc_dec(c, out, in, iv); +#endif +} + +static inline void +twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in, + byte *iv) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn(_gcry_twofish_amd64_cfb_dec, c, out, in, iv); +#else + _gcry_twofish_amd64_cfb_dec(c, out, in, iv); +#endif +} + +static inline void +twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, const byte *in, + byte *offset, byte *checksum, const u64 Ls[3]) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn6(_gcry_twofish_amd64_ocb_enc, ctx, out, in, offset, checksum, Ls); +#else + _gcry_twofish_amd64_ocb_enc(ctx, out, in, offset, checksum, Ls); +#endif +} + +static inline void +twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, const byte *in, + byte *offset, byte *checksum, const u64 Ls[3]) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn6(_gcry_twofish_amd64_ocb_dec, ctx, out, in, offset, checksum, Ls); +#else + _gcry_twofish_amd64_ocb_dec(ctx, out, in, offset, checksum, Ls); +#endif +} + +static inline void +twofish_amd64_ocb_auth(const TWOFISH_context *ctx, const byte *abuf, + byte *offset, byte *checksum, const u64 Ls[3]) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn5(_gcry_twofish_amd64_ocb_auth, ctx, abuf, offset, checksum, Ls); +#else + _gcry_twofish_amd64_ocb_auth(ctx, abuf, offset, checksum, Ls); +#endif +} + +#elif defined(USE_ARM_ASM) + +/* Assembly implementations of Twofish. */ +extern void _gcry_twofish_arm_encrypt_block(const TWOFISH_context *c, + byte *out, const byte *in); + +extern void _gcry_twofish_arm_decrypt_block(const TWOFISH_context *c, + byte *out, const byte *in); + +#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/ + +/* Macros to compute the g() function in the encryption and decryption + * rounds. G1 is the straight g() function; G2 includes the 8-bit + * rotation for the high 32-bit word. */ + +#define G1(a) \ + (ctx->s[0][(a) & 0xFF]) ^ (ctx->s[1][((a) >> 8) & 0xFF]) \ + ^ (ctx->s[2][((a) >> 16) & 0xFF]) ^ (ctx->s[3][(a) >> 24]) + +#define G2(b) \ + (ctx->s[1][(b) & 0xFF]) ^ (ctx->s[2][((b) >> 8) & 0xFF]) \ + ^ (ctx->s[3][((b) >> 16) & 0xFF]) ^ (ctx->s[0][(b) >> 24]) + +/* Encryption and decryption Feistel rounds. Each one calls the two g() + * macros, does the PHT, and performs the XOR and the appropriate bit + * rotations. The parameters are the round number (used to select subkeys), + * and the four 32-bit chunks of the text. */ + +#define ENCROUND(n, a, b, c, d) \ + x = G1 (a); y = G2 (b); \ + x += y; y += x + ctx->k[2 * (n) + 1]; \ + (c) ^= x + ctx->k[2 * (n)]; \ + (c) = ((c) >> 1) + ((c) << 31); \ + (d) = (((d) << 1)+((d) >> 31)) ^ y + +#define DECROUND(n, a, b, c, d) \ + x = G1 (a); y = G2 (b); \ + x += y; y += x; \ + (d) ^= y + ctx->k[2 * (n) + 1]; \ + (d) = ((d) >> 1) + ((d) << 31); \ + (c) = (((c) << 1)+((c) >> 31)); \ + (c) ^= (x + ctx->k[2 * (n)]) + +/* Encryption and decryption cycles; each one is simply two Feistel rounds + * with the 32-bit chunks re-ordered to simulate the "swap" */ + +#define ENCCYCLE(n) \ + ENCROUND (2 * (n), a, b, c, d); \ + ENCROUND (2 * (n) + 1, c, d, a, b) + +#define DECCYCLE(n) \ + DECROUND (2 * (n) + 1, c, d, a, b); \ + DECROUND (2 * (n), a, b, c, d) + +/* Macros to convert the input and output bytes into 32-bit words, + * and simultaneously perform the whitening step. INPACK packs word + * number n into the variable named by x, using whitening subkey number m. + * OUTUNPACK unpacks word number n from the variable named by x, using + * whitening subkey number m. */ + +#define INPACK(n, x, m) \ + x = buf_get_le32(in + (n) * 4); \ + x ^= ctx->w[m] + +#define OUTUNPACK(n, x, m) \ + x ^= ctx->w[m]; \ + buf_put_le32(out + (n) * 4, x) + +#endif /*!USE_AMD64_ASM*/ + + +/* Encrypt one block. in and out may be the same. */ + +#ifdef USE_AMD64_ASM + +static unsigned int +twofish_encrypt (void *context, byte *out, const byte *in) +{ + TWOFISH_context *ctx = context; + twofish_amd64_encrypt_block(ctx, out, in); + return /*burn_stack*/ (4*sizeof (void*)); +} + +#elif defined(USE_ARM_ASM) + +static unsigned int +twofish_encrypt (void *context, byte *out, const byte *in) +{ + TWOFISH_context *ctx = context; + _gcry_twofish_arm_encrypt_block(ctx, out, in); + return /*burn_stack*/ (4*sizeof (void*)); +} + +#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/ + +static void +do_twofish_encrypt (const TWOFISH_context *ctx, byte *out, const byte *in) +{ + /* The four 32-bit chunks of the text. */ + u32 a, b, c, d; + + /* Temporaries used by the round function. */ + u32 x, y; + + /* Input whitening and packing. */ + INPACK (0, a, 0); + INPACK (1, b, 1); + INPACK (2, c, 2); + INPACK (3, d, 3); + + /* Encryption Feistel cycles. */ + ENCCYCLE (0); + ENCCYCLE (1); + ENCCYCLE (2); + ENCCYCLE (3); + ENCCYCLE (4); + ENCCYCLE (5); + ENCCYCLE (6); + ENCCYCLE (7); + + /* Output whitening and unpacking. */ + OUTUNPACK (0, c, 4); + OUTUNPACK (1, d, 5); + OUTUNPACK (2, a, 6); + OUTUNPACK (3, b, 7); +} + +static unsigned int +twofish_encrypt (void *context, byte *out, const byte *in) +{ + TWOFISH_context *ctx = context; + do_twofish_encrypt (ctx, out, in); + return /*burn_stack*/ (24+3*sizeof (void*)); +} + +#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/ + + +/* Decrypt one block. in and out may be the same. */ + +#ifdef USE_AMD64_ASM + +static unsigned int +twofish_decrypt (void *context, byte *out, const byte *in) +{ + TWOFISH_context *ctx = context; + twofish_amd64_decrypt_block(ctx, out, in); + return /*burn_stack*/ (4*sizeof (void*)); +} + +#elif defined(USE_ARM_ASM) + +static unsigned int +twofish_decrypt (void *context, byte *out, const byte *in) +{ + TWOFISH_context *ctx = context; + _gcry_twofish_arm_decrypt_block(ctx, out, in); + return /*burn_stack*/ (4*sizeof (void*)); +} + +#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/ + +static void +do_twofish_decrypt (const TWOFISH_context *ctx, byte *out, const byte *in) +{ + /* The four 32-bit chunks of the text. */ + u32 a, b, c, d; + + /* Temporaries used by the round function. */ + u32 x, y; + + /* Input whitening and packing. */ + INPACK (0, c, 4); + INPACK (1, d, 5); + INPACK (2, a, 6); + INPACK (3, b, 7); + + /* Encryption Feistel cycles. */ + DECCYCLE (7); + DECCYCLE (6); + DECCYCLE (5); + DECCYCLE (4); + DECCYCLE (3); + DECCYCLE (2); + DECCYCLE (1); + DECCYCLE (0); + + /* Output whitening and unpacking. */ + OUTUNPACK (0, a, 0); + OUTUNPACK (1, b, 1); + OUTUNPACK (2, c, 2); + OUTUNPACK (3, d, 3); +} + +static unsigned int +twofish_decrypt (void *context, byte *out, const byte *in) +{ + TWOFISH_context *ctx = context; + + do_twofish_decrypt (ctx, out, in); + return /*burn_stack*/ (24+3*sizeof (void*)); +} + +#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/ + + + +/* Bulk encryption of complete blocks in CTR mode. This function is only + intended for the bulk encryption feature of cipher.c. CTR is expected to be + of size TWOFISH_BLOCKSIZE. */ +void +_gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + TWOFISH_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char tmpbuf[TWOFISH_BLOCKSIZE]; + unsigned int burn, burn_stack_depth = 0; + int i; + +#ifdef USE_AVX2 + if (ctx->use_avx2) + { + int did_use_avx2 = 0; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + _gcry_twofish_avx2_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 16; + outbuf += 16 * TWOFISH_BLOCKSIZE; + inbuf += 16 * TWOFISH_BLOCKSIZE; + did_use_avx2 = 1; + } + + if (did_use_avx2) + { + /* twofish-avx2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + } +#endif + +#ifdef USE_AMD64_ASM + { + /* Process data in 3 block chunks. */ + while (nblocks >= 3) + { + twofish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 3; + outbuf += 3 * TWOFISH_BLOCKSIZE; + inbuf += 3 * TWOFISH_BLOCKSIZE; + + burn = 8 * sizeof(void*); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + } + + /* Use generic code to handle smaller chunks... */ + /* TODO: use caching instead? */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the counter. */ + burn = twofish_encrypt(ctx, tmpbuf, ctr); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + + /* XOR the input with the encrypted counter and store in output. */ + buf_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE); + outbuf += TWOFISH_BLOCKSIZE; + inbuf += TWOFISH_BLOCKSIZE; + /* Increment the counter. */ + for (i = TWOFISH_BLOCKSIZE; i > 0; i--) + { + ctr[i-1]++; + if (ctr[i-1]) + break; + } + } + + wipememory(tmpbuf, sizeof(tmpbuf)); + _gcry_burn_stack(burn_stack_depth); +} + + +/* Bulk decryption of complete blocks in CBC mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +void +_gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + TWOFISH_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char savebuf[TWOFISH_BLOCKSIZE]; + unsigned int burn, burn_stack_depth = 0; + +#ifdef USE_AVX2 + if (ctx->use_avx2) + { + int did_use_avx2 = 0; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + _gcry_twofish_avx2_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 16; + outbuf += 16 * TWOFISH_BLOCKSIZE; + inbuf += 16 * TWOFISH_BLOCKSIZE; + did_use_avx2 = 1; + } + + if (did_use_avx2) + { + /* twofish-avx2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + } +#endif + +#ifdef USE_AMD64_ASM + { + /* Process data in 3 block chunks. */ + while (nblocks >= 3) + { + twofish_amd64_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 3; + outbuf += 3 * TWOFISH_BLOCKSIZE; + inbuf += 3 * TWOFISH_BLOCKSIZE; + + burn = 9 * sizeof(void*); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* INBUF is needed later and it may be identical to OUTBUF, so store + the intermediate result to SAVEBUF. */ + burn = twofish_decrypt (ctx, savebuf, inbuf); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + + buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, TWOFISH_BLOCKSIZE); + inbuf += TWOFISH_BLOCKSIZE; + outbuf += TWOFISH_BLOCKSIZE; + } + + wipememory(savebuf, sizeof(savebuf)); + _gcry_burn_stack(burn_stack_depth); +} + + +/* Bulk decryption of complete blocks in CFB mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +void +_gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + TWOFISH_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned int burn, burn_stack_depth = 0; + +#ifdef USE_AVX2 + if (ctx->use_avx2) + { + int did_use_avx2 = 0; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + _gcry_twofish_avx2_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 16; + outbuf += 16 * TWOFISH_BLOCKSIZE; + inbuf += 16 * TWOFISH_BLOCKSIZE; + did_use_avx2 = 1; + } + + if (did_use_avx2) + { + /* twofish-avx2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + } +#endif + +#ifdef USE_AMD64_ASM + { + /* Process data in 3 block chunks. */ + while (nblocks >= 3) + { + twofish_amd64_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 3; + outbuf += 3 * TWOFISH_BLOCKSIZE; + inbuf += 3 * TWOFISH_BLOCKSIZE; + + burn = 8 * sizeof(void*); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + burn = twofish_encrypt(ctx, iv, iv); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + + buf_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE); + outbuf += TWOFISH_BLOCKSIZE; + inbuf += TWOFISH_BLOCKSIZE; + } + + _gcry_burn_stack(burn_stack_depth); +} + +/* Bulk encryption/decryption of complete blocks in OCB mode. */ +size_t +_gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ +#ifdef USE_AMD64_ASM + TWOFISH_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned int burn, burn_stack_depth = 0; + u64 blkn = c->u_mode.ocb.data_nblocks; + +#ifdef USE_AVX2 + if (ctx->use_avx2) + { + int did_use_avx2 = 0; + u64 Ls[16]; + unsigned int n = 16 - (blkn % 16); + u64 *l; + int i; + + if (nblocks >= 16) + { + for (i = 0; i < 16; i += 8) + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + l = &Ls[(15 + n) % 16]; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + blkn += 16; + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); + + if (encrypt) + _gcry_twofish_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + else + _gcry_twofish_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + + nblocks -= 16; + outbuf += 16 * TWOFISH_BLOCKSIZE; + inbuf += 16 * TWOFISH_BLOCKSIZE; + did_use_avx2 = 1; + } + } + + if (did_use_avx2) + { + /* twofish-avx2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + } +#endif + + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + u64 Ls[3]; + + /* Process data in 3 block chunks. */ + while (nblocks >= 3) + { + Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1); + Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2); + Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3); + blkn += 3; + + if (encrypt) + twofish_amd64_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, + Ls); + else + twofish_amd64_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, + Ls); + + nblocks -= 3; + outbuf += 3 * TWOFISH_BLOCKSIZE; + inbuf += 3 * TWOFISH_BLOCKSIZE; + + burn = 8 * sizeof(void*); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + } + + /* Use generic code to handle smaller chunks... */ + } + + c->u_mode.ocb.data_nblocks = blkn; + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +#else + (void)c; + (void)outbuf_arg; + (void)inbuf_arg; + (void)encrypt; +#endif + + return nblocks; +} + +/* Bulk authentication of complete blocks in OCB mode. */ +size_t +_gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks) +{ +#ifdef USE_AMD64_ASM + TWOFISH_context *ctx = (void *)&c->context.c; + const unsigned char *abuf = abuf_arg; + unsigned int burn, burn_stack_depth = 0; + u64 blkn = c->u_mode.ocb.aad_nblocks; + +#ifdef USE_AVX2 + if (ctx->use_avx2) + { + int did_use_avx2 = 0; + u64 Ls[16]; + unsigned int n = 16 - (blkn % 16); + u64 *l; + int i; + + if (nblocks >= 16) + { + for (i = 0; i < 16; i += 8) + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + l = &Ls[(15 + n) % 16]; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + blkn += 16; + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); + + _gcry_twofish_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 16; + abuf += 16 * TWOFISH_BLOCKSIZE; + did_use_avx2 = 1; + } + } + + if (did_use_avx2) + { + /* twofish-avx2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + u64 Ls[3]; + + /* Process data in 3 block chunks. */ + while (nblocks >= 3) + { + Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1); + Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2); + Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3); + blkn += 3; + + twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 3; + abuf += 3 * TWOFISH_BLOCKSIZE; + + burn = 8 * sizeof(void*); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + } + + /* Use generic code to handle smaller chunks... */ + } + + c->u_mode.ocb.aad_nblocks = blkn; + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +#else + (void)c; + (void)abuf_arg; +#endif + + return nblocks; +} + + + +/* Run the self-tests for TWOFISH-CTR, tests IV increment of bulk CTR + encryption. Returns NULL on success. */ +static const char * +selftest_ctr (void) +{ + const int nblocks = 16+1; + const int blocksize = TWOFISH_BLOCKSIZE; + const int context_size = sizeof(TWOFISH_context); + + return _gcry_selftest_helper_ctr("TWOFISH", &twofish_setkey, + &twofish_encrypt, &_gcry_twofish_ctr_enc, nblocks, blocksize, + context_size); +} + +/* Run the self-tests for TWOFISH-CBC, tests bulk CBC decryption. + Returns NULL on success. */ +static const char * +selftest_cbc (void) +{ + const int nblocks = 16+2; + const int blocksize = TWOFISH_BLOCKSIZE; + const int context_size = sizeof(TWOFISH_context); + + return _gcry_selftest_helper_cbc("TWOFISH", &twofish_setkey, + &twofish_encrypt, &_gcry_twofish_cbc_dec, nblocks, blocksize, + context_size); +} + +/* Run the self-tests for TWOFISH-CFB, tests bulk CBC decryption. + Returns NULL on success. */ +static const char * +selftest_cfb (void) +{ + const int nblocks = 16+2; + const int blocksize = TWOFISH_BLOCKSIZE; + const int context_size = sizeof(TWOFISH_context); + + return _gcry_selftest_helper_cfb("TWOFISH", &twofish_setkey, + &twofish_encrypt, &_gcry_twofish_cfb_dec, nblocks, blocksize, + context_size); +} + + +/* Test a single encryption and decryption with each key size. */ + +static const char* +selftest (void) +{ + TWOFISH_context ctx; /* Expanded key. */ + byte scratch[16]; /* Encryption/decryption result buffer. */ + const char *r; + + /* Test vectors for single encryption/decryption. Note that I am using + * the vectors from the Twofish paper's "known answer test", I=3 for + * 128-bit and I=4 for 256-bit, instead of the all-0 vectors from the + * "intermediate value test", because an all-0 key would trigger all the + * special cases in the RS matrix multiply, leaving the math untested. */ + static byte plaintext[16] = { + 0xD4, 0x91, 0xDB, 0x16, 0xE7, 0xB1, 0xC3, 0x9E, + 0x86, 0xCB, 0x08, 0x6B, 0x78, 0x9F, 0x54, 0x19 + }; + static byte key[16] = { + 0x9F, 0x58, 0x9F, 0x5C, 0xF6, 0x12, 0x2C, 0x32, + 0xB6, 0xBF, 0xEC, 0x2F, 0x2A, 0xE8, 0xC3, 0x5A + }; + static const byte ciphertext[16] = { + 0x01, 0x9F, 0x98, 0x09, 0xDE, 0x17, 0x11, 0x85, + 0x8F, 0xAA, 0xC3, 0xA3, 0xBA, 0x20, 0xFB, 0xC3 + }; + static byte plaintext_256[16] = { + 0x90, 0xAF, 0xE9, 0x1B, 0xB2, 0x88, 0x54, 0x4F, + 0x2C, 0x32, 0xDC, 0x23, 0x9B, 0x26, 0x35, 0xE6 + }; + static byte key_256[32] = { + 0xD4, 0x3B, 0xB7, 0x55, 0x6E, 0xA3, 0x2E, 0x46, + 0xF2, 0xA2, 0x82, 0xB7, 0xD4, 0x5B, 0x4E, 0x0D, + 0x57, 0xFF, 0x73, 0x9D, 0x4D, 0xC9, 0x2C, 0x1B, + 0xD7, 0xFC, 0x01, 0x70, 0x0C, 0xC8, 0x21, 0x6F + }; + static const byte ciphertext_256[16] = { + 0x6C, 0xB4, 0x56, 0x1C, 0x40, 0xBF, 0x0A, 0x97, + 0x05, 0x93, 0x1C, 0xB6, 0xD4, 0x08, 0xE7, 0xFA + }; + + twofish_setkey (&ctx, key, sizeof(key)); + twofish_encrypt (&ctx, scratch, plaintext); + if (memcmp (scratch, ciphertext, sizeof (ciphertext))) + return "Twofish-128 test encryption failed."; + twofish_decrypt (&ctx, scratch, scratch); + if (memcmp (scratch, plaintext, sizeof (plaintext))) + return "Twofish-128 test decryption failed."; + + twofish_setkey (&ctx, key_256, sizeof(key_256)); + twofish_encrypt (&ctx, scratch, plaintext_256); + if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256))) + return "Twofish-256 test encryption failed."; + twofish_decrypt (&ctx, scratch, scratch); + if (memcmp (scratch, plaintext_256, sizeof (plaintext_256))) + return "Twofish-256 test decryption failed."; + + if ((r = selftest_ctr()) != NULL) + return r; + if ((r = selftest_cbc()) != NULL) + return r; + if ((r = selftest_cfb()) != NULL) + return r; + + return NULL; +} + +/* More complete test program. This does 1000 encryptions and decryptions + * with each of 250 128-bit keys and 2000 encryptions and decryptions with + * each of 125 256-bit keys, using a feedback scheme similar to a Feistel + * cipher, so as to be sure of testing all the table entries pretty + * thoroughly. We keep changing the keys so as to get a more meaningful + * performance number, since the key setup is non-trivial for Twofish. */ + +#ifdef TEST + +#include <stdio.h> +#include <string.h> +#include <time.h> + +int +main() +{ + TWOFISH_context ctx; /* Expanded key. */ + int i, j; /* Loop counters. */ + + const char *encrypt_msg; /* Message to print regarding encryption test; + * the printf is done outside the loop to avoid + * stuffing up the timing. */ + clock_t timer; /* For computing elapsed time. */ + + /* Test buffer. */ + byte buffer[4][16] = { + {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}, + {0x0F, 0x1E, 0x2D, 0x3C, 0x4B, 0x5A, 0x69, 0x78, + 0x87, 0x96, 0xA5, 0xB4, 0xC3, 0xD2 ,0xE1, 0xF0}, + {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, + 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54 ,0x32, 0x10}, + {0x01, 0x23, 0x45, 0x67, 0x76, 0x54 ,0x32, 0x10, + 0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98} + }; + + /* Expected outputs for the million-operation test */ + static const byte test_encrypt[4][16] = { + {0xC8, 0x23, 0xB8, 0xB7, 0x6B, 0xFE, 0x91, 0x13, + 0x2F, 0xA7, 0x5E, 0xE6, 0x94, 0x77, 0x6F, 0x6B}, + {0x90, 0x36, 0xD8, 0x29, 0xD5, 0x96, 0xC2, 0x8E, + 0xE4, 0xFF, 0x76, 0xBC, 0xE5, 0x77, 0x88, 0x27}, + {0xB8, 0x78, 0x69, 0xAF, 0x42, 0x8B, 0x48, 0x64, + 0xF7, 0xE9, 0xF3, 0x9C, 0x42, 0x18, 0x7B, 0x73}, + {0x7A, 0x88, 0xFB, 0xEB, 0x90, 0xA4, 0xB4, 0xA8, + 0x43, 0xA3, 0x1D, 0xF1, 0x26, 0xC4, 0x53, 0x57} + }; + static const byte test_decrypt[4][16] = { + {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}, + {0x0F, 0x1E, 0x2D, 0x3C, 0x4B, 0x5A, 0x69, 0x78, + 0x87, 0x96, 0xA5, 0xB4, 0xC3, 0xD2 ,0xE1, 0xF0}, + {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, + 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54 ,0x32, 0x10}, + {0x01, 0x23, 0x45, 0x67, 0x76, 0x54 ,0x32, 0x10, + 0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98} + }; + + /* Start the timer ticking. */ + timer = clock (); + + /* Encryption test. */ + for (i = 0; i < 125; i++) + { + twofish_setkey (&ctx, buffer[0], sizeof (buffer[0])); + for (j = 0; j < 1000; j++) + twofish_encrypt (&ctx, buffer[2], buffer[2]); + twofish_setkey (&ctx, buffer[1], sizeof (buffer[1])); + for (j = 0; j < 1000; j++) + twofish_encrypt (&ctx, buffer[3], buffer[3]); + twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2); + for (j = 0; j < 1000; j++) { + twofish_encrypt (&ctx, buffer[0], buffer[0]); + twofish_encrypt (&ctx, buffer[1], buffer[1]); + } + } + encrypt_msg = memcmp (buffer, test_encrypt, sizeof (test_encrypt)) ? + "encryption failure!\n" : "encryption OK!\n"; + + /* Decryption test. */ + for (i = 0; i < 125; i++) + { + twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2); + for (j = 0; j < 1000; j++) { + twofish_decrypt (&ctx, buffer[0], buffer[0]); + twofish_decrypt (&ctx, buffer[1], buffer[1]); + } + twofish_setkey (&ctx, buffer[1], sizeof (buffer[1])); + for (j = 0; j < 1000; j++) + twofish_decrypt (&ctx, buffer[3], buffer[3]); + twofish_setkey (&ctx, buffer[0], sizeof (buffer[0])); + for (j = 0; j < 1000; j++) + twofish_decrypt (&ctx, buffer[2], buffer[2]); + } + + /* Stop the timer, and print results. */ + timer = clock () - timer; + printf (encrypt_msg); + printf (memcmp (buffer, test_decrypt, sizeof (test_decrypt)) ? + "decryption failure!\n" : "decryption OK!\n"); + printf ("elapsed time: %.1f s.\n", (float) timer / CLOCKS_PER_SEC); + + return 0; +} + +#endif /* TEST */ + + + +gcry_cipher_spec_t _gcry_cipher_spec_twofish = + { + GCRY_CIPHER_TWOFISH, {0, 0}, + "TWOFISH", NULL, NULL, 16, 256, sizeof (TWOFISH_context), + twofish_setkey, twofish_encrypt, twofish_decrypt + }; + +gcry_cipher_spec_t _gcry_cipher_spec_twofish128 = + { + GCRY_CIPHER_TWOFISH128, {0, 0}, + "TWOFISH128", NULL, NULL, 16, 128, sizeof (TWOFISH_context), + twofish_setkey, twofish_encrypt, twofish_decrypt + }; diff --git a/libotr/libgcrypt-1.8.7/cipher/whirlpool-sse2-amd64.S b/libotr/libgcrypt-1.8.7/cipher/whirlpool-sse2-amd64.S new file mode 100644 index 0000000..e98b831 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/whirlpool-sse2-amd64.S @@ -0,0 +1,342 @@ +/* whirlpool-sse2-amd64.S - AMD64 assembly implementation of Whirlpool + * + * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef __x86_64 +#include <config.h> +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_WHIRLPOOL) + +#ifdef __PIC__ +# define RIP %rip +#else +# define RIP +#endif + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +.text + +/* look-up table offsets on RTAB */ +#define RC (0) +#define C0 (RC + (8 * 10)) +#define C1 (C0 + (8 * 256)) +#define C2 (C1 + (8 * 256)) +#define C3 (C2 + (8 * 256)) +#define C4 (C3 + (8 * 256)) +#define C5 (C4 + (8 * 256)) +#define C6 (C5 + (8 * 256)) +#define C7 (C6 + (8 * 256)) + +/* stack variables */ +#define STACK_DATAP (0) +#define STACK_STATEP (STACK_DATAP + 8) +#define STACK_ROUNDS (STACK_STATEP + 8) +#define STACK_NBLKS (STACK_ROUNDS + 8) +#define STACK_RBP (STACK_NBLKS + 8) +#define STACK_RBX (STACK_RBP + 8) +#define STACK_R12 (STACK_RBX + 8) +#define STACK_R13 (STACK_R12 + 8) +#define STACK_R14 (STACK_R13 + 8) +#define STACK_R15 (STACK_R14 + 8) +#define STACK_MAX (STACK_R15 + 8) + +/* register macros */ +#define RTAB %rbp + +#define RI1 %rax +#define RI2 %rbx +#define RI3 %rcx +#define RI4 %rdx + +#define RI1d %eax +#define RI2d %ebx +#define RI3d %ecx +#define RI4d %edx + +#define RI1bl %al +#define RI2bl %bl +#define RI3bl %cl +#define RI4bl %dl + +#define RI1bh %ah +#define RI2bh %bh +#define RI3bh %ch +#define RI4bh %dh + +#define RB0 %r8 +#define RB1 %r9 +#define RB2 %r10 +#define RB3 %r11 +#define RB4 %r12 +#define RB5 %r13 +#define RB6 %r14 +#define RB7 %r15 + +#define RT0 %rsi +#define RT1 %rdi + +#define RT0d %esi +#define RT1d %edi + +#define XKEY0 %xmm0 +#define XKEY1 %xmm1 +#define XKEY2 %xmm2 +#define XKEY3 %xmm3 +#define XKEY4 %xmm4 +#define XKEY5 %xmm5 +#define XKEY6 %xmm6 +#define XKEY7 %xmm7 + +#define XSTATE0 %xmm8 +#define XSTATE1 %xmm9 +#define XSTATE2 %xmm10 +#define XSTATE3 %xmm11 +#define XSTATE4 %xmm12 +#define XSTATE5 %xmm13 +#define XSTATE6 %xmm14 +#define XSTATE7 %xmm15 + +/*********************************************************************** + * AMD64 assembly implementation of Whirlpool. + * - Using table-lookups + * - Store state in XMM registers + ***********************************************************************/ +#define __do_whirl(op, ri, \ + b0, b1, b2, b3, b4, b5, b6, b7, \ + load_ri, load_arg) \ + movzbl ri ## bl, RT0d; \ + movzbl ri ## bh, RT1d; \ + shrq $16, ri; \ + op ## q C7(RTAB,RT0,8), b7; \ + op ## q C6(RTAB,RT1,8), b6; \ + movzbl ri ## bl, RT0d; \ + movzbl ri ## bh, RT1d; \ + shrq $16, ri; \ + op ## q C5(RTAB,RT0,8), b5; \ + op ## q C4(RTAB,RT1,8), b4; \ + movzbl ri ## bl, RT0d; \ + movzbl ri ## bh, RT1d; \ + shrl $16, ri ## d; \ + op ## q C3(RTAB,RT0,8), b3; \ + op ## q C2(RTAB,RT1,8), b2; \ + movzbl ri ## bl, RT0d; \ + movzbl ri ## bh, RT1d; \ + load_ri( load_arg, ri); \ + op ## q C1(RTAB,RT0,8), b1; \ + op ## q C0(RTAB,RT1,8), b0; + +#define do_whirl(op, ri, rb_add, load_ri, load_arg) \ + __do_whirl(op, ##ri, rb_add, load_ri, load_arg) + +#define dummy(...) /*_*/ + +#define do_movq(src, dst) movq src, dst; + +#define RB_ADD0 RB0, RB1, RB2, RB3, RB4, RB5, RB6, RB7 +#define RB_ADD1 RB1, RB2, RB3, RB4, RB5, RB6, RB7, RB0 +#define RB_ADD2 RB2, RB3, RB4, RB5, RB6, RB7, RB0, RB1 +#define RB_ADD3 RB3, RB4, RB5, RB6, RB7, RB0, RB1, RB2 +#define RB_ADD4 RB4, RB5, RB6, RB7, RB0, RB1, RB2, RB3 +#define RB_ADD5 RB5, RB6, RB7, RB0, RB1, RB2, RB3, RB4 +#define RB_ADD6 RB6, RB7, RB0, RB1, RB2, RB3, RB4, RB5 +#define RB_ADD7 RB7, RB0, RB1, RB2, RB3, RB4, RB5, RB6 + +.align 8 +.globl _gcry_whirlpool_transform_amd64 +ELF(.type _gcry_whirlpool_transform_amd64,@function;) + +_gcry_whirlpool_transform_amd64: + /* input: + * %rdi: state + * %rsi: inblk + * %rdx: nblks + * %rcx: look-up tables + */ + cmp $0, %rdx; + je .Lskip; + + subq $STACK_MAX, %rsp; + movq %rbp, STACK_RBP(%rsp); + movq %rbx, STACK_RBX(%rsp); + movq %r12, STACK_R12(%rsp); + movq %r13, STACK_R13(%rsp); + movq %r14, STACK_R14(%rsp); + movq %r15, STACK_R15(%rsp); + + movq %rdx, STACK_NBLKS(%rsp); + movq %rdi, STACK_STATEP(%rsp); + movq %rsi, STACK_DATAP(%rsp); + + movq %rcx, RTAB; + + jmp .Lfirst_block; + +.align 8 +.Lblock_loop: + movq STACK_DATAP(%rsp), %rsi; + movq RI1, %rdi; + +.Lfirst_block: + /* load data_block */ + movq 0*8(%rsi), RB0; + movq 1*8(%rsi), RB1; + bswapq RB0; + movq 2*8(%rsi), RB2; + bswapq RB1; + movq 3*8(%rsi), RB3; + bswapq RB2; + movq 4*8(%rsi), RB4; + bswapq RB3; + movq 5*8(%rsi), RB5; + bswapq RB4; + movq RB0, XSTATE0; + movq 6*8(%rsi), RB6; + bswapq RB5; + movq RB1, XSTATE1; + movq 7*8(%rsi), RB7; + bswapq RB6; + movq RB2, XSTATE2; + bswapq RB7; + movq RB3, XSTATE3; + movq RB4, XSTATE4; + movq RB5, XSTATE5; + movq RB6, XSTATE6; + movq RB7, XSTATE7; + + /* load key */ + movq 0*8(%rdi), XKEY0; + movq 1*8(%rdi), XKEY1; + movq 2*8(%rdi), XKEY2; + movq 3*8(%rdi), XKEY3; + movq 4*8(%rdi), XKEY4; + movq 5*8(%rdi), XKEY5; + movq 6*8(%rdi), XKEY6; + movq 7*8(%rdi), XKEY7; + + movq XKEY0, RI1; + movq XKEY1, RI2; + movq XKEY2, RI3; + movq XKEY3, RI4; + + /* prepare and store state */ + pxor XKEY0, XSTATE0; + pxor XKEY1, XSTATE1; + pxor XKEY2, XSTATE2; + pxor XKEY3, XSTATE3; + pxor XKEY4, XSTATE4; + pxor XKEY5, XSTATE5; + pxor XKEY6, XSTATE6; + pxor XKEY7, XSTATE7; + + movq XSTATE0, 0*8(%rdi); + movq XSTATE1, 1*8(%rdi); + movq XSTATE2, 2*8(%rdi); + movq XSTATE3, 3*8(%rdi); + movq XSTATE4, 4*8(%rdi); + movq XSTATE5, 5*8(%rdi); + movq XSTATE6, 6*8(%rdi); + movq XSTATE7, 7*8(%rdi); + + addq $64, STACK_DATAP(%rsp); + movl $(0), STACK_ROUNDS(%rsp); +.align 8 +.Lround_loop: + do_whirl(mov, RI1 /*XKEY0*/, RB_ADD0, do_movq, XKEY4); + do_whirl(xor, RI2 /*XKEY1*/, RB_ADD1, do_movq, XKEY5); + do_whirl(xor, RI3 /*XKEY2*/, RB_ADD2, do_movq, XKEY6); + do_whirl(xor, RI4 /*XKEY3*/, RB_ADD3, do_movq, XKEY7); + do_whirl(xor, RI1 /*XKEY0*/, RB_ADD4, do_movq, XSTATE0); + do_whirl(xor, RI2 /*XKEY1*/, RB_ADD5, do_movq, XSTATE1); + do_whirl(xor, RI3 /*XKEY2*/, RB_ADD6, do_movq, XSTATE2); + do_whirl(xor, RI4 /*XKEY3*/, RB_ADD7, do_movq, XSTATE3); + + movl STACK_ROUNDS(%rsp), RT0d; + movq RB1, XKEY1; + addl $1, STACK_ROUNDS(%rsp); + movq RB2, XKEY2; + movq RB3, XKEY3; + xorq RC(RTAB,RT0,8), RB0; /* Add round constant */ + movq RB4, XKEY4; + movq RB5, XKEY5; + movq RB0, XKEY0; + movq RB6, XKEY6; + movq RB7, XKEY7; + + do_whirl(xor, RI1 /*XSTATE0*/, RB_ADD0, do_movq, XSTATE4); + do_whirl(xor, RI2 /*XSTATE1*/, RB_ADD1, do_movq, XSTATE5); + do_whirl(xor, RI3 /*XSTATE2*/, RB_ADD2, do_movq, XSTATE6); + do_whirl(xor, RI4 /*XSTATE3*/, RB_ADD3, do_movq, XSTATE7); + + cmpl $10, STACK_ROUNDS(%rsp); + je .Lis_last_round; + + do_whirl(xor, RI1 /*XSTATE4*/, RB_ADD4, do_movq, XKEY0); + do_whirl(xor, RI2 /*XSTATE5*/, RB_ADD5, do_movq, XKEY1); + do_whirl(xor, RI3 /*XSTATE6*/, RB_ADD6, do_movq, XKEY2); + do_whirl(xor, RI4 /*XSTATE7*/, RB_ADD7, do_movq, XKEY3); + movq RB0, XSTATE0; + movq RB1, XSTATE1; + movq RB2, XSTATE2; + movq RB3, XSTATE3; + movq RB4, XSTATE4; + movq RB5, XSTATE5; + movq RB6, XSTATE6; + movq RB7, XSTATE7; + + jmp .Lround_loop; +.align 8 +.Lis_last_round: + do_whirl(xor, RI1 /*XSTATE4*/, RB_ADD4, dummy, _); + movq STACK_STATEP(%rsp), RI1; + do_whirl(xor, RI2 /*XSTATE5*/, RB_ADD5, dummy, _); + do_whirl(xor, RI3 /*XSTATE6*/, RB_ADD6, dummy, _); + do_whirl(xor, RI4 /*XSTATE7*/, RB_ADD7, dummy, _); + + /* store state */ + xorq RB0, 0*8(RI1); + xorq RB1, 1*8(RI1); + xorq RB2, 2*8(RI1); + xorq RB3, 3*8(RI1); + xorq RB4, 4*8(RI1); + xorq RB5, 5*8(RI1); + xorq RB6, 6*8(RI1); + xorq RB7, 7*8(RI1); + + subq $1, STACK_NBLKS(%rsp); + jnz .Lblock_loop; + + movq STACK_RBP(%rsp), %rbp; + movq STACK_RBX(%rsp), %rbx; + movq STACK_R12(%rsp), %r12; + movq STACK_R13(%rsp), %r13; + movq STACK_R14(%rsp), %r14; + movq STACK_R15(%rsp), %r15; + addq $STACK_MAX, %rsp; +.Lskip: + movl $(STACK_MAX + 8), %eax; + ret; +ELF(.size _gcry_whirlpool_transform_amd64,.-_gcry_whirlpool_transform_amd64;) + +#endif +#endif diff --git a/libotr/libgcrypt-1.8.7/cipher/whirlpool.c b/libotr/libgcrypt-1.8.7/cipher/whirlpool.c new file mode 100644 index 0000000..8a06939 --- /dev/null +++ b/libotr/libgcrypt-1.8.7/cipher/whirlpool.c @@ -0,0 +1,1530 @@ +/* whirlpool.c - Whirlpool hashing algorithm + * Copyright (C) 2005 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* This is an implementation of the Whirlpool hashing algorithm, which + has been developed by Vincent Rijmen and Paulo S. L. M. Barreto; + it's homepage is located at: + http://www.larc.usp.br/~pbarreto/WhirlpoolPage.html + + The S-Boxes and the structure of the main transformation function, + which implements an optimized version of the algorithm, is taken + from the reference implementation available from + http://www.larc.usp.br/~pbarreto/whirlpool.zip + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "types.h" +#include "g10lib.h" +#include "cipher.h" + +#include "bufhelp.h" +#include "hash-common.h" + +/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ +#undef USE_AMD64_ASM +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AMD64_ASM 1 +#endif + + + +/* Size of a whirlpool block (in bytes). */ +#define BLOCK_SIZE 64 + +/* Number of rounds. */ +#define R 10 + + + +/* Types. */ +typedef u64 whirlpool_block_t[BLOCK_SIZE / 8]; + +typedef struct { + gcry_md_block_ctx_t bctx; + whirlpool_block_t hash_state; + int use_bugemu; + struct { + size_t count; + unsigned char length[32]; + } bugemu; +} whirlpool_context_t; + + + +/* Macros. */ + +/* Convert the the buffer BUFFER into a block BLOCK, using I as + counter. */ +#define buffer_to_block(buffer, block, i) \ + for (i = 0; i < 8; i++) \ + (block)[i] = buf_get_be64((buffer) + i * 8); + +/* Convert the block BLOCK into a buffer BUFFER, using I as + counter. */ +#define block_to_buffer(buffer, block, i) \ + for (i = 0; i < 8; i++) \ + buf_put_be64((buffer) + i * 8, (block)[i]); + +/* Copy the block BLOCK_SRC to BLOCK_DST, using I as counter. */ +#define block_copy(block_dst, block_src, i) \ + for (i = 0; i < 8; i++) \ + block_dst[i] = block_src[i]; + +/* XOR the block BLOCK_SRC into BLOCK_DST, using I as counter. */ +#define block_xor(block_dst, block_src, i) \ + for (i = 0; i < 8; i++) \ + block_dst[i] ^= block_src[i]; + + + + +struct whirlpool_tables_s { + u64 RC[R]; + u64 C[8][256]; +}; + +static const struct whirlpool_tables_s tab = +{ +/* Round constants. */ + { + U64_C (0x1823c6e887b8014f), + U64_C (0x36a6d2f5796f9152), + U64_C (0x60bc9b8ea30c7b35), + U64_C (0x1de0d7c22e4bfe57), + U64_C (0x157737e59ff04ada), + U64_C (0x58c9290ab1a06b85), + U64_C (0xbd5d10f4cb3e0567), + U64_C (0xe427418ba77d95d8), + U64_C (0xfbee7c66dd17479e), + U64_C (0xca2dbf07ad5a8333), + }, +/* Main lookup boxes. */ + { { + U64_C (0x18186018c07830d8), U64_C (0x23238c2305af4626), + U64_C (0xc6c63fc67ef991b8), U64_C (0xe8e887e8136fcdfb), + U64_C (0x878726874ca113cb), U64_C (0xb8b8dab8a9626d11), + U64_C (0x0101040108050209), U64_C (0x4f4f214f426e9e0d), + U64_C (0x3636d836adee6c9b), U64_C (0xa6a6a2a6590451ff), + U64_C (0xd2d26fd2debdb90c), U64_C (0xf5f5f3f5fb06f70e), + U64_C (0x7979f979ef80f296), U64_C (0x6f6fa16f5fcede30), + U64_C (0x91917e91fcef3f6d), U64_C (0x52525552aa07a4f8), + U64_C (0x60609d6027fdc047), U64_C (0xbcbccabc89766535), + U64_C (0x9b9b569baccd2b37), U64_C (0x8e8e028e048c018a), + U64_C (0xa3a3b6a371155bd2), U64_C (0x0c0c300c603c186c), + U64_C (0x7b7bf17bff8af684), U64_C (0x3535d435b5e16a80), + U64_C (0x1d1d741de8693af5), U64_C (0xe0e0a7e05347ddb3), + U64_C (0xd7d77bd7f6acb321), U64_C (0xc2c22fc25eed999c), + U64_C (0x2e2eb82e6d965c43), U64_C (0x4b4b314b627a9629), + U64_C (0xfefedffea321e15d), U64_C (0x575741578216aed5), + U64_C (0x15155415a8412abd), U64_C (0x7777c1779fb6eee8), + U64_C (0x3737dc37a5eb6e92), U64_C (0xe5e5b3e57b56d79e), + U64_C (0x9f9f469f8cd92313), U64_C (0xf0f0e7f0d317fd23), + U64_C (0x4a4a354a6a7f9420), U64_C (0xdada4fda9e95a944), + U64_C (0x58587d58fa25b0a2), U64_C (0xc9c903c906ca8fcf), + U64_C (0x2929a429558d527c), U64_C (0x0a0a280a5022145a), + U64_C (0xb1b1feb1e14f7f50), U64_C (0xa0a0baa0691a5dc9), + U64_C (0x6b6bb16b7fdad614), U64_C (0x85852e855cab17d9), + U64_C (0xbdbdcebd8173673c), U64_C (0x5d5d695dd234ba8f), + U64_C (0x1010401080502090), U64_C (0xf4f4f7f4f303f507), + U64_C (0xcbcb0bcb16c08bdd), U64_C (0x3e3ef83eedc67cd3), + U64_C (0x0505140528110a2d), U64_C (0x676781671fe6ce78), + U64_C (0xe4e4b7e47353d597), U64_C (0x27279c2725bb4e02), + U64_C (0x4141194132588273), U64_C (0x8b8b168b2c9d0ba7), + U64_C (0xa7a7a6a7510153f6), U64_C (0x7d7de97dcf94fab2), + U64_C (0x95956e95dcfb3749), U64_C (0xd8d847d88e9fad56), + U64_C (0xfbfbcbfb8b30eb70), U64_C (0xeeee9fee2371c1cd), + U64_C (0x7c7ced7cc791f8bb), U64_C (0x6666856617e3cc71), + U64_C (0xdddd53dda68ea77b), U64_C (0x17175c17b84b2eaf), + U64_C (0x4747014702468e45), U64_C (0x9e9e429e84dc211a), + U64_C (0xcaca0fca1ec589d4), U64_C (0x2d2db42d75995a58), + U64_C (0xbfbfc6bf9179632e), U64_C (0x07071c07381b0e3f), + U64_C (0xadad8ead012347ac), U64_C (0x5a5a755aea2fb4b0), + U64_C (0x838336836cb51bef), U64_C (0x3333cc3385ff66b6), + U64_C (0x636391633ff2c65c), U64_C (0x02020802100a0412), + U64_C (0xaaaa92aa39384993), U64_C (0x7171d971afa8e2de), + U64_C (0xc8c807c80ecf8dc6), U64_C (0x19196419c87d32d1), + U64_C (0x494939497270923b), U64_C (0xd9d943d9869aaf5f), + U64_C (0xf2f2eff2c31df931), U64_C (0xe3e3abe34b48dba8), + U64_C (0x5b5b715be22ab6b9), U64_C (0x88881a8834920dbc), + U64_C (0x9a9a529aa4c8293e), U64_C (0x262698262dbe4c0b), + U64_C (0x3232c8328dfa64bf), U64_C (0xb0b0fab0e94a7d59), + U64_C (0xe9e983e91b6acff2), U64_C (0x0f0f3c0f78331e77), + U64_C (0xd5d573d5e6a6b733), U64_C (0x80803a8074ba1df4), + U64_C (0xbebec2be997c6127), U64_C (0xcdcd13cd26de87eb), + U64_C (0x3434d034bde46889), U64_C (0x48483d487a759032), + U64_C (0xffffdbffab24e354), U64_C (0x7a7af57af78ff48d), + U64_C (0x90907a90f4ea3d64), U64_C (0x5f5f615fc23ebe9d), + U64_C (0x202080201da0403d), U64_C (0x6868bd6867d5d00f), + U64_C (0x1a1a681ad07234ca), U64_C (0xaeae82ae192c41b7), + U64_C (0xb4b4eab4c95e757d), U64_C (0x54544d549a19a8ce), + U64_C (0x93937693ece53b7f), U64_C (0x222288220daa442f), + U64_C (0x64648d6407e9c863), U64_C (0xf1f1e3f1db12ff2a), + U64_C (0x7373d173bfa2e6cc), U64_C (0x12124812905a2482), + U64_C (0x40401d403a5d807a), U64_C (0x0808200840281048), + U64_C (0xc3c32bc356e89b95), U64_C (0xecec97ec337bc5df), + U64_C (0xdbdb4bdb9690ab4d), U64_C (0xa1a1bea1611f5fc0), + U64_C (0x8d8d0e8d1c830791), U64_C (0x3d3df43df5c97ac8), + U64_C (0x97976697ccf1335b), U64_C (0x0000000000000000), + U64_C (0xcfcf1bcf36d483f9), U64_C (0x2b2bac2b4587566e), + U64_C (0x7676c57697b3ece1), U64_C (0x8282328264b019e6), + U64_C (0xd6d67fd6fea9b128), U64_C (0x1b1b6c1bd87736c3), + U64_C (0xb5b5eeb5c15b7774), U64_C (0xafaf86af112943be), + U64_C (0x6a6ab56a77dfd41d), U64_C (0x50505d50ba0da0ea), + U64_C (0x45450945124c8a57), U64_C (0xf3f3ebf3cb18fb38), + U64_C (0x3030c0309df060ad), U64_C (0xefef9bef2b74c3c4), + U64_C (0x3f3ffc3fe5c37eda), U64_C (0x55554955921caac7), + U64_C (0xa2a2b2a2791059db), U64_C (0xeaea8fea0365c9e9), + U64_C (0x656589650fecca6a), U64_C (0xbabad2bab9686903), + U64_C (0x2f2fbc2f65935e4a), U64_C (0xc0c027c04ee79d8e), + U64_C (0xdede5fdebe81a160), U64_C (0x1c1c701ce06c38fc), + U64_C (0xfdfdd3fdbb2ee746), U64_C (0x4d4d294d52649a1f), + U64_C (0x92927292e4e03976), U64_C (0x7575c9758fbceafa), + U64_C (0x06061806301e0c36), U64_C (0x8a8a128a249809ae), + U64_C (0xb2b2f2b2f940794b), U64_C (0xe6e6bfe66359d185), + U64_C (0x0e0e380e70361c7e), U64_C (0x1f1f7c1ff8633ee7), + U64_C (0x6262956237f7c455), U64_C (0xd4d477d4eea3b53a), + U64_C (0xa8a89aa829324d81), U64_C (0x96966296c4f43152), + U64_C (0xf9f9c3f99b3aef62), U64_C (0xc5c533c566f697a3), + U64_C (0x2525942535b14a10), U64_C (0x59597959f220b2ab), + U64_C (0x84842a8454ae15d0), U64_C (0x7272d572b7a7e4c5), + U64_C (0x3939e439d5dd72ec), U64_C (0x4c4c2d4c5a619816), + U64_C (0x5e5e655eca3bbc94), U64_C (0x7878fd78e785f09f), + U64_C (0x3838e038ddd870e5), U64_C (0x8c8c0a8c14860598), + U64_C (0xd1d163d1c6b2bf17), U64_C (0xa5a5aea5410b57e4), + U64_C (0xe2e2afe2434dd9a1), U64_C (0x616199612ff8c24e), + U64_C (0xb3b3f6b3f1457b42), U64_C (0x2121842115a54234), + U64_C (0x9c9c4a9c94d62508), U64_C (0x1e1e781ef0663cee), + U64_C (0x4343114322528661), U64_C (0xc7c73bc776fc93b1), + U64_C (0xfcfcd7fcb32be54f), U64_C (0x0404100420140824), + U64_C (0x51515951b208a2e3), U64_C (0x99995e99bcc72f25), + U64_C (0x6d6da96d4fc4da22), U64_C (0x0d0d340d68391a65), + U64_C (0xfafacffa8335e979), U64_C (0xdfdf5bdfb684a369), + U64_C (0x7e7ee57ed79bfca9), U64_C (0x242490243db44819), + U64_C (0x3b3bec3bc5d776fe), U64_C (0xabab96ab313d4b9a), + U64_C (0xcece1fce3ed181f0), U64_C (0x1111441188552299), + U64_C (0x8f8f068f0c890383), U64_C (0x4e4e254e4a6b9c04), + U64_C (0xb7b7e6b7d1517366), U64_C (0xebeb8beb0b60cbe0), + U64_C (0x3c3cf03cfdcc78c1), U64_C (0x81813e817cbf1ffd), + U64_C (0x94946a94d4fe3540), U64_C (0xf7f7fbf7eb0cf31c), + U64_C (0xb9b9deb9a1676f18), U64_C (0x13134c13985f268b), + U64_C (0x2c2cb02c7d9c5851), U64_C (0xd3d36bd3d6b8bb05), + U64_C (0xe7e7bbe76b5cd38c), U64_C (0x6e6ea56e57cbdc39), + U64_C (0xc4c437c46ef395aa), U64_C (0x03030c03180f061b), + U64_C (0x565645568a13acdc), U64_C (0x44440d441a49885e), + U64_C (0x7f7fe17fdf9efea0), U64_C (0xa9a99ea921374f88), + U64_C (0x2a2aa82a4d825467), U64_C (0xbbbbd6bbb16d6b0a), + U64_C (0xc1c123c146e29f87), U64_C (0x53535153a202a6f1), + U64_C (0xdcdc57dcae8ba572), U64_C (0x0b0b2c0b58271653), + U64_C (0x9d9d4e9d9cd32701), U64_C (0x6c6cad6c47c1d82b), + U64_C (0x3131c43195f562a4), U64_C (0x7474cd7487b9e8f3), + U64_C (0xf6f6fff6e309f115), U64_C (0x464605460a438c4c), + U64_C (0xacac8aac092645a5), U64_C (0x89891e893c970fb5), + U64_C (0x14145014a04428b4), U64_C (0xe1e1a3e15b42dfba), + U64_C (0x16165816b04e2ca6), U64_C (0x3a3ae83acdd274f7), + U64_C (0x6969b9696fd0d206), U64_C (0x09092409482d1241), + U64_C (0x7070dd70a7ade0d7), U64_C (0xb6b6e2b6d954716f), + U64_C (0xd0d067d0ceb7bd1e), U64_C (0xeded93ed3b7ec7d6), + U64_C (0xcccc17cc2edb85e2), U64_C (0x424215422a578468), + U64_C (0x98985a98b4c22d2c), U64_C (0xa4a4aaa4490e55ed), + U64_C (0x2828a0285d885075), U64_C (0x5c5c6d5cda31b886), + U64_C (0xf8f8c7f8933fed6b), U64_C (0x8686228644a411c2), + }, { + U64_C (0xd818186018c07830), U64_C (0x2623238c2305af46), + U64_C (0xb8c6c63fc67ef991), U64_C (0xfbe8e887e8136fcd), + U64_C (0xcb878726874ca113), U64_C (0x11b8b8dab8a9626d), + U64_C (0x0901010401080502), U64_C (0x0d4f4f214f426e9e), + U64_C (0x9b3636d836adee6c), U64_C (0xffa6a6a2a6590451), + U64_C (0x0cd2d26fd2debdb9), U64_C (0x0ef5f5f3f5fb06f7), + U64_C (0x967979f979ef80f2), U64_C (0x306f6fa16f5fcede), + U64_C (0x6d91917e91fcef3f), U64_C (0xf852525552aa07a4), + U64_C (0x4760609d6027fdc0), U64_C (0x35bcbccabc897665), + U64_C (0x379b9b569baccd2b), U64_C (0x8a8e8e028e048c01), + U64_C (0xd2a3a3b6a371155b), U64_C (0x6c0c0c300c603c18), + U64_C (0x847b7bf17bff8af6), U64_C (0x803535d435b5e16a), + U64_C (0xf51d1d741de8693a), U64_C (0xb3e0e0a7e05347dd), + U64_C (0x21d7d77bd7f6acb3), U64_C (0x9cc2c22fc25eed99), + U64_C (0x432e2eb82e6d965c), U64_C (0x294b4b314b627a96), + U64_C (0x5dfefedffea321e1), U64_C (0xd5575741578216ae), + U64_C (0xbd15155415a8412a), U64_C (0xe87777c1779fb6ee), + U64_C (0x923737dc37a5eb6e), U64_C (0x9ee5e5b3e57b56d7), + U64_C (0x139f9f469f8cd923), U64_C (0x23f0f0e7f0d317fd), + U64_C (0x204a4a354a6a7f94), U64_C (0x44dada4fda9e95a9), + U64_C (0xa258587d58fa25b0), U64_C (0xcfc9c903c906ca8f), + U64_C (0x7c2929a429558d52), U64_C (0x5a0a0a280a502214), + U64_C (0x50b1b1feb1e14f7f), U64_C (0xc9a0a0baa0691a5d), + U64_C (0x146b6bb16b7fdad6), U64_C (0xd985852e855cab17), + U64_C (0x3cbdbdcebd817367), U64_C (0x8f5d5d695dd234ba), + U64_C (0x9010104010805020), U64_C (0x07f4f4f7f4f303f5), + U64_C (0xddcbcb0bcb16c08b), U64_C (0xd33e3ef83eedc67c), + U64_C (0x2d0505140528110a), U64_C (0x78676781671fe6ce), + U64_C (0x97e4e4b7e47353d5), U64_C (0x0227279c2725bb4e), + U64_C (0x7341411941325882), U64_C (0xa78b8b168b2c9d0b), + U64_C (0xf6a7a7a6a7510153), U64_C (0xb27d7de97dcf94fa), + U64_C (0x4995956e95dcfb37), U64_C (0x56d8d847d88e9fad), + U64_C (0x70fbfbcbfb8b30eb), U64_C (0xcdeeee9fee2371c1), + U64_C (0xbb7c7ced7cc791f8), U64_C (0x716666856617e3cc), + U64_C (0x7bdddd53dda68ea7), U64_C (0xaf17175c17b84b2e), + U64_C (0x454747014702468e), U64_C (0x1a9e9e429e84dc21), + U64_C (0xd4caca0fca1ec589), U64_C (0x582d2db42d75995a), + U64_C (0x2ebfbfc6bf917963), U64_C (0x3f07071c07381b0e), + U64_C (0xacadad8ead012347), U64_C (0xb05a5a755aea2fb4), + U64_C (0xef838336836cb51b), U64_C (0xb63333cc3385ff66), + U64_C (0x5c636391633ff2c6), U64_C (0x1202020802100a04), + U64_C (0x93aaaa92aa393849), U64_C (0xde7171d971afa8e2), + U64_C (0xc6c8c807c80ecf8d), U64_C (0xd119196419c87d32), + U64_C (0x3b49493949727092), U64_C (0x5fd9d943d9869aaf), + U64_C (0x31f2f2eff2c31df9), U64_C (0xa8e3e3abe34b48db), + U64_C (0xb95b5b715be22ab6), U64_C (0xbc88881a8834920d), + U64_C (0x3e9a9a529aa4c829), U64_C (0x0b262698262dbe4c), + U64_C (0xbf3232c8328dfa64), U64_C (0x59b0b0fab0e94a7d), + U64_C (0xf2e9e983e91b6acf), U64_C (0x770f0f3c0f78331e), + U64_C (0x33d5d573d5e6a6b7), U64_C (0xf480803a8074ba1d), + U64_C (0x27bebec2be997c61), U64_C (0xebcdcd13cd26de87), + U64_C (0x893434d034bde468), U64_C (0x3248483d487a7590), + U64_C (0x54ffffdbffab24e3), U64_C (0x8d7a7af57af78ff4), + U64_C (0x6490907a90f4ea3d), U64_C (0x9d5f5f615fc23ebe), + U64_C (0x3d202080201da040), U64_C (0x0f6868bd6867d5d0), + U64_C (0xca1a1a681ad07234), U64_C (0xb7aeae82ae192c41), + U64_C (0x7db4b4eab4c95e75), U64_C (0xce54544d549a19a8), + U64_C (0x7f93937693ece53b), U64_C (0x2f222288220daa44), + U64_C (0x6364648d6407e9c8), U64_C (0x2af1f1e3f1db12ff), + U64_C (0xcc7373d173bfa2e6), U64_C (0x8212124812905a24), + U64_C (0x7a40401d403a5d80), U64_C (0x4808082008402810), + U64_C (0x95c3c32bc356e89b), U64_C (0xdfecec97ec337bc5), + U64_C (0x4ddbdb4bdb9690ab), U64_C (0xc0a1a1bea1611f5f), + U64_C (0x918d8d0e8d1c8307), U64_C (0xc83d3df43df5c97a), + U64_C (0x5b97976697ccf133), U64_C (0x0000000000000000), + U64_C (0xf9cfcf1bcf36d483), U64_C (0x6e2b2bac2b458756), + U64_C (0xe17676c57697b3ec), U64_C (0xe68282328264b019), + U64_C (0x28d6d67fd6fea9b1), U64_C (0xc31b1b6c1bd87736), + U64_C (0x74b5b5eeb5c15b77), U64_C (0xbeafaf86af112943), + U64_C (0x1d6a6ab56a77dfd4), U64_C (0xea50505d50ba0da0), + U64_C (0x5745450945124c8a), U64_C (0x38f3f3ebf3cb18fb), + U64_C (0xad3030c0309df060), U64_C (0xc4efef9bef2b74c3), + U64_C (0xda3f3ffc3fe5c37e), U64_C (0xc755554955921caa), + U64_C (0xdba2a2b2a2791059), U64_C (0xe9eaea8fea0365c9), + U64_C (0x6a656589650fecca), U64_C (0x03babad2bab96869), + U64_C (0x4a2f2fbc2f65935e), U64_C (0x8ec0c027c04ee79d), + U64_C (0x60dede5fdebe81a1), U64_C (0xfc1c1c701ce06c38), + U64_C (0x46fdfdd3fdbb2ee7), U64_C (0x1f4d4d294d52649a), + U64_C (0x7692927292e4e039), U64_C (0xfa7575c9758fbcea), + U64_C (0x3606061806301e0c), U64_C (0xae8a8a128a249809), + U64_C (0x4bb2b2f2b2f94079), U64_C (0x85e6e6bfe66359d1), + U64_C (0x7e0e0e380e70361c), U64_C (0xe71f1f7c1ff8633e), + U64_C (0x556262956237f7c4), U64_C (0x3ad4d477d4eea3b5), + U64_C (0x81a8a89aa829324d), U64_C (0x5296966296c4f431), + U64_C (0x62f9f9c3f99b3aef), U64_C (0xa3c5c533c566f697), + U64_C (0x102525942535b14a), U64_C (0xab59597959f220b2), + U64_C (0xd084842a8454ae15), U64_C (0xc57272d572b7a7e4), + U64_C (0xec3939e439d5dd72), U64_C (0x164c4c2d4c5a6198), + U64_C (0x945e5e655eca3bbc), U64_C (0x9f7878fd78e785f0), + U64_C (0xe53838e038ddd870), U64_C (0x988c8c0a8c148605), + U64_C (0x17d1d163d1c6b2bf), U64_C (0xe4a5a5aea5410b57), + U64_C (0xa1e2e2afe2434dd9), U64_C (0x4e616199612ff8c2), + U64_C (0x42b3b3f6b3f1457b), U64_C (0x342121842115a542), + U64_C (0x089c9c4a9c94d625), U64_C (0xee1e1e781ef0663c), + U64_C (0x6143431143225286), U64_C (0xb1c7c73bc776fc93), + U64_C (0x4ffcfcd7fcb32be5), U64_C (0x2404041004201408), + U64_C (0xe351515951b208a2), U64_C (0x2599995e99bcc72f), + U64_C (0x226d6da96d4fc4da), U64_C (0x650d0d340d68391a), + U64_C (0x79fafacffa8335e9), U64_C (0x69dfdf5bdfb684a3), + U64_C (0xa97e7ee57ed79bfc), U64_C (0x19242490243db448), + U64_C (0xfe3b3bec3bc5d776), U64_C (0x9aabab96ab313d4b), + U64_C (0xf0cece1fce3ed181), U64_C (0x9911114411885522), + U64_C (0x838f8f068f0c8903), U64_C (0x044e4e254e4a6b9c), + U64_C (0x66b7b7e6b7d15173), U64_C (0xe0ebeb8beb0b60cb), + U64_C (0xc13c3cf03cfdcc78), U64_C (0xfd81813e817cbf1f), + U64_C (0x4094946a94d4fe35), U64_C (0x1cf7f7fbf7eb0cf3), + U64_C (0x18b9b9deb9a1676f), U64_C (0x8b13134c13985f26), + U64_C (0x512c2cb02c7d9c58), U64_C (0x05d3d36bd3d6b8bb), + U64_C (0x8ce7e7bbe76b5cd3), U64_C (0x396e6ea56e57cbdc), + U64_C (0xaac4c437c46ef395), U64_C (0x1b03030c03180f06), + U64_C (0xdc565645568a13ac), U64_C (0x5e44440d441a4988), + U64_C (0xa07f7fe17fdf9efe), U64_C (0x88a9a99ea921374f), + U64_C (0x672a2aa82a4d8254), U64_C (0x0abbbbd6bbb16d6b), + U64_C (0x87c1c123c146e29f), U64_C (0xf153535153a202a6), + U64_C (0x72dcdc57dcae8ba5), U64_C (0x530b0b2c0b582716), + U64_C (0x019d9d4e9d9cd327), U64_C (0x2b6c6cad6c47c1d8), + U64_C (0xa43131c43195f562), U64_C (0xf37474cd7487b9e8), + U64_C (0x15f6f6fff6e309f1), U64_C (0x4c464605460a438c), + U64_C (0xa5acac8aac092645), U64_C (0xb589891e893c970f), + U64_C (0xb414145014a04428), U64_C (0xbae1e1a3e15b42df), + U64_C (0xa616165816b04e2c), U64_C (0xf73a3ae83acdd274), + U64_C (0x066969b9696fd0d2), U64_C (0x4109092409482d12), + U64_C (0xd77070dd70a7ade0), U64_C (0x6fb6b6e2b6d95471), + U64_C (0x1ed0d067d0ceb7bd), U64_C (0xd6eded93ed3b7ec7), + U64_C (0xe2cccc17cc2edb85), U64_C (0x68424215422a5784), + U64_C (0x2c98985a98b4c22d), U64_C (0xeda4a4aaa4490e55), + U64_C (0x752828a0285d8850), U64_C (0x865c5c6d5cda31b8), + U64_C (0x6bf8f8c7f8933fed), U64_C (0xc28686228644a411), + }, { + U64_C (0x30d818186018c078), U64_C (0x462623238c2305af), + U64_C (0x91b8c6c63fc67ef9), U64_C (0xcdfbe8e887e8136f), + U64_C (0x13cb878726874ca1), U64_C (0x6d11b8b8dab8a962), + U64_C (0x0209010104010805), U64_C (0x9e0d4f4f214f426e), + U64_C (0x6c9b3636d836adee), U64_C (0x51ffa6a6a2a65904), + U64_C (0xb90cd2d26fd2debd), U64_C (0xf70ef5f5f3f5fb06), + U64_C (0xf2967979f979ef80), U64_C (0xde306f6fa16f5fce), + U64_C (0x3f6d91917e91fcef), U64_C (0xa4f852525552aa07), + U64_C (0xc04760609d6027fd), U64_C (0x6535bcbccabc8976), + U64_C (0x2b379b9b569baccd), U64_C (0x018a8e8e028e048c), + U64_C (0x5bd2a3a3b6a37115), U64_C (0x186c0c0c300c603c), + U64_C (0xf6847b7bf17bff8a), U64_C (0x6a803535d435b5e1), + U64_C (0x3af51d1d741de869), U64_C (0xddb3e0e0a7e05347), + U64_C (0xb321d7d77bd7f6ac), U64_C (0x999cc2c22fc25eed), + U64_C (0x5c432e2eb82e6d96), U64_C (0x96294b4b314b627a), + U64_C (0xe15dfefedffea321), U64_C (0xaed5575741578216), + U64_C (0x2abd15155415a841), U64_C (0xeee87777c1779fb6), + U64_C (0x6e923737dc37a5eb), U64_C (0xd79ee5e5b3e57b56), + U64_C (0x23139f9f469f8cd9), U64_C (0xfd23f0f0e7f0d317), + U64_C (0x94204a4a354a6a7f), U64_C (0xa944dada4fda9e95), + U64_C (0xb0a258587d58fa25), U64_C (0x8fcfc9c903c906ca), + U64_C (0x527c2929a429558d), U64_C (0x145a0a0a280a5022), + U64_C (0x7f50b1b1feb1e14f), U64_C (0x5dc9a0a0baa0691a), + U64_C (0xd6146b6bb16b7fda), U64_C (0x17d985852e855cab), + U64_C (0x673cbdbdcebd8173), U64_C (0xba8f5d5d695dd234), + U64_C (0x2090101040108050), U64_C (0xf507f4f4f7f4f303), + U64_C (0x8bddcbcb0bcb16c0), U64_C (0x7cd33e3ef83eedc6), + U64_C (0x0a2d050514052811), U64_C (0xce78676781671fe6), + U64_C (0xd597e4e4b7e47353), U64_C (0x4e0227279c2725bb), + U64_C (0x8273414119413258), U64_C (0x0ba78b8b168b2c9d), + U64_C (0x53f6a7a7a6a75101), U64_C (0xfab27d7de97dcf94), + U64_C (0x374995956e95dcfb), U64_C (0xad56d8d847d88e9f), + U64_C (0xeb70fbfbcbfb8b30), U64_C (0xc1cdeeee9fee2371), + U64_C (0xf8bb7c7ced7cc791), U64_C (0xcc716666856617e3), + U64_C (0xa77bdddd53dda68e), U64_C (0x2eaf17175c17b84b), + U64_C (0x8e45474701470246), U64_C (0x211a9e9e429e84dc), + U64_C (0x89d4caca0fca1ec5), U64_C (0x5a582d2db42d7599), + U64_C (0x632ebfbfc6bf9179), U64_C (0x0e3f07071c07381b), + U64_C (0x47acadad8ead0123), U64_C (0xb4b05a5a755aea2f), + U64_C (0x1bef838336836cb5), U64_C (0x66b63333cc3385ff), + U64_C (0xc65c636391633ff2), U64_C (0x041202020802100a), + U64_C (0x4993aaaa92aa3938), U64_C (0xe2de7171d971afa8), + U64_C (0x8dc6c8c807c80ecf), U64_C (0x32d119196419c87d), + U64_C (0x923b494939497270), U64_C (0xaf5fd9d943d9869a), + U64_C (0xf931f2f2eff2c31d), U64_C (0xdba8e3e3abe34b48), + U64_C (0xb6b95b5b715be22a), U64_C (0x0dbc88881a883492), + U64_C (0x293e9a9a529aa4c8), U64_C (0x4c0b262698262dbe), + U64_C (0x64bf3232c8328dfa), U64_C (0x7d59b0b0fab0e94a), + U64_C (0xcff2e9e983e91b6a), U64_C (0x1e770f0f3c0f7833), + U64_C (0xb733d5d573d5e6a6), U64_C (0x1df480803a8074ba), + U64_C (0x6127bebec2be997c), U64_C (0x87ebcdcd13cd26de), + U64_C (0x68893434d034bde4), U64_C (0x903248483d487a75), + U64_C (0xe354ffffdbffab24), U64_C (0xf48d7a7af57af78f), + U64_C (0x3d6490907a90f4ea), U64_C (0xbe9d5f5f615fc23e), + U64_C (0x403d202080201da0), U64_C (0xd00f6868bd6867d5), + U64_C (0x34ca1a1a681ad072), U64_C (0x41b7aeae82ae192c), + U64_C (0x757db4b4eab4c95e), U64_C (0xa8ce54544d549a19), + U64_C (0x3b7f93937693ece5), U64_C (0x442f222288220daa), + U64_C (0xc86364648d6407e9), U64_C (0xff2af1f1e3f1db12), + U64_C (0xe6cc7373d173bfa2), U64_C (0x248212124812905a), + U64_C (0x807a40401d403a5d), U64_C (0x1048080820084028), + U64_C (0x9b95c3c32bc356e8), U64_C (0xc5dfecec97ec337b), + U64_C (0xab4ddbdb4bdb9690), U64_C (0x5fc0a1a1bea1611f), + U64_C (0x07918d8d0e8d1c83), U64_C (0x7ac83d3df43df5c9), + U64_C (0x335b97976697ccf1), U64_C (0x0000000000000000), + U64_C (0x83f9cfcf1bcf36d4), U64_C (0x566e2b2bac2b4587), + U64_C (0xece17676c57697b3), U64_C (0x19e68282328264b0), + U64_C (0xb128d6d67fd6fea9), U64_C (0x36c31b1b6c1bd877), + U64_C (0x7774b5b5eeb5c15b), U64_C (0x43beafaf86af1129), + U64_C (0xd41d6a6ab56a77df), U64_C (0xa0ea50505d50ba0d), + U64_C (0x8a5745450945124c), U64_C (0xfb38f3f3ebf3cb18), + U64_C (0x60ad3030c0309df0), U64_C (0xc3c4efef9bef2b74), + U64_C (0x7eda3f3ffc3fe5c3), U64_C (0xaac755554955921c), + U64_C (0x59dba2a2b2a27910), U64_C (0xc9e9eaea8fea0365), + U64_C (0xca6a656589650fec), U64_C (0x6903babad2bab968), + U64_C (0x5e4a2f2fbc2f6593), U64_C (0x9d8ec0c027c04ee7), + U64_C (0xa160dede5fdebe81), U64_C (0x38fc1c1c701ce06c), + U64_C (0xe746fdfdd3fdbb2e), U64_C (0x9a1f4d4d294d5264), + U64_C (0x397692927292e4e0), U64_C (0xeafa7575c9758fbc), + U64_C (0x0c3606061806301e), U64_C (0x09ae8a8a128a2498), + U64_C (0x794bb2b2f2b2f940), U64_C (0xd185e6e6bfe66359), + U64_C (0x1c7e0e0e380e7036), U64_C (0x3ee71f1f7c1ff863), + U64_C (0xc4556262956237f7), U64_C (0xb53ad4d477d4eea3), + U64_C (0x4d81a8a89aa82932), U64_C (0x315296966296c4f4), + U64_C (0xef62f9f9c3f99b3a), U64_C (0x97a3c5c533c566f6), + U64_C (0x4a102525942535b1), U64_C (0xb2ab59597959f220), + U64_C (0x15d084842a8454ae), U64_C (0xe4c57272d572b7a7), + U64_C (0x72ec3939e439d5dd), U64_C (0x98164c4c2d4c5a61), + U64_C (0xbc945e5e655eca3b), U64_C (0xf09f7878fd78e785), + U64_C (0x70e53838e038ddd8), U64_C (0x05988c8c0a8c1486), + U64_C (0xbf17d1d163d1c6b2), U64_C (0x57e4a5a5aea5410b), + U64_C (0xd9a1e2e2afe2434d), U64_C (0xc24e616199612ff8), + U64_C (0x7b42b3b3f6b3f145), U64_C (0x42342121842115a5), + U64_C (0x25089c9c4a9c94d6), U64_C (0x3cee1e1e781ef066), + U64_C (0x8661434311432252), U64_C (0x93b1c7c73bc776fc), + U64_C (0xe54ffcfcd7fcb32b), U64_C (0x0824040410042014), + U64_C (0xa2e351515951b208), U64_C (0x2f2599995e99bcc7), + U64_C (0xda226d6da96d4fc4), U64_C (0x1a650d0d340d6839), + U64_C (0xe979fafacffa8335), U64_C (0xa369dfdf5bdfb684), + U64_C (0xfca97e7ee57ed79b), U64_C (0x4819242490243db4), + U64_C (0x76fe3b3bec3bc5d7), U64_C (0x4b9aabab96ab313d), + U64_C (0x81f0cece1fce3ed1), U64_C (0x2299111144118855), + U64_C (0x03838f8f068f0c89), U64_C (0x9c044e4e254e4a6b), + U64_C (0x7366b7b7e6b7d151), U64_C (0xcbe0ebeb8beb0b60), + U64_C (0x78c13c3cf03cfdcc), U64_C (0x1ffd81813e817cbf), + U64_C (0x354094946a94d4fe), U64_C (0xf31cf7f7fbf7eb0c), + U64_C (0x6f18b9b9deb9a167), U64_C (0x268b13134c13985f), + U64_C (0x58512c2cb02c7d9c), U64_C (0xbb05d3d36bd3d6b8), + U64_C (0xd38ce7e7bbe76b5c), U64_C (0xdc396e6ea56e57cb), + U64_C (0x95aac4c437c46ef3), U64_C (0x061b03030c03180f), + U64_C (0xacdc565645568a13), U64_C (0x885e44440d441a49), + U64_C (0xfea07f7fe17fdf9e), U64_C (0x4f88a9a99ea92137), + U64_C (0x54672a2aa82a4d82), U64_C (0x6b0abbbbd6bbb16d), + U64_C (0x9f87c1c123c146e2), U64_C (0xa6f153535153a202), + U64_C (0xa572dcdc57dcae8b), U64_C (0x16530b0b2c0b5827), + U64_C (0x27019d9d4e9d9cd3), U64_C (0xd82b6c6cad6c47c1), + U64_C (0x62a43131c43195f5), U64_C (0xe8f37474cd7487b9), + U64_C (0xf115f6f6fff6e309), U64_C (0x8c4c464605460a43), + U64_C (0x45a5acac8aac0926), U64_C (0x0fb589891e893c97), + U64_C (0x28b414145014a044), U64_C (0xdfbae1e1a3e15b42), + U64_C (0x2ca616165816b04e), U64_C (0x74f73a3ae83acdd2), + U64_C (0xd2066969b9696fd0), U64_C (0x124109092409482d), + U64_C (0xe0d77070dd70a7ad), U64_C (0x716fb6b6e2b6d954), + U64_C (0xbd1ed0d067d0ceb7), U64_C (0xc7d6eded93ed3b7e), + U64_C (0x85e2cccc17cc2edb), U64_C (0x8468424215422a57), + U64_C (0x2d2c98985a98b4c2), U64_C (0x55eda4a4aaa4490e), + U64_C (0x50752828a0285d88), U64_C (0xb8865c5c6d5cda31), + U64_C (0xed6bf8f8c7f8933f), U64_C (0x11c28686228644a4), + }, { + U64_C (0x7830d818186018c0), U64_C (0xaf462623238c2305), + U64_C (0xf991b8c6c63fc67e), U64_C (0x6fcdfbe8e887e813), + U64_C (0xa113cb878726874c), U64_C (0x626d11b8b8dab8a9), + U64_C (0x0502090101040108), U64_C (0x6e9e0d4f4f214f42), + U64_C (0xee6c9b3636d836ad), U64_C (0x0451ffa6a6a2a659), + U64_C (0xbdb90cd2d26fd2de), U64_C (0x06f70ef5f5f3f5fb), + U64_C (0x80f2967979f979ef), U64_C (0xcede306f6fa16f5f), + U64_C (0xef3f6d91917e91fc), U64_C (0x07a4f852525552aa), + U64_C (0xfdc04760609d6027), U64_C (0x766535bcbccabc89), + U64_C (0xcd2b379b9b569bac), U64_C (0x8c018a8e8e028e04), + U64_C (0x155bd2a3a3b6a371), U64_C (0x3c186c0c0c300c60), + U64_C (0x8af6847b7bf17bff), U64_C (0xe16a803535d435b5), + U64_C (0x693af51d1d741de8), U64_C (0x47ddb3e0e0a7e053), + U64_C (0xacb321d7d77bd7f6), U64_C (0xed999cc2c22fc25e), + U64_C (0x965c432e2eb82e6d), U64_C (0x7a96294b4b314b62), + U64_C (0x21e15dfefedffea3), U64_C (0x16aed55757415782), + U64_C (0x412abd15155415a8), U64_C (0xb6eee87777c1779f), + U64_C (0xeb6e923737dc37a5), U64_C (0x56d79ee5e5b3e57b), + U64_C (0xd923139f9f469f8c), U64_C (0x17fd23f0f0e7f0d3), + U64_C (0x7f94204a4a354a6a), U64_C (0x95a944dada4fda9e), + U64_C (0x25b0a258587d58fa), U64_C (0xca8fcfc9c903c906), + U64_C (0x8d527c2929a42955), U64_C (0x22145a0a0a280a50), + U64_C (0x4f7f50b1b1feb1e1), U64_C (0x1a5dc9a0a0baa069), + U64_C (0xdad6146b6bb16b7f), U64_C (0xab17d985852e855c), + U64_C (0x73673cbdbdcebd81), U64_C (0x34ba8f5d5d695dd2), + U64_C (0x5020901010401080), U64_C (0x03f507f4f4f7f4f3), + U64_C (0xc08bddcbcb0bcb16), U64_C (0xc67cd33e3ef83eed), + U64_C (0x110a2d0505140528), U64_C (0xe6ce78676781671f), + U64_C (0x53d597e4e4b7e473), U64_C (0xbb4e0227279c2725), + U64_C (0x5882734141194132), U64_C (0x9d0ba78b8b168b2c), + U64_C (0x0153f6a7a7a6a751), U64_C (0x94fab27d7de97dcf), + U64_C (0xfb374995956e95dc), U64_C (0x9fad56d8d847d88e), + U64_C (0x30eb70fbfbcbfb8b), U64_C (0x71c1cdeeee9fee23), + U64_C (0x91f8bb7c7ced7cc7), U64_C (0xe3cc716666856617), + U64_C (0x8ea77bdddd53dda6), U64_C (0x4b2eaf17175c17b8), + U64_C (0x468e454747014702), U64_C (0xdc211a9e9e429e84), + U64_C (0xc589d4caca0fca1e), U64_C (0x995a582d2db42d75), + U64_C (0x79632ebfbfc6bf91), U64_C (0x1b0e3f07071c0738), + U64_C (0x2347acadad8ead01), U64_C (0x2fb4b05a5a755aea), + U64_C (0xb51bef838336836c), U64_C (0xff66b63333cc3385), + U64_C (0xf2c65c636391633f), U64_C (0x0a04120202080210), + U64_C (0x384993aaaa92aa39), U64_C (0xa8e2de7171d971af), + U64_C (0xcf8dc6c8c807c80e), U64_C (0x7d32d119196419c8), + U64_C (0x70923b4949394972), U64_C (0x9aaf5fd9d943d986), + U64_C (0x1df931f2f2eff2c3), U64_C (0x48dba8e3e3abe34b), + U64_C (0x2ab6b95b5b715be2), U64_C (0x920dbc88881a8834), + U64_C (0xc8293e9a9a529aa4), U64_C (0xbe4c0b262698262d), + U64_C (0xfa64bf3232c8328d), U64_C (0x4a7d59b0b0fab0e9), + U64_C (0x6acff2e9e983e91b), U64_C (0x331e770f0f3c0f78), + U64_C (0xa6b733d5d573d5e6), U64_C (0xba1df480803a8074), + U64_C (0x7c6127bebec2be99), U64_C (0xde87ebcdcd13cd26), + U64_C (0xe468893434d034bd), U64_C (0x75903248483d487a), + U64_C (0x24e354ffffdbffab), U64_C (0x8ff48d7a7af57af7), + U64_C (0xea3d6490907a90f4), U64_C (0x3ebe9d5f5f615fc2), + U64_C (0xa0403d202080201d), U64_C (0xd5d00f6868bd6867), + U64_C (0x7234ca1a1a681ad0), U64_C (0x2c41b7aeae82ae19), + U64_C (0x5e757db4b4eab4c9), U64_C (0x19a8ce54544d549a), + U64_C (0xe53b7f93937693ec), U64_C (0xaa442f222288220d), + U64_C (0xe9c86364648d6407), U64_C (0x12ff2af1f1e3f1db), + U64_C (0xa2e6cc7373d173bf), U64_C (0x5a24821212481290), + U64_C (0x5d807a40401d403a), U64_C (0x2810480808200840), + U64_C (0xe89b95c3c32bc356), U64_C (0x7bc5dfecec97ec33), + U64_C (0x90ab4ddbdb4bdb96), U64_C (0x1f5fc0a1a1bea161), + U64_C (0x8307918d8d0e8d1c), U64_C (0xc97ac83d3df43df5), + U64_C (0xf1335b97976697cc), U64_C (0x0000000000000000), + U64_C (0xd483f9cfcf1bcf36), U64_C (0x87566e2b2bac2b45), + U64_C (0xb3ece17676c57697), U64_C (0xb019e68282328264), + U64_C (0xa9b128d6d67fd6fe), U64_C (0x7736c31b1b6c1bd8), + U64_C (0x5b7774b5b5eeb5c1), U64_C (0x2943beafaf86af11), + U64_C (0xdfd41d6a6ab56a77), U64_C (0x0da0ea50505d50ba), + U64_C (0x4c8a574545094512), U64_C (0x18fb38f3f3ebf3cb), + U64_C (0xf060ad3030c0309d), U64_C (0x74c3c4efef9bef2b), + U64_C (0xc37eda3f3ffc3fe5), U64_C (0x1caac75555495592), + U64_C (0x1059dba2a2b2a279), U64_C (0x65c9e9eaea8fea03), + U64_C (0xecca6a656589650f), U64_C (0x686903babad2bab9), + U64_C (0x935e4a2f2fbc2f65), U64_C (0xe79d8ec0c027c04e), + U64_C (0x81a160dede5fdebe), U64_C (0x6c38fc1c1c701ce0), + U64_C (0x2ee746fdfdd3fdbb), U64_C (0x649a1f4d4d294d52), + U64_C (0xe0397692927292e4), U64_C (0xbceafa7575c9758f), + U64_C (0x1e0c360606180630), U64_C (0x9809ae8a8a128a24), + U64_C (0x40794bb2b2f2b2f9), U64_C (0x59d185e6e6bfe663), + U64_C (0x361c7e0e0e380e70), U64_C (0x633ee71f1f7c1ff8), + U64_C (0xf7c4556262956237), U64_C (0xa3b53ad4d477d4ee), + U64_C (0x324d81a8a89aa829), U64_C (0xf4315296966296c4), + U64_C (0x3aef62f9f9c3f99b), U64_C (0xf697a3c5c533c566), + U64_C (0xb14a102525942535), U64_C (0x20b2ab59597959f2), + U64_C (0xae15d084842a8454), U64_C (0xa7e4c57272d572b7), + U64_C (0xdd72ec3939e439d5), U64_C (0x6198164c4c2d4c5a), + U64_C (0x3bbc945e5e655eca), U64_C (0x85f09f7878fd78e7), + U64_C (0xd870e53838e038dd), U64_C (0x8605988c8c0a8c14), + U64_C (0xb2bf17d1d163d1c6), U64_C (0x0b57e4a5a5aea541), + U64_C (0x4dd9a1e2e2afe243), U64_C (0xf8c24e616199612f), + U64_C (0x457b42b3b3f6b3f1), U64_C (0xa542342121842115), + U64_C (0xd625089c9c4a9c94), U64_C (0x663cee1e1e781ef0), + U64_C (0x5286614343114322), U64_C (0xfc93b1c7c73bc776), + U64_C (0x2be54ffcfcd7fcb3), U64_C (0x1408240404100420), + U64_C (0x08a2e351515951b2), U64_C (0xc72f2599995e99bc), + U64_C (0xc4da226d6da96d4f), U64_C (0x391a650d0d340d68), + U64_C (0x35e979fafacffa83), U64_C (0x84a369dfdf5bdfb6), + U64_C (0x9bfca97e7ee57ed7), U64_C (0xb44819242490243d), + U64_C (0xd776fe3b3bec3bc5), U64_C (0x3d4b9aabab96ab31), + U64_C (0xd181f0cece1fce3e), U64_C (0x5522991111441188), + U64_C (0x8903838f8f068f0c), U64_C (0x6b9c044e4e254e4a), + U64_C (0x517366b7b7e6b7d1), U64_C (0x60cbe0ebeb8beb0b), + U64_C (0xcc78c13c3cf03cfd), U64_C (0xbf1ffd81813e817c), + U64_C (0xfe354094946a94d4), U64_C (0x0cf31cf7f7fbf7eb), + U64_C (0x676f18b9b9deb9a1), U64_C (0x5f268b13134c1398), + U64_C (0x9c58512c2cb02c7d), U64_C (0xb8bb05d3d36bd3d6), + U64_C (0x5cd38ce7e7bbe76b), U64_C (0xcbdc396e6ea56e57), + U64_C (0xf395aac4c437c46e), U64_C (0x0f061b03030c0318), + U64_C (0x13acdc565645568a), U64_C (0x49885e44440d441a), + U64_C (0x9efea07f7fe17fdf), U64_C (0x374f88a9a99ea921), + U64_C (0x8254672a2aa82a4d), U64_C (0x6d6b0abbbbd6bbb1), + U64_C (0xe29f87c1c123c146), U64_C (0x02a6f153535153a2), + U64_C (0x8ba572dcdc57dcae), U64_C (0x2716530b0b2c0b58), + U64_C (0xd327019d9d4e9d9c), U64_C (0xc1d82b6c6cad6c47), + U64_C (0xf562a43131c43195), U64_C (0xb9e8f37474cd7487), + U64_C (0x09f115f6f6fff6e3), U64_C (0x438c4c464605460a), + U64_C (0x2645a5acac8aac09), U64_C (0x970fb589891e893c), + U64_C (0x4428b414145014a0), U64_C (0x42dfbae1e1a3e15b), + U64_C (0x4e2ca616165816b0), U64_C (0xd274f73a3ae83acd), + U64_C (0xd0d2066969b9696f), U64_C (0x2d12410909240948), + U64_C (0xade0d77070dd70a7), U64_C (0x54716fb6b6e2b6d9), + U64_C (0xb7bd1ed0d067d0ce), U64_C (0x7ec7d6eded93ed3b), + U64_C (0xdb85e2cccc17cc2e), U64_C (0x578468424215422a), + U64_C (0xc22d2c98985a98b4), U64_C (0x0e55eda4a4aaa449), + U64_C (0x8850752828a0285d), U64_C (0x31b8865c5c6d5cda), + U64_C (0x3fed6bf8f8c7f893), U64_C (0xa411c28686228644), + }, { + U64_C (0xc07830d818186018), U64_C (0x05af462623238c23), + U64_C (0x7ef991b8c6c63fc6), U64_C (0x136fcdfbe8e887e8), + U64_C (0x4ca113cb87872687), U64_C (0xa9626d11b8b8dab8), + U64_C (0x0805020901010401), U64_C (0x426e9e0d4f4f214f), + U64_C (0xadee6c9b3636d836), U64_C (0x590451ffa6a6a2a6), + U64_C (0xdebdb90cd2d26fd2), U64_C (0xfb06f70ef5f5f3f5), + U64_C (0xef80f2967979f979), U64_C (0x5fcede306f6fa16f), + U64_C (0xfcef3f6d91917e91), U64_C (0xaa07a4f852525552), + U64_C (0x27fdc04760609d60), U64_C (0x89766535bcbccabc), + U64_C (0xaccd2b379b9b569b), U64_C (0x048c018a8e8e028e), + U64_C (0x71155bd2a3a3b6a3), U64_C (0x603c186c0c0c300c), + U64_C (0xff8af6847b7bf17b), U64_C (0xb5e16a803535d435), + U64_C (0xe8693af51d1d741d), U64_C (0x5347ddb3e0e0a7e0), + U64_C (0xf6acb321d7d77bd7), U64_C (0x5eed999cc2c22fc2), + U64_C (0x6d965c432e2eb82e), U64_C (0x627a96294b4b314b), + U64_C (0xa321e15dfefedffe), U64_C (0x8216aed557574157), + U64_C (0xa8412abd15155415), U64_C (0x9fb6eee87777c177), + U64_C (0xa5eb6e923737dc37), U64_C (0x7b56d79ee5e5b3e5), + U64_C (0x8cd923139f9f469f), U64_C (0xd317fd23f0f0e7f0), + U64_C (0x6a7f94204a4a354a), U64_C (0x9e95a944dada4fda), + U64_C (0xfa25b0a258587d58), U64_C (0x06ca8fcfc9c903c9), + U64_C (0x558d527c2929a429), U64_C (0x5022145a0a0a280a), + U64_C (0xe14f7f50b1b1feb1), U64_C (0x691a5dc9a0a0baa0), + U64_C (0x7fdad6146b6bb16b), U64_C (0x5cab17d985852e85), + U64_C (0x8173673cbdbdcebd), U64_C (0xd234ba8f5d5d695d), + U64_C (0x8050209010104010), U64_C (0xf303f507f4f4f7f4), + U64_C (0x16c08bddcbcb0bcb), U64_C (0xedc67cd33e3ef83e), + U64_C (0x28110a2d05051405), U64_C (0x1fe6ce7867678167), + U64_C (0x7353d597e4e4b7e4), U64_C (0x25bb4e0227279c27), + U64_C (0x3258827341411941), U64_C (0x2c9d0ba78b8b168b), + U64_C (0x510153f6a7a7a6a7), U64_C (0xcf94fab27d7de97d), + U64_C (0xdcfb374995956e95), U64_C (0x8e9fad56d8d847d8), + U64_C (0x8b30eb70fbfbcbfb), U64_C (0x2371c1cdeeee9fee), + U64_C (0xc791f8bb7c7ced7c), U64_C (0x17e3cc7166668566), + U64_C (0xa68ea77bdddd53dd), U64_C (0xb84b2eaf17175c17), + U64_C (0x02468e4547470147), U64_C (0x84dc211a9e9e429e), + U64_C (0x1ec589d4caca0fca), U64_C (0x75995a582d2db42d), + U64_C (0x9179632ebfbfc6bf), U64_C (0x381b0e3f07071c07), + U64_C (0x012347acadad8ead), U64_C (0xea2fb4b05a5a755a), + U64_C (0x6cb51bef83833683), U64_C (0x85ff66b63333cc33), + U64_C (0x3ff2c65c63639163), U64_C (0x100a041202020802), + U64_C (0x39384993aaaa92aa), U64_C (0xafa8e2de7171d971), + U64_C (0x0ecf8dc6c8c807c8), U64_C (0xc87d32d119196419), + U64_C (0x7270923b49493949), U64_C (0x869aaf5fd9d943d9), + U64_C (0xc31df931f2f2eff2), U64_C (0x4b48dba8e3e3abe3), + U64_C (0xe22ab6b95b5b715b), U64_C (0x34920dbc88881a88), + U64_C (0xa4c8293e9a9a529a), U64_C (0x2dbe4c0b26269826), + U64_C (0x8dfa64bf3232c832), U64_C (0xe94a7d59b0b0fab0), + U64_C (0x1b6acff2e9e983e9), U64_C (0x78331e770f0f3c0f), + U64_C (0xe6a6b733d5d573d5), U64_C (0x74ba1df480803a80), + U64_C (0x997c6127bebec2be), U64_C (0x26de87ebcdcd13cd), + U64_C (0xbde468893434d034), U64_C (0x7a75903248483d48), + U64_C (0xab24e354ffffdbff), U64_C (0xf78ff48d7a7af57a), + U64_C (0xf4ea3d6490907a90), U64_C (0xc23ebe9d5f5f615f), + U64_C (0x1da0403d20208020), U64_C (0x67d5d00f6868bd68), + U64_C (0xd07234ca1a1a681a), U64_C (0x192c41b7aeae82ae), + U64_C (0xc95e757db4b4eab4), U64_C (0x9a19a8ce54544d54), + U64_C (0xece53b7f93937693), U64_C (0x0daa442f22228822), + U64_C (0x07e9c86364648d64), U64_C (0xdb12ff2af1f1e3f1), + U64_C (0xbfa2e6cc7373d173), U64_C (0x905a248212124812), + U64_C (0x3a5d807a40401d40), U64_C (0x4028104808082008), + U64_C (0x56e89b95c3c32bc3), U64_C (0x337bc5dfecec97ec), + U64_C (0x9690ab4ddbdb4bdb), U64_C (0x611f5fc0a1a1bea1), + U64_C (0x1c8307918d8d0e8d), U64_C (0xf5c97ac83d3df43d), + U64_C (0xccf1335b97976697), U64_C (0x0000000000000000), + U64_C (0x36d483f9cfcf1bcf), U64_C (0x4587566e2b2bac2b), + U64_C (0x97b3ece17676c576), U64_C (0x64b019e682823282), + U64_C (0xfea9b128d6d67fd6), U64_C (0xd87736c31b1b6c1b), + U64_C (0xc15b7774b5b5eeb5), U64_C (0x112943beafaf86af), + U64_C (0x77dfd41d6a6ab56a), U64_C (0xba0da0ea50505d50), + U64_C (0x124c8a5745450945), U64_C (0xcb18fb38f3f3ebf3), + U64_C (0x9df060ad3030c030), U64_C (0x2b74c3c4efef9bef), + U64_C (0xe5c37eda3f3ffc3f), U64_C (0x921caac755554955), + U64_C (0x791059dba2a2b2a2), U64_C (0x0365c9e9eaea8fea), + U64_C (0x0fecca6a65658965), U64_C (0xb9686903babad2ba), + U64_C (0x65935e4a2f2fbc2f), U64_C (0x4ee79d8ec0c027c0), + U64_C (0xbe81a160dede5fde), U64_C (0xe06c38fc1c1c701c), + U64_C (0xbb2ee746fdfdd3fd), U64_C (0x52649a1f4d4d294d), + U64_C (0xe4e0397692927292), U64_C (0x8fbceafa7575c975), + U64_C (0x301e0c3606061806), U64_C (0x249809ae8a8a128a), + U64_C (0xf940794bb2b2f2b2), U64_C (0x6359d185e6e6bfe6), + U64_C (0x70361c7e0e0e380e), U64_C (0xf8633ee71f1f7c1f), + U64_C (0x37f7c45562629562), U64_C (0xeea3b53ad4d477d4), + U64_C (0x29324d81a8a89aa8), U64_C (0xc4f4315296966296), + U64_C (0x9b3aef62f9f9c3f9), U64_C (0x66f697a3c5c533c5), + U64_C (0x35b14a1025259425), U64_C (0xf220b2ab59597959), + U64_C (0x54ae15d084842a84), U64_C (0xb7a7e4c57272d572), + U64_C (0xd5dd72ec3939e439), U64_C (0x5a6198164c4c2d4c), + U64_C (0xca3bbc945e5e655e), U64_C (0xe785f09f7878fd78), + U64_C (0xddd870e53838e038), U64_C (0x148605988c8c0a8c), + U64_C (0xc6b2bf17d1d163d1), U64_C (0x410b57e4a5a5aea5), + U64_C (0x434dd9a1e2e2afe2), U64_C (0x2ff8c24e61619961), + U64_C (0xf1457b42b3b3f6b3), U64_C (0x15a5423421218421), + U64_C (0x94d625089c9c4a9c), U64_C (0xf0663cee1e1e781e), + U64_C (0x2252866143431143), U64_C (0x76fc93b1c7c73bc7), + U64_C (0xb32be54ffcfcd7fc), U64_C (0x2014082404041004), + U64_C (0xb208a2e351515951), U64_C (0xbcc72f2599995e99), + U64_C (0x4fc4da226d6da96d), U64_C (0x68391a650d0d340d), + U64_C (0x8335e979fafacffa), U64_C (0xb684a369dfdf5bdf), + U64_C (0xd79bfca97e7ee57e), U64_C (0x3db4481924249024), + U64_C (0xc5d776fe3b3bec3b), U64_C (0x313d4b9aabab96ab), + U64_C (0x3ed181f0cece1fce), U64_C (0x8855229911114411), + U64_C (0x0c8903838f8f068f), U64_C (0x4a6b9c044e4e254e), + U64_C (0xd1517366b7b7e6b7), U64_C (0x0b60cbe0ebeb8beb), + U64_C (0xfdcc78c13c3cf03c), U64_C (0x7cbf1ffd81813e81), + U64_C (0xd4fe354094946a94), U64_C (0xeb0cf31cf7f7fbf7), + U64_C (0xa1676f18b9b9deb9), U64_C (0x985f268b13134c13), + U64_C (0x7d9c58512c2cb02c), U64_C (0xd6b8bb05d3d36bd3), + U64_C (0x6b5cd38ce7e7bbe7), U64_C (0x57cbdc396e6ea56e), + U64_C (0x6ef395aac4c437c4), U64_C (0x180f061b03030c03), + U64_C (0x8a13acdc56564556), U64_C (0x1a49885e44440d44), + U64_C (0xdf9efea07f7fe17f), U64_C (0x21374f88a9a99ea9), + U64_C (0x4d8254672a2aa82a), U64_C (0xb16d6b0abbbbd6bb), + U64_C (0x46e29f87c1c123c1), U64_C (0xa202a6f153535153), + U64_C (0xae8ba572dcdc57dc), U64_C (0x582716530b0b2c0b), + U64_C (0x9cd327019d9d4e9d), U64_C (0x47c1d82b6c6cad6c), + U64_C (0x95f562a43131c431), U64_C (0x87b9e8f37474cd74), + U64_C (0xe309f115f6f6fff6), U64_C (0x0a438c4c46460546), + U64_C (0x092645a5acac8aac), U64_C (0x3c970fb589891e89), + U64_C (0xa04428b414145014), U64_C (0x5b42dfbae1e1a3e1), + U64_C (0xb04e2ca616165816), U64_C (0xcdd274f73a3ae83a), + U64_C (0x6fd0d2066969b969), U64_C (0x482d124109092409), + U64_C (0xa7ade0d77070dd70), U64_C (0xd954716fb6b6e2b6), + U64_C (0xceb7bd1ed0d067d0), U64_C (0x3b7ec7d6eded93ed), + U64_C (0x2edb85e2cccc17cc), U64_C (0x2a57846842421542), + U64_C (0xb4c22d2c98985a98), U64_C (0x490e55eda4a4aaa4), + U64_C (0x5d8850752828a028), U64_C (0xda31b8865c5c6d5c), + U64_C (0x933fed6bf8f8c7f8), U64_C (0x44a411c286862286), + }, { + U64_C (0x18c07830d8181860), U64_C (0x2305af462623238c), + U64_C (0xc67ef991b8c6c63f), U64_C (0xe8136fcdfbe8e887), + U64_C (0x874ca113cb878726), U64_C (0xb8a9626d11b8b8da), + U64_C (0x0108050209010104), U64_C (0x4f426e9e0d4f4f21), + U64_C (0x36adee6c9b3636d8), U64_C (0xa6590451ffa6a6a2), + U64_C (0xd2debdb90cd2d26f), U64_C (0xf5fb06f70ef5f5f3), + U64_C (0x79ef80f2967979f9), U64_C (0x6f5fcede306f6fa1), + U64_C (0x91fcef3f6d91917e), U64_C (0x52aa07a4f8525255), + U64_C (0x6027fdc04760609d), U64_C (0xbc89766535bcbcca), + U64_C (0x9baccd2b379b9b56), U64_C (0x8e048c018a8e8e02), + U64_C (0xa371155bd2a3a3b6), U64_C (0x0c603c186c0c0c30), + U64_C (0x7bff8af6847b7bf1), U64_C (0x35b5e16a803535d4), + U64_C (0x1de8693af51d1d74), U64_C (0xe05347ddb3e0e0a7), + U64_C (0xd7f6acb321d7d77b), U64_C (0xc25eed999cc2c22f), + U64_C (0x2e6d965c432e2eb8), U64_C (0x4b627a96294b4b31), + U64_C (0xfea321e15dfefedf), U64_C (0x578216aed5575741), + U64_C (0x15a8412abd151554), U64_C (0x779fb6eee87777c1), + U64_C (0x37a5eb6e923737dc), U64_C (0xe57b56d79ee5e5b3), + U64_C (0x9f8cd923139f9f46), U64_C (0xf0d317fd23f0f0e7), + U64_C (0x4a6a7f94204a4a35), U64_C (0xda9e95a944dada4f), + U64_C (0x58fa25b0a258587d), U64_C (0xc906ca8fcfc9c903), + U64_C (0x29558d527c2929a4), U64_C (0x0a5022145a0a0a28), + U64_C (0xb1e14f7f50b1b1fe), U64_C (0xa0691a5dc9a0a0ba), + U64_C (0x6b7fdad6146b6bb1), U64_C (0x855cab17d985852e), + U64_C (0xbd8173673cbdbdce), U64_C (0x5dd234ba8f5d5d69), + U64_C (0x1080502090101040), U64_C (0xf4f303f507f4f4f7), + U64_C (0xcb16c08bddcbcb0b), U64_C (0x3eedc67cd33e3ef8), + U64_C (0x0528110a2d050514), U64_C (0x671fe6ce78676781), + U64_C (0xe47353d597e4e4b7), U64_C (0x2725bb4e0227279c), + U64_C (0x4132588273414119), U64_C (0x8b2c9d0ba78b8b16), + U64_C (0xa7510153f6a7a7a6), U64_C (0x7dcf94fab27d7de9), + U64_C (0x95dcfb374995956e), U64_C (0xd88e9fad56d8d847), + U64_C (0xfb8b30eb70fbfbcb), U64_C (0xee2371c1cdeeee9f), + U64_C (0x7cc791f8bb7c7ced), U64_C (0x6617e3cc71666685), + U64_C (0xdda68ea77bdddd53), U64_C (0x17b84b2eaf17175c), + U64_C (0x4702468e45474701), U64_C (0x9e84dc211a9e9e42), + U64_C (0xca1ec589d4caca0f), U64_C (0x2d75995a582d2db4), + U64_C (0xbf9179632ebfbfc6), U64_C (0x07381b0e3f07071c), + U64_C (0xad012347acadad8e), U64_C (0x5aea2fb4b05a5a75), + U64_C (0x836cb51bef838336), U64_C (0x3385ff66b63333cc), + U64_C (0x633ff2c65c636391), U64_C (0x02100a0412020208), + U64_C (0xaa39384993aaaa92), U64_C (0x71afa8e2de7171d9), + U64_C (0xc80ecf8dc6c8c807), U64_C (0x19c87d32d1191964), + U64_C (0x497270923b494939), U64_C (0xd9869aaf5fd9d943), + U64_C (0xf2c31df931f2f2ef), U64_C (0xe34b48dba8e3e3ab), + U64_C (0x5be22ab6b95b5b71), U64_C (0x8834920dbc88881a), + U64_C (0x9aa4c8293e9a9a52), U64_C (0x262dbe4c0b262698), + U64_C (0x328dfa64bf3232c8), U64_C (0xb0e94a7d59b0b0fa), + U64_C (0xe91b6acff2e9e983), U64_C (0x0f78331e770f0f3c), + U64_C (0xd5e6a6b733d5d573), U64_C (0x8074ba1df480803a), + U64_C (0xbe997c6127bebec2), U64_C (0xcd26de87ebcdcd13), + U64_C (0x34bde468893434d0), U64_C (0x487a75903248483d), + U64_C (0xffab24e354ffffdb), U64_C (0x7af78ff48d7a7af5), + U64_C (0x90f4ea3d6490907a), U64_C (0x5fc23ebe9d5f5f61), + U64_C (0x201da0403d202080), U64_C (0x6867d5d00f6868bd), + U64_C (0x1ad07234ca1a1a68), U64_C (0xae192c41b7aeae82), + U64_C (0xb4c95e757db4b4ea), U64_C (0x549a19a8ce54544d), + U64_C (0x93ece53b7f939376), U64_C (0x220daa442f222288), + U64_C (0x6407e9c86364648d), U64_C (0xf1db12ff2af1f1e3), + U64_C (0x73bfa2e6cc7373d1), U64_C (0x12905a2482121248), + U64_C (0x403a5d807a40401d), U64_C (0x0840281048080820), + U64_C (0xc356e89b95c3c32b), U64_C (0xec337bc5dfecec97), + U64_C (0xdb9690ab4ddbdb4b), U64_C (0xa1611f5fc0a1a1be), + U64_C (0x8d1c8307918d8d0e), U64_C (0x3df5c97ac83d3df4), + U64_C (0x97ccf1335b979766), U64_C (0x0000000000000000), + U64_C (0xcf36d483f9cfcf1b), U64_C (0x2b4587566e2b2bac), + U64_C (0x7697b3ece17676c5), U64_C (0x8264b019e6828232), + U64_C (0xd6fea9b128d6d67f), U64_C (0x1bd87736c31b1b6c), + U64_C (0xb5c15b7774b5b5ee), U64_C (0xaf112943beafaf86), + U64_C (0x6a77dfd41d6a6ab5), U64_C (0x50ba0da0ea50505d), + U64_C (0x45124c8a57454509), U64_C (0xf3cb18fb38f3f3eb), + U64_C (0x309df060ad3030c0), U64_C (0xef2b74c3c4efef9b), + U64_C (0x3fe5c37eda3f3ffc), U64_C (0x55921caac7555549), + U64_C (0xa2791059dba2a2b2), U64_C (0xea0365c9e9eaea8f), + U64_C (0x650fecca6a656589), U64_C (0xbab9686903babad2), + U64_C (0x2f65935e4a2f2fbc), U64_C (0xc04ee79d8ec0c027), + U64_C (0xdebe81a160dede5f), U64_C (0x1ce06c38fc1c1c70), + U64_C (0xfdbb2ee746fdfdd3), U64_C (0x4d52649a1f4d4d29), + U64_C (0x92e4e03976929272), U64_C (0x758fbceafa7575c9), + U64_C (0x06301e0c36060618), U64_C (0x8a249809ae8a8a12), + U64_C (0xb2f940794bb2b2f2), U64_C (0xe66359d185e6e6bf), + U64_C (0x0e70361c7e0e0e38), U64_C (0x1ff8633ee71f1f7c), + U64_C (0x6237f7c455626295), U64_C (0xd4eea3b53ad4d477), + U64_C (0xa829324d81a8a89a), U64_C (0x96c4f43152969662), + U64_C (0xf99b3aef62f9f9c3), U64_C (0xc566f697a3c5c533), + U64_C (0x2535b14a10252594), U64_C (0x59f220b2ab595979), + U64_C (0x8454ae15d084842a), U64_C (0x72b7a7e4c57272d5), + U64_C (0x39d5dd72ec3939e4), U64_C (0x4c5a6198164c4c2d), + U64_C (0x5eca3bbc945e5e65), U64_C (0x78e785f09f7878fd), + U64_C (0x38ddd870e53838e0), U64_C (0x8c148605988c8c0a), + U64_C (0xd1c6b2bf17d1d163), U64_C (0xa5410b57e4a5a5ae), + U64_C (0xe2434dd9a1e2e2af), U64_C (0x612ff8c24e616199), + U64_C (0xb3f1457b42b3b3f6), U64_C (0x2115a54234212184), + U64_C (0x9c94d625089c9c4a), U64_C (0x1ef0663cee1e1e78), + U64_C (0x4322528661434311), U64_C (0xc776fc93b1c7c73b), + U64_C (0xfcb32be54ffcfcd7), U64_C (0x0420140824040410), + U64_C (0x51b208a2e3515159), U64_C (0x99bcc72f2599995e), + U64_C (0x6d4fc4da226d6da9), U64_C (0x0d68391a650d0d34), + U64_C (0xfa8335e979fafacf), U64_C (0xdfb684a369dfdf5b), + U64_C (0x7ed79bfca97e7ee5), U64_C (0x243db44819242490), + U64_C (0x3bc5d776fe3b3bec), U64_C (0xab313d4b9aabab96), + U64_C (0xce3ed181f0cece1f), U64_C (0x1188552299111144), + U64_C (0x8f0c8903838f8f06), U64_C (0x4e4a6b9c044e4e25), + U64_C (0xb7d1517366b7b7e6), U64_C (0xeb0b60cbe0ebeb8b), + U64_C (0x3cfdcc78c13c3cf0), U64_C (0x817cbf1ffd81813e), + U64_C (0x94d4fe354094946a), U64_C (0xf7eb0cf31cf7f7fb), + U64_C (0xb9a1676f18b9b9de), U64_C (0x13985f268b13134c), + U64_C (0x2c7d9c58512c2cb0), U64_C (0xd3d6b8bb05d3d36b), + U64_C (0xe76b5cd38ce7e7bb), U64_C (0x6e57cbdc396e6ea5), + U64_C (0xc46ef395aac4c437), U64_C (0x03180f061b03030c), + U64_C (0x568a13acdc565645), U64_C (0x441a49885e44440d), + U64_C (0x7fdf9efea07f7fe1), U64_C (0xa921374f88a9a99e), + U64_C (0x2a4d8254672a2aa8), U64_C (0xbbb16d6b0abbbbd6), + U64_C (0xc146e29f87c1c123), U64_C (0x53a202a6f1535351), + U64_C (0xdcae8ba572dcdc57), U64_C (0x0b582716530b0b2c), + U64_C (0x9d9cd327019d9d4e), U64_C (0x6c47c1d82b6c6cad), + U64_C (0x3195f562a43131c4), U64_C (0x7487b9e8f37474cd), + U64_C (0xf6e309f115f6f6ff), U64_C (0x460a438c4c464605), + U64_C (0xac092645a5acac8a), U64_C (0x893c970fb589891e), + U64_C (0x14a04428b4141450), U64_C (0xe15b42dfbae1e1a3), + U64_C (0x16b04e2ca6161658), U64_C (0x3acdd274f73a3ae8), + U64_C (0x696fd0d2066969b9), U64_C (0x09482d1241090924), + U64_C (0x70a7ade0d77070dd), U64_C (0xb6d954716fb6b6e2), + U64_C (0xd0ceb7bd1ed0d067), U64_C (0xed3b7ec7d6eded93), + U64_C (0xcc2edb85e2cccc17), U64_C (0x422a578468424215), + U64_C (0x98b4c22d2c98985a), U64_C (0xa4490e55eda4a4aa), + U64_C (0x285d8850752828a0), U64_C (0x5cda31b8865c5c6d), + U64_C (0xf8933fed6bf8f8c7), U64_C (0x8644a411c2868622), + }, { + U64_C (0x6018c07830d81818), U64_C (0x8c2305af46262323), + U64_C (0x3fc67ef991b8c6c6), U64_C (0x87e8136fcdfbe8e8), + U64_C (0x26874ca113cb8787), U64_C (0xdab8a9626d11b8b8), + U64_C (0x0401080502090101), U64_C (0x214f426e9e0d4f4f), + U64_C (0xd836adee6c9b3636), U64_C (0xa2a6590451ffa6a6), + U64_C (0x6fd2debdb90cd2d2), U64_C (0xf3f5fb06f70ef5f5), + U64_C (0xf979ef80f2967979), U64_C (0xa16f5fcede306f6f), + U64_C (0x7e91fcef3f6d9191), U64_C (0x5552aa07a4f85252), + U64_C (0x9d6027fdc0476060), U64_C (0xcabc89766535bcbc), + U64_C (0x569baccd2b379b9b), U64_C (0x028e048c018a8e8e), + U64_C (0xb6a371155bd2a3a3), U64_C (0x300c603c186c0c0c), + U64_C (0xf17bff8af6847b7b), U64_C (0xd435b5e16a803535), + U64_C (0x741de8693af51d1d), U64_C (0xa7e05347ddb3e0e0), + U64_C (0x7bd7f6acb321d7d7), U64_C (0x2fc25eed999cc2c2), + U64_C (0xb82e6d965c432e2e), U64_C (0x314b627a96294b4b), + U64_C (0xdffea321e15dfefe), U64_C (0x41578216aed55757), + U64_C (0x5415a8412abd1515), U64_C (0xc1779fb6eee87777), + U64_C (0xdc37a5eb6e923737), U64_C (0xb3e57b56d79ee5e5), + U64_C (0x469f8cd923139f9f), U64_C (0xe7f0d317fd23f0f0), + U64_C (0x354a6a7f94204a4a), U64_C (0x4fda9e95a944dada), + U64_C (0x7d58fa25b0a25858), U64_C (0x03c906ca8fcfc9c9), + U64_C (0xa429558d527c2929), U64_C (0x280a5022145a0a0a), + U64_C (0xfeb1e14f7f50b1b1), U64_C (0xbaa0691a5dc9a0a0), + U64_C (0xb16b7fdad6146b6b), U64_C (0x2e855cab17d98585), + U64_C (0xcebd8173673cbdbd), U64_C (0x695dd234ba8f5d5d), + U64_C (0x4010805020901010), U64_C (0xf7f4f303f507f4f4), + U64_C (0x0bcb16c08bddcbcb), U64_C (0xf83eedc67cd33e3e), + U64_C (0x140528110a2d0505), U64_C (0x81671fe6ce786767), + U64_C (0xb7e47353d597e4e4), U64_C (0x9c2725bb4e022727), + U64_C (0x1941325882734141), U64_C (0x168b2c9d0ba78b8b), + U64_C (0xa6a7510153f6a7a7), U64_C (0xe97dcf94fab27d7d), + U64_C (0x6e95dcfb37499595), U64_C (0x47d88e9fad56d8d8), + U64_C (0xcbfb8b30eb70fbfb), U64_C (0x9fee2371c1cdeeee), + U64_C (0xed7cc791f8bb7c7c), U64_C (0x856617e3cc716666), + U64_C (0x53dda68ea77bdddd), U64_C (0x5c17b84b2eaf1717), + U64_C (0x014702468e454747), U64_C (0x429e84dc211a9e9e), + U64_C (0x0fca1ec589d4caca), U64_C (0xb42d75995a582d2d), + U64_C (0xc6bf9179632ebfbf), U64_C (0x1c07381b0e3f0707), + U64_C (0x8ead012347acadad), U64_C (0x755aea2fb4b05a5a), + U64_C (0x36836cb51bef8383), U64_C (0xcc3385ff66b63333), + U64_C (0x91633ff2c65c6363), U64_C (0x0802100a04120202), + U64_C (0x92aa39384993aaaa), U64_C (0xd971afa8e2de7171), + U64_C (0x07c80ecf8dc6c8c8), U64_C (0x6419c87d32d11919), + U64_C (0x39497270923b4949), U64_C (0x43d9869aaf5fd9d9), + U64_C (0xeff2c31df931f2f2), U64_C (0xabe34b48dba8e3e3), + U64_C (0x715be22ab6b95b5b), U64_C (0x1a8834920dbc8888), + U64_C (0x529aa4c8293e9a9a), U64_C (0x98262dbe4c0b2626), + U64_C (0xc8328dfa64bf3232), U64_C (0xfab0e94a7d59b0b0), + U64_C (0x83e91b6acff2e9e9), U64_C (0x3c0f78331e770f0f), + U64_C (0x73d5e6a6b733d5d5), U64_C (0x3a8074ba1df48080), + U64_C (0xc2be997c6127bebe), U64_C (0x13cd26de87ebcdcd), + U64_C (0xd034bde468893434), U64_C (0x3d487a7590324848), + U64_C (0xdbffab24e354ffff), U64_C (0xf57af78ff48d7a7a), + U64_C (0x7a90f4ea3d649090), U64_C (0x615fc23ebe9d5f5f), + U64_C (0x80201da0403d2020), U64_C (0xbd6867d5d00f6868), + U64_C (0x681ad07234ca1a1a), U64_C (0x82ae192c41b7aeae), + U64_C (0xeab4c95e757db4b4), U64_C (0x4d549a19a8ce5454), + U64_C (0x7693ece53b7f9393), U64_C (0x88220daa442f2222), + U64_C (0x8d6407e9c8636464), U64_C (0xe3f1db12ff2af1f1), + U64_C (0xd173bfa2e6cc7373), U64_C (0x4812905a24821212), + U64_C (0x1d403a5d807a4040), U64_C (0x2008402810480808), + U64_C (0x2bc356e89b95c3c3), U64_C (0x97ec337bc5dfecec), + U64_C (0x4bdb9690ab4ddbdb), U64_C (0xbea1611f5fc0a1a1), + U64_C (0x0e8d1c8307918d8d), U64_C (0xf43df5c97ac83d3d), + U64_C (0x6697ccf1335b9797), U64_C (0x0000000000000000), + U64_C (0x1bcf36d483f9cfcf), U64_C (0xac2b4587566e2b2b), + U64_C (0xc57697b3ece17676), U64_C (0x328264b019e68282), + U64_C (0x7fd6fea9b128d6d6), U64_C (0x6c1bd87736c31b1b), + U64_C (0xeeb5c15b7774b5b5), U64_C (0x86af112943beafaf), + U64_C (0xb56a77dfd41d6a6a), U64_C (0x5d50ba0da0ea5050), + U64_C (0x0945124c8a574545), U64_C (0xebf3cb18fb38f3f3), + U64_C (0xc0309df060ad3030), U64_C (0x9bef2b74c3c4efef), + U64_C (0xfc3fe5c37eda3f3f), U64_C (0x4955921caac75555), + U64_C (0xb2a2791059dba2a2), U64_C (0x8fea0365c9e9eaea), + U64_C (0x89650fecca6a6565), U64_C (0xd2bab9686903baba), + U64_C (0xbc2f65935e4a2f2f), U64_C (0x27c04ee79d8ec0c0), + U64_C (0x5fdebe81a160dede), U64_C (0x701ce06c38fc1c1c), + U64_C (0xd3fdbb2ee746fdfd), U64_C (0x294d52649a1f4d4d), + U64_C (0x7292e4e039769292), U64_C (0xc9758fbceafa7575), + U64_C (0x1806301e0c360606), U64_C (0x128a249809ae8a8a), + U64_C (0xf2b2f940794bb2b2), U64_C (0xbfe66359d185e6e6), + U64_C (0x380e70361c7e0e0e), U64_C (0x7c1ff8633ee71f1f), + U64_C (0x956237f7c4556262), U64_C (0x77d4eea3b53ad4d4), + U64_C (0x9aa829324d81a8a8), U64_C (0x6296c4f431529696), + U64_C (0xc3f99b3aef62f9f9), U64_C (0x33c566f697a3c5c5), + U64_C (0x942535b14a102525), U64_C (0x7959f220b2ab5959), + U64_C (0x2a8454ae15d08484), U64_C (0xd572b7a7e4c57272), + U64_C (0xe439d5dd72ec3939), U64_C (0x2d4c5a6198164c4c), + U64_C (0x655eca3bbc945e5e), U64_C (0xfd78e785f09f7878), + U64_C (0xe038ddd870e53838), U64_C (0x0a8c148605988c8c), + U64_C (0x63d1c6b2bf17d1d1), U64_C (0xaea5410b57e4a5a5), + U64_C (0xafe2434dd9a1e2e2), U64_C (0x99612ff8c24e6161), + U64_C (0xf6b3f1457b42b3b3), U64_C (0x842115a542342121), + U64_C (0x4a9c94d625089c9c), U64_C (0x781ef0663cee1e1e), + U64_C (0x1143225286614343), U64_C (0x3bc776fc93b1c7c7), + U64_C (0xd7fcb32be54ffcfc), U64_C (0x1004201408240404), + U64_C (0x5951b208a2e35151), U64_C (0x5e99bcc72f259999), + U64_C (0xa96d4fc4da226d6d), U64_C (0x340d68391a650d0d), + U64_C (0xcffa8335e979fafa), U64_C (0x5bdfb684a369dfdf), + U64_C (0xe57ed79bfca97e7e), U64_C (0x90243db448192424), + U64_C (0xec3bc5d776fe3b3b), U64_C (0x96ab313d4b9aabab), + U64_C (0x1fce3ed181f0cece), U64_C (0x4411885522991111), + U64_C (0x068f0c8903838f8f), U64_C (0x254e4a6b9c044e4e), + U64_C (0xe6b7d1517366b7b7), U64_C (0x8beb0b60cbe0ebeb), + U64_C (0xf03cfdcc78c13c3c), U64_C (0x3e817cbf1ffd8181), + U64_C (0x6a94d4fe35409494), U64_C (0xfbf7eb0cf31cf7f7), + U64_C (0xdeb9a1676f18b9b9), U64_C (0x4c13985f268b1313), + U64_C (0xb02c7d9c58512c2c), U64_C (0x6bd3d6b8bb05d3d3), + U64_C (0xbbe76b5cd38ce7e7), U64_C (0xa56e57cbdc396e6e), + U64_C (0x37c46ef395aac4c4), U64_C (0x0c03180f061b0303), + U64_C (0x45568a13acdc5656), U64_C (0x0d441a49885e4444), + U64_C (0xe17fdf9efea07f7f), U64_C (0x9ea921374f88a9a9), + U64_C (0xa82a4d8254672a2a), U64_C (0xd6bbb16d6b0abbbb), + U64_C (0x23c146e29f87c1c1), U64_C (0x5153a202a6f15353), + U64_C (0x57dcae8ba572dcdc), U64_C (0x2c0b582716530b0b), + U64_C (0x4e9d9cd327019d9d), U64_C (0xad6c47c1d82b6c6c), + U64_C (0xc43195f562a43131), U64_C (0xcd7487b9e8f37474), + U64_C (0xfff6e309f115f6f6), U64_C (0x05460a438c4c4646), + U64_C (0x8aac092645a5acac), U64_C (0x1e893c970fb58989), + U64_C (0x5014a04428b41414), U64_C (0xa3e15b42dfbae1e1), + U64_C (0x5816b04e2ca61616), U64_C (0xe83acdd274f73a3a), + U64_C (0xb9696fd0d2066969), U64_C (0x2409482d12410909), + U64_C (0xdd70a7ade0d77070), U64_C (0xe2b6d954716fb6b6), + U64_C (0x67d0ceb7bd1ed0d0), U64_C (0x93ed3b7ec7d6eded), + U64_C (0x17cc2edb85e2cccc), U64_C (0x15422a5784684242), + U64_C (0x5a98b4c22d2c9898), U64_C (0xaaa4490e55eda4a4), + U64_C (0xa0285d8850752828), U64_C (0x6d5cda31b8865c5c), + U64_C (0xc7f8933fed6bf8f8), U64_C (0x228644a411c28686), + }, { + U64_C (0x186018c07830d818), U64_C (0x238c2305af462623), + U64_C (0xc63fc67ef991b8c6), U64_C (0xe887e8136fcdfbe8), + U64_C (0x8726874ca113cb87), U64_C (0xb8dab8a9626d11b8), + U64_C (0x0104010805020901), U64_C (0x4f214f426e9e0d4f), + U64_C (0x36d836adee6c9b36), U64_C (0xa6a2a6590451ffa6), + U64_C (0xd26fd2debdb90cd2), U64_C (0xf5f3f5fb06f70ef5), + U64_C (0x79f979ef80f29679), U64_C (0x6fa16f5fcede306f), + U64_C (0x917e91fcef3f6d91), U64_C (0x525552aa07a4f852), + U64_C (0x609d6027fdc04760), U64_C (0xbccabc89766535bc), + U64_C (0x9b569baccd2b379b), U64_C (0x8e028e048c018a8e), + U64_C (0xa3b6a371155bd2a3), U64_C (0x0c300c603c186c0c), + U64_C (0x7bf17bff8af6847b), U64_C (0x35d435b5e16a8035), + U64_C (0x1d741de8693af51d), U64_C (0xe0a7e05347ddb3e0), + U64_C (0xd77bd7f6acb321d7), U64_C (0xc22fc25eed999cc2), + U64_C (0x2eb82e6d965c432e), U64_C (0x4b314b627a96294b), + U64_C (0xfedffea321e15dfe), U64_C (0x5741578216aed557), + U64_C (0x155415a8412abd15), U64_C (0x77c1779fb6eee877), + U64_C (0x37dc37a5eb6e9237), U64_C (0xe5b3e57b56d79ee5), + U64_C (0x9f469f8cd923139f), U64_C (0xf0e7f0d317fd23f0), + U64_C (0x4a354a6a7f94204a), U64_C (0xda4fda9e95a944da), + U64_C (0x587d58fa25b0a258), U64_C (0xc903c906ca8fcfc9), + U64_C (0x29a429558d527c29), U64_C (0x0a280a5022145a0a), + U64_C (0xb1feb1e14f7f50b1), U64_C (0xa0baa0691a5dc9a0), + U64_C (0x6bb16b7fdad6146b), U64_C (0x852e855cab17d985), + U64_C (0xbdcebd8173673cbd), U64_C (0x5d695dd234ba8f5d), + U64_C (0x1040108050209010), U64_C (0xf4f7f4f303f507f4), + U64_C (0xcb0bcb16c08bddcb), U64_C (0x3ef83eedc67cd33e), + U64_C (0x05140528110a2d05), U64_C (0x6781671fe6ce7867), + U64_C (0xe4b7e47353d597e4), U64_C (0x279c2725bb4e0227), + U64_C (0x4119413258827341), U64_C (0x8b168b2c9d0ba78b), + U64_C (0xa7a6a7510153f6a7), U64_C (0x7de97dcf94fab27d), + U64_C (0x956e95dcfb374995), U64_C (0xd847d88e9fad56d8), + U64_C (0xfbcbfb8b30eb70fb), U64_C (0xee9fee2371c1cdee), + U64_C (0x7ced7cc791f8bb7c), U64_C (0x66856617e3cc7166), + U64_C (0xdd53dda68ea77bdd), U64_C (0x175c17b84b2eaf17), + U64_C (0x47014702468e4547), U64_C (0x9e429e84dc211a9e), + U64_C (0xca0fca1ec589d4ca), U64_C (0x2db42d75995a582d), + U64_C (0xbfc6bf9179632ebf), U64_C (0x071c07381b0e3f07), + U64_C (0xad8ead012347acad), U64_C (0x5a755aea2fb4b05a), + U64_C (0x8336836cb51bef83), U64_C (0x33cc3385ff66b633), + U64_C (0x6391633ff2c65c63), U64_C (0x020802100a041202), + U64_C (0xaa92aa39384993aa), U64_C (0x71d971afa8e2de71), + U64_C (0xc807c80ecf8dc6c8), U64_C (0x196419c87d32d119), + U64_C (0x4939497270923b49), U64_C (0xd943d9869aaf5fd9), + U64_C (0xf2eff2c31df931f2), U64_C (0xe3abe34b48dba8e3), + U64_C (0x5b715be22ab6b95b), U64_C (0x881a8834920dbc88), + U64_C (0x9a529aa4c8293e9a), U64_C (0x2698262dbe4c0b26), + U64_C (0x32c8328dfa64bf32), U64_C (0xb0fab0e94a7d59b0), + U64_C (0xe983e91b6acff2e9), U64_C (0x0f3c0f78331e770f), + U64_C (0xd573d5e6a6b733d5), U64_C (0x803a8074ba1df480), + U64_C (0xbec2be997c6127be), U64_C (0xcd13cd26de87ebcd), + U64_C (0x34d034bde4688934), U64_C (0x483d487a75903248), + U64_C (0xffdbffab24e354ff), U64_C (0x7af57af78ff48d7a), + U64_C (0x907a90f4ea3d6490), U64_C (0x5f615fc23ebe9d5f), + U64_C (0x2080201da0403d20), U64_C (0x68bd6867d5d00f68), + U64_C (0x1a681ad07234ca1a), U64_C (0xae82ae192c41b7ae), + U64_C (0xb4eab4c95e757db4), U64_C (0x544d549a19a8ce54), + U64_C (0x937693ece53b7f93), U64_C (0x2288220daa442f22), + U64_C (0x648d6407e9c86364), U64_C (0xf1e3f1db12ff2af1), + U64_C (0x73d173bfa2e6cc73), U64_C (0x124812905a248212), + U64_C (0x401d403a5d807a40), U64_C (0x0820084028104808), + U64_C (0xc32bc356e89b95c3), U64_C (0xec97ec337bc5dfec), + U64_C (0xdb4bdb9690ab4ddb), U64_C (0xa1bea1611f5fc0a1), + U64_C (0x8d0e8d1c8307918d), U64_C (0x3df43df5c97ac83d), + U64_C (0x976697ccf1335b97), U64_C (0x0000000000000000), + U64_C (0xcf1bcf36d483f9cf), U64_C (0x2bac2b4587566e2b), + U64_C (0x76c57697b3ece176), U64_C (0x82328264b019e682), + U64_C (0xd67fd6fea9b128d6), U64_C (0x1b6c1bd87736c31b), + U64_C (0xb5eeb5c15b7774b5), U64_C (0xaf86af112943beaf), + U64_C (0x6ab56a77dfd41d6a), U64_C (0x505d50ba0da0ea50), + U64_C (0x450945124c8a5745), U64_C (0xf3ebf3cb18fb38f3), + U64_C (0x30c0309df060ad30), U64_C (0xef9bef2b74c3c4ef), + U64_C (0x3ffc3fe5c37eda3f), U64_C (0x554955921caac755), + U64_C (0xa2b2a2791059dba2), U64_C (0xea8fea0365c9e9ea), + U64_C (0x6589650fecca6a65), U64_C (0xbad2bab9686903ba), + U64_C (0x2fbc2f65935e4a2f), U64_C (0xc027c04ee79d8ec0), + U64_C (0xde5fdebe81a160de), U64_C (0x1c701ce06c38fc1c), + U64_C (0xfdd3fdbb2ee746fd), U64_C (0x4d294d52649a1f4d), + U64_C (0x927292e4e0397692), U64_C (0x75c9758fbceafa75), + U64_C (0x061806301e0c3606), U64_C (0x8a128a249809ae8a), + U64_C (0xb2f2b2f940794bb2), U64_C (0xe6bfe66359d185e6), + U64_C (0x0e380e70361c7e0e), U64_C (0x1f7c1ff8633ee71f), + U64_C (0x62956237f7c45562), U64_C (0xd477d4eea3b53ad4), + U64_C (0xa89aa829324d81a8), U64_C (0x966296c4f4315296), + U64_C (0xf9c3f99b3aef62f9), U64_C (0xc533c566f697a3c5), + U64_C (0x25942535b14a1025), U64_C (0x597959f220b2ab59), + U64_C (0x842a8454ae15d084), U64_C (0x72d572b7a7e4c572), + U64_C (0x39e439d5dd72ec39), U64_C (0x4c2d4c5a6198164c), + U64_C (0x5e655eca3bbc945e), U64_C (0x78fd78e785f09f78), + U64_C (0x38e038ddd870e538), U64_C (0x8c0a8c148605988c), + U64_C (0xd163d1c6b2bf17d1), U64_C (0xa5aea5410b57e4a5), + U64_C (0xe2afe2434dd9a1e2), U64_C (0x6199612ff8c24e61), + U64_C (0xb3f6b3f1457b42b3), U64_C (0x21842115a5423421), + U64_C (0x9c4a9c94d625089c), U64_C (0x1e781ef0663cee1e), + U64_C (0x4311432252866143), U64_C (0xc73bc776fc93b1c7), + U64_C (0xfcd7fcb32be54ffc), U64_C (0x0410042014082404), + U64_C (0x515951b208a2e351), U64_C (0x995e99bcc72f2599), + U64_C (0x6da96d4fc4da226d), U64_C (0x0d340d68391a650d), + U64_C (0xfacffa8335e979fa), U64_C (0xdf5bdfb684a369df), + U64_C (0x7ee57ed79bfca97e), U64_C (0x2490243db4481924), + U64_C (0x3bec3bc5d776fe3b), U64_C (0xab96ab313d4b9aab), + U64_C (0xce1fce3ed181f0ce), U64_C (0x1144118855229911), + U64_C (0x8f068f0c8903838f), U64_C (0x4e254e4a6b9c044e), + U64_C (0xb7e6b7d1517366b7), U64_C (0xeb8beb0b60cbe0eb), + U64_C (0x3cf03cfdcc78c13c), U64_C (0x813e817cbf1ffd81), + U64_C (0x946a94d4fe354094), U64_C (0xf7fbf7eb0cf31cf7), + U64_C (0xb9deb9a1676f18b9), U64_C (0x134c13985f268b13), + U64_C (0x2cb02c7d9c58512c), U64_C (0xd36bd3d6b8bb05d3), + U64_C (0xe7bbe76b5cd38ce7), U64_C (0x6ea56e57cbdc396e), + U64_C (0xc437c46ef395aac4), U64_C (0x030c03180f061b03), + U64_C (0x5645568a13acdc56), U64_C (0x440d441a49885e44), + U64_C (0x7fe17fdf9efea07f), U64_C (0xa99ea921374f88a9), + U64_C (0x2aa82a4d8254672a), U64_C (0xbbd6bbb16d6b0abb), + U64_C (0xc123c146e29f87c1), U64_C (0x535153a202a6f153), + U64_C (0xdc57dcae8ba572dc), U64_C (0x0b2c0b582716530b), + U64_C (0x9d4e9d9cd327019d), U64_C (0x6cad6c47c1d82b6c), + U64_C (0x31c43195f562a431), U64_C (0x74cd7487b9e8f374), + U64_C (0xf6fff6e309f115f6), U64_C (0x4605460a438c4c46), + U64_C (0xac8aac092645a5ac), U64_C (0x891e893c970fb589), + U64_C (0x145014a04428b414), U64_C (0xe1a3e15b42dfbae1), + U64_C (0x165816b04e2ca616), U64_C (0x3ae83acdd274f73a), + U64_C (0x69b9696fd0d20669), U64_C (0x092409482d124109), + U64_C (0x70dd70a7ade0d770), U64_C (0xb6e2b6d954716fb6), + U64_C (0xd067d0ceb7bd1ed0), U64_C (0xed93ed3b7ec7d6ed), + U64_C (0xcc17cc2edb85e2cc), U64_C (0x4215422a57846842), + U64_C (0x985a98b4c22d2c98), U64_C (0xa4aaa4490e55eda4), + U64_C (0x28a0285d88507528), U64_C (0x5c6d5cda31b8865c), + U64_C (0xf8c7f8933fed6bf8), U64_C (0x86228644a411c286), + } } +}; +#define C tab.C +#define C0 C[0] +#define C1 C[1] +#define C2 C[2] +#define C3 C[3] +#define C4 C[4] +#define C5 C[5] +#define C6 C[6] +#define C7 C[7] +#define rc tab.RC + + + +static unsigned int +whirlpool_transform (void *ctx, const unsigned char *data, size_t nblks); + + + +static void +whirlpool_init (void *ctx, unsigned int flags) +{ + whirlpool_context_t *context = ctx; + + memset (context, 0, sizeof (*context)); + + context->bctx.blocksize = BLOCK_SIZE; + context->bctx.bwrite = whirlpool_transform; + if ((flags & GCRY_MD_FLAG_BUGEMU1)) + { + memset (&context->bugemu, 0, sizeof context->bugemu); + context->use_bugemu = 1; + } + else + context->use_bugemu = 0; +} + + +#ifdef USE_AMD64_ASM + +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_EXTRA_STACK (10 * 16) +#else +# define ASM_FUNC_ABI +# define ASM_EXTRA_STACK 0 +#endif + +extern unsigned int +_gcry_whirlpool_transform_amd64(u64 *state, const unsigned char *data, + size_t nblks, const struct whirlpool_tables_s *tables) ASM_FUNC_ABI; + +static unsigned int +whirlpool_transform (void *ctx, const unsigned char *data, size_t nblks) +{ + whirlpool_context_t *context = ctx; + + return _gcry_whirlpool_transform_amd64( + context->hash_state, data, nblks, &tab) + ASM_EXTRA_STACK; +} + +#else /* USE_AMD64_ASM */ + +/* + * Transform block. + */ +static unsigned int +whirlpool_transform_blk (void *ctx, const unsigned char *data) +{ + whirlpool_context_t *context = ctx; + whirlpool_block_t data_block; + whirlpool_block_t key; + whirlpool_block_t state; + whirlpool_block_t block; + unsigned int r; + unsigned int i; + + buffer_to_block (data, data_block, i); + block_copy (key, context->hash_state, i); + block_copy (state, context->hash_state, i); + block_xor (state, data_block, i); + + for (r = 0; r < R; r++) + { + /* Compute round key K^r. */ + + block[0] = (C0[(key[0] >> 56) & 0xFF] ^ C1[(key[7] >> 48) & 0xFF] ^ + C2[(key[6] >> 40) & 0xFF] ^ C3[(key[5] >> 32) & 0xFF] ^ + C4[(key[4] >> 24) & 0xFF] ^ C5[(key[3] >> 16) & 0xFF] ^ + C6[(key[2] >> 8) & 0xFF] ^ C7[(key[1] >> 0) & 0xFF] ^ rc[r]); + block[1] = (C0[(key[1] >> 56) & 0xFF] ^ C1[(key[0] >> 48) & 0xFF] ^ + C2[(key[7] >> 40) & 0xFF] ^ C3[(key[6] >> 32) & 0xFF] ^ + C4[(key[5] >> 24) & 0xFF] ^ C5[(key[4] >> 16) & 0xFF] ^ + C6[(key[3] >> 8) & 0xFF] ^ C7[(key[2] >> 0) & 0xFF]); + block[2] = (C0[(key[2] >> 56) & 0xFF] ^ C1[(key[1] >> 48) & 0xFF] ^ + C2[(key[0] >> 40) & 0xFF] ^ C3[(key[7] >> 32) & 0xFF] ^ + C4[(key[6] >> 24) & 0xFF] ^ C5[(key[5] >> 16) & 0xFF] ^ + C6[(key[4] >> 8) & 0xFF] ^ C7[(key[3] >> 0) & 0xFF]); + block[3] = (C0[(key[3] >> 56) & 0xFF] ^ C1[(key[2] >> 48) & 0xFF] ^ + C2[(key[1] >> 40) & 0xFF] ^ C3[(key[0] >> 32) & 0xFF] ^ + C4[(key[7] >> 24) & 0xFF] ^ C5[(key[6] >> 16) & 0xFF] ^ + C6[(key[5] >> 8) & 0xFF] ^ C7[(key[4] >> 0) & 0xFF]); + block[4] = (C0[(key[4] >> 56) & 0xFF] ^ C1[(key[3] >> 48) & 0xFF] ^ + C2[(key[2] >> 40) & 0xFF] ^ C3[(key[1] >> 32) & 0xFF] ^ + C4[(key[0] >> 24) & 0xFF] ^ C5[(key[7] >> 16) & 0xFF] ^ + C6[(key[6] >> 8) & 0xFF] ^ C7[(key[5] >> 0) & 0xFF]); + block[5] = (C0[(key[5] >> 56) & 0xFF] ^ C1[(key[4] >> 48) & 0xFF] ^ + C2[(key[3] >> 40) & 0xFF] ^ C3[(key[2] >> 32) & 0xFF] ^ + C4[(key[1] >> 24) & 0xFF] ^ C5[(key[0] >> 16) & 0xFF] ^ + C6[(key[7] >> 8) & 0xFF] ^ C7[(key[6] >> 0) & 0xFF]); + block[6] = (C0[(key[6] >> 56) & 0xFF] ^ C1[(key[5] >> 48) & 0xFF] ^ + C2[(key[4] >> 40) & 0xFF] ^ C3[(key[3] >> 32) & 0xFF] ^ + C4[(key[2] >> 24) & 0xFF] ^ C5[(key[1] >> 16) & 0xFF] ^ + C6[(key[0] >> 8) & 0xFF] ^ C7[(key[7] >> 0) & 0xFF]); + block[7] = (C0[(key[7] >> 56) & 0xFF] ^ C1[(key[6] >> 48) & 0xFF] ^ + C2[(key[5] >> 40) & 0xFF] ^ C3[(key[4] >> 32) & 0xFF] ^ + C4[(key[3] >> 24) & 0xFF] ^ C5[(key[2] >> 16) & 0xFF] ^ + C6[(key[1] >> 8) & 0xFF] ^ C7[(key[0] >> 0) & 0xFF]); + block_copy (key, block, i); + + /* Apply r-th round transformation. */ + + block[0] = (C0[(state[0] >> 56) & 0xFF] ^ C1[(state[7] >> 48) & 0xFF] ^ + C2[(state[6] >> 40) & 0xFF] ^ C3[(state[5] >> 32) & 0xFF] ^ + C4[(state[4] >> 24) & 0xFF] ^ C5[(state[3] >> 16) & 0xFF] ^ + C6[(state[2] >> 8) & 0xFF] ^ C7[(state[1] >> 0) & 0xFF] ^ key[0]); + block[1] = (C0[(state[1] >> 56) & 0xFF] ^ C1[(state[0] >> 48) & 0xFF] ^ + C2[(state[7] >> 40) & 0xFF] ^ C3[(state[6] >> 32) & 0xFF] ^ + C4[(state[5] >> 24) & 0xFF] ^ C5[(state[4] >> 16) & 0xFF] ^ + C6[(state[3] >> 8) & 0xFF] ^ C7[(state[2] >> 0) & 0xFF] ^ key[1]); + block[2] = (C0[(state[2] >> 56) & 0xFF] ^ C1[(state[1] >> 48) & 0xFF] ^ + C2[(state[0] >> 40) & 0xFF] ^ C3[(state[7] >> 32) & 0xFF] ^ + C4[(state[6] >> 24) & 0xFF] ^ C5[(state[5] >> 16) & 0xFF] ^ + C6[(state[4] >> 8) & 0xFF] ^ C7[(state[3] >> 0) & 0xFF] ^ key[2]); + block[3] = (C0[(state[3] >> 56) & 0xFF] ^ C1[(state[2] >> 48) & 0xFF] ^ + C2[(state[1] >> 40) & 0xFF] ^ C3[(state[0] >> 32) & 0xFF] ^ + C4[(state[7] >> 24) & 0xFF] ^ C5[(state[6] >> 16) & 0xFF] ^ + C6[(state[5] >> 8) & 0xFF] ^ C7[(state[4] >> 0) & 0xFF] ^ key[3]); + block[4] = (C0[(state[4] >> 56) & 0xFF] ^ C1[(state[3] >> 48) & 0xFF] ^ + C2[(state[2] >> 40) & 0xFF] ^ C3[(state[1] >> 32) & 0xFF] ^ + C4[(state[0] >> 24) & 0xFF] ^ C5[(state[7] >> 16) & 0xFF] ^ + C6[(state[6] >> 8) & 0xFF] ^ C7[(state[5] >> 0) & 0xFF] ^ key[4]); + block[5] = (C0[(state[5] >> 56) & 0xFF] ^ C1[(state[4] >> 48) & 0xFF] ^ + C2[(state[3] >> 40) & 0xFF] ^ C3[(state[2] >> 32) & 0xFF] ^ + C4[(state[1] >> 24) & 0xFF] ^ C5[(state[0] >> 16) & 0xFF] ^ + C6[(state[7] >> 8) & 0xFF] ^ C7[(state[6] >> 0) & 0xFF] ^ key[5]); + block[6] = (C0[(state[6] >> 56) & 0xFF] ^ C1[(state[5] >> 48) & 0xFF] ^ + C2[(state[4] >> 40) & 0xFF] ^ C3[(state[3] >> 32) & 0xFF] ^ + C4[(state[2] >> 24) & 0xFF] ^ C5[(state[1] >> 16) & 0xFF] ^ + C6[(state[0] >> 8) & 0xFF] ^ C7[(state[7] >> 0) & 0xFF] ^ key[6]); + block[7] = (C0[(state[7] >> 56) & 0xFF] ^ C1[(state[6] >> 48) & 0xFF] ^ + C2[(state[5] >> 40) & 0xFF] ^ C3[(state[4] >> 32) & 0xFF] ^ + C4[(state[3] >> 24) & 0xFF] ^ C5[(state[2] >> 16) & 0xFF] ^ + C6[(state[1] >> 8) & 0xFF] ^ C7[(state[0] >> 0) & 0xFF] ^ key[7]); + block_copy (state, block, i); + } + + /* Compression. */ + + block_xor (context->hash_state, data_block, i); + block_xor (context->hash_state, state, i); + + return /*burn_stack*/ 4 * sizeof(whirlpool_block_t) + 2 * sizeof(int) + + 4 * sizeof(void*); +} + +static unsigned int +whirlpool_transform ( void *c, const unsigned char *data, size_t nblks ) +{ + unsigned int burn; + + do + { + burn = whirlpool_transform_blk (c, data); + data += BLOCK_SIZE; + } + while (--nblks); + + return burn; +} + +#endif /* !USE_AMD64_ASM */ + + +/* Bug compatibility Whirlpool version. */ +static void +whirlpool_add_bugemu (whirlpool_context_t *context, + const void *buffer_arg, size_t buffer_n) +{ + const unsigned char *buffer = buffer_arg; + u64 buffer_size; + unsigned int carry; + unsigned int i; + + buffer_size = buffer_n; + + if (context->bugemu.count == BLOCK_SIZE) + { + /* Flush the buffer. */ + whirlpool_transform (context, context->bctx.buf, 1); + context->bugemu.count = 0; + } + if (! buffer) + return; /* Nothing to add. */ + + if (context->bugemu.count) + { + while (buffer_n && (context->bugemu.count < BLOCK_SIZE)) + { + context->bctx.buf[context->bugemu.count++] = *buffer++; + buffer_n--; + } + whirlpool_add_bugemu (context, NULL, 0); + if (!buffer_n) + return; /* Done. This is the bug we emulate. */ + } + + while (buffer_n >= BLOCK_SIZE) + { + whirlpool_transform (context, buffer, 1); + context->bugemu.count = 0; + buffer_n -= BLOCK_SIZE; + buffer += BLOCK_SIZE; + } + while (buffer_n && (context->bugemu.count < BLOCK_SIZE)) + { + context->bctx.buf[context->bugemu.count++] = *buffer++; + buffer_n--; + } + + /* Update bit counter. */ + carry = 0; + buffer_size <<= 3; + for (i = 1; i <= 32; i++) + { + if (! (buffer_size || carry)) + break; + + carry += context->bugemu.length[32 - i] + (buffer_size & 0xFF); + context->bugemu.length[32 - i] = carry; + buffer_size >>= 8; + carry >>= 8; + } + gcry_assert (! (buffer_size || carry)); +} + + +/* Bug compatibility Whirlpool version. */ +static void +whirlpool_final_bugemu (void *ctx) +{ + whirlpool_context_t *context = ctx; + unsigned int i; + + /* Flush. */ + whirlpool_add_bugemu (context, NULL, 0); + + /* Pad. */ + context->bctx.buf[context->bugemu.count++] = 0x80; + + if (context->bugemu.count > 32) + { + /* An extra block is necessary. */ + while (context->bugemu.count < 64) + context->bctx.buf[context->bugemu.count++] = 0; + whirlpool_add_bugemu (context, NULL, 0); + } + while (context->bugemu.count < 32) + context->bctx.buf[context->bugemu.count++] = 0; + + /* Add length of message. */ + memcpy (context->bctx.buf + context->bugemu.count, + context->bugemu.length, 32); + context->bugemu.count += 32; + whirlpool_add_bugemu (context, NULL, 0); + + block_to_buffer (context->bctx.buf, context->hash_state, i); +} + + +static void +whirlpool_write (void *ctx, const void *buffer, size_t buffer_n) +{ + whirlpool_context_t *context = ctx; + + if (context->use_bugemu) + { + whirlpool_add_bugemu (context, buffer, buffer_n); + } + else + { + u64 old_nblocks = context->bctx.nblocks; + + _gcry_md_block_write (context, buffer, buffer_n); + + gcry_assert (old_nblocks <= context->bctx.nblocks); + } +} + +static void +whirlpool_final (void *ctx) +{ + whirlpool_context_t *context = ctx; + unsigned int i; + u64 t, th, lsb, msb; + unsigned char *length; + + if (context->use_bugemu) + { + whirlpool_final_bugemu (ctx); + return; + } + + t = context->bctx.nblocks; + /* if (sizeof t == sizeof context->bctx.nblocks) */ + th = context->bctx.nblocks_high; + /* else */ + /* th = context->bctx.nblocks >> 64; In case we ever use u128 */ + + /* multiply by 64 to make a byte count */ + lsb = t << 6; + msb = (th << 6) | (t >> 58); + /* add the count */ + t = lsb; + if ((lsb += context->bctx.count) < t) + msb++; + /* multiply by 8 to make a bit count */ + t = lsb; + lsb <<= 3; + msb <<= 3; + msb |= t >> 61; + + /* Flush. */ + whirlpool_write (context, NULL, 0); + + /* Pad. */ + context->bctx.buf[context->bctx.count++] = 0x80; + + if (context->bctx.count > 32) + { + /* An extra block is necessary. */ + while (context->bctx.count < 64) + context->bctx.buf[context->bctx.count++] = 0; + whirlpool_write (context, NULL, 0); + } + while (context->bctx.count < 32) + context->bctx.buf[context->bctx.count++] = 0; + + /* Add length of message. */ + length = context->bctx.buf + context->bctx.count; + buf_put_be64(&length[0 * 8], 0); + buf_put_be64(&length[1 * 8], 0); + buf_put_be64(&length[2 * 8], msb); + buf_put_be64(&length[3 * 8], lsb); + context->bctx.count += 32; + whirlpool_write (context, NULL, 0); + + block_to_buffer (context->bctx.buf, context->hash_state, i); +} + +static byte * +whirlpool_read (void *ctx) +{ + whirlpool_context_t *context = ctx; + + return context->bctx.buf; +} + +gcry_md_spec_t _gcry_digest_spec_whirlpool = + { + GCRY_MD_WHIRLPOOL, {0, 0}, + "WHIRLPOOL", NULL, 0, NULL, 64, + whirlpool_init, whirlpool_write, whirlpool_final, whirlpool_read, NULL, + sizeof (whirlpool_context_t) + }; |