/* keccak.c - SHA3 hash functions
* Copyright (C) 2015 g10 Code GmbH
*
* This file is part of Libgcrypt.
*
* Libgcrypt is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser general Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* Libgcrypt is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <string.h>
#include "g10lib.h"
#include "bithelp.h"
#include "bufhelp.h"
#include "cipher.h"
#include "hash-common.h"
/* USE_64BIT indicates whether to use 64-bit generic implementation.
* USE_32BIT indicates whether to use 32-bit generic implementation. */
#undef USE_64BIT
#if defined(__x86_64__) || SIZEOF_UNSIGNED_LONG == 8
# define USE_64BIT 1
#else
# define USE_32BIT 1
#endif
/* USE_64BIT_BMI2 indicates whether to compile with 64-bit Intel BMI2 code. */
#undef USE_64BIT_BMI2
#if defined(USE_64BIT) && defined(HAVE_GCC_INLINE_ASM_BMI2)
# define USE_64BIT_BMI2 1
#endif
/* USE_64BIT_SHLD indicates whether to compile with 64-bit Intel SHLD code. */
#undef USE_64BIT_SHLD
#if defined(USE_64BIT) && defined (__GNUC__) && defined(__x86_64__)
# define USE_64BIT_SHLD 1
#endif
/* USE_32BIT_BMI2 indicates whether to compile with 32-bit Intel BMI2 code. */
#undef USE_32BIT_BMI2
#if defined(USE_32BIT) && defined(HAVE_GCC_INLINE_ASM_BMI2)
# define USE_32BIT_BMI2 1
#endif
/* USE_64BIT_ARM_NEON indicates whether to enable 64-bit ARM/NEON assembly
* code. */
#undef USE_64BIT_ARM_NEON
#ifdef ENABLE_NEON_SUPPORT
# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
&& defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
&& defined(HAVE_GCC_INLINE_ASM_NEON)
# define USE_64BIT_ARM_NEON 1
# endif
#endif /*ENABLE_NEON_SUPPORT*/
#if defined(USE_64BIT) || defined(USE_64BIT_ARM_NEON)
# define NEED_COMMON64 1
#endif
#ifdef USE_32BIT
# define NEED_COMMON32BI 1
#endif
#define SHA3_DELIMITED_SUFFIX 0x06
#define SHAKE_DELIMITED_SUFFIX 0x1F
typedef struct
{
union {
#ifdef NEED_COMMON64
u64 state64[25];
#endif
#ifdef NEED_COMMON32BI
u32 state32bi[50];
#endif
} u;
} KECCAK_STATE;
typedef struct
{
unsigned int (*permute)(KECCAK_STATE *hd);
unsigned int (*absorb)(KECCAK_STATE *hd, int pos, const byte *lanes,
unsigned int nlanes, int blocklanes);
unsigned int (*extract) (KECCAK_STATE *hd, unsigned int pos, byte *outbuf,
unsigned int outlen);
} keccak_ops_t;
typedef struct KECCAK_CONTEXT_S
{
KECCAK_STATE state;
unsigned int outlen;
unsigned int blocksize;
unsigned int count;
unsigned int suffix;
const keccak_ops_t *ops;
} KECCAK_CONTEXT;
#ifdef NEED_COMMON64
const u64 _gcry_keccak_round_consts_64bit[24 + 1] =
{
U64_C(0x0000000000000001), U64_C(0x0000000000008082),
U64_C(0x800000000000808A), U64_C(0x8000000080008000),
U64_C(0x000000000000808B), U64_C(0x0000000080000001),
U64_C(0x8000000080008081), U64_C(0x8000000000008009),
U64_C(0x000000000000008A), U64_C(0x0000000000000088),
U64_C(0x0000000080008009), U64_C(0x000000008000000A),
U64_C(0x000000008000808B), U64_C(0x800000000000008B),
U64_C(0x8000000000008089), U64_C(0x8000000000008003),
U64_C(0x8000000000008002), U64_C(0x8000000000000080),
U64_C(0x000000000000800A), U64_C(0x800000008000000A),
U64_C(0x8000000080008081), U64_C(0x8000000000008080),
U64_C(0x0000000080000001), U64_C(0x8000000080008008),
U64_C(0xFFFFFFFFFFFFFFFF)
};
static unsigned int
keccak_extract64(KECCAK_STATE *hd, unsigned int pos, byte *outbuf,
unsigned int outlen)
{
unsigned int i;
/* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */
for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++)
{
u64 tmp = hd->u.state64[i];
buf_put_le64(outbuf, tmp);
outbuf += 8;
}
return 0;
}
#endif /* NEED_COMMON64 */
#ifdef NEED_COMMON32BI
static const u32 round_consts_32bit[2 * 24] =
{
0x00000001UL, 0x00000000UL, 0x00000000UL, 0x00000089UL,
0x00000000UL, 0x8000008bUL, 0x00000000UL, 0x80008080UL,
0x00000001UL, 0x0000008bUL, 0x00000001UL, 0x00008000UL,
0x00000001UL, 0x80008088UL, 0x00000001UL, 0x80000082UL,
0x00000000UL, 0x0000000bUL, 0x00000000UL, 0x0000000aUL,
0x00000001UL, 0x00008082UL, 0x00000000UL, 0x00008003UL,
0x00000001UL, 0x0000808bUL, 0x00000001UL, 0x8000000bUL,
0x00000001UL, 0x8000008aUL, 0x00000001UL, 0x80000081UL,
0x00000000UL, 0x80000081UL, 0x00000000UL, 0x80000008UL,
0x00000000UL, 0x00000083UL, 0x00000000UL, 0x80008003UL,
0x00000001UL, 0x80008088UL, 0x00000000UL, 0x80000088UL,
0x00000001UL, 0x00008000UL, 0x00000000UL, 0x80008082UL
};
static unsigned int
keccak_extract32bi(KECCAK_STATE *hd, unsigned int pos, byte *outbuf,
unsigned int outlen)
{
unsigned int i;
u32 x0;
u32 x1;
u32 t;
/* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */
for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++)
{
x0 = hd->u.state32bi[i * 2 + 0];
x1 = hd->u.state32bi[i * 2 + 1];
t = (x0 & 0x0000FFFFUL) + (x1 << 16);
x1 = (x0 >> 16) + (x1 & 0xFFFF0000UL);
x0 = t;
t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8);
t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4);
t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2);
t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1);
t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8);
t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4);
t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2);
t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1);
buf_put_le32(&outbuf[0], x0);
buf_put_le32(&outbuf[4], x1);
outbuf += 8;
}
return 0;
}
static inline void
keccak_absorb_lane32bi(u32 *lane, u32 x0, u32 x1)
{
u32 t;
t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1);
t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2);
t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4);
t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8);
t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1);
t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2);
t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4);
t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8);
lane[0] ^= (x0 & 0x0000FFFFUL) + (x1 << 16);
lane[1] ^= (x0 >> 16) + (x1 & 0xFFFF0000UL);
}
#endif /* NEED_COMMON32BI */
/* Construct generic 64-bit implementation. */
#ifdef USE_64BIT
#if __GNUC__ >= 4 && defined(__x86_64__)
static inline void absorb_lanes64_8(u64 *dst, const byte *in)
{
asm ("movdqu 0*16(%[dst]), %%xmm0\n\t"
"movdqu 0*16(%[in]), %%xmm4\n\t"
"movdqu 1*16(%[dst]), %%xmm1\n\t"
"movdqu 1*16(%[in]), %%xmm5\n\t"
"movdqu 2*16(%[dst]), %%xmm2\n\t"
"movdqu 3*16(%[dst]), %%xmm3\n\t"
"pxor %%xmm4, %%xmm0\n\t"
"pxor %%xmm5, %%xmm1\n\t"
"movdqu 2*16(%[in]), %%xmm4\n\t"
"movdqu 3*16(%[in]), %%xmm5\n\t"
"movdqu %%xmm0, 0*16(%[dst])\n\t"
"pxor %%xmm4, %%xmm2\n\t"
"movdqu %%xmm1, 1*16(%[dst])\n\t"
"pxor %%xmm5, %%xmm3\n\t"
"movdqu %%xmm2, 2*16(%[dst])\n\t"
"movdqu %%xmm3, 3*16(%[dst])\n\t"
:
: [dst] "r" (dst), [in] "r" (in)
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory");
}
static inline void absorb_lanes64_4(u64 *dst, const byte *in)
{
asm ("movdqu 0*16(%[dst]), %%xmm0\n\t"
"movdqu 0*16(%[in]), %%xmm4\n\t"
"movdqu 1*16(%[dst]), %%xmm1\n\t"
"movdqu 1*16(%[in]), %%xmm5\n\t"
"pxor %%xmm4, %%xmm0\n\t"
"pxor %%xmm5, %%xmm1\n\t"
"movdqu %%xmm0, 0*16(%[dst])\n\t"
"movdqu %%xmm1, 1*16(%[dst])\n\t"
:
: [dst] "r" (dst), [in] "r" (in)
: "xmm0", "xmm1", "xmm4", "xmm5", "memory");
}
static inline void absorb_lanes64_2(u64 *dst, const byte *in)
{
asm ("movdqu 0*16(%[dst]), %%xmm0\n\t"
"movdqu 0*16(%[in]), %%xmm4\n\t"
"pxor %%xmm4, %%xmm0\n\t"
"movdqu %%xmm0, 0*16(%[dst])\n\t"
:
: [dst] "r" (dst), [in] "r" (in)
: "xmm0", "xmm4", "memory");
}
#else /* __x86_64__ */
static inline void absorb_lanes64_8(u64 *dst, const byte *in)
{
dst[0] ^= buf_get_le64(in + 8 * 0);
dst[1] ^= buf_get_le64(in + 8 * 1);
dst[2] ^= buf_get_le64(in + 8 * 2);
dst[3] ^= buf_get_le64(in + 8 * 3);
dst[4] ^= buf_get_le64(in + 8 * 4);
dst[5] ^= buf_get_le64(in + 8 * 5);
dst[6] ^= buf_get_le64(in + 8 * 6);
dst[7] ^= buf_get_le64(in + 8 * 7);
}
static inline void absorb_lanes64_4(u64 *dst, const byte *in)
{
dst[0] ^= buf_get_le64(in + 8 * 0);
dst[1] ^= buf_get_le64(in + 8 * 1);
dst[2] ^= buf_get_le64(in + 8 * 2);
dst[3] ^= buf_get_le64(in + 8 * 3);
}
static inline void absorb_lanes64_2(u64 *dst, const byte *in)
{
dst[0] ^= buf_get_le64(in + 8 * 0);
dst[1] ^= buf_get_le64(in + 8 * 1);
}
#endif /* !__x86_64__ */
static inline void absorb_lanes64_1(u64 *dst, const byte *in)
{
dst[0] ^= buf_get_le64(in + 8 * 0);
}
# define ANDN64(x, y) (~(x) & (y))
# define ROL64(x, n) (((x) << ((unsigned int)n & 63)) | \
((x) >> ((64 - (unsigned int)(n)) & 63)))
# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64
# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64
# include "keccak_permute_64.h"
# undef ANDN64
# undef ROL64
# undef KECCAK_F1600_PERMUTE_FUNC_NAME
# undef KECCAK_F1600_ABSORB_FUNC_NAME
static const keccak_ops_t keccak_generic64_ops =
{
.permute = keccak_f1600_state_permute64,
.absorb = keccak_absorb_lanes64,
.extract = keccak_extract64,
};
#endif /* USE_64BIT */
/* Construct 64-bit Intel SHLD implementation. */
#ifdef USE_64BIT_SHLD
# define ANDN64(x, y) (~(x) & (y))
# define ROL64(x, n) ({ \
u64 tmp = (x); \
asm ("shldq %1, %0, %0" \
: "+r" (tmp) \
: "J" ((n) & 63) \
: "cc"); \
tmp; })
# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_shld
# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_shld
# include "keccak_permute_64.h"
# undef ANDN64
# undef ROL64
# undef KECCAK_F1600_PERMUTE_FUNC_NAME
# undef KECCAK_F1600_ABSORB_FUNC_NAME
static const keccak_ops_t keccak_shld_64_ops =
{
.permute = keccak_f1600_state_permute64_shld,
.absorb = keccak_absorb_lanes64_shld,
.extract = keccak_extract64,
};
#endif /* USE_64BIT_SHLD */
/* Construct 64-bit Intel BMI2 implementation. */
#ifdef USE_64BIT_BMI2
# define ANDN64(x, y) ({ \
u64 tmp; \
asm ("andnq %2, %1, %0" \
: "=r" (tmp) \
: "r0" (x), "rm" (y)); \
tmp; })
# define ROL64(x, n) ({ \
u64 tmp; \
asm ("rorxq %2, %1, %0" \
: "=r" (tmp) \
: "rm0" (x), "J" (64 - ((n) & 63))); \
tmp; })
# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_bmi2
# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_bmi2
# include "keccak_permute_64.h"
# undef ANDN64
# undef ROL64
# undef KECCAK_F1600_PERMUTE_FUNC_NAME
# undef KECCAK_F1600_ABSORB_FUNC_NAME
static const keccak_ops_t keccak_bmi2_64_ops =
{
.permute = keccak_f1600_state_permute64_bmi2,
.absorb = keccak_absorb_lanes64_bmi2,
.extract = keccak_extract64,
};
#endif /* USE_64BIT_BMI2 */
/* 64-bit ARMv7/NEON implementation. */
#ifdef USE_64BIT_ARM_NEON
unsigned int _gcry_keccak_permute_armv7_neon(u64 *state);
unsigned int _gcry_keccak_absorb_lanes64_armv7_neon(u64 *state, int pos,
const byte *lanes,
unsigned int nlanes,
int blocklanes);
static unsigned int keccak_permute64_armv7_neon(KECCAK_STATE *hd)
{
return _gcry_keccak_permute_armv7_neon(hd->u.state64);
}
static unsigned int
keccak_absorb_lanes64_armv7_neon(KECCAK_STATE *hd, int pos, const byte *lanes,
unsigned int nlanes, int blocklanes)
{
if (blocklanes < 0)
{
/* blocklanes == -1, permutationless absorb from keccak_final. */
while (nlanes)
{
hd->u.state64[pos] ^= buf_get_le64(lanes);
lanes += 8;
nlanes--;
}
return 0;
}
else
{
return _gcry_keccak_absorb_lanes64_armv7_neon(hd->u.state64, pos, lanes,
nlanes, blocklanes);
}
}
static const keccak_ops_t keccak_armv7_neon_64_ops =
{
.permute = keccak_permute64_armv7_neon,
.absorb = keccak_absorb_lanes64_armv7_neon,
.extract = keccak_extract64,
};
#endif /* USE_64BIT_ARM_NEON */
/* Construct generic 32-bit implementation. */
#ifdef USE_32BIT
# define ANDN32(x, y) (~(x) & (y))
# define ROL32(x, n) (((x) << ((unsigned int)n & 31)) | \
((x) >> ((32 - (unsigned int)(n)) & 31)))
# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute32bi
# include "keccak_permute_32.h"
# undef ANDN32
# undef ROL32
# undef KECCAK_F1600_PERMUTE_FUNC_NAME
static unsigned int
keccak_absorb_lanes32bi(KECCAK_STATE *hd, int pos, const byte *lanes,
unsigned int nlanes, int blocklanes)
{
unsigned int burn = 0;
while (nlanes)
{
keccak_absorb_lane32bi(&hd->u.state32bi[pos * 2],
buf_get_le32(lanes + 0),
buf_get_le32(lanes + 4));
lanes += 8;
nlanes--;
if (++pos == blocklanes)
{
burn = keccak_f1600_state_permute32bi(hd);
pos = 0;
}
}
return burn;
}
static const keccak_ops_t keccak_generic32bi_ops =
{
.permute = keccak_f1600_state_permute32bi,
.absorb = keccak_absorb_lanes32bi,
.extract = keccak_extract32bi,
};
#endif /* USE_32BIT */
/* Construct 32-bit Intel BMI2 implementation. */
#ifdef USE_32BIT_BMI2
# define ANDN32(x, y) ({ \
u32 tmp; \
asm ("andnl %2, %1, %0" \
: "=r" (tmp) \
: "r0" (x), "rm" (y)); \
tmp; })
# define ROL32(x, n) ({ \
u32 tmp; \
asm ("rorxl %2, %1, %0" \
: "=r" (tmp) \
: "rm0" (x), "J" (32 - ((n) & 31))); \
tmp; })
# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute32bi_bmi2
# include "keccak_permute_32.h"
# undef ANDN32
# undef ROL32
# undef KECCAK_F1600_PERMUTE_FUNC_NAME
static inline u32 pext(u32 x, u32 mask)
{
u32 tmp;
asm ("pextl %2, %1, %0" : "=r" (tmp) : "r0" (x), "rm" (mask));
return tmp;
}
static inline u32 pdep(u32 x, u32 mask)
{
u32 tmp;
asm ("pdepl %2, %1, %0" : "=r" (tmp) : "r0" (x), "rm" (mask));
return tmp;
}
static inline void
keccak_absorb_lane32bi_bmi2(u32 *lane, u32 x0, u32 x1)
{
x0 = pdep(pext(x0, 0x55555555), 0x0000ffff) | (pext(x0, 0xaaaaaaaa) << 16);
x1 = pdep(pext(x1, 0x55555555), 0x0000ffff) | (pext(x1, 0xaaaaaaaa) << 16);
lane[0] ^= (x0 & 0x0000FFFFUL) + (x1 << 16);
lane[1] ^= (x0 >> 16) + (x1 & 0xFFFF0000UL);
}
static unsigned int
keccak_absorb_lanes32bi_bmi2(KECCAK_STATE *hd, int pos, const byte *lanes,
unsigned int nlanes, int blocklanes)
{
unsigned int burn = 0;
while (nlanes)
{
keccak_absorb_lane32bi_bmi2(&hd->u.state32bi[pos * 2],
buf_get_le32(lanes + 0),
buf_get_le32(lanes + 4));
lanes += 8;
nlanes--;
if (++pos == blocklanes)
{
burn = keccak_f1600_state_permute32bi_bmi2(hd);
pos = 0;
}
}
return burn;
}
static unsigned int
keccak_extract32bi_bmi2(KECCAK_STATE *hd, unsigned int pos, byte *outbuf,
unsigned int outlen)
{
unsigned int i;
u32 x0;
u32 x1;
u32 t;
/* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */
for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++)
{
x0 = hd->u.state32bi[i * 2 + 0];
x1 = hd->u.state32bi[i * 2 + 1];
t = (x0 & 0x0000FFFFUL) + (x1 << 16);
x1 = (x0 >> 16) + (x1 & 0xFFFF0000UL);
x0 = t;
x0 = pdep(pext(x0, 0xffff0001), 0xaaaaaaab) | pdep(x0 >> 1, 0x55555554);
x1 = pdep(pext(x1, 0xffff0001), 0xaaaaaaab) | pdep(x1 >> 1, 0x55555554);
buf_put_le32(&outbuf[0], x0);
buf_put_le32(&outbuf[4], x1);
outbuf += 8;
}
return 0;
}
static const keccak_ops_t keccak_bmi2_32bi_ops =
{
.permute = keccak_f1600_state_permute32bi_bmi2,
.absorb = keccak_absorb_lanes32bi_bmi2,
.extract = keccak_extract32bi_bmi2,
};
#endif /* USE_32BIT */
static void
keccak_write (void *context, const void *inbuf_arg, size_t inlen)
{
KECCAK_CONTEXT *ctx = context;
const size_t bsize = ctx->blocksize;
const size_t blocklanes = bsize / 8;
const byte *inbuf = inbuf_arg;
unsigned int nburn, burn = 0;
unsigned int count, i;
unsigned int pos, nlanes;
count = ctx->count;
if (inlen && (count % 8))
{
byte lane[8] = { 0, };
/* Complete absorbing partial input lane. */
pos = count / 8;
for (i = count % 8; inlen && i < 8; i++)
{
lane[i] = *inbuf++;
inlen--;
count++;
}
if (count == bsize)
count = 0;
nburn = ctx->ops->absorb(&ctx->state, pos, lane, 1,
(count % 8) ? -1 : blocklanes);
burn = nburn > burn ? nburn : burn;
}
/* Absorb full input lanes. */
pos = count / 8;
nlanes = inlen / 8;
if (nlanes > 0)
{
nburn = ctx->ops->absorb(&ctx->state, pos, inbuf, nlanes, blocklanes);
burn = nburn > burn ? nburn : burn;
inlen -= nlanes * 8;
inbuf += nlanes * 8;
count += nlanes * 8;
count = count % bsize;
}
if (inlen)
{
byte lane[8] = { 0, };
/* Absorb remaining partial input lane. */
pos = count / 8;
for (i = count % 8; inlen && i < 8; i++)
{
lane[i] = *inbuf++;
inlen--;
count++;
}
nburn = ctx->ops->absorb(&ctx->state, pos, lane, 1, -1);
burn = nburn > burn ? nburn : burn;
gcry_assert(count < bsize);
}
ctx->count = count;
if (burn)
_gcry_burn_stack (burn);
}
static void
keccak_init (int algo, void *context, unsigned int flags)
{
KECCAK_CONTEXT *ctx = context;
KECCAK_STATE *hd = &ctx->state;
unsigned int features = _gcry_get_hw_features ();
(void)flags;
(void)features;
memset (hd, 0, sizeof *hd);
ctx->count = 0;
/* Select generic implementation. */
#ifdef USE_64BIT
ctx->ops = &keccak_generic64_ops;
#elif defined USE_32BIT
ctx->ops = &keccak_generic32bi_ops;
#endif
/* Select optimized implementation based in hw features. */
if (0) {}
#ifdef USE_64BIT_ARM_NEON
else if (features & HWF_ARM_NEON)
ctx->ops = &keccak_armv7_neon_64_ops;
#endif
#ifdef USE_64BIT_BMI2
else if (features & HWF_INTEL_BMI2)
ctx->ops = &keccak_bmi2_64_ops;
#endif
#ifdef USE_32BIT_BMI2
else if (features & HWF_INTEL_BMI2)
ctx->ops = &keccak_bmi2_32bi_ops;
#endif
#ifdef USE_64BIT_SHLD
else if (features & HWF_INTEL_FAST_SHLD)
ctx->ops = &keccak_shld_64_ops;
#endif
/* Set input block size, in Keccak terms this is called 'rate'. */
switch (algo)
{
case GCRY_MD_SHA3_224:
ctx->suffix = SHA3_DELIMITED_SUFFIX;
ctx->blocksize = 1152 / 8;
ctx->outlen = 224 / 8;
break;
case GCRY_MD_SHA3_256:
ctx->suffix = SHA3_DELIMITED_SUFFIX;
ctx->blocksize = 1088 / 8;
ctx->outlen = 256 / 8;
break;
case GCRY_MD_SHA3_384:
ctx->suffix = SHA3_DELIMITED_SUFFIX;
ctx->blocksize = 832 / 8;
ctx->outlen = 384 / 8;
break;
case GCRY_MD_SHA3_512:
ctx->suffix = SHA3_DELIMITED_SUFFIX;
ctx->blocksize = 576 / 8;
ctx->outlen = 512 / 8;
break;
case GCRY_MD_SHAKE128:
ctx->suffix = SHAKE_DELIMITED_SUFFIX;
ctx->blocksize = 1344 / 8;
ctx->outlen = 0;
break;
case GCRY_MD_SHAKE256:
ctx->suffix = SHAKE_DELIMITED_SUFFIX;
ctx->blocksize = 1088 / 8;
ctx->outlen = 0;
break;
default:
BUG();
}
}
static void
sha3_224_init (void *context, unsigned int flags)
{
keccak_init (GCRY_MD_SHA3_224, context, flags);
}
static void
sha3_256_init (void *context, unsigned int flags)
{
keccak_init (GCRY_MD_SHA3_256, context, flags);
}
static void
sha3_384_init (void *context, unsigned int flags)
{
keccak_init (GCRY_MD_SHA3_384, context, flags);
}
static void
sha3_512_init (void *context, unsigned int flags)
{
keccak_init (GCRY_MD_SHA3_512, context, flags);
}
static void
shake128_init (void *context, unsigned int flags)
{
keccak_init (GCRY_MD_SHAKE128, context, flags);
}
static void
shake256_init (void *context, unsigned int flags)
{
keccak_init (GCRY_MD_SHAKE256, context, flags);
}
/* The routine final terminates the computation and
* returns the digest.
* The handle is prepared for a new cycle, but adding bytes to the
* handle will the destroy the returned buffer.
* Returns: 64 bytes representing the digest. When used for sha384,
* we take the leftmost 48 of those bytes.
*/
static void
keccak_final (void *context)
{
KECCAK_CONTEXT *ctx = context;
KECCAK_STATE *hd = &ctx->state;
const size_t bsize = ctx->blocksize;
const byte suffix = ctx->suffix;
unsigned int nburn, burn = 0;
unsigned int lastbytes;
byte lane[8];
lastbytes = ctx->count;
/* Do the padding and switch to the squeezing phase */
/* Absorb the last few bits and add the first bit of padding (which
coincides with the delimiter in delimited suffix) */
buf_put_le64(lane, (u64)suffix << ((lastbytes % 8) * 8));
nburn = ctx->ops->absorb(&ctx->state, lastbytes / 8, lane, 1, -1);
burn = nburn > burn ? nburn : burn;
/* Add the second bit of padding. */
buf_put_le64(lane, (u64)0x80 << (((bsize - 1) % 8) * 8));
nburn = ctx->ops->absorb(&ctx->state, (bsize - 1) / 8, lane, 1, -1);
burn = nburn > burn ? nburn : burn;
if (suffix == SHA3_DELIMITED_SUFFIX)
{
/* Switch to the squeezing phase. */
nburn = ctx->ops->permute(hd);
burn = nburn > burn ? nburn : burn;
/* Squeeze out the SHA3 digest. */
nburn = ctx->ops->extract(hd, 0, (void *)hd, ctx->outlen);
burn = nburn > burn ? nburn : burn;
}
else
{
/* Output for SHAKE can now be read with md_extract(). */
ctx->count = 0;
}
wipememory(lane, sizeof(lane));
if (burn)
_gcry_burn_stack (burn);
}
static byte *
keccak_read (void *context)
{
KECCAK_CONTEXT *ctx = (KECCAK_CONTEXT *) context;
KECCAK_STATE *hd = &ctx->state;
return (byte *)&hd->u;
}
static void
keccak_extract (void *context, void *out, size_t outlen)
{
KECCAK_CONTEXT *ctx = context;
KECCAK_STATE *hd = &ctx->state;
const size_t bsize = ctx->blocksize;
unsigned int nburn, burn = 0;
byte *outbuf = out;
unsigned int nlanes;
unsigned int nleft;
unsigned int count;
unsigned int i;
byte lane[8];
count = ctx->count;
while (count && outlen && (outlen < 8 || count % 8))
{
/* Extract partial lane. */
nburn = ctx->ops->extract(hd, count / 8, lane, 8);
burn = nburn > burn ? nburn : burn;
for (i = count % 8; outlen && i < 8; i++)
{
*outbuf++ = lane[i];
outlen--;
count++;
}
gcry_assert(count <= bsize);
if (count == bsize)
count = 0;
}
if (outlen >= 8 && count)
{
/* Extract tail of partial block. */
nlanes = outlen / 8;
nleft = (bsize - count) / 8;
nlanes = nlanes < nleft ? nlanes : nleft;
nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8);
burn = nburn > burn ? nburn : burn;
outlen -= nlanes * 8;
outbuf += nlanes * 8;
count += nlanes * 8;
gcry_assert(count <= bsize);
if (count == bsize)
count = 0;
}
while (outlen >= bsize)
{
gcry_assert(count == 0);
/* Squeeze more. */
nburn = ctx->ops->permute(hd);
burn = nburn > burn ? nburn : burn;
/* Extract full block. */
nburn = ctx->ops->extract(hd, 0, outbuf, bsize);
burn = nburn > burn ? nburn : burn;
outlen -= bsize;
outbuf += bsize;
}
if (outlen)
{
gcry_assert(outlen < bsize);
if (count == 0)
{
/* Squeeze more. */
nburn = ctx->ops->permute(hd);
burn = nburn > burn ? nburn : burn;
}
if (outlen >= 8)
{
/* Extract head of partial block. */
nlanes = outlen / 8;
nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8);
burn = nburn > burn ? nburn : burn;
outlen -= nlanes * 8;
outbuf += nlanes * 8;
count += nlanes * 8;
gcry_assert(count < bsize);
}
if (outlen)
{
/* Extract head of partial lane. */
nburn = ctx->ops->extract(hd, count / 8, lane, 8);
burn = nburn > burn ? nburn : burn;
for (i = count % 8; outlen && i < 8; i++)
{
*outbuf++ = lane[i];
outlen--;
count++;
}
gcry_assert(count < bsize);
}
}
ctx->count = count;
if (burn)
_gcry_burn_stack (burn);
}
/*
Self-test section.
*/
static gpg_err_code_t
selftests_keccak (int algo, int extended, selftest_report_func_t report)
{
const char *what;
const char *errtxt;
const char *short_hash;
const char *long_hash;
const char *one_million_a_hash;
int hash_len;
switch (algo)
{
default:
BUG();
case GCRY_MD_SHA3_224:
short_hash =
"\xe6\x42\x82\x4c\x3f\x8c\xf2\x4a\xd0\x92\x34\xee\x7d\x3c\x76\x6f"
"\xc9\xa3\xa5\x16\x8d\x0c\x94\xad\x73\xb4\x6f\xdf";
long_hash =
"\x54\x3e\x68\x68\xe1\x66\x6c\x1a\x64\x36\x30\xdf\x77\x36\x7a\xe5"
"\xa6\x2a\x85\x07\x0a\x51\xc1\x4c\xbf\x66\x5c\xbc";
one_million_a_hash =
"\xd6\x93\x35\xb9\x33\x25\x19\x2e\x51\x6a\x91\x2e\x6d\x19\xa1\x5c"
"\xb5\x1c\x6e\xd5\xc1\x52\x43\xe7\xa7\xfd\x65\x3c";
hash_len = 28;
break;
case GCRY_MD_SHA3_256:
short_hash =
"\x3a\x98\x5d\xa7\x4f\xe2\x25\xb2\x04\x5c\x17\x2d\x6b\xd3\x90\xbd"
"\x85\x5f\x08\x6e\x3e\x9d\x52\x5b\x46\xbf\xe2\x45\x11\x43\x15\x32";
long_hash =
"\x91\x6f\x60\x61\xfe\x87\x97\x41\xca\x64\x69\xb4\x39\x71\xdf\xdb"
"\x28\xb1\xa3\x2d\xc3\x6c\xb3\x25\x4e\x81\x2b\xe2\x7a\xad\x1d\x18";
one_million_a_hash =
"\x5c\x88\x75\xae\x47\x4a\x36\x34\xba\x4f\xd5\x5e\xc8\x5b\xff\xd6"
"\x61\xf3\x2a\xca\x75\xc6\xd6\x99\xd0\xcd\xcb\x6c\x11\x58\x91\xc1";
hash_len = 32;
break;
case GCRY_MD_SHA3_384:
short_hash =
"\xec\x01\x49\x82\x88\x51\x6f\xc9\x26\x45\x9f\x58\xe2\xc6\xad\x8d"
"\xf9\xb4\x73\xcb\x0f\xc0\x8c\x25\x96\xda\x7c\xf0\xe4\x9b\xe4\xb2"
"\x98\xd8\x8c\xea\x92\x7a\xc7\xf5\x39\xf1\xed\xf2\x28\x37\x6d\x25";
long_hash =
"\x79\x40\x7d\x3b\x59\x16\xb5\x9c\x3e\x30\xb0\x98\x22\x97\x47\x91"
"\xc3\x13\xfb\x9e\xcc\x84\x9e\x40\x6f\x23\x59\x2d\x04\xf6\x25\xdc"
"\x8c\x70\x9b\x98\xb4\x3b\x38\x52\xb3\x37\x21\x61\x79\xaa\x7f\xc7";
one_million_a_hash =
"\xee\xe9\xe2\x4d\x78\xc1\x85\x53\x37\x98\x34\x51\xdf\x97\xc8\xad"
"\x9e\xed\xf2\x56\xc6\x33\x4f\x8e\x94\x8d\x25\x2d\x5e\x0e\x76\x84"
"\x7a\xa0\x77\x4d\xdb\x90\xa8\x42\x19\x0d\x2c\x55\x8b\x4b\x83\x40";
hash_len = 48;
break;
case GCRY_MD_SHA3_512:
short_hash =
"\xb7\x51\x85\x0b\x1a\x57\x16\x8a\x56\x93\xcd\x92\x4b\x6b\x09\x6e"
"\x08\xf6\x21\x82\x74\x44\xf7\x0d\x88\x4f\x5d\x02\x40\xd2\x71\x2e"
"\x10\xe1\x16\xe9\x19\x2a\xf3\xc9\x1a\x7e\xc5\x76\x47\xe3\x93\x40"
"\x57\x34\x0b\x4c\xf4\x08\xd5\xa5\x65\x92\xf8\x27\x4e\xec\x53\xf0";
long_hash =
"\xaf\xeb\xb2\xef\x54\x2e\x65\x79\xc5\x0c\xad\x06\xd2\xe5\x78\xf9"
"\xf8\xdd\x68\x81\xd7\xdc\x82\x4d\x26\x36\x0f\xee\xbf\x18\xa4\xfa"
"\x73\xe3\x26\x11\x22\x94\x8e\xfc\xfd\x49\x2e\x74\xe8\x2e\x21\x89"
"\xed\x0f\xb4\x40\xd1\x87\xf3\x82\x27\x0c\xb4\x55\xf2\x1d\xd1\x85";
one_million_a_hash =
"\x3c\x3a\x87\x6d\xa1\x40\x34\xab\x60\x62\x7c\x07\x7b\xb9\x8f\x7e"
"\x12\x0a\x2a\x53\x70\x21\x2d\xff\xb3\x38\x5a\x18\xd4\xf3\x88\x59"
"\xed\x31\x1d\x0a\x9d\x51\x41\xce\x9c\xc5\xc6\x6e\xe6\x89\xb2\x66"
"\xa8\xaa\x18\xac\xe8\x28\x2a\x0e\x0d\xb5\x96\xc9\x0b\x0a\x7b\x87";
hash_len = 64;
break;
case GCRY_MD_SHAKE128:
short_hash =
"\x58\x81\x09\x2d\xd8\x18\xbf\x5c\xf8\xa3\xdd\xb7\x93\xfb\xcb\xa7"
"\x40\x97\xd5\xc5\x26\xa6\xd3\x5f\x97\xb8\x33\x51\x94\x0f\x2c\xc8";
long_hash =
"\x7b\x6d\xf6\xff\x18\x11\x73\xb6\xd7\x89\x8d\x7f\xf6\x3f\xb0\x7b"
"\x7c\x23\x7d\xaf\x47\x1a\x5a\xe5\x60\x2a\xdb\xcc\xef\x9c\xcf\x4b";
one_million_a_hash =
"\x9d\x22\x2c\x79\xc4\xff\x9d\x09\x2c\xf6\xca\x86\x14\x3a\xa4\x11"
"\xe3\x69\x97\x38\x08\xef\x97\x09\x32\x55\x82\x6c\x55\x72\xef\x58";
hash_len = 32;
break;
case GCRY_MD_SHAKE256:
short_hash =
"\x48\x33\x66\x60\x13\x60\xa8\x77\x1c\x68\x63\x08\x0c\xc4\x11\x4d"
"\x8d\xb4\x45\x30\xf8\xf1\xe1\xee\x4f\x94\xea\x37\xe7\x8b\x57\x39";
long_hash =
"\x98\xbe\x04\x51\x6c\x04\xcc\x73\x59\x3f\xef\x3e\xd0\x35\x2e\xa9"
"\xf6\x44\x39\x42\xd6\x95\x0e\x29\xa3\x72\xa6\x81\xc3\xde\xaf\x45";
one_million_a_hash =
"\x35\x78\xa7\xa4\xca\x91\x37\x56\x9c\xdf\x76\xed\x61\x7d\x31\xbb"
"\x99\x4f\xca\x9c\x1b\xbf\x8b\x18\x40\x13\xde\x82\x34\xdf\xd1\x3a";
hash_len = 32;
break;
}
what = "short string";
errtxt = _gcry_hash_selftest_check_one (algo, 0, "abc", 3, short_hash,
hash_len);
if (errtxt)
goto failed;
if (extended)
{
what = "long string";
errtxt = _gcry_hash_selftest_check_one
(algo, 0,
"abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
"hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112,
long_hash, hash_len);
if (errtxt)
goto failed;
what = "one million \"a\"";
errtxt = _gcry_hash_selftest_check_one (algo, 1, NULL, 0,
one_million_a_hash, hash_len);
if (errtxt)
goto failed;
}
return 0; /* Succeeded. */
failed:
if (report)
report ("digest", algo, what, errtxt);
return GPG_ERR_SELFTEST_FAILED;
}
/* Run a full self-test for ALGO and return 0 on success. */
static gpg_err_code_t
run_selftests (int algo, int extended, selftest_report_func_t report)
{
gpg_err_code_t ec;
switch (algo)
{
case GCRY_MD_SHA3_224:
case GCRY_MD_SHA3_256:
case GCRY_MD_SHA3_384:
case GCRY_MD_SHA3_512:
case GCRY_MD_SHAKE128:
case GCRY_MD_SHAKE256:
ec = selftests_keccak (algo, extended, report);
break;
default:
ec = GPG_ERR_DIGEST_ALGO;
break;
}
return ec;
}
static byte sha3_224_asn[] = { 0x30 };
static gcry_md_oid_spec_t oid_spec_sha3_224[] =
{
{ "2.16.840.1.101.3.4.2.7" },
/* PKCS#1 sha3_224WithRSAEncryption */
{ "?" },
{ NULL }
};
static byte sha3_256_asn[] = { 0x30 };
static gcry_md_oid_spec_t oid_spec_sha3_256[] =
{
{ "2.16.840.1.101.3.4.2.8" },
/* PKCS#1 sha3_256WithRSAEncryption */
{ "?" },
{ NULL }
};
static byte sha3_384_asn[] = { 0x30 };
static gcry_md_oid_spec_t oid_spec_sha3_384[] =
{
{ "2.16.840.1.101.3.4.2.9" },
/* PKCS#1 sha3_384WithRSAEncryption */
{ "?" },
{ NULL }
};
static byte sha3_512_asn[] = { 0x30 };
static gcry_md_oid_spec_t oid_spec_sha3_512[] =
{
{ "2.16.840.1.101.3.4.2.10" },
/* PKCS#1 sha3_512WithRSAEncryption */
{ "?" },
{ NULL }
};
static byte shake128_asn[] = { 0x30 };
static gcry_md_oid_spec_t oid_spec_shake128[] =
{
{ "2.16.840.1.101.3.4.2.11" },
/* PKCS#1 shake128WithRSAEncryption */
{ "?" },
{ NULL }
};
static byte shake256_asn[] = { 0x30 };
static gcry_md_oid_spec_t oid_spec_shake256[] =
{
{ "2.16.840.1.101.3.4.2.12" },
/* PKCS#1 shake256WithRSAEncryption */
{ "?" },
{ NULL }
};
gcry_md_spec_t _gcry_digest_spec_sha3_224 =
{
GCRY_MD_SHA3_224, {0, 1},
"SHA3-224", sha3_224_asn, DIM (sha3_224_asn), oid_spec_sha3_224, 28,
sha3_224_init, keccak_write, keccak_final, keccak_read, NULL,
sizeof (KECCAK_CONTEXT),
run_selftests
};
gcry_md_spec_t _gcry_digest_spec_sha3_256 =
{
GCRY_MD_SHA3_256, {0, 1},
"SHA3-256", sha3_256_asn, DIM (sha3_256_asn), oid_spec_sha3_256, 32,
sha3_256_init, keccak_write, keccak_final, keccak_read, NULL,
sizeof (KECCAK_CONTEXT),
run_selftests
};
gcry_md_spec_t _gcry_digest_spec_sha3_384 =
{
GCRY_MD_SHA3_384, {0, 1},
"SHA3-384", sha3_384_asn, DIM (sha3_384_asn), oid_spec_sha3_384, 48,
sha3_384_init, keccak_write, keccak_final, keccak_read, NULL,
sizeof (KECCAK_CONTEXT),
run_selftests
};
gcry_md_spec_t _gcry_digest_spec_sha3_512 =
{
GCRY_MD_SHA3_512, {0, 1},
"SHA3-512", sha3_512_asn, DIM (sha3_512_asn), oid_spec_sha3_512, 64,
sha3_512_init, keccak_write, keccak_final, keccak_read, NULL,
sizeof (KECCAK_CONTEXT),
run_selftests
};
gcry_md_spec_t _gcry_digest_spec_shake128 =
{
GCRY_MD_SHAKE128, {0, 1},
"SHAKE128", shake128_asn, DIM (shake128_asn), oid_spec_shake128, 0,
shake128_init, keccak_write, keccak_final, NULL, keccak_extract,
sizeof (KECCAK_CONTEXT),
run_selftests
};
gcry_md_spec_t _gcry_digest_spec_shake256 =
{
GCRY_MD_SHAKE256, {0, 1},
"SHAKE256", shake256_asn, DIM (shake256_asn), oid_spec_shake256, 0,
shake256_init, keccak_write, keccak_final, NULL, keccak_extract,
sizeof (KECCAK_CONTEXT),
run_selftests
};