rsys

Basic data structures and low-level features
git clone git://git.meso-star.fr/rsys.git
Log | Files | Refs | README | LICENSE

commit 3a86cc9ebd615655f25540204b9abbdab097cb34
parent 36a529a9feb4f6e5b2b7135ee66475250f78fb86
Author: vaplv <vaplv@free.fr>
Date:   Tue,  6 Sep 2022 18:16:56 +0200

Optimize the sha256 implementation

Diffstat:
Msrc/hash.c | 157+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
1 file changed, 114 insertions(+), 43 deletions(-)

diff --git a/src/hash.c b/src/hash.c @@ -43,32 +43,21 @@ static const uint32_t k[64] = { 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; -/******************************************************************************* - * Helper functions - ******************************************************************************/ -/* Right rotation */ -static FINLINE uint32_t -rrot(const uint32_t ui, const unsigned int count) -{ - ASSERT(count <= 32); - return ui >> count | ui << (32 - count); -} - +/* Most of this code comes from GnuPG's cipher/sha1.c */ static void -sha256_process_chunk(struct sha256_ctx* ctx) +sha256_process_chunk(struct sha256_ctx* ctx, const char chunk[64]) { uint32_t w[64]; uint32_t a, b, c, d, e, f, g, h; uint32_t i; + + uint32_t tm; + uint32_t t0, t1; + ASSERT(ctx); FOR_EACH(i, 0, 16) { - w[i] = big_endian_32(((uint32_t*)ctx->chunk)[i]); - } - FOR_EACH(i, 16, 64) { - const uint32_t s0 = rrot(w[i-15],7) ^ rrot(w[i-15],18) ^ (w[i-15] >> 3); - const uint32_t s1 = rrot(w[i-2],17) ^ rrot(w[i-2], 19) ^ (w[i-2] >> 10); - w[i] = w[i-16] + s0 + w[i-7] + s1; + w[i] = big_endian_32(((uint32_t*)chunk)[i]); } a = ctx->state[0]; @@ -80,24 +69,86 @@ sha256_process_chunk(struct sha256_ctx* ctx) g = ctx->state[6]; h = ctx->state[7]; - /* Compress the chunk */ - FOR_EACH(i, 0, 64) { - const uint32_t s0 = rrot(a, 2) ^ rrot(a, 13) ^ rrot(a, 22); - const uint32_t s1 = rrot(e, 6) ^ rrot(e, 11) ^ rrot(e, 25); - const uint32_t ch = (e & f) ^ ((~e) & g); - const uint32_t maj = (a & b) ^ (a & c) ^ (b & c); - const uint32_t tmp1 = h + s1 + ch + k[i] + w[i]; - const uint32_t tmp2 = s0 + maj; - - h = g; - g = f; - f = e; - e = d + tmp1; - d = c; - c = b; - b = a; - a = tmp1 + tmp2; - } + #define ROL(X, N) (((X) << (N)) | ((X) >> (32 - (N)))) + #define S0(X) (ROL(X,25)^ROL(X,14)^(X>>3)) + #define S1(X) (ROL(X,15)^ROL(X,13)^(X>>10)) + #define SS0(X) (ROL(X,30)^ROL(X,19)^ROL(X,10)) + #define SS1(X) (ROL(X,26)^ROL(X,21)^ROL(X,7)) + #define M(I) (tm = S1(w[(I- 2)&0x0f]) + w[(I-7)&0x0f] \ + + S0(w[(I-15)&0x0f]) + w[I&0x0f], w[I&0x0f] = tm) + #define F2(A, B, C) (( A & B ) | (C & (A | B))) + #define F1(E, F, G) (G ^ (E & (F ^ G))) + #define R(A, B, C, D, E, F, G, H, K, M) { \ + t0 = SS0(A) + F2(A, B, C); \ + t1 = H + SS1(E) + F1(E, F, G) + K + M; \ + D += t1; \ + H = t0 + t1; \ + } (void)0 + + R( a, b, c, d, e, f, g, h, k[ 0], w[ 0] ); + R( h, a, b, c, d, e, f, g, k[ 1], w[ 1] ); + R( g, h, a, b, c, d, e, f, k[ 2], w[ 2] ); + R( f, g, h, a, b, c, d, e, k[ 3], w[ 3] ); + R( e, f, g, h, a, b, c, d, k[ 4], w[ 4] ); + R( d, e, f, g, h, a, b, c, k[ 5], w[ 5] ); + R( c, d, e, f, g, h, a, b, k[ 6], w[ 6] ); + R( b, c, d, e, f, g, h, a, k[ 7], w[ 7] ); + R( a, b, c, d, e, f, g, h, k[ 8], w[ 8] ); + R( h, a, b, c, d, e, f, g, k[ 9], w[ 9] ); + R( g, h, a, b, c, d, e, f, k[10], w[10] ); + R( f, g, h, a, b, c, d, e, k[11], w[11] ); + R( e, f, g, h, a, b, c, d, k[12], w[12] ); + R( d, e, f, g, h, a, b, c, k[13], w[13] ); + R( c, d, e, f, g, h, a, b, k[14], w[14] ); + R( b, c, d, e, f, g, h, a, k[15], w[15] ); + R( a, b, c, d, e, f, g, h, k[16], M(16) ); + R( h, a, b, c, d, e, f, g, k[17], M(17) ); + R( g, h, a, b, c, d, e, f, k[18], M(18) ); + R( f, g, h, a, b, c, d, e, k[19], M(19) ); + R( e, f, g, h, a, b, c, d, k[20], M(20) ); + R( d, e, f, g, h, a, b, c, k[21], M(21) ); + R( c, d, e, f, g, h, a, b, k[22], M(22) ); + R( b, c, d, e, f, g, h, a, k[23], M(23) ); + R( a, b, c, d, e, f, g, h, k[24], M(24) ); + R( h, a, b, c, d, e, f, g, k[25], M(25) ); + R( g, h, a, b, c, d, e, f, k[26], M(26) ); + R( f, g, h, a, b, c, d, e, k[27], M(27) ); + R( e, f, g, h, a, b, c, d, k[28], M(28) ); + R( d, e, f, g, h, a, b, c, k[29], M(29) ); + R( c, d, e, f, g, h, a, b, k[30], M(30) ); + R( b, c, d, e, f, g, h, a, k[31], M(31) ); + R( a, b, c, d, e, f, g, h, k[32], M(32) ); + R( h, a, b, c, d, e, f, g, k[33], M(33) ); + R( g, h, a, b, c, d, e, f, k[34], M(34) ); + R( f, g, h, a, b, c, d, e, k[35], M(35) ); + R( e, f, g, h, a, b, c, d, k[36], M(36) ); + R( d, e, f, g, h, a, b, c, k[37], M(37) ); + R( c, d, e, f, g, h, a, b, k[38], M(38) ); + R( b, c, d, e, f, g, h, a, k[39], M(39) ); + R( a, b, c, d, e, f, g, h, k[40], M(40) ); + R( h, a, b, c, d, e, f, g, k[41], M(41) ); + R( g, h, a, b, c, d, e, f, k[42], M(42) ); + R( f, g, h, a, b, c, d, e, k[43], M(43) ); + R( e, f, g, h, a, b, c, d, k[44], M(44) ); + R( d, e, f, g, h, a, b, c, k[45], M(45) ); + R( c, d, e, f, g, h, a, b, k[46], M(46) ); + R( b, c, d, e, f, g, h, a, k[47], M(47) ); + R( a, b, c, d, e, f, g, h, k[48], M(48) ); + R( h, a, b, c, d, e, f, g, k[49], M(49) ); + R( g, h, a, b, c, d, e, f, k[50], M(50) ); + R( f, g, h, a, b, c, d, e, k[51], M(51) ); + R( e, f, g, h, a, b, c, d, k[52], M(52) ); + R( d, e, f, g, h, a, b, c, k[53], M(53) ); + R( c, d, e, f, g, h, a, b, k[54], M(54) ); + R( b, c, d, e, f, g, h, a, k[55], M(55) ); + R( a, b, c, d, e, f, g, h, k[56], M(56) ); + R( h, a, b, c, d, e, f, g, k[57], M(57) ); + R( g, h, a, b, c, d, e, f, k[58], M(58) ); + R( f, g, h, a, b, c, d, e, k[59], M(59) ); + R( e, f, g, h, a, b, c, d, k[60], M(60) ); + R( d, e, f, g, h, a, b, c, k[61], M(61) ); + R( c, d, e, f, g, h, a, b, k[62], M(62) ); + R( b, c, d, e, f, g, h, a, k[63], M(63) ); ctx->state[0] += a; ctx->state[1] += b; @@ -133,21 +184,41 @@ void sha256_ctx_update (struct sha256_ctx* ctx, const char* bytes, - const size_t len) + size_t len) { + size_t n; uint32_t i; ASSERT(ctx); ASSERT(bytes || !len); - FOR_EACH(i, 0, len) { - ctx->chunk[ctx->len] = bytes[i]; - ctx->len += 1; + if(ctx->len) { + n = MMIN(64 - ctx->len, len); + memcpy(ctx->chunk + ctx->len, bytes, n); + ctx->len += (uint32_t)n; + bytes += n; + len -= n; + if(ctx->len == 64) { - sha256_process_chunk(ctx); + sha256_process_chunk(ctx, ctx->chunk); ctx->nbits += 512; ctx->len = 0; } } + + if(len >= 64) { + n = len / 64; + FOR_EACH(i, 0, n) { + sha256_process_chunk(ctx, bytes); + bytes += 64; + } + ctx->nbits += n * 512; + len -= n * 64; + } + + if(len) { + memcpy(ctx->chunk, bytes, len); + ctx->len = (uint32_t)len; + } } void @@ -168,13 +239,13 @@ sha256_ctx_finalize(struct sha256_ctx* ctx, hash256_T hash) memset(ctx->chunk+i, 0, 56-i); } else { memset(ctx->chunk+i, 0, 64-i); - sha256_process_chunk(ctx); + sha256_process_chunk(ctx, ctx->chunk); memset(ctx->chunk, 0, 56); } /* Store the message's length in bits */ *((uint64_t*)(ctx->chunk + 56)) = big_endian_64(ctx->nbits); - sha256_process_chunk(ctx); + sha256_process_chunk(ctx, ctx->chunk); /* Store result the result */ ((uint32_t*)hash)[0] = big_endian_32(ctx->state[0]);