Skip to content

Commit

Permalink
sha1: Roll iterations inside transform() into loop
Browse files Browse the repository at this point in the history
Use 5-round unrolled loops to keep the {a..e} variables consistent with
before.  The change in key schedule at round 16 is awkward, so leave
that block as it was previously.

64:
add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-2458 (-2458)
Function                                     old     new   delta
transform                                   4433    1975   -2458
Total: Before=55933, After=53475, chg -4.39%

32:
add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-2140 (-2140)
Function                                     old     new   delta
transform                                   4048    1908   -2140
Total: Before=27030, After=24890, chg -7.92%

lto.64:
add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-2458 (-2458)
Function                                     old     new   delta
transform                                   4433    1975   -2458
Total: Before=55466, After=53008, chg -4.43%

lto.32:
add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-2140 (-2140)
Function                                     old     new   delta
transform                                   4048    1908   -2140
Total: Before=26298, After=24158, chg -8.14%

Signed-off-by: Andrew Cooper <[email protected]>
Reviewed-by: Ross Philipson <[email protected]>
Reviewed-by: Krystian Hebel <[email protected]>
Tested-by: Krystian Hebel <[email protected]>
  • Loading branch information
andyhhp committed Mar 10, 2020
1 parent 9c6f699 commit e122a30
Showing 1 changed file with 36 additions and 76 deletions.
112 changes: 36 additions & 76 deletions sha1sum.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ static void sha1_transform(SHA1_CONTEXT *hd, const unsigned char *data)
{
u32 a,b,c,d,e;
u32 x[16];
int i;

/* get values from the chaining vars */
a = hd->h0;
Expand All @@ -77,7 +78,7 @@ static void sha1_transform(SHA1_CONTEXT *hd, const unsigned char *data)
d = hd->h3;
e = hd->h4;

for (int i = 0; i < 16; ++i, data += 4)
for (i = 0; i < 16; ++i, data += 4)
x[i] = cpu_to_be32(*(u32 *)data);


Expand All @@ -90,93 +91,52 @@ static void sha1_transform(SHA1_CONTEXT *hd, const unsigned char *data)
#define F3(x,y,z) ( ( x & y ) | ( z & ( x | y ) ) )
#define F4(x,y,z) ( x ^ y ^ z )


#define M(i) sha1_blend(x, i)
#define R(a,b,c,d,e,f,k,m) do { e += rol( a, 5 ) \
+ f( b, c, d ) \
+ k \
+ m; \
b = rol( b, 30 ); \
} while(0)
R( a, b, c, d, e, F1, K1, x[ 0] );
R( e, a, b, c, d, F1, K1, x[ 1] );
R( d, e, a, b, c, F1, K1, x[ 2] );
R( c, d, e, a, b, F1, K1, x[ 3] );
R( b, c, d, e, a, F1, K1, x[ 4] );
R( a, b, c, d, e, F1, K1, x[ 5] );
R( e, a, b, c, d, F1, K1, x[ 6] );
R( d, e, a, b, c, F1, K1, x[ 7] );
R( c, d, e, a, b, F1, K1, x[ 8] );
R( b, c, d, e, a, F1, K1, x[ 9] );
R( a, b, c, d, e, F1, K1, x[10] );
R( e, a, b, c, d, F1, K1, x[11] );
R( d, e, a, b, c, F1, K1, x[12] );
R( c, d, e, a, b, F1, K1, x[13] );
R( b, c, d, e, a, F1, K1, x[14] );

for (i = 0; i < 15; i += 5) {
R(a, b, c, d, e, F1, K1, x[i + 0]);
R(e, a, b, c, d, F1, K1, x[i + 1]);
R(d, e, a, b, c, F1, K1, x[i + 2]);
R(c, d, e, a, b, F1, K1, x[i + 3]);
R(b, c, d, e, a, F1, K1, x[i + 4]);
}

R( a, b, c, d, e, F1, K1, x[15] );
R( e, a, b, c, d, F1, K1, M(16) );
R( d, e, a, b, c, F1, K1, M(17) );
R( c, d, e, a, b, F1, K1, M(18) );
R( b, c, d, e, a, F1, K1, M(19) );
R( a, b, c, d, e, F2, K2, M(20) );
R( e, a, b, c, d, F2, K2, M(21) );
R( d, e, a, b, c, F2, K2, M(22) );
R( c, d, e, a, b, F2, K2, M(23) );
R( b, c, d, e, a, F2, K2, M(24) );
R( a, b, c, d, e, F2, K2, M(25) );
R( e, a, b, c, d, F2, K2, M(26) );
R( d, e, a, b, c, F2, K2, M(27) );
R( c, d, e, a, b, F2, K2, M(28) );
R( b, c, d, e, a, F2, K2, M(29) );
R( a, b, c, d, e, F2, K2, M(30) );
R( e, a, b, c, d, F2, K2, M(31) );
R( d, e, a, b, c, F2, K2, M(32) );
R( c, d, e, a, b, F2, K2, M(33) );
R( b, c, d, e, a, F2, K2, M(34) );
R( a, b, c, d, e, F2, K2, M(35) );
R( e, a, b, c, d, F2, K2, M(36) );
R( d, e, a, b, c, F2, K2, M(37) );
R( c, d, e, a, b, F2, K2, M(38) );
R( b, c, d, e, a, F2, K2, M(39) );
R( a, b, c, d, e, F3, K3, M(40) );
R( e, a, b, c, d, F3, K3, M(41) );
R( d, e, a, b, c, F3, K3, M(42) );
R( c, d, e, a, b, F3, K3, M(43) );
R( b, c, d, e, a, F3, K3, M(44) );
R( a, b, c, d, e, F3, K3, M(45) );
R( e, a, b, c, d, F3, K3, M(46) );
R( d, e, a, b, c, F3, K3, M(47) );
R( c, d, e, a, b, F3, K3, M(48) );
R( b, c, d, e, a, F3, K3, M(49) );
R( a, b, c, d, e, F3, K3, M(50) );
R( e, a, b, c, d, F3, K3, M(51) );
R( d, e, a, b, c, F3, K3, M(52) );
R( c, d, e, a, b, F3, K3, M(53) );
R( b, c, d, e, a, F3, K3, M(54) );
R( a, b, c, d, e, F3, K3, M(55) );
R( e, a, b, c, d, F3, K3, M(56) );
R( d, e, a, b, c, F3, K3, M(57) );
R( c, d, e, a, b, F3, K3, M(58) );
R( b, c, d, e, a, F3, K3, M(59) );
R( a, b, c, d, e, F4, K4, M(60) );
R( e, a, b, c, d, F4, K4, M(61) );
R( d, e, a, b, c, F4, K4, M(62) );
R( c, d, e, a, b, F4, K4, M(63) );
R( b, c, d, e, a, F4, K4, M(64) );
R( a, b, c, d, e, F4, K4, M(65) );
R( e, a, b, c, d, F4, K4, M(66) );
R( d, e, a, b, c, F4, K4, M(67) );
R( c, d, e, a, b, F4, K4, M(68) );
R( b, c, d, e, a, F4, K4, M(69) );
R( a, b, c, d, e, F4, K4, M(70) );
R( e, a, b, c, d, F4, K4, M(71) );
R( d, e, a, b, c, F4, K4, M(72) );
R( c, d, e, a, b, F4, K4, M(73) );
R( b, c, d, e, a, F4, K4, M(74) );
R( a, b, c, d, e, F4, K4, M(75) );
R( e, a, b, c, d, F4, K4, M(76) );
R( d, e, a, b, c, F4, K4, M(77) );
R( c, d, e, a, b, F4, K4, M(78) );
R( b, c, d, e, a, F4, K4, M(79) );

for (i = 20; i < 40; i += 5) {
R(a, b, c, d, e, F2, K2, M(i + 0));
R(e, a, b, c, d, F2, K2, M(i + 1));
R(d, e, a, b, c, F2, K2, M(i + 2));
R(c, d, e, a, b, F2, K2, M(i + 3));
R(b, c, d, e, a, F2, K2, M(i + 4));
}

for (; i < 60; i += 5) {
R(a, b, c, d, e, F3, K3, M(i + 0));
R(e, a, b, c, d, F3, K3, M(i + 1));
R(d, e, a, b, c, F3, K3, M(i + 2));
R(c, d, e, a, b, F3, K3, M(i + 3));
R(b, c, d, e, a, F3, K3, M(i + 4));
}

for (; i < 80; i += 5) {
R(a, b, c, d, e, F4, K4, M(i + 0));
R(e, a, b, c, d, F4, K4, M(i + 1));
R(d, e, a, b, c, F4, K4, M(i + 2));
R(c, d, e, a, b, F4, K4, M(i + 3));
R(b, c, d, e, a, F4, K4, M(i + 4));
}

/* Update chaining vars */
hd->h0 += a;
Expand Down

0 comments on commit e122a30

Please sign in to comment.