From 8cb80e097d2378b33b5e6f8eeabfd6430b7b1926 Mon Sep 17 00:00:00 2001 From: Robin Salen <30937548+Nashtare@users.noreply.github.com> Date: Mon, 22 Jan 2024 09:42:11 -0500 Subject: [PATCH] Improve `blake2f` call (#1477) * Improve on blake2 operations * Comments * Remove swap_mstore calls by changing stack macros --- .../cpu/kernel/asm/hash/blake2/addresses.asm | 18 +++-- .../cpu/kernel/asm/hash/blake2/blake2_f.asm | 70 +++++++++---------- .../kernel/asm/hash/blake2/g_functions.asm | 20 +++--- evm/src/cpu/kernel/asm/hash/blake2/hash.asm | 6 +- 4 files changed, 60 insertions(+), 54 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake2/addresses.asm b/evm/src/cpu/kernel/asm/hash/blake2/addresses.asm index 06b93f9ea9..5beb1dee64 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2/addresses.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2/addresses.asm @@ -1,12 +1,20 @@ // Address where the working version of the hash value is stored. +// It is ready to be used, i.e. already containing the current context +// and SEGMENT_KERNEL_GENERAL. %macro blake2_hash_value_addr - PUSH 0 - // stack: 0 - %mload_current_general - // stack: num_blocks + PUSH @SEGMENT_KERNEL_GENERAL + // stack: segment + GET_CONTEXT + // stack: context, segment + %build_address_no_offset + DUP1 + MLOAD_GENERAL + // stack: num_blocks, addr %block_size %add_const(2) - // stack: num_bytes+2 + // stack: num_bytes+2, addr + ADD + // stack: addr %endmacro // Address where the working version of the compression internal state is stored. diff --git a/evm/src/cpu/kernel/asm/hash/blake2/blake2_f.asm b/evm/src/cpu/kernel/asm/hash/blake2/blake2_f.asm index 95a4749e0f..d1a4a2ab64 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2/blake2_f.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2/blake2_f.asm @@ -6,9 +6,9 @@ global blake2_f: // stack: addr, rounds, h0...h7, m0...m15, t0, t1, flag, retdest %rep 8 // stack: addr, rounds, h_i, ... - %stack (addr, rounds, h_i) -> (addr, h_i, addr, rounds) - // stack: addr, h_i, addr, rounds, ... - %mstore_current_general + %stack (addr, rounds, h_i) -> (h_i, addr, addr, rounds) + // stack: h_i, addr, addr, rounds, ... + MSTORE_GENERAL %increment %endrep @@ -21,9 +21,9 @@ global blake2_f: // stack: message_addr, rounds, m0...m15, t0, t1, flag, retdest %rep 16 // stack: message_addr, rounds, m_i, ... - %stack (message_addr, rounds, m_i) -> (message_addr, m_i, message_addr, rounds) - // stack: message_addr, m_i, message_addr, rounds, ... - %mstore_current_general + %stack (message_addr, rounds, m_i) -> (m_i, message_addr, message_addr, rounds) + // stack: m_i, message_addr, message_addr, rounds, ... + MSTORE_GENERAL %increment %endrep @@ -37,7 +37,7 @@ global blake2_f: // stack: addr, ... DUP1 // stack: addr, addr, ... - %mload_current_general + MLOAD_GENERAL // stack: val, addr, ... SWAP1 // stack: addr, val, ... @@ -53,31 +53,30 @@ global blake2_f: // First eight words of the internal state: current hash value h_0, ..., h_7. %rep 8 - SWAP1 - DUP2 - %mstore_current_general + DUP1 + SWAP2 + MSTORE_GENERAL %increment %endrep // stack: start + 8, rounds, t0, t1, flag, retdest // Next four values of the internal state: first four IV values. PUSH 0 - // stack: 0, start + 8, rounds, t0, t1, flag, retdest + // stack: 0, addr, rounds, t0, t1, flag, retdest %rep 4 - // stack: i, loc, ... - DUP1 - // stack: i, i, loc, ... + // stack: i, addr, ... + DUP2 + DUP2 + // stack: i, addr, i, addr, ... %blake2_iv - // stack: IV_i, i, loc, ... - DUP3 - // stack: loc, IV_i, i, loc, ... - %mstore_current_general - // stack: i, loc, ... + // stack: IV_i, addr, i, addr, ... + MSTORE_GENERAL + // stack: i, addr, ... %increment SWAP1 %increment SWAP1 - // stack: i + 1, loc + 1,... + // stack: i + 1, addr + 1,... %endrep // stack: 4, start + 12, rounds, t0, t1, flag, retdest POP @@ -92,29 +91,28 @@ global blake2_f: // Last four values of the internal state: last four IV values, XOR'd with // the values (t0, t1, invert_if_flag, 0). %rep 4 - // stack: i, loc, val, next_val,... - DUP1 - // stack: i, i, loc, val, next_val,... + // stack: i, addr, val, next_val,... + DUP2 + DUP2 + // stack: i, addr, i, addr, val, next_val,... %blake2_iv - // stack: IV_i, i, loc, val, next_val,... - DUP4 - // stack: val, IV_i, i, loc, val, next_val,... + // stack: IV_i, addr, i, addr, val, next_val,... + DUP5 + // stack: val, IV_i, addr, i, addr, val, next_val,... XOR - // stack: val ^ IV_i, i, loc, val, next_val,... - DUP3 - // stack: loc, val ^ IV_i, i, loc, val, next_val,... - %mstore_current_general - // stack: i, loc, val, next_val,... + // stack: val ^ IV_i, addr, i, addr, val, next_val,... + MSTORE_GENERAL + // stack: i, addr, val, next_val,... %increment - // stack: i + 1, loc, val, next_val,... + // stack: i + 1, addr, val, next_val,... SWAP2 - // stack: val, loc, i + 1, next_val,... + // stack: val, addr, i + 1, next_val,... POP - // stack: loc, i + 1, next_val,... + // stack: addr, i + 1, next_val,... %increment - // stack: loc + 1, i + 1, next_val,... + // stack: addr + 1, i + 1, next_val,... SWAP1 - // stack: i + 1, loc + 1, next_val,... + // stack: i + 1, addr + 1, next_val,... %endrep // stack: 8, start + 16, rounds, retdest %pop2 diff --git a/evm/src/cpu/kernel/asm/hash/blake2/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake2/g_functions.asm index 45e54ff43f..d521da6d80 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2/g_functions.asm @@ -11,28 +11,28 @@ DUP11 // stack: start, a, b, c, d, a, b, c, d, x, y, start ADD - %mload_current_general + MLOAD_GENERAL // stack: v[a], b, c, d, a, b, c, d, x, y, start SWAP1 // stack: b, v[a], c, d, a, b, c, d, x, y, start DUP11 // stack: start, b, v[a], c, d, a, b, c, d, x, y, start ADD - %mload_current_general + MLOAD_GENERAL // stack: v[b], v[a], c, d, a, b, c, d, x, y, start SWAP2 // stack: c, v[a], v[b], d, a, b, c, d, x, y, start DUP11 // stack: start, c, v[a], v[b], d, a, b, c, d, x, y, start ADD - %mload_current_general + MLOAD_GENERAL // stack: v[c], v[a], v[b], d, a, b, c, d, x, y, start SWAP3 // stack: d, v[a], v[b], v[c], a, b, c, d, x, y, start DUP11 // stack: start, d, v[a], v[b], v[c], a, b, c, d, x, y, start ADD - %mload_current_general + MLOAD_GENERAL // stack: v[d], v[a], v[b], v[c], a, b, c, d, x, y, start %stack (vd, vs: 3) -> (vs, vd) // stack: v[a], v[b], v[c], v[d], a, b, c, d, x, y, start @@ -95,13 +95,13 @@ %stack (vb, vc, vd, va, a, b, c, d, x, y, start) -> (start, a, va, start, b, vb, start, c, vc, start, d, vd) // stack: start, a, v[a]'', start, b, v[b]'', start, c, v[c]'', start, d, v[d]'' ADD - %mstore_current_general + %swap_mstore ADD - %mstore_current_general + %swap_mstore ADD - %mstore_current_general + %swap_mstore ADD - %mstore_current_general + %swap_mstore %endmacro %macro call_blake2_g_function(a, b, c, d, x_idx, y_idx) @@ -113,7 +113,7 @@ // stack: s[y_idx], round, start %blake2_message_addr ADD - %mload_current_general + MLOAD_GENERAL // stack: m[s[y_idx]], round, start PUSH $x_idx DUP3 @@ -122,7 +122,7 @@ // stack: s[x_idx], m[s[y_idx]], round, start %blake2_message_addr ADD - %mload_current_general + MLOAD_GENERAL // stack: m[s[x_idx]], m[s[y_idx]], round, start %stack (ss: 2, r, s) -> (ss, s, r, s) // stack: m[s[x_idx]], m[s[y_idx]], start, round, start diff --git a/evm/src/cpu/kernel/asm/hash/blake2/hash.asm b/evm/src/cpu/kernel/asm/hash/blake2/hash.asm index 24ec9caba8..ab0d247633 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2/hash.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2/hash.asm @@ -5,13 +5,13 @@ blake2_generate_new_hash_value: // stack: addr, i, retdest DUP2 ADD - %mload_current_general + MLOAD_GENERAL // stack: h_i, i, retdest %blake2_internal_state_addr // stack: addr, h_i, i, retdest DUP3 ADD - %mload_current_general + MLOAD_GENERAL // stack: v_i, h_i, i, retdest %blake2_internal_state_addr // stack: addr, v_i, h_i, i, retdest @@ -21,7 +21,7 @@ blake2_generate_new_hash_value: // stack: i, addr, h_i, v_i, retdest ADD %add_const(8) - %mload_current_general + MLOAD_GENERAL // stack: v_(i+8), h_i, v_i, retdest XOR XOR