From 5dc3ac7ec700d85886eda3d53a03abcf5c7efc9c Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 15 Jul 2023 11:20:53 -0400 Subject: [PATCH] feat: improve batch copy performance (#3483) per cancun, eip-5656, this commit adds the use of mcopy for memory copies. it also - adds heuristics to use loops vs unrolled loops for batch copies. - adds helper functions `vyper.codegen.core._opt_[gas,codesize,none]()` to detect optimization mode during codegen - adds `--optimize none` to CLI options, with the intent of phasing out `--no-optimize` if the ergonomics are better. --- .github/workflows/era-tester.yml | 4 +- setup.cfg | 1 - tests/compiler/test_opcodes.py | 7 +- tests/parser/functions/test_slice.py | 89 ++++++++-------- tests/parser/types/test_dynamic_array.py | 12 +-- vyper/cli/vyper_compile.py | 2 +- vyper/codegen/core.py | 128 ++++++++++++++++++++--- vyper/codegen/ir_node.py | 16 +-- vyper/compiler/phases.py | 8 +- vyper/evm/opcodes.py | 5 +- vyper/ir/compile_ir.py | 1 + vyper/ir/optimizer.py | 44 +++++--- vyper/utils.py | 3 +- 13 files changed, 221 insertions(+), 99 deletions(-) diff --git a/.github/workflows/era-tester.yml b/.github/workflows/era-tester.yml index 8a2a3e50ce..187b5c03a2 100644 --- a/.github/workflows/era-tester.yml +++ b/.github/workflows/era-tester.yml @@ -101,11 +101,11 @@ jobs: if: ${{ github.ref != 'refs/heads/master' }} run: | cd era-compiler-tester - cargo run --release --bin compiler-tester -- -v --path=tests/vyper/ --mode="M0B0 ${{ env.VYPER_VERSION }}" + cargo run --release --bin compiler-tester -- --path=tests/vyper/ --mode="M0B0 ${{ env.VYPER_VERSION }}" - name: Run tester (slow) # Run era tester across the LLVM optimization matrix if: ${{ github.ref == 'refs/heads/master' }} run: | cd era-compiler-tester - cargo run --release --bin compiler-tester -- -v --path=tests/vyper/ --mode="M*B* ${{ env.VYPER_VERSION }}" + cargo run --release --bin compiler-tester -- --path=tests/vyper/ --mode="M*B* ${{ env.VYPER_VERSION }}" diff --git a/setup.cfg b/setup.cfg index d18ffe2ac7..dd4a32a3ac 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,7 +31,6 @@ addopts = -n auto --cov-report html --cov-report xml --cov=vyper - --hypothesis-show-statistics python_files = test_*.py testpaths = tests markers = diff --git a/tests/compiler/test_opcodes.py b/tests/compiler/test_opcodes.py index b9841b92f0..20f45ced6b 100644 --- a/tests/compiler/test_opcodes.py +++ b/tests/compiler/test_opcodes.py @@ -59,5 +59,8 @@ def test_get_opcodes(evm_version): assert "PUSH0" in ops if evm_version in ("cancun",): - assert "TLOAD" in ops - assert "TSTORE" in ops + for op in ("TLOAD", "TSTORE", "MCOPY"): + assert op in ops + else: + for op in ("TLOAD", "TSTORE", "MCOPY"): + assert op not in ops diff --git a/tests/parser/functions/test_slice.py b/tests/parser/functions/test_slice.py index 11d834bf42..f1b642b28d 100644 --- a/tests/parser/functions/test_slice.py +++ b/tests/parser/functions/test_slice.py @@ -1,4 +1,6 @@ +import hypothesis.strategies as st import pytest +from hypothesis import given, settings from vyper.exceptions import ArgumentException @@ -9,14 +11,6 @@ def _generate_bytes(length): return bytes(list(range(length))) -# good numbers to try -_fun_numbers = [0, 1, 5, 31, 32, 33, 64, 99, 100, 101] - - -# [b"", b"\x01", b"\x02"...] -_bytes_examples = [_generate_bytes(i) for i in _fun_numbers if i <= 100] - - def test_basic_slice(get_contract_with_gas_estimation): code = """ @external @@ -31,12 +25,16 @@ def slice_tower_test(inp1: Bytes[50]) -> Bytes[50]: assert x == b"klmnopqrst", x -@pytest.mark.parametrize("bytesdata", _bytes_examples) -@pytest.mark.parametrize("start", _fun_numbers) +# note: optimization boundaries at 32, 64 and 320 depending on mode +_draw_1024 = st.integers(min_value=0, max_value=1024) +_draw_1024_1 = st.integers(min_value=1, max_value=1024) +_bytes_1024 = st.binary(min_size=0, max_size=1024) + + @pytest.mark.parametrize("literal_start", (True, False)) -@pytest.mark.parametrize("length", _fun_numbers) @pytest.mark.parametrize("literal_length", (True, False)) -@pytest.mark.fuzzing +@given(start=_draw_1024, length=_draw_1024, length_bound=_draw_1024_1, bytesdata=_bytes_1024) +@settings(max_examples=25, deadline=None) def test_slice_immutable( get_contract, assert_compile_failed, @@ -46,47 +44,48 @@ def test_slice_immutable( literal_start, length, literal_length, + length_bound, ): _start = start if literal_start else "start" _length = length if literal_length else "length" code = f""" -IMMUTABLE_BYTES: immutable(Bytes[100]) -IMMUTABLE_SLICE: immutable(Bytes[100]) +IMMUTABLE_BYTES: immutable(Bytes[{length_bound}]) +IMMUTABLE_SLICE: immutable(Bytes[{length_bound}]) @external -def __init__(inp: Bytes[100], start: uint256, length: uint256): +def __init__(inp: Bytes[{length_bound}], start: uint256, length: uint256): IMMUTABLE_BYTES = inp IMMUTABLE_SLICE = slice(IMMUTABLE_BYTES, {_start}, {_length}) @external -def do_splice() -> Bytes[100]: +def do_splice() -> Bytes[{length_bound}]: return IMMUTABLE_SLICE """ + def _get_contract(): + return get_contract(code, bytesdata, start, length) + if ( - (start + length > 100 and literal_start and literal_length) - or (literal_length and length > 100) - or (literal_start and start > 100) + (start + length > length_bound and literal_start and literal_length) + or (literal_length and length > length_bound) + or (literal_start and start > length_bound) or (literal_length and length < 1) ): - assert_compile_failed( - lambda: get_contract(code, bytesdata, start, length), ArgumentException - ) - elif start + length > len(bytesdata): - assert_tx_failed(lambda: get_contract(code, bytesdata, start, length)) + assert_compile_failed(lambda: _get_contract(), ArgumentException) + elif start + length > len(bytesdata) or (len(bytesdata) > length_bound): + # deploy fail + assert_tx_failed(lambda: _get_contract()) else: - c = get_contract(code, bytesdata, start, length) + c = _get_contract() assert c.do_splice() == bytesdata[start : start + length] @pytest.mark.parametrize("location", ("storage", "calldata", "memory", "literal", "code")) -@pytest.mark.parametrize("bytesdata", _bytes_examples) -@pytest.mark.parametrize("start", _fun_numbers) @pytest.mark.parametrize("literal_start", (True, False)) -@pytest.mark.parametrize("length", _fun_numbers) @pytest.mark.parametrize("literal_length", (True, False)) -@pytest.mark.fuzzing +@given(start=_draw_1024, length=_draw_1024, length_bound=_draw_1024_1, bytesdata=_bytes_1024) +@settings(max_examples=25, deadline=None) def test_slice_bytes( get_contract, assert_compile_failed, @@ -97,9 +96,10 @@ def test_slice_bytes( literal_start, length, literal_length, + length_bound, ): if location == "memory": - spliced_code = "foo: Bytes[100] = inp" + spliced_code = f"foo: Bytes[{length_bound}] = inp" foo = "foo" elif location == "storage": spliced_code = "self.foo = inp" @@ -120,31 +120,38 @@ def test_slice_bytes( _length = length if literal_length else "length" code = f""" -foo: Bytes[100] -IMMUTABLE_BYTES: immutable(Bytes[100]) +foo: Bytes[{length_bound}] +IMMUTABLE_BYTES: immutable(Bytes[{length_bound}]) @external -def __init__(foo: Bytes[100]): +def __init__(foo: Bytes[{length_bound}]): IMMUTABLE_BYTES = foo @external -def do_slice(inp: Bytes[100], start: uint256, length: uint256) -> Bytes[100]: +def do_slice(inp: Bytes[{length_bound}], start: uint256, length: uint256) -> Bytes[{length_bound}]: {spliced_code} return slice({foo}, {_start}, {_length}) """ - length_bound = len(bytesdata) if location == "literal" else 100 + def _get_contract(): + return get_contract(code, bytesdata) + + data_length = len(bytesdata) if location == "literal" else length_bound if ( - (start + length > length_bound and literal_start and literal_length) - or (literal_length and length > length_bound) - or (literal_start and start > length_bound) + (start + length > data_length and literal_start and literal_length) + or (literal_length and length > data_length) + or (location == "literal" and len(bytesdata) > length_bound) + or (literal_start and start > data_length) or (literal_length and length < 1) ): - assert_compile_failed(lambda: get_contract(code, bytesdata), ArgumentException) + assert_compile_failed(lambda: _get_contract(), ArgumentException) + elif len(bytesdata) > data_length: + # deploy fail + assert_tx_failed(lambda: _get_contract()) elif start + length > len(bytesdata): - c = get_contract(code, bytesdata) + c = _get_contract() assert_tx_failed(lambda: c.do_slice(bytesdata, start, length)) else: - c = get_contract(code, bytesdata) + c = _get_contract() assert c.do_slice(bytesdata, start, length) == bytesdata[start : start + length], code diff --git a/tests/parser/types/test_dynamic_array.py b/tests/parser/types/test_dynamic_array.py index cbae183fe4..9231d1979f 100644 --- a/tests/parser/types/test_dynamic_array.py +++ b/tests/parser/types/test_dynamic_array.py @@ -2,7 +2,6 @@ import pytest -from vyper.compiler.settings import OptimizationLevel from vyper.exceptions import ( ArgumentException, ArrayIndexException, @@ -1585,14 +1584,9 @@ def bar2() -> uint256: newFoo.b1[1][0][0].a1[0][1][1] + \\ newFoo.b1[0][1][0].a1[0][0][0] """ - - if optimize == OptimizationLevel.NONE: - # fails at assembly stage with too many stack variables - assert_compile_failed(lambda: get_contract(code), Exception) - else: - c = get_contract(code) - assert c.bar() == [[[3, 7], [7, 3]], [[7, 3], [0, 0]]] - assert c.bar2() == 0 + c = get_contract(code) + assert c.bar() == [[[3, 7], [7, 3]], [[7, 3], [0, 0]]] + assert c.bar2() == 0 def test_tuple_of_lists(get_contract): diff --git a/vyper/cli/vyper_compile.py b/vyper/cli/vyper_compile.py index 71e78dd666..55e0fc82b2 100755 --- a/vyper/cli/vyper_compile.py +++ b/vyper/cli/vyper_compile.py @@ -105,7 +105,7 @@ def _parse_args(argv): dest="evm_version", ) parser.add_argument("--no-optimize", help="Do not optimize", action="store_true") - parser.add_argument("--optimize", help="Optimization flag", choices=["gas", "codesize"]) + parser.add_argument("--optimize", help="Optimization flag", choices=["gas", "codesize", "none"]) parser.add_argument( "--no-bytecode-metadata", help="Do not add metadata to bytecode", action="store_true" ) diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 58d9db9889..5b16938e99 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -1,6 +1,11 @@ +import contextlib +from typing import Generator + from vyper import ast as vy_ast from vyper.codegen.ir_node import Encoding, IRnode +from vyper.compiler.settings import OptimizationLevel from vyper.evm.address_space import CALLDATA, DATA, IMMUTABLES, MEMORY, STORAGE, TRANSIENT +from vyper.evm.opcodes import version_check from vyper.exceptions import CompilerPanic, StructureException, TypeCheckFailure, TypeMismatch from vyper.semantics.types import ( AddressT, @@ -19,13 +24,7 @@ from vyper.semantics.types.shortcuts import BYTES32_T, INT256_T, UINT256_T from vyper.semantics.types.subscriptable import SArrayT from vyper.semantics.types.user import EnumT -from vyper.utils import ( - GAS_CALLDATACOPY_WORD, - GAS_CODECOPY_WORD, - GAS_IDENTITY, - GAS_IDENTITYWORD, - ceil32, -) +from vyper.utils import GAS_COPY_WORD, GAS_IDENTITY, GAS_IDENTITYWORD, ceil32 DYNAMIC_ARRAY_OVERHEAD = 1 @@ -90,12 +89,16 @@ def _identity_gas_bound(num_bytes): return GAS_IDENTITY + GAS_IDENTITYWORD * (ceil32(num_bytes) // 32) +def _mcopy_gas_bound(num_bytes): + return GAS_COPY_WORD * ceil32(num_bytes) // 32 + + def _calldatacopy_gas_bound(num_bytes): - return GAS_CALLDATACOPY_WORD * ceil32(num_bytes) // 32 + return GAS_COPY_WORD * ceil32(num_bytes) // 32 def _codecopy_gas_bound(num_bytes): - return GAS_CODECOPY_WORD * ceil32(num_bytes) // 32 + return GAS_COPY_WORD * ceil32(num_bytes) // 32 # Copy byte array word-for-word (including layout) @@ -258,7 +261,6 @@ def copy_bytes(dst, src, length, length_bound): assert src.is_pointer and dst.is_pointer # fast code for common case where num bytes is small - # TODO expand this for more cases where num words is less than ~8 if length_bound <= 32: copy_op = STORE(dst, LOAD(src)) ret = IRnode.from_list(copy_op, annotation=annotation) @@ -268,8 +270,12 @@ def copy_bytes(dst, src, length, length_bound): # special cases: batch copy to memory # TODO: iloadbytes if src.location == MEMORY: - copy_op = ["staticcall", "gas", 4, src, length, dst, length] - gas_bound = _identity_gas_bound(length_bound) + if version_check(begin="cancun"): + copy_op = ["mcopy", dst, src, length] + gas_bound = _mcopy_gas_bound(length_bound) + else: + copy_op = ["staticcall", "gas", 4, src, length, dst, length] + gas_bound = _identity_gas_bound(length_bound) elif src.location == CALLDATA: copy_op = ["calldatacopy", dst, src, length] gas_bound = _calldatacopy_gas_bound(length_bound) @@ -876,6 +882,38 @@ def make_setter(left, right): return _complex_make_setter(left, right) +_opt_level = OptimizationLevel.GAS + + +@contextlib.contextmanager +def anchor_opt_level(new_level: OptimizationLevel) -> Generator: + """ + Set the global optimization level variable for the duration of this + context manager. + """ + assert isinstance(new_level, OptimizationLevel) + + global _opt_level + try: + tmp = _opt_level + _opt_level = new_level + yield + finally: + _opt_level = tmp + + +def _opt_codesize(): + return _opt_level == OptimizationLevel.CODESIZE + + +def _opt_gas(): + return _opt_level == OptimizationLevel.GAS + + +def _opt_none(): + return _opt_level == OptimizationLevel.NONE + + def _complex_make_setter(left, right): if right.value == "~empty" and left.location == MEMORY: # optimized memzero @@ -891,11 +929,69 @@ def _complex_make_setter(left, right): assert is_tuple_like(left.typ) keys = left.typ.tuple_keys() - # if len(keyz) == 0: - # return IRnode.from_list(["pass"]) + if left.is_pointer and right.is_pointer and right.encoding == Encoding.VYPER: + # both left and right are pointers, see if we want to batch copy + # instead of unrolling the loop. + assert left.encoding == Encoding.VYPER + len_ = left.typ.memory_bytes_required + + has_storage = STORAGE in (left.location, right.location) + if has_storage: + if _opt_codesize(): + # assuming PUSH2, a single sstore(dst (sload src)) is 8 bytes, + # sstore(add (dst ofst), (sload (add (src ofst)))) is 16 bytes, + # whereas loop overhead is 16-17 bytes. + base_cost = 3 + if left._optimized.is_literal: + # code size is smaller since add is performed at compile-time + base_cost += 1 + if right._optimized.is_literal: + base_cost += 1 + # the formula is a heuristic, but it works. + # (CMC 2023-07-14 could get more detailed for PUSH1 vs + # PUSH2 etc but not worried about that too much now, + # it's probably better to add a proper unroll rule in the + # optimizer.) + should_batch_copy = len_ >= 32 * base_cost + elif _opt_gas(): + # kind of arbitrary, but cut off when code used > ~160 bytes + should_batch_copy = len_ >= 32 * 10 + else: + assert _opt_none() + # don't care, just generate the most readable version + should_batch_copy = True + else: + # find a cutoff for memory copy where identity is cheaper + # than unrolled mloads/mstores + # if MCOPY is available, mcopy is *always* better (except in + # the 1 word case, but that is already handled by copy_bytes). + if right.location == MEMORY and _opt_gas() and not version_check(begin="cancun"): + # cost for 0th word - (mstore dst (mload src)) + base_unroll_cost = 12 + nth_word_cost = base_unroll_cost + if not left._optimized.is_literal: + # (mstore (add N dst) (mload src)) + nth_word_cost += 6 + if not right._optimized.is_literal: + # (mstore dst (mload (add N src))) + nth_word_cost += 6 + + identity_base_cost = 115 # staticcall 4 gas dst len src len + + n_words = ceil32(len_) // 32 + should_batch_copy = ( + base_unroll_cost + (nth_word_cost * (n_words - 1)) >= identity_base_cost + ) + + # calldata to memory, code to memory, cancun, or codesize - + # batch copy is always better. + else: + should_batch_copy = True + + if should_batch_copy: + return copy_bytes(left, right, len_, len_) - # general case - # TODO use copy_bytes when the generated code is above a certain size + # general case, unroll with left.cache_when_complex("_L") as (b1, left), right.cache_when_complex("_R") as (b2, right): for k in keys: l_i = get_element_ptr(left, k, array_bounds_check=False) diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index f7698fbabb..0895e5f02d 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -49,10 +49,7 @@ class Encoding(Enum): # this creates a magical block which maps to IR `with` class _WithBuilder: def __init__(self, ir_node, name, should_inline=False): - # TODO figure out how to fix this circular import - from vyper.ir.optimizer import optimize - - if should_inline and optimize(ir_node).is_complex_ir: + if should_inline and ir_node._optimized.is_complex_ir: # this can only mean trouble raise CompilerPanic("trying to inline a complex IR node") @@ -366,6 +363,13 @@ def is_pointer(self): # eventually return self.location is not None + @property # probably could be cached_property but be paranoid + def _optimized(self): + # TODO figure out how to fix this circular import + from vyper.ir.optimizer import optimize + + return optimize(self) + # This function is slightly confusing but abstracts a common pattern: # when an IR value needs to be computed once and then cached as an # IR value (if it is expensive, or more importantly if its computation @@ -382,13 +386,11 @@ def is_pointer(self): # return builder.resolve(ret) # ``` def cache_when_complex(self, name): - from vyper.ir.optimizer import optimize - # for caching purposes, see if the ir_node will be optimized # because a non-literal expr could turn into a literal, # (e.g. `(add 1 2)`) # TODO this could really be moved into optimizer.py - should_inline = not optimize(self).is_complex_ir + should_inline = not self._optimized.is_complex_ir return _WithBuilder(self, name, should_inline) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 99465809bd..4e1bd9e6c3 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -5,6 +5,7 @@ from vyper import ast as vy_ast from vyper.codegen import module +from vyper.codegen.core import anchor_opt_level from vyper.codegen.global_context import GlobalContext from vyper.codegen.ir_node import IRnode from vyper.compiler.settings import OptimizationLevel, Settings @@ -268,7 +269,9 @@ def generate_folded_ast( return vyper_module_folded, symbol_tables -def generate_ir_nodes(global_ctx: GlobalContext, optimize: bool) -> tuple[IRnode, IRnode]: +def generate_ir_nodes( + global_ctx: GlobalContext, optimize: OptimizationLevel +) -> tuple[IRnode, IRnode]: """ Generate the intermediate representation (IR) from the contextualized AST. @@ -288,7 +291,8 @@ def generate_ir_nodes(global_ctx: GlobalContext, optimize: bool) -> tuple[IRnode IR to generate deployment bytecode IR to generate runtime bytecode """ - ir_nodes, ir_runtime = module.generate_ir_for_module(global_ctx) + with anchor_opt_level(optimize): + ir_nodes, ir_runtime = module.generate_ir_for_module(global_ctx) if optimize != OptimizationLevel.NONE: ir_nodes = optimizer.optimize(ir_nodes) ir_runtime = optimizer.optimize(ir_runtime) diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index 4fec13e897..767d634c89 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -89,6 +89,7 @@ "MSIZE": (0x59, 0, 1, 2), "GAS": (0x5A, 0, 1, 2), "JUMPDEST": (0x5B, 0, 0, 1), + "MCOPY": (0x5E, 3, 0, (None, None, None, None, None, 3)), "PUSH0": (0x5F, 0, 1, 2), "PUSH1": (0x60, 0, 1, 3), "PUSH2": (0x61, 0, 1, 3), @@ -171,8 +172,8 @@ "INVALID": (0xFE, 0, 0, 0), "DEBUG": (0xA5, 1, 0, 0), "BREAKPOINT": (0xA6, 0, 0, 0), - "TLOAD": (0x5C, 1, 1, 100), - "TSTORE": (0x5D, 2, 0, 100), + "TLOAD": (0x5C, 1, 1, (None, None, None, None, None, 100)), + "TSTORE": (0x5D, 2, 0, (None, None, None, None, None, 100)), } PSEUDO_OPCODES: OpcodeMap = { diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 15a68a5079..a9064a44fa 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -297,6 +297,7 @@ def _height_of(witharg): return o # batch copy from data section of the currently executing code to memory + # (probably should have named this dcopy but oh well) elif code.value == "dloadbytes": dst = code.args[0] src = code.args[1] diff --git a/vyper/ir/optimizer.py b/vyper/ir/optimizer.py index b13c6f79f8..40e02e79c7 100644 --- a/vyper/ir/optimizer.py +++ b/vyper/ir/optimizer.py @@ -2,6 +2,7 @@ from typing import List, Optional, Tuple, Union from vyper.codegen.ir_node import IRnode +from vyper.evm.opcodes import version_check from vyper.exceptions import CompilerPanic, StaticAssertionException from vyper.utils import ( ceil32, @@ -472,6 +473,7 @@ def finalize(val, args): if value == "seq": changed |= _merge_memzero(argz) changed |= _merge_calldataload(argz) + changed |= _merge_mload(argz) changed |= _remove_empty_seqs(argz) # (seq x) => (x) for cleanliness and @@ -636,12 +638,26 @@ def _remove_empty_seqs(argz): def _merge_calldataload(argz): - # look for sequential operations copying from calldata to memory - # and merge them into a single calldatacopy operation + return _merge_load(argz, "calldataload", "calldatacopy") + + +def _merge_dload(argz): + return _merge_load(argz, "dload", "dloadbytes") + + +def _merge_mload(argz): + if not version_check(begin="cancun"): + return False + return _merge_load(argz, "mload", "mcopy") + + +def _merge_load(argz, _LOAD, _COPY): + # look for sequential operations copying from X to Y + # and merge them into a single copy operation changed = False mstore_nodes: List = [] - initial_mem_offset = 0 - initial_calldata_offset = 0 + initial_dst_offset = 0 + initial_src_offset = 0 total_length = 0 idx = None for i, ir_node in enumerate(argz): @@ -649,19 +665,19 @@ def _merge_calldataload(argz): if ( ir_node.value == "mstore" and isinstance(ir_node.args[0].value, int) - and ir_node.args[1].value == "calldataload" + and ir_node.args[1].value == _LOAD and isinstance(ir_node.args[1].args[0].value, int) ): # mstore of a zero value - mem_offset = ir_node.args[0].value - calldata_offset = ir_node.args[1].args[0].value + dst_offset = ir_node.args[0].value + src_offset = ir_node.args[1].args[0].value if not mstore_nodes: - initial_mem_offset = mem_offset - initial_calldata_offset = calldata_offset + initial_dst_offset = dst_offset + initial_src_offset = src_offset idx = i if ( - initial_mem_offset + total_length == mem_offset - and initial_calldata_offset + total_length == calldata_offset + initial_dst_offset + total_length == dst_offset + and initial_src_offset + total_length == src_offset ): mstore_nodes.append(ir_node) total_length += 32 @@ -676,7 +692,7 @@ def _merge_calldataload(argz): if len(mstore_nodes) > 1: changed = True new_ir = IRnode.from_list( - ["calldatacopy", initial_mem_offset, initial_calldata_offset, total_length], + [_COPY, initial_dst_offset, initial_src_offset, total_length], source_pos=mstore_nodes[0].source_pos, ) # replace first copy operation with optimized node and remove the rest @@ -684,8 +700,8 @@ def _merge_calldataload(argz): # note: del xs[k:l] deletes l - k items del argz[idx + 1 : idx + len(mstore_nodes)] - initial_mem_offset = 0 - initial_calldata_offset = 0 + initial_dst_offset = 0 + initial_src_offset = 0 total_length = 0 mstore_nodes.clear() diff --git a/vyper/utils.py b/vyper/utils.py index 2440117d0c..3d9d9cb416 100644 --- a/vyper/utils.py +++ b/vyper/utils.py @@ -196,8 +196,7 @@ def calc_mem_gas(memsize): # Specific gas usage GAS_IDENTITY = 15 GAS_IDENTITYWORD = 3 -GAS_CODECOPY_WORD = 3 -GAS_CALLDATACOPY_WORD = 3 +GAS_COPY_WORD = 3 # i.e., W_copy from YP # A decimal value can store multiples of 1/DECIMAL_DIVISOR MAX_DECIMAL_PLACES = 10