From 5a7b4811be9356e0bd60963119a4ccaafe26d850 Mon Sep 17 00:00:00 2001 From: cyberthirst Date: Tue, 10 Sep 2024 23:03:22 +0200 Subject: [PATCH 01/15] feat[docs]: add bug bounty program to security policy (#4230) --------- Co-authored-by: sudo rm -rf --no-preserve-root / --- SECURITY.md | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 1a16f521d3..977a00f7b2 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -3,7 +3,7 @@ ## Supported Versions - it is recommended to follow the list of known [vulnerabilities](https://github.com/vyperlang/vyper/security/advisories) and stay up-to-date with the latest releases - - as of May 2024, the `0.4.0` release is the most secure and the most comprehensively reviewed one and is recommended for use in production environments + - as of May 2024, the [`0.4.0`](https://github.com/vyperlang/vyper/releases/tag/v0.4.0) release is the most comprehensively reviewed one and is recommended for use in production environments - if a compiler vulnerability is found, a new compiler version with a patch will be released. The vulnerable version itself is not updated (see the examples below). - `example1`: suppose `0.4.0` is the latest version and a hypothetical vulnerability is found in `0.4.0`, then a patch will be released in `0.4.1` - `example2`: suppose `0.4.0` is the latest version and a hypothetical vulnerability is found both in `0.3.10` and `0.4.0`, then a patch will be released only in `0.4.1` @@ -26,7 +26,22 @@ we will add an entry to the list of security advisories for posterity and refere ## Bug Bounty Program -- as of May 2024, Vyper does not have a bug bounty program. It is planned to instantiate one soon. +- Vyper runs a bug bounty program via the Ethereum Foundation. + - Bugs should be reported through the [Ethereum Foundation's bounty program](https://ethereum.org/bug-bounty). + +### Scope +- Rules from the Ethereum Foundation's bug bounty program apply; for any questions please reach out [here](mailto:bounty@ethereum.org). Here we further clarify the scope of the Vyper bounty program. +- If a compiler bug affects production code, it is in scope (excluding known issues). + - This includes bugs in older compiler versions still used in production. +- If a compiler bug does not currently affect production but is likely to in the future, it is in scope. + - This mainly applies to the latest compiler release (e.g., a new release is available but contracts are not yet deployed with it). + - Experimental features (e.g. `--experimental-codegen`) are out of scope, as they are not intended for production and are unlikely to affect production code. + - Bugs in older compiler versions are generally out of scope, as they are no longer used for new contracts. + - There might be exceptions, e.g., when an L2 doesn't support recent compiler releases. In such cases, it might be reasonable for an older version to be used. It is up to the discretion of the EF & Vyper team to decide if the bug is in scope. +- If a vulnerability affects multiple contracts, the whitehat is eligible for only one payout (though the severity of the bug may increase). +- Eligibility for project-specific bounties is independent of this bounty. +- [Security advisories](https://github.com/vyperlang/vyper/security/advisories) and [known issues](https://github.com/vyperlang/vyper/issues) are not eligible for the bounty program, as they are publicly disclosed and protocols should structure their contracts accordingly. +- Individuals or organizations contracted or engaged specifically for security development, auditing, or testing of this project are ineligible for the bounty program. ## Reporting a Vulnerability From 03095ce39216362dd4cbff67f8efb2bbb188bb40 Mon Sep 17 00:00:00 2001 From: cyberthirst Date: Mon, 16 Sep 2024 15:47:55 +0200 Subject: [PATCH 02/15] feat[ux]: move exception hint to the end of the message (#4154) moved the `(hint: ...)` to be reported at the end of the exception to improve the readability. the hint should be displayed after the actual problem has been described, i.e. as the last item in the exception message. --------- Co-authored-by: Charles Cooper --- vyper/exceptions.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/vyper/exceptions.py b/vyper/exceptions.py index 3c0444b1ca..c69163b561 100644 --- a/vyper/exceptions.py +++ b/vyper/exceptions.py @@ -97,10 +97,7 @@ def hint(self): @property def message(self): - msg = self._message - if self.hint: - msg += f"\n\n (hint: {self.hint})" - return msg + return self._message def format_annotation(self, value): from vyper import ast as vy_ast @@ -148,7 +145,16 @@ def format_annotation(self, value): node_msg = textwrap.indent(node_msg, " ") return node_msg + def _add_hint(self, msg): + hint = self.hint + if hint is None: + return msg + return msg + f"\n (hint: {self.hint})" + def __str__(self): + return self._add_hint(self._str_helper()) + + def _str_helper(self): if not self.annotations: if self.lineno is not None and self.col_offset is not None: return f"line {self.lineno}:{self.col_offset} {self.message}" From f7b8e93a32bac8d545cd121b041cb53aa74f8dd9 Mon Sep 17 00:00:00 2001 From: HodanPlodky <36966616+HodanPlodky@users.noreply.github.com> Date: Wed, 18 Sep 2024 12:23:18 +0000 Subject: [PATCH 03/15] fix[venom]: invalid jump error (#4214) fix an issue where `ret` would generate a jump to an invalid location because the stack is not clean. the solution is to always pop instruction outputs which are not used. note this leads to a slight performance regression (roughly 0.07% bytecode size), since `POP`s are always generated, even in cases when the stack does not need to be cleaned (e.g. before `STOP`). --------- Co-authored-by: Charles Cooper Co-authored-by: Harry Kalogirou --- .../compiler/venom/test_duplicate_operands.py | 4 +-- .../unit/compiler/venom/test_stack_cleanup.py | 16 ++++++++++ vyper/venom/venom_to_assembly.py | 29 ++++--------------- 3 files changed, 24 insertions(+), 25 deletions(-) create mode 100644 tests/unit/compiler/venom/test_stack_cleanup.py diff --git a/tests/unit/compiler/venom/test_duplicate_operands.py b/tests/unit/compiler/venom/test_duplicate_operands.py index 44c4ed0404..fbff0835d2 100644 --- a/tests/unit/compiler/venom/test_duplicate_operands.py +++ b/tests/unit/compiler/venom/test_duplicate_operands.py @@ -13,7 +13,7 @@ def test_duplicate_operands(): %3 = mul %1, %2 stop - Should compile to: [PUSH1, 10, DUP1, DUP1, DUP1, ADD, MUL, STOP] + Should compile to: [PUSH1, 10, DUP1, DUP1, DUP1, ADD, MUL, POP, STOP] """ ctx = IRContext() fn = ctx.create_function("test") @@ -24,4 +24,4 @@ def test_duplicate_operands(): bb.append_instruction("stop") asm = generate_assembly_experimental(ctx, optimize=OptimizationLevel.GAS) - assert asm == ["PUSH1", 10, "DUP1", "DUP1", "ADD", "MUL", "STOP"] + assert asm == ["PUSH1", 10, "DUP1", "DUP1", "ADD", "MUL", "POP", "STOP"] diff --git a/tests/unit/compiler/venom/test_stack_cleanup.py b/tests/unit/compiler/venom/test_stack_cleanup.py new file mode 100644 index 0000000000..6015cf1c41 --- /dev/null +++ b/tests/unit/compiler/venom/test_stack_cleanup.py @@ -0,0 +1,16 @@ +from vyper.compiler.settings import OptimizationLevel +from vyper.venom import generate_assembly_experimental +from vyper.venom.context import IRContext + + +def test_cleanup_stack(): + ctx = IRContext() + fn = ctx.create_function("test") + bb = fn.get_basic_block() + ret_val = bb.append_instruction("param") + op = bb.append_instruction("store", 10) + bb.append_instruction("add", op, op) + bb.append_instruction("ret", ret_val) + + asm = generate_assembly_experimental(ctx, optimize=OptimizationLevel.GAS) + assert asm == ["PUSH1", 10, "DUP1", "ADD", "POP", "JUMP"] diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 07d63afc70..c087d29bff 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -285,34 +285,16 @@ def _generate_evm_for_basicblock_r( self.clean_stack_from_cfg_in(asm, basicblock, stack) - param_insts = [inst for inst in basicblock.instructions if inst.opcode == "param"] - main_insts = [inst for inst in basicblock.instructions if inst.opcode != "param"] + all_insts = sorted(basicblock.instructions, key=lambda x: x.opcode != "param") - for inst in param_insts: - asm.extend(self._generate_evm_for_instruction(inst, stack)) - - self._clean_unused_params(asm, basicblock, stack) - - for i, inst in enumerate(main_insts): - next_liveness = main_insts[i + 1].liveness if i + 1 < len(main_insts) else OrderedSet() + for i, inst in enumerate(all_insts): + next_liveness = all_insts[i + 1].liveness if i + 1 < len(all_insts) else OrderedSet() asm.extend(self._generate_evm_for_instruction(inst, stack, next_liveness)) for bb in basicblock.reachable: self._generate_evm_for_basicblock_r(asm, bb, stack.copy()) - def _clean_unused_params(self, asm: list, bb: IRBasicBlock, stack: StackModel) -> None: - for i, inst in enumerate(bb.instructions): - if inst.opcode != "param": - break - if inst.is_volatile and i + 1 < len(bb.instructions): - liveness = bb.instructions[i + 1].liveness - if inst.output is not None and inst.output not in liveness: - depth = stack.get_depth(inst.output) - if depth != 0: - self.swap(asm, stack, depth) - self.pop(asm, stack) - # pop values from stack at entry to bb # note this produces the same result(!) no matter which basic block # we enter from in the CFG. @@ -543,11 +525,12 @@ def _generate_evm_for_instruction( # Step 6: Emit instructions output operands (if any) if inst.output is not None: - if "call" in inst.opcode and inst.output not in next_liveness: + if inst.output not in next_liveness: self.pop(assembly, stack) - elif inst.output in next_liveness: + else: # peek at next_liveness to find the next scheduled item, # and optimistically swap with it + # TODO: implement OrderedSet.last() next_scheduled = list(next_liveness)[-1] self.swap_op(assembly, stack, next_scheduled) From 53f76758c87c41c0a2d9e31c76eb4bbe9a5f5e5f Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 18 Sep 2024 08:46:04 -0400 Subject: [PATCH 04/15] perf[venom]: add `OrderedSet.last()` (#4236) smidge performance improvement (roughly 3%). peek directly at end of `OrderedSet` instead of converting to list every time. --- .github/workflows/pull-request.yaml | 1 + vyper/utils.py | 6 ++++++ vyper/venom/venom_to_assembly.py | 3 +-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pull-request.yaml b/.github/workflows/pull-request.yaml index 4c4ff8a1df..2eb0113487 100644 --- a/.github/workflows/pull-request.yaml +++ b/.github/workflows/pull-request.yaml @@ -24,6 +24,7 @@ jobs: with: types: | feat + perf fix chore refactor diff --git a/vyper/utils.py b/vyper/utils.py index 2b95485f4e..f4f14a346e 100644 --- a/vyper/utils.py +++ b/vyper/utils.py @@ -36,6 +36,9 @@ def __repr__(self): def __iter__(self): return iter(self._data) + def __reversed__(self): + return reversed(self._data) + def __contains__(self, item): return self._data.__contains__(item) @@ -45,6 +48,9 @@ def __len__(self): def first(self): return next(iter(self)) + def last(self): + return next(reversed(self)) + def pop(self): return self._data.popitem()[0] diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index c087d29bff..eb8c4e69ec 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -530,8 +530,7 @@ def _generate_evm_for_instruction( else: # peek at next_liveness to find the next scheduled item, # and optimistically swap with it - # TODO: implement OrderedSet.last() - next_scheduled = list(next_liveness)[-1] + next_scheduled = next_liveness.last() self.swap_op(assembly, stack, next_scheduled) return apply_line_numbers(inst, assembly) From e1de93a6b267212a22772a81422c8c0ad76de04e Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 18 Sep 2024 15:57:35 -0400 Subject: [PATCH 05/15] feat[venom]: improve liveness computation time (#4086) reduce time spent in liveness by only recomputing liveness for basic blocks which need it (any of its `cfg_out` has changed). reduces time in venom by roughly 25% there is also a slight codesize improvement. it seems to be sensitive to the order in which items are added to the worklist. --- vyper/venom/analysis/liveness.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/vyper/venom/analysis/liveness.py b/vyper/venom/analysis/liveness.py index 5d1ac488f1..2a471bc8be 100644 --- a/vyper/venom/analysis/liveness.py +++ b/vyper/venom/analysis/liveness.py @@ -1,3 +1,5 @@ +from collections import deque + from vyper.exceptions import CompilerPanic from vyper.utils import OrderedSet from vyper.venom.analysis.analysis import IRAnalysis @@ -13,14 +15,19 @@ class LivenessAnalysis(IRAnalysis): def analyze(self): self.analyses_cache.request_analysis(CFGAnalysis) self._reset_liveness() - while True: - changed = False - for bb in self.function.get_basic_blocks(): - changed |= self._calculate_out_vars(bb) - changed |= self._calculate_liveness(bb) - if not changed: - break + self._worklist = deque() + self._worklist.extend(self.function.get_basic_blocks()) + + while len(self._worklist) > 0: + changed = False + bb = self._worklist.popleft() + changed |= self._calculate_out_vars(bb) + changed |= self._calculate_liveness(bb) + # recompute liveness for basic blocks pointing into + # this basic block + if changed: + self._worklist.extend(bb.cfg_in) def _reset_liveness(self) -> None: for bb in self.function.get_basic_blocks(): From d76d7502cf5903a2434fd3976eb6ba84398bb433 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 19 Sep 2024 07:43:33 -0400 Subject: [PATCH 06/15] fix[ux]: improve error message for bad hex literals (#4244) improve the error message when a hex literal starts with `0X`. it is accepted by the parser, but prior to this commit, would result in a compiler panic. this commit validates the literal and provides a proper error message. --- tests/unit/ast/nodes/test_hex.py | 3 +++ vyper/ast/nodes.py | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/tests/unit/ast/nodes/test_hex.py b/tests/unit/ast/nodes/test_hex.py index 1b61764d57..7168defa99 100644 --- a/tests/unit/ast/nodes/test_hex.py +++ b/tests/unit/ast/nodes/test_hex.py @@ -33,6 +33,9 @@ def foo(): """ foo: constant(bytes4) = 0x12_34_56 """, + """ +foo: constant(bytes4) = 0X12345678 + """, ] diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index f4c99b7952..991edeca6e 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -854,10 +854,15 @@ class Hex(Constant): def validate(self): if "_" in self.value: + # TODO: revisit this, we should probably allow underscores raise InvalidLiteral("Underscores not allowed in hex literals", self) if len(self.value) % 2: raise InvalidLiteral("Hex notation requires an even number of digits", self) + if self.value.startswith("0X"): + hint = f"Did you mean `0x{self.value[2:]}`?" + raise InvalidLiteral("Hex literal begins with 0X!", self, hint=hint) + @property def n_nibbles(self): """ From 48a5da46b7c553d29181880e8ff6d51da62b2679 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 20 Sep 2024 10:20:09 -0400 Subject: [PATCH 07/15] feat[venom]: only stack_reorder before join points (#4247) fix a venom performance bug, where the stack would be reordered before all jump instructions. for "joining" jump instructions (where the target basic block can have multiple cfg inputs), the stack reorder is needed, since we need the invariant that the stack layout needs to be the same no matter which basic block we jump to the target basic block from. however, before jumping into a block with only a single `cfg_in` (which, after cfg normalization, is equivalent to "splitting" jump instructions `jnz` and `djmp`), the stack reorder is unneeded, since stack reordering happens anyways in the target basic block, and no invariant on the incoming stack layout is required. this commit changes the behavior so that stack reordering only occurs before these "joining" jump instructions. on branch-heavy code, this can improve codesize by as much as 25%, with corresponding gas improvement, especially in the presence of loops. --- .../codegen/features/test_constructor.py | 4 +++ vyper/venom/venom_to_assembly.py | 29 +++++++++++-------- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/tests/functional/codegen/features/test_constructor.py b/tests/functional/codegen/features/test_constructor.py index 6cc7007bb2..3b86fe3460 100644 --- a/tests/functional/codegen/features/test_constructor.py +++ b/tests/functional/codegen/features/test_constructor.py @@ -1,4 +1,7 @@ +import pytest + from tests.evm_backends.base_env import _compile +from vyper.exceptions import StackTooDeep from vyper.utils import method_id @@ -166,6 +169,7 @@ def get_foo() -> uint256: assert c.get_foo() == 39 +@pytest.mark.venom_xfail(raises=StackTooDeep, reason="stack scheduler regression") def test_nested_dynamic_array_constructor_arg_2(env, get_contract): code = """ foo: int128 diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index eb8c4e69ec..41a76319d7 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -392,18 +392,23 @@ def _generate_evm_for_instruction( # Step 2: Emit instruction's input operands self._emit_input_operands(assembly, inst, operands, stack) - # Step 3: Reorder stack - if opcode in ["jnz", "djmp", "jmp"]: - # prepare stack for jump into another basic block - assert inst.parent and isinstance(inst.parent.cfg_out, OrderedSet) - b = next(iter(inst.parent.cfg_out)) - target_stack = self.liveness_analysis.input_vars_from(inst.parent, b) - # TODO optimize stack reordering at entry and exit from basic blocks - # NOTE: stack in general can contain multiple copies of the same variable, - # however we are safe in the case of jmp/djmp/jnz as it's not going to - # have multiples. - target_stack_list = list(target_stack) - self._stack_reorder(assembly, stack, target_stack_list) + # Step 3: Reorder stack before join points + if opcode == "jmp": + # prepare stack for jump into a join point + # we only need to reorder stack before join points, which after + # cfg normalization, join points can only be led into by + # jmp instructions. + assert isinstance(inst.parent.cfg_out, OrderedSet) + assert len(inst.parent.cfg_out) == 1 + next_bb = inst.parent.cfg_out.first() + + # guaranteed by cfg normalization+simplification + assert len(next_bb.cfg_in) > 1 + + target_stack = self.liveness_analysis.input_vars_from(inst.parent, next_bb) + # NOTE: in general the stack can contain multiple copies of + # the same variable, however, before a jump that is not possible + self._stack_reorder(assembly, stack, list(target_stack)) if opcode in COMMUTATIVE_INSTRUCTIONS: cost_no_swap = self._stack_reorder([], stack, operands, dry_run=True) From 957c8edd834e4991d7470082b179e4a1a1ab7e4d Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 25 Sep 2024 11:27:07 -0400 Subject: [PATCH 08/15] perf[venom]: improve OrderedSet operations (#4246) improve time spent in venom by 25% dict views support set operations, which allows us to loop, using `&=`. since our intersections are typically not between a large number of OrderedSets, this results in faster execution time. references: - https://docs.python.org/3/library/stdtypes.html#dict-views --- vyper/utils.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/vyper/utils.py b/vyper/utils.py index f4f14a346e..3f19a9d15c 100644 --- a/vyper/utils.py +++ b/vyper/utils.py @@ -25,9 +25,10 @@ class OrderedSet(Generic[_T]): """ def __init__(self, iterable=None): - self._data = dict() - if iterable is not None: - self.update(iterable) + if iterable is None: + self._data = dict() + else: + self._data = dict.fromkeys(iterable) def __repr__(self): keys = ", ".join(repr(k) for k in self) @@ -57,6 +58,7 @@ def pop(self): def add(self, item: _T) -> None: self._data[item] = None + # NOTE to refactor: duplicate of self.update() def addmany(self, iterable): for item in iterable: self._data[item] = None @@ -109,11 +111,11 @@ def intersection(cls, *sets): if len(sets) == 0: raise ValueError("undefined: intersection of no sets") - ret = sets[0].copy() - for e in sets[0]: - if any(e not in s for s in sets[1:]): - ret.remove(e) - return ret + tmp = sets[0]._data.keys() + for s in sets[1:]: + tmp &= s._data.keys() + + return cls(tmp) class StringEnum(enum.Enum): From d60d31f3375db5b1f867c209316d9c29aca5d8b3 Mon Sep 17 00:00:00 2001 From: trocher Date: Thu, 26 Sep 2024 02:52:57 +0200 Subject: [PATCH 09/15] fix[lang]: fix `==` and `!=` bytesM folding (#4254) `bytesM` literals are not case-sensitive (they represent the same value no matter if the literal is lower- or upper-case), but the folding operation was case sensitive. --------- Co-authored-by: Charles Cooper --- tests/unit/ast/nodes/test_fold_compare.py | 17 +++++++++++++++++ vyper/semantics/analysis/constant_folding.py | 7 +++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/unit/ast/nodes/test_fold_compare.py b/tests/unit/ast/nodes/test_fold_compare.py index aab8ac0b2d..fd9f65a7d3 100644 --- a/tests/unit/ast/nodes/test_fold_compare.py +++ b/tests/unit/ast/nodes/test_fold_compare.py @@ -110,3 +110,20 @@ def test_compare_type_mismatch(op): old_node = vyper_ast.body[0].value with pytest.raises(UnfoldableNode): old_node.get_folded_value() + + +@pytest.mark.parametrize("op", ["==", "!="]) +def test_compare_eq_bytes(get_contract, op): + left, right = "0xA1AAB33F", "0xa1aab33f" + source = f""" +@external +def foo(a: bytes4, b: bytes4) -> bool: + return a {op} b + """ + contract = get_contract(source) + + vyper_ast = parse_and_fold(f"{left} {op} {right}") + old_node = vyper_ast.body[0].value + new_node = old_node.get_folded_value() + + assert contract.foo(left, right) == new_node.value diff --git a/vyper/semantics/analysis/constant_folding.py b/vyper/semantics/analysis/constant_folding.py index 6e4166dc52..98cab0f8cb 100644 --- a/vyper/semantics/analysis/constant_folding.py +++ b/vyper/semantics/analysis/constant_folding.py @@ -180,8 +180,11 @@ def visit_Compare(self, node): raise UnfoldableNode( f"Invalid literal types for {node.op.description} comparison", node ) - - value = node.op._op(left.value, right.value) + lvalue, rvalue = left.value, right.value + if isinstance(left, vy_ast.Hex): + # Hex values are str, convert to be case-unsensitive. + lvalue, rvalue = lvalue.lower(), rvalue.lower() + value = node.op._op(lvalue, rvalue) return vy_ast.NameConstant.from_node(node, value=value) def visit_List(self, node) -> vy_ast.ExprNode: From e2f60019029139f2a69523a97586c6e404e00a37 Mon Sep 17 00:00:00 2001 From: HodanPlodky <36966616+HodanPlodky@users.noreply.github.com> Date: Fri, 27 Sep 2024 01:21:57 +0000 Subject: [PATCH 10/15] fix[venom]: fix invalid `phi`s after SCCP (#4181) This commit reduces `phi` nodes which, after SCCP, refer to a basic block which is unreachable. This could happen when the SCCP would replace a `jnz` by a `jmp` instruction, resulting in a `phi` instruction which refers to a non-existent block, or a `phi` instruction in a basic block which only has one predecessor. This commit reduces such `phi` instructions into `store` instructions. --------- Co-authored-by: Harry Kalogirou Co-authored-by: Charles Cooper --- tests/unit/compiler/venom/test_sccp.py | 31 ++++++++++++ .../unit/compiler/venom/test_simplify_cfg.py | 49 +++++++++++++++++++ vyper/venom/passes/sccp/sccp.py | 42 +++++++++++++++- vyper/venom/passes/simplify_cfg.py | 16 +++--- 4 files changed, 127 insertions(+), 11 deletions(-) create mode 100644 tests/unit/compiler/venom/test_simplify_cfg.py diff --git a/tests/unit/compiler/venom/test_sccp.py b/tests/unit/compiler/venom/test_sccp.py index e65839136e..478acc1079 100644 --- a/tests/unit/compiler/venom/test_sccp.py +++ b/tests/unit/compiler/venom/test_sccp.py @@ -211,3 +211,34 @@ def test_cont_phi_const_case(): assert sccp.lattice[IRVariable("%5", version=1)].value == 106 assert sccp.lattice[IRVariable("%5", version=2)].value == 97 assert sccp.lattice[IRVariable("%5")].value == 2 + + +def test_phi_reduction_after_unreachable_block(): + ctx = IRContext() + fn = ctx.create_function("_global") + + bb = fn.get_basic_block() + + br1 = IRBasicBlock(IRLabel("then"), fn) + fn.append_basic_block(br1) + join = IRBasicBlock(IRLabel("join"), fn) + fn.append_basic_block(join) + + op = bb.append_instruction("store", 1) + true = IRLiteral(1) + bb.append_instruction("jnz", true, br1.label, join.label) + + op1 = br1.append_instruction("store", 2) + + br1.append_instruction("jmp", join.label) + + join.append_instruction("phi", bb.label, op, br1.label, op1) + join.append_instruction("stop") + + ac = IRAnalysesCache(fn) + SCCP(ac, fn).run_pass() + + assert join.instructions[0].opcode == "store", join.instructions[0] + assert join.instructions[0].operands == [op1] + + assert join.instructions[1].opcode == "stop" diff --git a/tests/unit/compiler/venom/test_simplify_cfg.py b/tests/unit/compiler/venom/test_simplify_cfg.py new file mode 100644 index 0000000000..c4bdbb263b --- /dev/null +++ b/tests/unit/compiler/venom/test_simplify_cfg.py @@ -0,0 +1,49 @@ +from vyper.venom.analysis.analysis import IRAnalysesCache +from vyper.venom.basicblock import IRBasicBlock, IRLabel, IRLiteral +from vyper.venom.context import IRContext +from vyper.venom.passes.sccp import SCCP +from vyper.venom.passes.simplify_cfg import SimplifyCFGPass + + +def test_phi_reduction_after_block_pruning(): + ctx = IRContext() + fn = ctx.create_function("_global") + + bb = fn.get_basic_block() + + br1 = IRBasicBlock(IRLabel("then"), fn) + fn.append_basic_block(br1) + br2 = IRBasicBlock(IRLabel("else"), fn) + fn.append_basic_block(br2) + + join = IRBasicBlock(IRLabel("join"), fn) + fn.append_basic_block(join) + + true = IRLiteral(1) + bb.append_instruction("jnz", true, br1.label, br2.label) + + op1 = br1.append_instruction("store", 1) + op2 = br2.append_instruction("store", 2) + + br1.append_instruction("jmp", join.label) + br2.append_instruction("jmp", join.label) + + join.append_instruction("phi", br1.label, op1, br2.label, op2) + join.append_instruction("stop") + + ac = IRAnalysesCache(fn) + SCCP(ac, fn).run_pass() + SimplifyCFGPass(ac, fn).run_pass() + + bbs = list(fn.get_basic_blocks()) + + assert len(bbs) == 1 + final_bb = bbs[0] + + inst0, inst1, inst2 = final_bb.instructions + + assert inst0.opcode == "store" + assert inst0.operands == [IRLiteral(1)] + assert inst1.opcode == "store" + assert inst1.operands == [inst0.output] + assert inst2.opcode == "stop" diff --git a/vyper/venom/passes/sccp/sccp.py b/vyper/venom/passes/sccp/sccp.py index 164d8e241d..cfac6794f8 100644 --- a/vyper/venom/passes/sccp/sccp.py +++ b/vyper/venom/passes/sccp/sccp.py @@ -56,14 +56,18 @@ class SCCP(IRPass): uses: dict[IRVariable, OrderedSet[IRInstruction]] lattice: Lattice work_list: list[WorkListItem] - cfg_dirty: bool cfg_in_exec: dict[IRBasicBlock, OrderedSet[IRBasicBlock]] + cfg_dirty: bool + # list of basic blocks whose cfg_in was modified + cfg_in_modified: OrderedSet[IRBasicBlock] + def __init__(self, analyses_cache: IRAnalysesCache, function: IRFunction): super().__init__(analyses_cache, function) self.lattice = {} self.work_list: list[WorkListItem] = [] self.cfg_dirty = False + self.cfg_in_modified = OrderedSet() def run_pass(self): self.fn = self.function @@ -74,7 +78,9 @@ def run_pass(self): # self._propagate_variables() - self.analyses_cache.invalidate_analysis(CFGAnalysis) + if self.cfg_dirty: + self.analyses_cache.force_analysis(CFGAnalysis) + self._fix_phi_nodes() def _calculate_sccp(self, entry: IRBasicBlock): """ @@ -304,7 +310,11 @@ def _replace_constants(self, inst: IRInstruction): target = inst.operands[1] inst.opcode = "jmp" inst.operands = [target] + self.cfg_dirty = True + for bb in inst.parent.cfg_out: + if bb.label == target: + self.cfg_in_modified.add(bb) elif inst.opcode in ("assert", "assert_unreachable"): lat = self._eval_from_lattice(inst.operands[0]) @@ -329,6 +339,34 @@ def _replace_constants(self, inst: IRInstruction): if isinstance(lat, IRLiteral): inst.operands[i] = lat + def _fix_phi_nodes(self): + # fix basic blocks whose cfg in was changed + # maybe this should really be done in _visit_phi + needs_sort = False + + for bb in self.fn.get_basic_blocks(): + cfg_in_labels = OrderedSet(in_bb.label for in_bb in bb.cfg_in) + + for inst in bb.instructions: + if inst.opcode != "phi": + break + needs_sort |= self._fix_phi_inst(inst, cfg_in_labels) + + # move phi instructions to the top of the block + if needs_sort: + bb.instructions.sort(key=lambda inst: inst.opcode != "phi") + + def _fix_phi_inst(self, inst: IRInstruction, cfg_in_labels: OrderedSet): + operands = [op for label, op in inst.phi_operands if label in cfg_in_labels] + + if len(operands) != 1: + return False + + assert inst.output is not None + inst.opcode = "store" + inst.operands = operands + return True + def _meet(x: LatticeItem, y: LatticeItem) -> LatticeItem: if x == LatticeEnum.TOP: diff --git a/vyper/venom/passes/simplify_cfg.py b/vyper/venom/passes/simplify_cfg.py index 08582fee96..1409f43947 100644 --- a/vyper/venom/passes/simplify_cfg.py +++ b/vyper/venom/passes/simplify_cfg.py @@ -9,23 +9,21 @@ class SimplifyCFGPass(IRPass): visited: OrderedSet def _merge_blocks(self, a: IRBasicBlock, b: IRBasicBlock): - a.instructions.pop() + a.instructions.pop() # pop terminating instruction for inst in b.instructions: - assert inst.opcode != "phi", "Not implemented yet" - if inst.opcode == "phi": - a.instructions.insert(0, inst) - else: - inst.parent = a - a.instructions.append(inst) + assert inst.opcode != "phi", f"Instruction should never be phi {b}" + inst.parent = a + a.instructions.append(inst) # Update CFG a.cfg_out = b.cfg_out - if len(b.cfg_out) > 0: - next_bb = b.cfg_out.first() + + for next_bb in a.cfg_out: next_bb.remove_cfg_in(b) next_bb.add_cfg_in(a) for inst in next_bb.instructions: + # assume phi instructions are at beginning of bb if inst.opcode != "phi": break inst.operands[inst.operands.index(b.label)] = a.label From b83bc98d16440c430a2d1ccd04b34054e3bfe9aa Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 28 Sep 2024 11:02:51 -0400 Subject: [PATCH 11/15] fix[venom]: clean up sccp pass (#4261) small cleanup tasks for sccp pass. remove dead variables and stray comments. --- vyper/venom/passes/sccp/sccp.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/vyper/venom/passes/sccp/sccp.py b/vyper/venom/passes/sccp/sccp.py index cfac6794f8..013583ec63 100644 --- a/vyper/venom/passes/sccp/sccp.py +++ b/vyper/venom/passes/sccp/sccp.py @@ -59,15 +59,12 @@ class SCCP(IRPass): cfg_in_exec: dict[IRBasicBlock, OrderedSet[IRBasicBlock]] cfg_dirty: bool - # list of basic blocks whose cfg_in was modified - cfg_in_modified: OrderedSet[IRBasicBlock] def __init__(self, analyses_cache: IRAnalysesCache, function: IRFunction): super().__init__(analyses_cache, function) self.lattice = {} self.work_list: list[WorkListItem] = [] self.cfg_dirty = False - self.cfg_in_modified = OrderedSet() def run_pass(self): self.fn = self.function @@ -76,8 +73,6 @@ def run_pass(self): self._calculate_sccp(self.fn.entry) self._propagate_constants() - # self._propagate_variables() - if self.cfg_dirty: self.analyses_cache.force_analysis(CFGAnalysis) self._fix_phi_nodes() @@ -312,9 +307,6 @@ def _replace_constants(self, inst: IRInstruction): inst.operands = [target] self.cfg_dirty = True - for bb in inst.parent.cfg_out: - if bb.label == target: - self.cfg_in_modified.add(bb) elif inst.opcode in ("assert", "assert_unreachable"): lat = self._eval_from_lattice(inst.operands[0]) From e21f3e8b1f7d5a5b5b40fb73ab4c6a2946e878ef Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 28 Sep 2024 11:08:03 -0400 Subject: [PATCH 12/15] refactor[venom]: remove `dup_requirements` analysis (#4262) this commit updates `next_liveness` to reflect `out_vars` when the instruction is the last instruction in the basic block. after this change, membership in `dup_requirements` is the same as membership in `next_liveness`, so we can remove the `dup_requirements` analysis. --- vyper/venom/analysis/dup_requirements.py | 15 --------------- vyper/venom/basicblock.py | 2 -- vyper/venom/venom_to_assembly.py | 24 ++++++++++++++---------- 3 files changed, 14 insertions(+), 27 deletions(-) delete mode 100644 vyper/venom/analysis/dup_requirements.py diff --git a/vyper/venom/analysis/dup_requirements.py b/vyper/venom/analysis/dup_requirements.py deleted file mode 100644 index 7afb315035..0000000000 --- a/vyper/venom/analysis/dup_requirements.py +++ /dev/null @@ -1,15 +0,0 @@ -from vyper.utils import OrderedSet -from vyper.venom.analysis.analysis import IRAnalysis - - -class DupRequirementsAnalysis(IRAnalysis): - def analyze(self): - for bb in self.function.get_basic_blocks(): - last_liveness = bb.out_vars - for inst in reversed(bb.instructions): - inst.dup_requirements = OrderedSet() - ops = inst.get_input_variables() - for op in ops: - if op in last_liveness: - inst.dup_requirements.add(op) - last_liveness = inst.liveness diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index d6fb9560cd..1199579b3f 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -209,7 +209,6 @@ class IRInstruction: output: Optional[IROperand] # set of live variables at this instruction liveness: OrderedSet[IRVariable] - dup_requirements: OrderedSet[IRVariable] parent: "IRBasicBlock" fence_id: int annotation: Optional[str] @@ -228,7 +227,6 @@ def __init__( self.operands = list(operands) # in case we get an iterator self.output = output self.liveness = OrderedSet() - self.dup_requirements = OrderedSet() self.fence_id = -1 self.annotation = None self.ast_source = None diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 41a76319d7..390fab8e7c 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -12,7 +12,6 @@ ) from vyper.utils import MemoryPositions, OrderedSet from vyper.venom.analysis.analysis import IRAnalysesCache -from vyper.venom.analysis.dup_requirements import DupRequirementsAnalysis from vyper.venom.analysis.liveness import LivenessAnalysis from vyper.venom.basicblock import ( IRBasicBlock, @@ -153,7 +152,6 @@ def generate_evm(self, no_optimize: bool = False) -> list[str]: NormalizationPass(ac, fn).run_pass() self.liveness_analysis = ac.request_analysis(LivenessAnalysis) - ac.request_analysis(DupRequirementsAnalysis) assert fn.normalized, "Non-normalized CFG!" @@ -231,7 +229,12 @@ def _stack_reorder( return cost def _emit_input_operands( - self, assembly: list, inst: IRInstruction, ops: list[IROperand], stack: StackModel + self, + assembly: list, + inst: IRInstruction, + ops: list[IROperand], + stack: StackModel, + next_liveness: OrderedSet[IRVariable], ) -> None: # PRE: we already have all the items on the stack that have # been scheduled to be killed. now it's just a matter of emitting @@ -241,7 +244,7 @@ def _emit_input_operands( # it with something that is wanted if ops and stack.height > 0 and stack.peek(0) not in ops: for op in ops: - if isinstance(op, IRVariable) and op not in inst.dup_requirements: + if isinstance(op, IRVariable) and op not in next_liveness: self.swap_op(assembly, stack, op) break @@ -264,7 +267,7 @@ def _emit_input_operands( stack.push(op) continue - if op in inst.dup_requirements and op not in emitted_ops: + if op in next_liveness and op not in emitted_ops: self.dup_op(assembly, stack, op) if op in emitted_ops: @@ -288,7 +291,9 @@ def _generate_evm_for_basicblock_r( all_insts = sorted(basicblock.instructions, key=lambda x: x.opcode != "param") for i, inst in enumerate(all_insts): - next_liveness = all_insts[i + 1].liveness if i + 1 < len(all_insts) else OrderedSet() + next_liveness = ( + all_insts[i + 1].liveness if i + 1 < len(all_insts) else basicblock.out_vars + ) asm.extend(self._generate_evm_for_instruction(inst, stack, next_liveness)) @@ -327,10 +332,9 @@ def clean_stack_from_cfg_in( self.pop(asm, stack) def _generate_evm_for_instruction( - self, inst: IRInstruction, stack: StackModel, next_liveness: OrderedSet = None + self, inst: IRInstruction, stack: StackModel, next_liveness: OrderedSet ) -> list[str]: assembly: list[str | int] = [] - next_liveness = next_liveness or OrderedSet() opcode = inst.opcode # @@ -375,7 +379,7 @@ def _generate_evm_for_instruction( # example, for `%56 = %label1 %13 %label2 %14`, we will # find an instance of %13 *or* %14 in the stack and replace it with %56. to_be_replaced = stack.peek(depth) - if to_be_replaced in inst.dup_requirements: + if to_be_replaced in next_liveness: # %13/%14 is still live(!), so we make a copy of it self.dup(assembly, stack, depth) stack.poke(0, ret) @@ -390,7 +394,7 @@ def _generate_evm_for_instruction( return apply_line_numbers(inst, assembly) # Step 2: Emit instruction's input operands - self._emit_input_operands(assembly, inst, operands, stack) + self._emit_input_operands(assembly, inst, operands, stack, next_liveness) # Step 3: Reorder stack before join points if opcode == "jmp": From 4f47497450044fdb9692881862477466348bad67 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 4 Oct 2024 20:35:20 +0300 Subject: [PATCH 13/15] fix[venom]: remove duplicate volatile instructions (#4263) remove duplicate `assert`, `assert_unreachable` from `VOLATILE_INSTRUCTIONS` dictionary --- vyper/venom/basicblock.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 1199579b3f..45db8b232f 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -21,8 +21,6 @@ "istore", "tload", "tstore", - "assert", - "assert_unreachable", "mstore", "mload", "calldatacopy", From 96551197701251ebde92f243fc900eed2005cee3 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 4 Oct 2024 15:43:52 -0400 Subject: [PATCH 14/15] feat[tool]: add integrity hash to initcode (#4234) this commit adds the integrity hash of the source code to the initcode. it extends the existing cbor metadata payload in the initcode, so that verifiers can compare the integrity hash to the artifact produced by a source bundle. the integrity hash is put in the initcode to preserve bytecode space of the runtime code. refactor: - change existing `insert_compiler_metadata=` flag to the more generic `compiler_metadata=None`, which is more extensible. --- .../builtins/codegen/test_raw_call.py | 18 ++++++++- tests/unit/compiler/test_bytecode_runtime.py | 37 ++++++++++++++----- vyper/compiler/output.py | 6 +-- vyper/compiler/phases.py | 17 +++++---- vyper/ir/compile_ir.py | 15 +++++--- 5 files changed, 64 insertions(+), 29 deletions(-) diff --git a/tests/functional/builtins/codegen/test_raw_call.py b/tests/functional/builtins/codegen/test_raw_call.py index 4107f9a4d0..bf953ff018 100644 --- a/tests/functional/builtins/codegen/test_raw_call.py +++ b/tests/functional/builtins/codegen/test_raw_call.py @@ -261,6 +261,12 @@ def __default__(): assert env.message_call(caller.address, data=sig) == b"" +def _strip_initcode_suffix(bytecode): + bs = bytes.fromhex(bytecode.removeprefix("0x")) + to_strip = int.from_bytes(bs[-2:], "big") + return bs[:-to_strip].hex() + + # check max_outsize=0 does same thing as not setting max_outsize. # compile to bytecode and compare bytecode directly. def test_max_outsize_0(): @@ -276,7 +282,11 @@ def test_raw_call(_target: address): """ output1 = compile_code(code1, output_formats=["bytecode", "bytecode_runtime"]) output2 = compile_code(code2, output_formats=["bytecode", "bytecode_runtime"]) - assert output1 == output2 + assert output1["bytecode_runtime"] == output2["bytecode_runtime"] + + bytecode1 = output1["bytecode"] + bytecode2 = output2["bytecode"] + assert _strip_initcode_suffix(bytecode1) == _strip_initcode_suffix(bytecode2) # check max_outsize=0 does same thing as not setting max_outsize, @@ -298,7 +308,11 @@ def test_raw_call(_target: address) -> bool: """ output1 = compile_code(code1, output_formats=["bytecode", "bytecode_runtime"]) output2 = compile_code(code2, output_formats=["bytecode", "bytecode_runtime"]) - assert output1 == output2 + assert output1["bytecode_runtime"] == output2["bytecode_runtime"] + + bytecode1 = output1["bytecode"] + bytecode2 = output2["bytecode"] + assert _strip_initcode_suffix(bytecode1) == _strip_initcode_suffix(bytecode2) # test functionality of max_outsize=0 diff --git a/tests/unit/compiler/test_bytecode_runtime.py b/tests/unit/compiler/test_bytecode_runtime.py index 213adce017..1d38130c49 100644 --- a/tests/unit/compiler/test_bytecode_runtime.py +++ b/tests/unit/compiler/test_bytecode_runtime.py @@ -55,13 +55,17 @@ def test_bytecode_runtime(): def test_bytecode_signature(): - out = vyper.compile_code(simple_contract_code, output_formats=["bytecode_runtime", "bytecode"]) + out = vyper.compile_code( + simple_contract_code, output_formats=["bytecode_runtime", "bytecode", "integrity"] + ) runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x")) initcode = bytes.fromhex(out["bytecode"].removeprefix("0x")) metadata = _parse_cbor_metadata(initcode) - runtime_len, data_section_lengths, immutables_len, compiler = metadata + integrity_hash, runtime_len, data_section_lengths, immutables_len, compiler = metadata + + assert integrity_hash.hex() == out["integrity"] assert runtime_len == len(runtime_code) assert data_section_lengths == [] @@ -73,14 +77,18 @@ def test_bytecode_signature_dense_jumptable(): settings = Settings(optimize=OptimizationLevel.CODESIZE) out = vyper.compile_code( - many_functions, output_formats=["bytecode_runtime", "bytecode"], settings=settings + many_functions, + output_formats=["bytecode_runtime", "bytecode", "integrity"], + settings=settings, ) runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x")) initcode = bytes.fromhex(out["bytecode"].removeprefix("0x")) metadata = _parse_cbor_metadata(initcode) - runtime_len, data_section_lengths, immutables_len, compiler = metadata + integrity_hash, runtime_len, data_section_lengths, immutables_len, compiler = metadata + + assert integrity_hash.hex() == out["integrity"] assert runtime_len == len(runtime_code) assert data_section_lengths == [5, 35] @@ -92,14 +100,18 @@ def test_bytecode_signature_sparse_jumptable(): settings = Settings(optimize=OptimizationLevel.GAS) out = vyper.compile_code( - many_functions, output_formats=["bytecode_runtime", "bytecode"], settings=settings + many_functions, + output_formats=["bytecode_runtime", "bytecode", "integrity"], + settings=settings, ) runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x")) initcode = bytes.fromhex(out["bytecode"].removeprefix("0x")) metadata = _parse_cbor_metadata(initcode) - runtime_len, data_section_lengths, immutables_len, compiler = metadata + integrity_hash, runtime_len, data_section_lengths, immutables_len, compiler = metadata + + assert integrity_hash.hex() == out["integrity"] assert runtime_len == len(runtime_code) assert data_section_lengths == [8] @@ -108,13 +120,17 @@ def test_bytecode_signature_sparse_jumptable(): def test_bytecode_signature_immutables(): - out = vyper.compile_code(has_immutables, output_formats=["bytecode_runtime", "bytecode"]) + out = vyper.compile_code( + has_immutables, output_formats=["bytecode_runtime", "bytecode", "integrity"] + ) runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x")) initcode = bytes.fromhex(out["bytecode"].removeprefix("0x")) metadata = _parse_cbor_metadata(initcode) - runtime_len, data_section_lengths, immutables_len, compiler = metadata + integrity_hash, runtime_len, data_section_lengths, immutables_len, compiler = metadata + + assert integrity_hash.hex() == out["integrity"] assert runtime_len == len(runtime_code) assert data_section_lengths == [] @@ -129,7 +145,10 @@ def test_bytecode_signature_deployed(code, get_contract, env): deployed_code = env.get_code(c.address) metadata = _parse_cbor_metadata(c.bytecode) - runtime_len, data_section_lengths, immutables_len, compiler = metadata + integrity_hash, runtime_len, data_section_lengths, immutables_len, compiler = metadata + + out = vyper.compile_code(code, output_formats=["integrity"]) + assert integrity_hash.hex() == out["integrity"] assert compiler == {"vyper": list(vyper.version.version_tuple)} diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 577afd3822..09d299b90d 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -320,15 +320,13 @@ def _build_source_map_output(compiler_data, bytecode, pc_maps): def build_source_map_output(compiler_data: CompilerData) -> dict: - bytecode, pc_maps = compile_ir.assembly_to_evm( - compiler_data.assembly, insert_compiler_metadata=False - ) + bytecode, pc_maps = compile_ir.assembly_to_evm(compiler_data.assembly, compiler_metadata=None) return _build_source_map_output(compiler_data, bytecode, pc_maps) def build_source_map_runtime_output(compiler_data: CompilerData) -> dict: bytecode, pc_maps = compile_ir.assembly_to_evm( - compiler_data.assembly_runtime, insert_compiler_metadata=False + compiler_data.assembly_runtime, compiler_metadata=None ) return _build_source_map_output(compiler_data, bytecode, pc_maps) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 147af24d67..97df73cdae 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -2,7 +2,7 @@ import warnings from functools import cached_property from pathlib import Path, PurePath -from typing import Optional +from typing import Any, Optional from vyper import ast as vy_ast from vyper.ast import natspec @@ -249,12 +249,15 @@ def assembly_runtime(self) -> list: @cached_property def bytecode(self) -> bytes: - insert_compiler_metadata = not self.no_bytecode_metadata - return generate_bytecode(self.assembly, insert_compiler_metadata=insert_compiler_metadata) + metadata = None + if not self.no_bytecode_metadata: + module_t = self.compilation_target._metadata["type"] + metadata = bytes.fromhex(module_t.integrity_sum) + return generate_bytecode(self.assembly, compiler_metadata=metadata) @cached_property def bytecode_runtime(self) -> bytes: - return generate_bytecode(self.assembly_runtime, insert_compiler_metadata=False) + return generate_bytecode(self.assembly_runtime, compiler_metadata=None) @cached_property def blueprint_bytecode(self) -> bytes: @@ -351,7 +354,7 @@ def _find_nested_opcode(assembly, key): return any(_find_nested_opcode(x, key) for x in sublists) -def generate_bytecode(assembly: list, insert_compiler_metadata: bool) -> bytes: +def generate_bytecode(assembly: list, compiler_metadata: Optional[Any]) -> bytes: """ Generate bytecode from assembly instructions. @@ -365,6 +368,4 @@ def generate_bytecode(assembly: list, insert_compiler_metadata: bool) -> bytes: bytes Final compiled bytecode. """ - return compile_ir.assembly_to_evm(assembly, insert_compiler_metadata=insert_compiler_metadata)[ - 0 - ] + return compile_ir.assembly_to_evm(assembly, compiler_metadata=compiler_metadata)[0] diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 4c68aa2c8f..2cc951b188 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1155,22 +1155,24 @@ def _relocate_segments(assembly): # TODO: change API to split assembly_to_evm and assembly_to_source/symbol_maps -def assembly_to_evm(assembly, pc_ofst=0, insert_compiler_metadata=False): +def assembly_to_evm(assembly, pc_ofst=0, compiler_metadata=None): bytecode, source_maps, _ = assembly_to_evm_with_symbol_map( - assembly, pc_ofst=pc_ofst, insert_compiler_metadata=insert_compiler_metadata + assembly, pc_ofst=pc_ofst, compiler_metadata=compiler_metadata ) return bytecode, source_maps -def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_compiler_metadata=False): +def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None): """ Assembles assembly into EVM assembly: list of asm instructions pc_ofst: when constructing the source map, the amount to offset all pcs by (no effect until we add deploy code source map) - insert_compiler_metadata: whether to append vyper metadata to output - (should be true for runtime code) + compiler_metadata: any compiler metadata to add. pass `None` to indicate + no metadata to be added (should always be `None` for + runtime code). the value is opaque, and will be passed + directly to `cbor2.dumps()`. """ line_number_map = { "breakpoints": set(), @@ -1278,10 +1280,11 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_compiler_metadat pc += 1 bytecode_suffix = b"" - if insert_compiler_metadata: + if compiler_metadata is not None: # this will hold true when we are in initcode assert immutables_len is not None metadata = ( + compiler_metadata, len(runtime_code), data_section_lengths, immutables_len, From 5d8280feec16f86ae1a888e770f20a96113fdabd Mon Sep 17 00:00:00 2001 From: HodanPlodky <36966616+HodanPlodky@users.noreply.github.com> Date: Fri, 4 Oct 2024 19:50:02 +0000 Subject: [PATCH 15/15] fix[venom]: fix `_stack_reorder()` routine (#4220) fix an issue where `stack_reorder()` reorders operands incorrectly, resulting in the result stack not matching the target stack. this bug can manifest when there are multiple copies of an operand on the stack. in the `stack_reorder()` loop, an operand gets moved past one of its copies which has not been moved yet, resulting in the operand getting moved twice, instead of each copy of the operand getting moved once, since `get_depth()` returns the wrong copy of the operand after the first move. this commit fixes the issue by keeping track of the positions of each copy of each stack item, and ensuring that each copy only gets moved once. --------- Co-authored-by: Charles Cooper --- .../unit/compiler/venom/test_stack_reorder.py | 28 +++++++++++++ vyper/venom/venom_to_assembly.py | 39 +++++++++++++++++-- 2 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 tests/unit/compiler/venom/test_stack_reorder.py diff --git a/tests/unit/compiler/venom/test_stack_reorder.py b/tests/unit/compiler/venom/test_stack_reorder.py new file mode 100644 index 0000000000..a9f505984e --- /dev/null +++ b/tests/unit/compiler/venom/test_stack_reorder.py @@ -0,0 +1,28 @@ +from vyper.venom import generate_assembly_experimental +from vyper.venom.context import IRContext + + +def test_stack_reorder(): + """ + Test to was created from the example in the + issue https://github.com/vyperlang/vyper/issues/4215 + this example should fail with original stack reorder + algorithm but succeed with new one + """ + ctx = IRContext() + fn = ctx.create_function("_global") + + bb = fn.get_basic_block() + var0 = bb.append_instruction("store", 1) + var1 = bb.append_instruction("store", 2) + var2 = bb.append_instruction("store", 3) + var3 = bb.append_instruction("store", 4) + var4 = bb.append_instruction("store", 5) + + bb.append_instruction("staticcall", var0, var1, var2, var3, var4, var3) + + ret_val = bb.append_instruction("add", var4, var4) + + bb.append_instruction("ret", ret_val) + + generate_assembly_experimental(ctx) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 390fab8e7c..9de75dab38 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -1,4 +1,5 @@ -from collections import Counter +from bisect import insort +from collections import Counter, defaultdict from typing import Any from vyper.exceptions import CompilerPanic, StackTooDeep @@ -205,14 +206,29 @@ def _stack_reorder( stack = stack.copy() stack_ops_count = len(stack_ops) + if stack_ops_count == 0: + return 0 counts = Counter(stack_ops) + # positions stores the positions of relevant operands + # on stack for example operand %82 is on positions [0, 3] + # this operand could ocure even more deeper in the stack + # but only those that are needed/relevant in calculation + # are considered + positions: dict[IROperand, list[int]] = defaultdict(list) + for op in stack_ops: + positions[op] = [] + for i in range(counts[op]): + positions[op].append(stack.get_depth(op, i + 1)) + for i in range(stack_ops_count): op = stack_ops[i] final_stack_depth = -(stack_ops_count - i - 1) - depth = stack.get_depth(op, counts[op]) # type: ignore - counts[op] -= 1 + depth = positions[op].pop() # type: ignore + assert depth not in range( + -stack_ops_count + 1, final_stack_depth + ), f"{depth} : ({-stack_ops_count - 1}, {final_stack_depth})" if depth == StackModel.NOT_IN_STACK: raise CompilerPanic(f"Variable {op} not in stack") @@ -223,9 +239,26 @@ def _stack_reorder( if op == stack.peek(final_stack_depth): continue + # moves the top item to original position + top_item_positions = positions[stack.peek(0)] + if len(top_item_positions) != 0: + top_item_positions.remove(0) + insort(top_item_positions, depth) + cost += self.swap(assembly, stack, depth) + + # moves the item from final position to top + final_item_positions = positions[stack.peek(final_stack_depth)] + if final_stack_depth in final_item_positions: + final_item_positions.remove(final_stack_depth) + final_item_positions.insert(0, 0) + else: + final_item_positions.insert(0, 0) + cost += self.swap(assembly, stack, final_stack_depth) + assert stack._stack[-len(stack_ops) :] == stack_ops, (stack, stack_ops) + return cost def _emit_input_operands(