From 4f47497450044fdb9692881862477466348bad67 Mon Sep 17 00:00:00 2001
From: Harry Kalogirou <harkal@nlogn.eu>
Date: Fri, 4 Oct 2024 20:35:20 +0300
Subject: [PATCH 1/6] fix[venom]: remove duplicate volatile instructions
 (#4263)

remove duplicate `assert`, `assert_unreachable` from `VOLATILE_INSTRUCTIONS` dictionary
---
 vyper/venom/basicblock.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py
index 1199579b3f..45db8b232f 100644
--- a/vyper/venom/basicblock.py
+++ b/vyper/venom/basicblock.py
@@ -21,8 +21,6 @@
         "istore",
         "tload",
         "tstore",
-        "assert",
-        "assert_unreachable",
         "mstore",
         "mload",
         "calldatacopy",

From 96551197701251ebde92f243fc900eed2005cee3 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Fri, 4 Oct 2024 15:43:52 -0400
Subject: [PATCH 2/6] feat[tool]: add integrity hash to initcode (#4234)

this commit adds the integrity hash of the source code to the
initcode. it extends the existing cbor metadata payload in the
initcode, so that verifiers can compare the integrity hash to the
artifact produced by a source bundle.

the integrity hash is put in the initcode to preserve bytecode space of
the runtime code.

refactor:
- change existing `insert_compiler_metadata=` flag to the more generic
`compiler_metadata=None`, which is more extensible.
---
 .../builtins/codegen/test_raw_call.py         | 18 ++++++++-
 tests/unit/compiler/test_bytecode_runtime.py  | 37 ++++++++++++++-----
 vyper/compiler/output.py                      |  6 +--
 vyper/compiler/phases.py                      | 17 +++++----
 vyper/ir/compile_ir.py                        | 15 +++++---
 5 files changed, 64 insertions(+), 29 deletions(-)

diff --git a/tests/functional/builtins/codegen/test_raw_call.py b/tests/functional/builtins/codegen/test_raw_call.py
index 4107f9a4d0..bf953ff018 100644
--- a/tests/functional/builtins/codegen/test_raw_call.py
+++ b/tests/functional/builtins/codegen/test_raw_call.py
@@ -261,6 +261,12 @@ def __default__():
     assert env.message_call(caller.address, data=sig) == b""
 
 
+def _strip_initcode_suffix(bytecode):
+    bs = bytes.fromhex(bytecode.removeprefix("0x"))
+    to_strip = int.from_bytes(bs[-2:], "big")
+    return bs[:-to_strip].hex()
+
+
 # check max_outsize=0 does same thing as not setting max_outsize.
 # compile to bytecode and compare bytecode directly.
 def test_max_outsize_0():
@@ -276,7 +282,11 @@ def test_raw_call(_target: address):
     """
     output1 = compile_code(code1, output_formats=["bytecode", "bytecode_runtime"])
     output2 = compile_code(code2, output_formats=["bytecode", "bytecode_runtime"])
-    assert output1 == output2
+    assert output1["bytecode_runtime"] == output2["bytecode_runtime"]
+
+    bytecode1 = output1["bytecode"]
+    bytecode2 = output2["bytecode"]
+    assert _strip_initcode_suffix(bytecode1) == _strip_initcode_suffix(bytecode2)
 
 
 # check max_outsize=0 does same thing as not setting max_outsize,
@@ -298,7 +308,11 @@ def test_raw_call(_target: address) -> bool:
     """
     output1 = compile_code(code1, output_formats=["bytecode", "bytecode_runtime"])
     output2 = compile_code(code2, output_formats=["bytecode", "bytecode_runtime"])
-    assert output1 == output2
+    assert output1["bytecode_runtime"] == output2["bytecode_runtime"]
+
+    bytecode1 = output1["bytecode"]
+    bytecode2 = output2["bytecode"]
+    assert _strip_initcode_suffix(bytecode1) == _strip_initcode_suffix(bytecode2)
 
 
 # test functionality of max_outsize=0
diff --git a/tests/unit/compiler/test_bytecode_runtime.py b/tests/unit/compiler/test_bytecode_runtime.py
index 213adce017..1d38130c49 100644
--- a/tests/unit/compiler/test_bytecode_runtime.py
+++ b/tests/unit/compiler/test_bytecode_runtime.py
@@ -55,13 +55,17 @@ def test_bytecode_runtime():
 
 
 def test_bytecode_signature():
-    out = vyper.compile_code(simple_contract_code, output_formats=["bytecode_runtime", "bytecode"])
+    out = vyper.compile_code(
+        simple_contract_code, output_formats=["bytecode_runtime", "bytecode", "integrity"]
+    )
 
     runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x"))
     initcode = bytes.fromhex(out["bytecode"].removeprefix("0x"))
 
     metadata = _parse_cbor_metadata(initcode)
-    runtime_len, data_section_lengths, immutables_len, compiler = metadata
+    integrity_hash, runtime_len, data_section_lengths, immutables_len, compiler = metadata
+
+    assert integrity_hash.hex() == out["integrity"]
 
     assert runtime_len == len(runtime_code)
     assert data_section_lengths == []
@@ -73,14 +77,18 @@ def test_bytecode_signature_dense_jumptable():
     settings = Settings(optimize=OptimizationLevel.CODESIZE)
 
     out = vyper.compile_code(
-        many_functions, output_formats=["bytecode_runtime", "bytecode"], settings=settings
+        many_functions,
+        output_formats=["bytecode_runtime", "bytecode", "integrity"],
+        settings=settings,
     )
 
     runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x"))
     initcode = bytes.fromhex(out["bytecode"].removeprefix("0x"))
 
     metadata = _parse_cbor_metadata(initcode)
-    runtime_len, data_section_lengths, immutables_len, compiler = metadata
+    integrity_hash, runtime_len, data_section_lengths, immutables_len, compiler = metadata
+
+    assert integrity_hash.hex() == out["integrity"]
 
     assert runtime_len == len(runtime_code)
     assert data_section_lengths == [5, 35]
@@ -92,14 +100,18 @@ def test_bytecode_signature_sparse_jumptable():
     settings = Settings(optimize=OptimizationLevel.GAS)
 
     out = vyper.compile_code(
-        many_functions, output_formats=["bytecode_runtime", "bytecode"], settings=settings
+        many_functions,
+        output_formats=["bytecode_runtime", "bytecode", "integrity"],
+        settings=settings,
     )
 
     runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x"))
     initcode = bytes.fromhex(out["bytecode"].removeprefix("0x"))
 
     metadata = _parse_cbor_metadata(initcode)
-    runtime_len, data_section_lengths, immutables_len, compiler = metadata
+    integrity_hash, runtime_len, data_section_lengths, immutables_len, compiler = metadata
+
+    assert integrity_hash.hex() == out["integrity"]
 
     assert runtime_len == len(runtime_code)
     assert data_section_lengths == [8]
@@ -108,13 +120,17 @@ def test_bytecode_signature_sparse_jumptable():
 
 
 def test_bytecode_signature_immutables():
-    out = vyper.compile_code(has_immutables, output_formats=["bytecode_runtime", "bytecode"])
+    out = vyper.compile_code(
+        has_immutables, output_formats=["bytecode_runtime", "bytecode", "integrity"]
+    )
 
     runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x"))
     initcode = bytes.fromhex(out["bytecode"].removeprefix("0x"))
 
     metadata = _parse_cbor_metadata(initcode)
-    runtime_len, data_section_lengths, immutables_len, compiler = metadata
+    integrity_hash, runtime_len, data_section_lengths, immutables_len, compiler = metadata
+
+    assert integrity_hash.hex() == out["integrity"]
 
     assert runtime_len == len(runtime_code)
     assert data_section_lengths == []
@@ -129,7 +145,10 @@ def test_bytecode_signature_deployed(code, get_contract, env):
     deployed_code = env.get_code(c.address)
 
     metadata = _parse_cbor_metadata(c.bytecode)
-    runtime_len, data_section_lengths, immutables_len, compiler = metadata
+    integrity_hash, runtime_len, data_section_lengths, immutables_len, compiler = metadata
+
+    out = vyper.compile_code(code, output_formats=["integrity"])
+    assert integrity_hash.hex() == out["integrity"]
 
     assert compiler == {"vyper": list(vyper.version.version_tuple)}
 
diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py
index 577afd3822..09d299b90d 100644
--- a/vyper/compiler/output.py
+++ b/vyper/compiler/output.py
@@ -320,15 +320,13 @@ def _build_source_map_output(compiler_data, bytecode, pc_maps):
 
 
 def build_source_map_output(compiler_data: CompilerData) -> dict:
-    bytecode, pc_maps = compile_ir.assembly_to_evm(
-        compiler_data.assembly, insert_compiler_metadata=False
-    )
+    bytecode, pc_maps = compile_ir.assembly_to_evm(compiler_data.assembly, compiler_metadata=None)
     return _build_source_map_output(compiler_data, bytecode, pc_maps)
 
 
 def build_source_map_runtime_output(compiler_data: CompilerData) -> dict:
     bytecode, pc_maps = compile_ir.assembly_to_evm(
-        compiler_data.assembly_runtime, insert_compiler_metadata=False
+        compiler_data.assembly_runtime, compiler_metadata=None
     )
     return _build_source_map_output(compiler_data, bytecode, pc_maps)
 
diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py
index 147af24d67..97df73cdae 100644
--- a/vyper/compiler/phases.py
+++ b/vyper/compiler/phases.py
@@ -2,7 +2,7 @@
 import warnings
 from functools import cached_property
 from pathlib import Path, PurePath
-from typing import Optional
+from typing import Any, Optional
 
 from vyper import ast as vy_ast
 from vyper.ast import natspec
@@ -249,12 +249,15 @@ def assembly_runtime(self) -> list:
 
     @cached_property
     def bytecode(self) -> bytes:
-        insert_compiler_metadata = not self.no_bytecode_metadata
-        return generate_bytecode(self.assembly, insert_compiler_metadata=insert_compiler_metadata)
+        metadata = None
+        if not self.no_bytecode_metadata:
+            module_t = self.compilation_target._metadata["type"]
+            metadata = bytes.fromhex(module_t.integrity_sum)
+        return generate_bytecode(self.assembly, compiler_metadata=metadata)
 
     @cached_property
     def bytecode_runtime(self) -> bytes:
-        return generate_bytecode(self.assembly_runtime, insert_compiler_metadata=False)
+        return generate_bytecode(self.assembly_runtime, compiler_metadata=None)
 
     @cached_property
     def blueprint_bytecode(self) -> bytes:
@@ -351,7 +354,7 @@ def _find_nested_opcode(assembly, key):
         return any(_find_nested_opcode(x, key) for x in sublists)
 
 
-def generate_bytecode(assembly: list, insert_compiler_metadata: bool) -> bytes:
+def generate_bytecode(assembly: list, compiler_metadata: Optional[Any]) -> bytes:
     """
     Generate bytecode from assembly instructions.
 
@@ -365,6 +368,4 @@ def generate_bytecode(assembly: list, insert_compiler_metadata: bool) -> bytes:
     bytes
         Final compiled bytecode.
     """
-    return compile_ir.assembly_to_evm(assembly, insert_compiler_metadata=insert_compiler_metadata)[
-        0
-    ]
+    return compile_ir.assembly_to_evm(assembly, compiler_metadata=compiler_metadata)[0]
diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py
index 4c68aa2c8f..2cc951b188 100644
--- a/vyper/ir/compile_ir.py
+++ b/vyper/ir/compile_ir.py
@@ -1155,22 +1155,24 @@ def _relocate_segments(assembly):
 
 
 # TODO: change API to split assembly_to_evm and assembly_to_source/symbol_maps
-def assembly_to_evm(assembly, pc_ofst=0, insert_compiler_metadata=False):
+def assembly_to_evm(assembly, pc_ofst=0, compiler_metadata=None):
     bytecode, source_maps, _ = assembly_to_evm_with_symbol_map(
-        assembly, pc_ofst=pc_ofst, insert_compiler_metadata=insert_compiler_metadata
+        assembly, pc_ofst=pc_ofst, compiler_metadata=compiler_metadata
     )
     return bytecode, source_maps
 
 
-def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_compiler_metadata=False):
+def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None):
     """
     Assembles assembly into EVM
 
     assembly: list of asm instructions
     pc_ofst: when constructing the source map, the amount to offset all
              pcs by (no effect until we add deploy code source map)
-    insert_compiler_metadata: whether to append vyper metadata to output
-                            (should be true for runtime code)
+    compiler_metadata: any compiler metadata to add. pass `None` to indicate
+                       no metadata to be added (should always be `None` for
+                       runtime code). the value is opaque, and will be passed
+                       directly to `cbor2.dumps()`.
     """
     line_number_map = {
         "breakpoints": set(),
@@ -1278,10 +1280,11 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_compiler_metadat
             pc += 1
 
     bytecode_suffix = b""
-    if insert_compiler_metadata:
+    if compiler_metadata is not None:
         # this will hold true when we are in initcode
         assert immutables_len is not None
         metadata = (
+            compiler_metadata,
             len(runtime_code),
             data_section_lengths,
             immutables_len,

From 5d8280feec16f86ae1a888e770f20a96113fdabd Mon Sep 17 00:00:00 2001
From: HodanPlodky <36966616+HodanPlodky@users.noreply.github.com>
Date: Fri, 4 Oct 2024 19:50:02 +0000
Subject: [PATCH 3/6] fix[venom]: fix `_stack_reorder()` routine (#4220)

fix an issue where `stack_reorder()` reorders operands incorrectly,
resulting in the result stack not matching the target stack. this bug
can manifest when there are multiple copies of an operand on the stack.
in the `stack_reorder()` loop, an operand gets moved past one of its
copies which has not been moved yet, resulting in the operand getting
moved twice, instead of each copy of the operand getting moved once,
since `get_depth()` returns the wrong copy of the operand after the
first move.

this commit fixes the issue by keeping track of the positions of each
copy of each stack item, and ensuring that each copy only gets moved
once.

---------

Co-authored-by: Charles Cooper <cooper.charles.m@gmail.com>
---
 .../unit/compiler/venom/test_stack_reorder.py | 28 +++++++++++++
 vyper/venom/venom_to_assembly.py              | 39 +++++++++++++++++--
 2 files changed, 64 insertions(+), 3 deletions(-)
 create mode 100644 tests/unit/compiler/venom/test_stack_reorder.py

diff --git a/tests/unit/compiler/venom/test_stack_reorder.py b/tests/unit/compiler/venom/test_stack_reorder.py
new file mode 100644
index 0000000000..a9f505984e
--- /dev/null
+++ b/tests/unit/compiler/venom/test_stack_reorder.py
@@ -0,0 +1,28 @@
+from vyper.venom import generate_assembly_experimental
+from vyper.venom.context import IRContext
+
+
+def test_stack_reorder():
+    """
+    Test to was created from the example in the
+    issue https://github.com/vyperlang/vyper/issues/4215
+    this example should fail with original stack reorder
+    algorithm but succeed with new one
+    """
+    ctx = IRContext()
+    fn = ctx.create_function("_global")
+
+    bb = fn.get_basic_block()
+    var0 = bb.append_instruction("store", 1)
+    var1 = bb.append_instruction("store", 2)
+    var2 = bb.append_instruction("store", 3)
+    var3 = bb.append_instruction("store", 4)
+    var4 = bb.append_instruction("store", 5)
+
+    bb.append_instruction("staticcall", var0, var1, var2, var3, var4, var3)
+
+    ret_val = bb.append_instruction("add", var4, var4)
+
+    bb.append_instruction("ret", ret_val)
+
+    generate_assembly_experimental(ctx)
diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py
index 390fab8e7c..9de75dab38 100644
--- a/vyper/venom/venom_to_assembly.py
+++ b/vyper/venom/venom_to_assembly.py
@@ -1,4 +1,5 @@
-from collections import Counter
+from bisect import insort
+from collections import Counter, defaultdict
 from typing import Any
 
 from vyper.exceptions import CompilerPanic, StackTooDeep
@@ -205,14 +206,29 @@ def _stack_reorder(
             stack = stack.copy()
 
         stack_ops_count = len(stack_ops)
+        if stack_ops_count == 0:
+            return 0
 
         counts = Counter(stack_ops)
 
+        # positions stores the positions of relevant operands
+        # on stack for example operand %82 is on positions [0, 3]
+        # this operand could ocure even more deeper in the stack
+        # but only those that are needed/relevant in calculation
+        # are considered
+        positions: dict[IROperand, list[int]] = defaultdict(list)
+        for op in stack_ops:
+            positions[op] = []
+            for i in range(counts[op]):
+                positions[op].append(stack.get_depth(op, i + 1))
+
         for i in range(stack_ops_count):
             op = stack_ops[i]
             final_stack_depth = -(stack_ops_count - i - 1)
-            depth = stack.get_depth(op, counts[op])  # type: ignore
-            counts[op] -= 1
+            depth = positions[op].pop()  # type: ignore
+            assert depth not in range(
+                -stack_ops_count + 1, final_stack_depth
+            ), f"{depth} : ({-stack_ops_count - 1}, {final_stack_depth})"
 
             if depth == StackModel.NOT_IN_STACK:
                 raise CompilerPanic(f"Variable {op} not in stack")
@@ -223,9 +239,26 @@ def _stack_reorder(
             if op == stack.peek(final_stack_depth):
                 continue
 
+            # moves the top item to original position
+            top_item_positions = positions[stack.peek(0)]
+            if len(top_item_positions) != 0:
+                top_item_positions.remove(0)
+                insort(top_item_positions, depth)
+
             cost += self.swap(assembly, stack, depth)
+
+            # moves the item from final position to top
+            final_item_positions = positions[stack.peek(final_stack_depth)]
+            if final_stack_depth in final_item_positions:
+                final_item_positions.remove(final_stack_depth)
+                final_item_positions.insert(0, 0)
+            else:
+                final_item_positions.insert(0, 0)
+
             cost += self.swap(assembly, stack, final_stack_depth)
 
+        assert stack._stack[-len(stack_ops) :] == stack_ops, (stack, stack_ops)
+
         return cost
 
     def _emit_input_operands(

From 0f809c6f61c36b02b5f9dbae67be01cdc4c6e0f1 Mon Sep 17 00:00:00 2001
From: Daniel Schiavini <daniel.schiavini@gmail.com>
Date: Fri, 4 Oct 2024 22:14:03 +0200
Subject: [PATCH 4/6] chore[docs]: add binary installation methods (#4258)

- Add instructions to install the binaries from github releases to the
  documentation
- Show pip before docker, as it might be more common and easier to
  install
- Update python requirement in the installation docs

---------

Co-authored-by: Charles Cooper <cooper.charles.m@gmail.com>
---
 docs/installing-vyper.rst | 69 ++++++++++++++++++++++-----------------
 1 file changed, 39 insertions(+), 30 deletions(-)

diff --git a/docs/installing-vyper.rst b/docs/installing-vyper.rst
index 8eaa93590a..515d88f87c 100644
--- a/docs/installing-vyper.rst
+++ b/docs/installing-vyper.rst
@@ -7,37 +7,16 @@ any errors.
 
 .. note::
 
-    The easiest way to experiment with the language is to use the `Remix online compiler <https://remix.ethereum.org>`_.
-    (Activate the vyper-remix plugin in the Plugin manager.)
+    The easiest way to experiment with the language is to use either `Try Vyper! <https://try.vyperlang.org>`_ (maintained by the Vyper team) or the `Remix online compiler <https://remix.ethereum.org>`_ (maintained by the Ethereum Foundation).
+    - To use Try Vyper, go to https://try.vyperlang.org and log in (requires Github login).
+    - To use remix, go to https://remix.ethereum.org and activate the vyper-remix plugin in the Plugin manager.
 
-Docker
-******
-
-Vyper can be downloaded as docker image from `dockerhub <https://hub.docker.com/r/vyperlang/vyper/tags?page=1&ordering=last_updated>`_:
-::
-
-    docker pull vyperlang/vyper
-
-To run the compiler use the ``docker run`` command:
-::
-
-    docker run -v $(pwd):/code vyperlang/vyper /code/<contract_file.vy>
-
-Alternatively you can log into the docker image and execute vyper on the prompt.
-::
-
-    docker run -v $(pwd):/code/ -it --entrypoint /bin/bash vyperlang/vyper
-    root@d35252d1fb1b:/code# vyper <contract_file.vy>
-
-The normal parameters are also supported, for example:
-::
+Binaries
+********
 
-    docker run -v $(pwd):/code vyperlang/vyper -f abi /code/<contract_file.vy>
-    [{'name': 'test1', 'outputs': [], 'inputs': [{'type': 'uint256', 'name': 'a'}, {'type': 'bytes', 'name': 'b'}], 'constant': False, 'payable': False, 'type': 'function', 'gas': 441}, {'name': 'test2', 'outputs': [], 'inputs': [{'type': 'uint256', 'name': 'a'}], 'constant': False, 'payable': False, 'type': 'function', 'gas': 316}]
+Vyper binaries for Windows, Mac and Linux are available for download from the `GitHub releases page
+<https://github.com/vyperlang/vyper/releases>`_.
 
-.. note::
-
-    If you would like to know how to install Docker, please follow their `documentation <https://docs.docker.com/get-docker/>`_.
 
 PIP
 ***
@@ -45,7 +24,7 @@ PIP
 Installing Python
 =================
 
-Vyper can only be built using Python 3.6 and higher. If you need to know how to install the correct version of python,
+Vyper can only be built using Python 3.10 and higher. If you need to know how to install the correct version of python,
 follow the instructions from the official `Python website <https://wiki.python.org/moin/BeginnersGuide/Download>`_.
 
 Creating a virtual environment
@@ -76,13 +55,43 @@ Each tagged version of vyper is uploaded to `pypi <https://pypi.org/project/vype
 To install a specific version use:
 ::
 
-    pip install vyper==0.3.7
+    pip install vyper==0.4.0
 
 You can check if Vyper is installed completely or not by typing the following in your terminal/cmd:
 ::
 
     vyper --version
 
+
+Docker
+******
+
+Vyper can be downloaded as docker image from `dockerhub <https://hub.docker.com/r/vyperlang/vyper/tags?page=1&ordering=last_updated>`_:
+::
+
+    docker pull vyperlang/vyper
+
+To run the compiler use the ``docker run`` command:
+::
+
+    docker run -v $(pwd):/code vyperlang/vyper /code/<contract_file.vy>
+
+Alternatively you can log into the docker image and execute vyper on the prompt.
+::
+
+    docker run -v $(pwd):/code/ -it --entrypoint /bin/bash vyperlang/vyper
+    root@d35252d1fb1b:/code# vyper <contract_file.vy>
+
+The normal parameters are also supported, for example:
+::
+
+    docker run -v $(pwd):/code vyperlang/vyper -f abi /code/<contract_file.vy>
+    [{'name': 'test1', 'outputs': [], 'inputs': [{'type': 'uint256', 'name': 'a'}, {'type': 'bytes', 'name': 'b'}], 'constant': False, 'payable': False, 'type': 'function', 'gas': 441}, {'name': 'test2', 'outputs': [], 'inputs': [{'type': 'uint256', 'name': 'a'}], 'constant': False, 'payable': False, 'type': 'function', 'gas': 316}]
+
+.. note::
+
+    If you would like to know how to install Docker, please follow their `documentation <https://docs.docker.com/get-docker/>`_.
+
 nix
 ***
 

From c7669bd2ebe2c405aa5572b58311b51517568143 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Fri, 4 Oct 2024 16:16:33 -0400
Subject: [PATCH 5/6] fix[ux]: fix relpath compiler panic on windows (#4228)

fix a bug where `os.path.relpath()` raises an exception on window
- when the source path and the destination path are on different
drives. this commit introduces the helper function `safe_relpath()`,
which tries hard to construct a relpath (using `os.path.relpath()`),
but falls back to the original path (which might be an absolute path)
instead of raising an exception.

references:
- https://docs.python.org/3/library/os.path.html#os.path.relpath
---
 vyper/compiler/output_bundle.py    |  9 ++++-----
 vyper/semantics/analysis/module.py |  5 ++---
 vyper/utils.py                     | 10 ++++++++++
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/vyper/compiler/output_bundle.py b/vyper/compiler/output_bundle.py
index 92494e3a70..06a84064a1 100644
--- a/vyper/compiler/output_bundle.py
+++ b/vyper/compiler/output_bundle.py
@@ -1,7 +1,6 @@
 import importlib
 import io
 import json
-import os
 import zipfile
 from dataclasses import dataclass
 from functools import cached_property
@@ -13,7 +12,7 @@
 from vyper.compiler.settings import Settings
 from vyper.exceptions import CompilerPanic
 from vyper.semantics.analysis.module import _is_builtin
-from vyper.utils import get_long_version
+from vyper.utils import get_long_version, safe_relpath
 
 # data structures and routines for constructing "output bundles",
 # basically reproducible builds of a vyper contract, with varying
@@ -62,7 +61,7 @@ def compiler_inputs(self) -> dict[str, CompilerInput]:
 
         sources = {}
         for c in inputs:
-            path = os.path.relpath(c.resolved_path)
+            path = safe_relpath(c.resolved_path)
             # note: there should be a 1:1 correspondence between
             # resolved_path and source_id, but for clarity use resolved_path
             # since it corresponds more directly to search path semantics.
@@ -73,7 +72,7 @@ def compiler_inputs(self) -> dict[str, CompilerInput]:
     @cached_property
     def compilation_target_path(self):
         p = PurePath(self.compiler_data.file_input.resolved_path)
-        p = os.path.relpath(p)
+        p = safe_relpath(p)
         return _anonymize(p)
 
     @cached_property
@@ -121,7 +120,7 @@ def used_search_paths(self) -> list[str]:
         sps = [sp for sp, count in tmp.items() if count > 0]
         assert len(sps) > 0
 
-        return [_anonymize(os.path.relpath(sp)) for sp in sps]
+        return [_anonymize(safe_relpath(sp)) for sp in sps]
 
 
 class OutputBundleWriter:
diff --git a/vyper/semantics/analysis/module.py b/vyper/semantics/analysis/module.py
index d05e494b80..90bb631e14 100644
--- a/vyper/semantics/analysis/module.py
+++ b/vyper/semantics/analysis/module.py
@@ -1,4 +1,3 @@
-import os
 from pathlib import Path, PurePath
 from typing import Any, Optional
 
@@ -58,7 +57,7 @@
 from vyper.semantics.types.function import ContractFunctionT
 from vyper.semantics.types.module import ModuleT
 from vyper.semantics.types.utils import type_from_annotation
-from vyper.utils import OrderedSet
+from vyper.utils import OrderedSet, safe_relpath
 
 
 def analyze_module(
@@ -921,7 +920,7 @@ def _load_builtin_import(level: int, module_str: str) -> tuple[CompilerInput, In
     # hygiene: convert to relpath to avoid leaking user directory info
     # (note Path.relative_to cannot handle absolute to relative path
     # conversion, so we must use the `os` module).
-    builtins_path = os.path.relpath(builtins_path)
+    builtins_path = safe_relpath(builtins_path)
 
     search_path = Path(builtins_path).parent.parent.parent
     # generate an input bundle just because it knows how to build paths.
diff --git a/vyper/utils.py b/vyper/utils.py
index 3f19a9d15c..5307cd115e 100644
--- a/vyper/utils.py
+++ b/vyper/utils.py
@@ -4,6 +4,7 @@
 import enum
 import functools
 import hashlib
+import os
 import sys
 import time
 import traceback
@@ -599,3 +600,12 @@ def annotate_source_code(
     cleanup_lines += [""] * (num_lines - len(cleanup_lines))
 
     return "\n".join(cleanup_lines)
+
+
+def safe_relpath(path):
+    try:
+        return os.path.relpath(path)
+    except ValueError:
+        # on Windows, if path and curdir are on different drives, an exception
+        # can be thrown
+        return path

From 0e29db0d7f8bd0689c0d30c7de1796001382ec32 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Fri, 4 Oct 2024 17:58:37 -0400
Subject: [PATCH 6/6] feat[venom]: store expansion pass (#4068)

expand `extract_literals` pass (introduced in bb9129aaa42b2f810f) to
also extract variables and rename it to `store_expansion`, allowing for
the dft pass to reorder variable uses more effectively.

since this also gives us the guarantee that each variable is used
exactly once (besides by store instructions), this allows us to make
some simplifications in `venom_to_assembly.py`, since we no longer need
to account for the same variable occurring on the stack more than one
time (cf. for example 5d8280feec16f86ae).

this results in a stack scheduler improvement. for example:
- examples/tokens/ERC20.vy has a 20 byte codesize improvement
- examples/auctions/blind_auction.vy has a 145 byte codesize improvement
- examples/voting/ballot.vy has a 28 byte codesize improvement

across a range of contracts, the improvement seems to be between 1-2%,
but sometimes as high as 5%

since stack operands are now guaranteed to be unique, the old rule to
avoid swapping if two stack operands are the same no longer works. to
address this, this commit adds an equivalence analysis. this creates
equivalence sets of equivalent variables based on store chains, and
then generalizes the rule from "don't swap if two stack operands are
the same" to "don't swap if two stack operands are equivalent".
---
 .../compiler/venom/test_duplicate_operands.py |  12 +-
 .../unit/compiler/venom/test_stack_cleanup.py |   3 +-
 .../unit/compiler/venom/test_stack_reorder.py |   5 +
 vyper/venom/__init__.py                       |   5 +-
 vyper/venom/analysis/equivalent_vars.py       |  41 ++++++
 ...extract_literals.py => store_expansion.py} |  13 +-
 vyper/venom/stack_model.py                    |   7 +-
 vyper/venom/venom_to_assembly.py              | 125 +++++++++---------
 8 files changed, 133 insertions(+), 78 deletions(-)
 create mode 100644 vyper/venom/analysis/equivalent_vars.py
 rename vyper/venom/passes/{extract_literals.py => store_expansion.py} (73%)

diff --git a/tests/unit/compiler/venom/test_duplicate_operands.py b/tests/unit/compiler/venom/test_duplicate_operands.py
index fbff0835d2..ab55649dae 100644
--- a/tests/unit/compiler/venom/test_duplicate_operands.py
+++ b/tests/unit/compiler/venom/test_duplicate_operands.py
@@ -1,6 +1,8 @@
 from vyper.compiler.settings import OptimizationLevel
 from vyper.venom import generate_assembly_experimental
+from vyper.venom.analysis.analysis import IRAnalysesCache
 from vyper.venom.context import IRContext
+from vyper.venom.passes.store_expansion import StoreExpansionPass
 
 
 def test_duplicate_operands():
@@ -13,7 +15,7 @@ def test_duplicate_operands():
     %3 = mul %1, %2
     stop
 
-    Should compile to: [PUSH1, 10, DUP1, DUP1, DUP1, ADD, MUL, POP, STOP]
+    Should compile to: [PUSH1, 10, DUP1, DUP2, ADD, MUL, POP, STOP]
     """
     ctx = IRContext()
     fn = ctx.create_function("test")
@@ -23,5 +25,9 @@ def test_duplicate_operands():
     bb.append_instruction("mul", sum_, op)
     bb.append_instruction("stop")
 
-    asm = generate_assembly_experimental(ctx, optimize=OptimizationLevel.GAS)
-    assert asm == ["PUSH1", 10, "DUP1", "DUP1", "ADD", "MUL", "POP", "STOP"]
+    ac = IRAnalysesCache(fn)
+    StoreExpansionPass(ac, fn).run_pass()
+
+    optimize = OptimizationLevel.GAS
+    asm = generate_assembly_experimental(ctx, optimize=optimize)
+    assert asm == ["PUSH1", 10, "DUP1", "DUP2", "ADD", "MUL", "POP", "STOP"]
diff --git a/tests/unit/compiler/venom/test_stack_cleanup.py b/tests/unit/compiler/venom/test_stack_cleanup.py
index 6015cf1c41..7198861771 100644
--- a/tests/unit/compiler/venom/test_stack_cleanup.py
+++ b/tests/unit/compiler/venom/test_stack_cleanup.py
@@ -9,7 +9,8 @@ def test_cleanup_stack():
     bb = fn.get_basic_block()
     ret_val = bb.append_instruction("param")
     op = bb.append_instruction("store", 10)
-    bb.append_instruction("add", op, op)
+    op2 = bb.append_instruction("store", op)
+    bb.append_instruction("add", op, op2)
     bb.append_instruction("ret", ret_val)
 
     asm = generate_assembly_experimental(ctx, optimize=OptimizationLevel.GAS)
diff --git a/tests/unit/compiler/venom/test_stack_reorder.py b/tests/unit/compiler/venom/test_stack_reorder.py
index a9f505984e..a15dd4d540 100644
--- a/tests/unit/compiler/venom/test_stack_reorder.py
+++ b/tests/unit/compiler/venom/test_stack_reorder.py
@@ -1,5 +1,7 @@
 from vyper.venom import generate_assembly_experimental
+from vyper.venom.analysis.analysis import IRAnalysesCache
 from vyper.venom.context import IRContext
+from vyper.venom.passes.store_expansion import StoreExpansionPass
 
 
 def test_stack_reorder():
@@ -25,4 +27,7 @@ def test_stack_reorder():
 
     bb.append_instruction("ret", ret_val)
 
+    ac = IRAnalysesCache(fn)
+    StoreExpansionPass(ac, fn).run_pass()
+
     generate_assembly_experimental(ctx)
diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py
index afd79fc44f..a5f51b787d 100644
--- a/vyper/venom/__init__.py
+++ b/vyper/venom/__init__.py
@@ -12,13 +12,13 @@
 from vyper.venom.passes.algebraic_optimization import AlgebraicOptimizationPass
 from vyper.venom.passes.branch_optimization import BranchOptimizationPass
 from vyper.venom.passes.dft import DFTPass
-from vyper.venom.passes.extract_literals import ExtractLiteralsPass
 from vyper.venom.passes.make_ssa import MakeSSA
 from vyper.venom.passes.mem2var import Mem2Var
 from vyper.venom.passes.remove_unused_variables import RemoveUnusedVariablesPass
 from vyper.venom.passes.sccp import SCCP
 from vyper.venom.passes.simplify_cfg import SimplifyCFGPass
 from vyper.venom.passes.store_elimination import StoreElimination
+from vyper.venom.passes.store_expansion import StoreExpansionPass
 from vyper.venom.venom_to_assembly import VenomCompiler
 
 DEFAULT_OPT_LEVEL = OptimizationLevel.default()
@@ -54,8 +54,9 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel) -> None:
     SimplifyCFGPass(ac, fn).run_pass()
     AlgebraicOptimizationPass(ac, fn).run_pass()
     BranchOptimizationPass(ac, fn).run_pass()
-    ExtractLiteralsPass(ac, fn).run_pass()
     RemoveUnusedVariablesPass(ac, fn).run_pass()
+
+    StoreExpansionPass(ac, fn).run_pass()
     DFTPass(ac, fn).run_pass()
 
 
diff --git a/vyper/venom/analysis/equivalent_vars.py b/vyper/venom/analysis/equivalent_vars.py
new file mode 100644
index 0000000000..9b0c03e3d1
--- /dev/null
+++ b/vyper/venom/analysis/equivalent_vars.py
@@ -0,0 +1,41 @@
+from vyper.venom.analysis.analysis import IRAnalysis
+from vyper.venom.analysis.dfg import DFGAnalysis
+from vyper.venom.basicblock import IRVariable
+
+
+class VarEquivalenceAnalysis(IRAnalysis):
+    """
+    Generate equivalence sets of variables. This is used to avoid swapping
+    variables which are the same during venom_to_assembly. Theoretically,
+    the DFTPass should order variable declarations optimally, but, it is
+    not aware of the "pickaxe" heuristic in venom_to_assembly, so they can
+    interfere.
+    """
+
+    def analyze(self):
+        dfg = self.analyses_cache.request_analysis(DFGAnalysis)
+
+        equivalence_set: dict[IRVariable, int] = {}
+
+        for bag, (var, inst) in enumerate(dfg._dfg_outputs.items()):
+            if inst.opcode != "store":
+                continue
+
+            source = inst.operands[0]
+
+            assert var not in equivalence_set  # invariant
+            if source in equivalence_set:
+                equivalence_set[var] = equivalence_set[source]
+                continue
+            else:
+                equivalence_set[var] = bag
+                equivalence_set[source] = bag
+
+        self._equivalence_set = equivalence_set
+
+    def equivalent(self, var1, var2):
+        if var1 not in self._equivalence_set:
+            return False
+        if var2 not in self._equivalence_set:
+            return False
+        return self._equivalence_set[var1] == self._equivalence_set[var2]
diff --git a/vyper/venom/passes/extract_literals.py b/vyper/venom/passes/store_expansion.py
similarity index 73%
rename from vyper/venom/passes/extract_literals.py
rename to vyper/venom/passes/store_expansion.py
index 91c0813e67..7718e67d33 100644
--- a/vyper/venom/passes/extract_literals.py
+++ b/vyper/venom/passes/store_expansion.py
@@ -1,12 +1,13 @@
 from vyper.venom.analysis.dfg import DFGAnalysis
 from vyper.venom.analysis.liveness import LivenessAnalysis
-from vyper.venom.basicblock import IRInstruction, IRLiteral
+from vyper.venom.basicblock import IRInstruction, IRLiteral, IRVariable
 from vyper.venom.passes.base_pass import IRPass
 
 
-class ExtractLiteralsPass(IRPass):
+class StoreExpansionPass(IRPass):
     """
-    This pass extracts literals so that they can be reordered by the DFT pass
+    This pass extracts literals and variables so that they can be
+    reordered by the DFT pass
     """
 
     def run_pass(self):
@@ -20,7 +21,7 @@ def _process_bb(self, bb):
         i = 0
         while i < len(bb.instructions):
             inst = bb.instructions[i]
-            if inst.opcode in ("store", "offset"):
+            if inst.opcode in ("store", "offset", "phi", "param"):
                 i += 1
                 continue
 
@@ -29,9 +30,11 @@ def _process_bb(self, bb):
                 if inst.opcode == "log" and j == 0:
                     continue
 
-                if isinstance(op, IRLiteral):
+                if isinstance(op, (IRVariable, IRLiteral)):
                     var = self.function.get_next_variable()
                     to_insert = IRInstruction("store", [op], var)
                     bb.insert_instruction(to_insert, index=i)
                     inst.operands[j] = var
+                    i += 1
+
             i += 1
diff --git a/vyper/venom/stack_model.py b/vyper/venom/stack_model.py
index a98e5bb25b..e284b41fb2 100644
--- a/vyper/venom/stack_model.py
+++ b/vyper/venom/stack_model.py
@@ -30,7 +30,7 @@ def push(self, op: IROperand) -> None:
     def pop(self, num: int = 1) -> None:
         del self._stack[len(self._stack) - num :]
 
-    def get_depth(self, op: IROperand, n: int = 1) -> int:
+    def get_depth(self, op: IROperand) -> int:
         """
         Returns the depth of the n-th matching operand in the stack map.
         If the operand is not in the stack map, returns NOT_IN_STACK.
@@ -39,10 +39,7 @@ def get_depth(self, op: IROperand, n: int = 1) -> int:
 
         for i, stack_op in enumerate(reversed(self._stack)):
             if stack_op.value == op.value:
-                if n <= 1:
-                    return -i
-                else:
-                    n -= 1
+                return -i
 
         return StackModel.NOT_IN_STACK  # type: ignore
 
diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py
index 9de75dab38..56228d53d2 100644
--- a/vyper/venom/venom_to_assembly.py
+++ b/vyper/venom/venom_to_assembly.py
@@ -1,5 +1,3 @@
-from bisect import insort
-from collections import Counter, defaultdict
 from typing import Any
 
 from vyper.exceptions import CompilerPanic, StackTooDeep
@@ -13,6 +11,7 @@
 )
 from vyper.utils import MemoryPositions, OrderedSet
 from vyper.venom.analysis.analysis import IRAnalysesCache
+from vyper.venom.analysis.equivalent_vars import VarEquivalenceAnalysis
 from vyper.venom.analysis.liveness import LivenessAnalysis
 from vyper.venom.basicblock import (
     IRBasicBlock,
@@ -26,6 +25,10 @@
 from vyper.venom.passes.normalization import NormalizationPass
 from vyper.venom.stack_model import StackModel
 
+DEBUG_SHOW_COST = False
+if DEBUG_SHOW_COST:
+    import sys
+
 # instructions which map one-to-one from venom to EVM
 _ONE_TO_ONE_INSTRUCTIONS = frozenset(
     [
@@ -153,6 +156,7 @@ def generate_evm(self, no_optimize: bool = False) -> list[str]:
 
                 NormalizationPass(ac, fn).run_pass()
                 self.liveness_analysis = ac.request_analysis(LivenessAnalysis)
+                self.equivalence = ac.request_analysis(VarEquivalenceAnalysis)
 
                 assert fn.normalized, "Non-normalized CFG!"
 
@@ -199,36 +203,19 @@ def generate_evm(self, no_optimize: bool = False) -> list[str]:
     def _stack_reorder(
         self, assembly: list, stack: StackModel, stack_ops: list[IROperand], dry_run: bool = False
     ) -> int:
-        cost = 0
-
         if dry_run:
             assert len(assembly) == 0, "Dry run should not work on assembly"
             stack = stack.copy()
 
-        stack_ops_count = len(stack_ops)
-        if stack_ops_count == 0:
+        if len(stack_ops) == 0:
             return 0
 
-        counts = Counter(stack_ops)
-
-        # positions stores the positions of relevant operands
-        # on stack for example operand %82 is on positions [0, 3]
-        # this operand could ocure even more deeper in the stack
-        # but only those that are needed/relevant in calculation
-        # are considered
-        positions: dict[IROperand, list[int]] = defaultdict(list)
-        for op in stack_ops:
-            positions[op] = []
-            for i in range(counts[op]):
-                positions[op].append(stack.get_depth(op, i + 1))
-
-        for i in range(stack_ops_count):
-            op = stack_ops[i]
-            final_stack_depth = -(stack_ops_count - i - 1)
-            depth = positions[op].pop()  # type: ignore
-            assert depth not in range(
-                -stack_ops_count + 1, final_stack_depth
-            ), f"{depth} : ({-stack_ops_count - 1}, {final_stack_depth})"
+        assert len(stack_ops) == len(set(stack_ops))  # precondition
+
+        cost = 0
+        for i, op in enumerate(stack_ops):
+            final_stack_depth = -(len(stack_ops) - i - 1)
+            depth = stack.get_depth(op)
 
             if depth == StackModel.NOT_IN_STACK:
                 raise CompilerPanic(f"Variable {op} not in stack")
@@ -236,25 +223,14 @@ def _stack_reorder(
             if depth == final_stack_depth:
                 continue
 
-            if op == stack.peek(final_stack_depth):
+            to_swap = stack.peek(final_stack_depth)
+            if self.equivalence.equivalent(op, to_swap):
+                # perform a "virtual" swap
+                stack.poke(final_stack_depth, op)
+                stack.poke(depth, to_swap)
                 continue
 
-            # moves the top item to original position
-            top_item_positions = positions[stack.peek(0)]
-            if len(top_item_positions) != 0:
-                top_item_positions.remove(0)
-                insort(top_item_positions, depth)
-
             cost += self.swap(assembly, stack, depth)
-
-            # moves the item from final position to top
-            final_item_positions = positions[stack.peek(final_stack_depth)]
-            if final_stack_depth in final_item_positions:
-                final_item_positions.remove(final_stack_depth)
-                final_item_positions.insert(0, 0)
-            else:
-                final_item_positions.insert(0, 0)
-
             cost += self.swap(assembly, stack, final_stack_depth)
 
         assert stack._stack[-len(stack_ops) :] == stack_ops, (stack, stack_ops)
@@ -273,19 +249,14 @@ def _emit_input_operands(
         # been scheduled to be killed. now it's just a matter of emitting
         # SWAPs, DUPs and PUSHes until we match the `ops` argument
 
-        # dumb heuristic: if the top of stack is not wanted here, swap
-        # it with something that is wanted
-        if ops and stack.height > 0 and stack.peek(0) not in ops:
-            for op in ops:
-                if isinstance(op, IRVariable) and op not in next_liveness:
-                    self.swap_op(assembly, stack, op)
-                    break
+        # to validate store expansion invariant -
+        # each op is emitted at most once.
+        seen: set[IROperand] = set()
 
-        emitted_ops = OrderedSet[IROperand]()
         for op in ops:
             if isinstance(op, IRLabel):
-                # invoke emits the actual instruction itself so we don't need to emit it here
-                # but we need to add it to the stack map
+                # invoke emits the actual instruction itself so we don't need
+                # to emit it here but we need to add it to the stack map
                 if inst.opcode != "invoke":
                     assembly.append(f"_sym_{op.value}")
                 stack.push(op)
@@ -300,13 +271,12 @@ def _emit_input_operands(
                 stack.push(op)
                 continue
 
-            if op in next_liveness and op not in emitted_ops:
+            if op in next_liveness:
                 self.dup_op(assembly, stack, op)
 
-            if op in emitted_ops:
-                self.dup_op(assembly, stack, op)
-
-            emitted_ops.add(op)
+            # guaranteed by store expansion
+            assert op not in seen, (op, seen)
+            seen.add(op)
 
     def _generate_evm_for_basicblock_r(
         self, asm: list, basicblock: IRBasicBlock, stack: StackModel
@@ -315,6 +285,12 @@ def _generate_evm_for_basicblock_r(
             return
         self.visited_basicblocks.add(basicblock)
 
+        if DEBUG_SHOW_COST:
+            print(basicblock, file=sys.stderr)
+
+        ref = asm
+        asm = []
+
         # assembly entry point into the block
         asm.append(f"_sym_{basicblock.label}")
         asm.append("JUMPDEST")
@@ -330,8 +306,14 @@ def _generate_evm_for_basicblock_r(
 
             asm.extend(self._generate_evm_for_instruction(inst, stack, next_liveness))
 
+        if DEBUG_SHOW_COST:
+            print(" ".join(map(str, asm)), file=sys.stderr)
+            print("\n", file=sys.stderr)
+
+        ref.extend(asm)
+
         for bb in basicblock.reachable:
-            self._generate_evm_for_basicblock_r(asm, bb, stack.copy())
+            self._generate_evm_for_basicblock_r(ref, bb, stack.copy())
 
     # pop values from stack at entry to bb
     # note this produces the same result(!) no matter which basic block
@@ -413,6 +395,7 @@ def _generate_evm_for_instruction(
             # find an instance of %13 *or* %14 in the stack and replace it with %56.
             to_be_replaced = stack.peek(depth)
             if to_be_replaced in next_liveness:
+                # this branch seems unreachable (maybe due to make_ssa)
                 # %13/%14 is still live(!), so we make a copy of it
                 self.dup(assembly, stack, depth)
                 stack.poke(0, ret)
@@ -454,6 +437,13 @@ def _generate_evm_for_instruction(
             if cost_with_swap > cost_no_swap:
                 operands[-1], operands[-2] = operands[-2], operands[-1]
 
+        cost = self._stack_reorder([], stack, operands, dry_run=True)
+        if DEBUG_SHOW_COST and cost:
+            print("ENTER", inst, file=sys.stderr)
+            print("  HAVE", stack, file=sys.stderr)
+            print("  WANT", operands, file=sys.stderr)
+            print("  COST", cost, file=sys.stderr)
+
         # final step to get the inputs to this instruction ordered
         # correctly on the stack
         self._stack_reorder(assembly, stack, operands)
@@ -570,10 +560,21 @@ def _generate_evm_for_instruction(
             if inst.output not in next_liveness:
                 self.pop(assembly, stack)
             else:
-                # peek at next_liveness to find the next scheduled item,
-                # and optimistically swap with it
+                # heuristic: peek at next_liveness to find the next scheduled
+                # item, and optimistically swap with it
+                if DEBUG_SHOW_COST:
+                    stack0 = stack.copy()
+
                 next_scheduled = next_liveness.last()
-                self.swap_op(assembly, stack, next_scheduled)
+                cost = 0
+                if not self.equivalence.equivalent(inst.output, next_scheduled):
+                    cost = self.swap_op(assembly, stack, next_scheduled)
+
+                if DEBUG_SHOW_COST and cost != 0:
+                    print("ENTER", inst, file=sys.stderr)
+                    print("  HAVE", stack0, file=sys.stderr)
+                    print("  NEXT LIVENESS", next_liveness, file=sys.stderr)
+                    print("  NEW_STACK", stack, file=sys.stderr)
 
         return apply_line_numbers(inst, assembly)
 
@@ -595,7 +596,7 @@ def dup(self, assembly, stack, depth):
         assembly.append(_evm_dup_for(depth))
 
     def swap_op(self, assembly, stack, op):
-        self.swap(assembly, stack, stack.get_depth(op))
+        return self.swap(assembly, stack, stack.get_depth(op))
 
     def dup_op(self, assembly, stack, op):
         self.dup(assembly, stack, stack.get_depth(op))