From 4a997d398eff31bbaeb39be7e07070537edffa22 Mon Sep 17 00:00:00 2001 From: Jonathan Becker <64037729+Jon-Becker@users.noreply.github.com> Date: Sat, 8 Apr 2023 13:04:54 -0500 Subject: [PATCH 1/8] :zap: perf: clippy perf fixes --- .../src/decompile/out/postprocessers/yul.rs | 18 ++- heimdall/src/decompile/out/yul.rs | 2 +- scripts/clippy | 1 + scripts/execute.py | 108 ------------------ 4 files changed, 9 insertions(+), 120 deletions(-) create mode 100644 scripts/clippy delete mode 100644 scripts/execute.py diff --git a/heimdall/src/decompile/out/postprocessers/yul.rs b/heimdall/src/decompile/out/postprocessers/yul.rs index 136448fe..35c17b05 100644 --- a/heimdall/src/decompile/out/postprocessers/yul.rs +++ b/heimdall/src/decompile/out/postprocessers/yul.rs @@ -39,7 +39,7 @@ fn remove_double_negation(line: String) -> String { let subject = second_subject[second_subject_indices.0+1..second_subject_indices.1-1].to_string(); // replace the double negation with the subject - cleaned = cleaned.replace(&format!("iszero(iszero({}))", subject), &subject); + cleaned = cleaned.replace(&format!("iszero(iszero({subject}))"), &subject); } } } @@ -78,13 +78,13 @@ fn convert_bitmask_to_casting(line: String) -> String { }; // construct new string with casting - let new_str = format!("bytes{}({})", size_bytes, arg2); + let new_str = format!("bytes{size_bytes}({arg2})"); // replace old string with new string cleaned.replace_range(index..end_index + index, &new_str); // set index for next iteration of loop - index += format!("bytes{}(", size_bytes).len(); + index += format!("bytes{size_bytes}(").len(); } cleaned @@ -130,7 +130,7 @@ fn remove_replace_casts(line: String) -> String { let (cast_start, cast_end, cast_type) = find_cast(cleaned.to_string()); if let Some(cast_type) = cast_type { let cast_arg = &cleaned[cast_start + 1..cast_end-1]; - let yul_cast = format!("({}) : {}", cast_arg, cast_type); + let yul_cast = format!("({cast_arg}) : {cast_type}"); cleaned.replace_range(cast_start-cast_type.len()..=cast_end-1, &yul_cast); } @@ -272,15 +272,11 @@ fn add_resolved_events(line: String, all_resolved_events: HashMap>() - .join(", "), + resolved_event.inputs.join(", "), cleaned ) } @@ -353,7 +349,7 @@ pub fn postprocess( *line = format!( "{}{}", " ".repeat(indentation * 4), - cleaned.replace("\n", &format!("\n{}", " ".repeat(indentation * 4))) + cleaned.replace('\n', &format!("\n{}", " ".repeat(indentation * 4))) ); // indent due to opening braces diff --git a/heimdall/src/decompile/out/yul.rs b/heimdall/src/decompile/out/yul.rs index 10c628d7..ed94bc7f 100644 --- a/heimdall/src/decompile/out/yul.rs +++ b/heimdall/src/decompile/out/yul.rs @@ -305,7 +305,7 @@ pub fn output( decompiled_output.extend( DECOMPILED_SOURCE_HEADER_YUL .replace("{}", env!("CARGO_PKG_VERSION")) - .split("\n") + .split('\n') .map(|x| x.to_string()), ); diff --git a/scripts/clippy b/scripts/clippy new file mode 100644 index 00000000..af0c7d99 --- /dev/null +++ b/scripts/clippy @@ -0,0 +1 @@ +cargo clippy -- --allow clippy::new_without_default --allow clippy::redundant_field_names --allow clippy::too_many_arguments \ No newline at end of file diff --git a/scripts/execute.py b/scripts/execute.py deleted file mode 100644 index 91253f3c..00000000 --- a/scripts/execute.py +++ /dev/null @@ -1,108 +0,0 @@ -import subprocess - -x = ['0x95aBF379c57B6732d83c25Bf9FD401854BC9D26e', -'0x776b5d686C1Be032Cb11433da2D54F7c1DacCd0b', -'0xC03d0eCf195e3197585A71f6EAaF8d64017c083c', -'0xa3DE5a1A6122D277b9eb9002FE23662298561e3f', -'0x7248394e2A2E68034020fb6292D677B0f50A21bf', -'0x0Ce4b84654286d8f4Deb42f6304B6d43168DcE0D', -'0x7304Fa27c4089Ea02a50cA0856dF22AAe146fdBF', -'0x0E1581Bc43535f9d0e797409BD71F2dbdD2F47be', -'0xA27E13d4B9F18870976CECcC09C09F1A376EC3e0', -'0x4cd127DF1AeEd8b0D6237Bb8C587e417ccF45D08', -'0xb83AcE7bfB3768EAe4Af2c5E43dE7b7b97Df0551', -'0xe43317aB6B853582550B1Fe577B75dC09C780216', -'0x7F5836a6b6eAd870C385719d75Ac1156E0959312', -'0x6B1dbcDC3c790da7Fc9bCec8798AE807ff569978', -'0x7128ed210994c18fBe4B9A1cA2BDacB8CddB76E6', -'0x191441E2861fb073346D77bA9b0f6d04b0E56514', -'0xB25137cC24ff1009134d908FeF6a7eA17E3d1bb0', -'0x58f6D49F0b3f42AfB88De60C40ff4cd7A468E61c', -'0xb13cC4ED07349Fa8bdD0B78291c50F62b7E9EE5f', -'0x459895483556daD32526eFa461F75E33E458d9E9', -'0x1bAb335628CFd8FF8B0e5ebbfa4fcA386Fa90624', -'0x5654A61552bF4bA14110B526EdEC9E7a15cA3D37', -'0xa13D9C3f08f57b183Efd779F580562630C9b9D24', -'0x9bed64dAc8F963608A37C37aFf9928Ef13347D2A', -'0x9370045CE37F381500ac7D6802513bb89871e076', -'0xCfA365B7d917ce41162BA1F75162c1360666Ba08', -'0x19caF51BC94595FbE3D87d6B0725a9697Bec5C6f', -'0x6be9bd0f34B87bf9c8D6350e613b05254b58A452', -'0x43745D585bEeE747CD2D18A0748c919de96b752C', -'0x90332aB1bfeBD0bb5e47D8ecC4cB942a1B1Ef3C0', -'0x08a82B5913094E9b3363108aa2f6b1eC74546248', -'0x662275FE322b3Bb7eB09cF376550eA70f393D427', -'0xEa2511661c9B6cD551C9d2994b548cA5FA13300b', -'0x04C9eCDA31f96633C4Eb7C9E30d475065170b56D', -'0x39881156DEc525e978796A2DA7Dc6e3690188e2E', -'0xBD500bEb602fa1723966A8115f2EAa79BD037076', -'0xd5Dc9D3225d483B266E38af2a68003cdbe45ecC7', -'0xD8e3d466F78E02F195DDD979462162C7CD49E6Df', -'0x1b5abBA5fc616322da9c8B4c0721ed850D036C77', -'0xFfa60CD831374F40358de1aA0489123935a8287D', -'0x61F70DfA3d1Ffa753C308DbEDd25e966AC200F3F', -'0x3b774768E0733BfaA0e08B7a80d6e7C025307Ae1', -'0xCB0316076Ea2AbFD95531bc3620C27803AE923f0', -'0x670Ca0967fbeCf9d8D1dAe203270c551F1A8990e', -'0x02d555E9C920Ddd56B9863208956D8F2a65763f3', -'0xFF4518dB139E0C9C5067c81157Bd51D7f1841F09', -'0x69954f936292187933c48462D43259d32616F3D6', -'0x0C6822ca73dE6871f27ACD9ca05A05b99294B805', -'0x7912FCA3b78dc6c48A52A888606356E4c34B2880', -'0x69147ca3F35b3A0C93248af44c4cFa2060C3806b', -'0x139b44AC101BAeB687789432A163447779632095', -'0x2A3B0EA72CD4Ac316b5069F6bc37F1A4eE25dAf1', -'0x47C3435677A3A3d371145e651f399eF52b012c23', -'0x403C3741DFe6a6AfED0A50D68E07f1DF3B4e24c5', -'0x94e675CE9E0A3274B1bca371Ad48b34D98C9a310', -'0xDA2Eea532049d183101f38f13549AF3ec4ccA8Ee', -'0x505058ce39F940690cE6EC020cB63cD284183c9A', -'0xcBef99c9eF80106e7189c49D6D899A8Abcdcb487', -'0x908727dc3f86bB42b3d16CA5cDBdA8607811Ae41', -'0xBa2f291070Ddd49d005dbd3Ff06Cd1b986B6c542', -'0xe2D055e53cE9517365F8c4957a247763C6298B32', -'0x2EB3d994256E5aDfC966961516e930D71Ff5563a', -'0x7b8fFFB8340F304CB47687bC092a0382b9387BD7', -'0xBEb246024e9329171deC17998e4295c56aA62aC2', -'0xcD60aa27E8E96b58FF93aC8EBb84D865Ba3a7511', -'0x054560ee2f10b316746881726afAc1A71b98dDCD', -'0xdF17253AB203d308A1FACA1E507D7E7177fDA97D', -'0xb30550760A511A67A6798DE6035672e593365E03', -'0x41d061494F9D6E8A7295A17D36469f94675CA22b', -'0x6d3aEEEbBc7683fC6AaE91f09125cAc1fa3f15F6', -'0xCe82F64C4fa0BD50517F15E4FB998FA0397a3b60', -'0x959D2a68de1E58C9A641ECB672302e88E4fC7560', -'0xD45Ed1FB0D7E03d343a14971f73Db8297bC64ea6', -'0xD31A1840EA87A2101028E0203FB17FBD3E1DE02C', -'0x95C8752092B2C336585F5178F299ea40a36399Ea', -'0xaf60a6C7639e5090c21954Ff3ad453E35d364378', -'0xc4478854ED468A522B5724Db6A8A144f5A8fbe8B', -'0x4bDB521da745591c2C630e4A4553CCfa2288dA93', -'0xa925Bedb9E69e94D4DC13899dF39a46025aB63e6', -'0x7AfEdA4c714e1C0A2a1248332c100924506aC8e6', -'0x0654c99f46B49Acb08c455DA39111f772355F00a', -'0xF0214D3FFe6F49367f06065d7875536372efE37f', -'0x3c1344ba585Ba280A62519f36edA53a4b190BbFf', -'0x65B8357E592A3E39D1b2f1013FA1b35B37a828A7', -'0x0B608729ee4bD511319b9366809986135A194ba4', -'0xca54F7DEF56aee0c9AD84289E715d86445aC1b1A', -'0x9f2028FCF252bD1fa41cfafbDC15Da8a8cc42992', -'0x76c7133e59547FD398019e6442CEBeE1321546Ae', -'0x528b332B4049fBc4AE0752F9Ac30E635253A601f', -'0xF83402D554eD765E6aa3523d1F52FCD3Ef75D44e', -'0x7418e611bdaF8487Be593aCf7dDA3301909c7562', -'0x15d4D0307E9919759E7Dcaf7e3a9b6ecAa8ed9D2', -'0x52F298501913F6e5afb65C9BF382b1b4B8D4E6A3', -'0x0b440b3821257331F54873FbA9915DD664C59c39', -'0x55023f852D5bfc6A6C7660dc8C7e3f2B3e38490B', -'0x136Fc16A14837DA6B4a0c1fF54cEf6159f86C53f', -'0xACaDaF7FB6156D3c1832c27612611C735baBB495', -'0xb15d0Fb1574ecd6f14022E3F2EF1140a24730124', -'0x6749987cE65b985A1703932949aF794f561C8f7a', -'0xD97062C3B748A19fa52CB1E6B69E0c24cE71B00d',] - -for address in x: - print(address) - - # run the command 'echo "address"' with sys - print(subprocess.run(["cargo", "run", "--release", "--", "decompile", address, "-vvv", "-d", "--output", "./contracts/" + address, "--include-sol"])) \ No newline at end of file From 055bd1b83447dd23257bc7a8d787ae839e2d8744 Mon Sep 17 00:00:00 2001 From: Jonathan Becker <64037729+Jon-Becker@users.noreply.github.com> Date: Tue, 11 Apr 2023 09:38:31 -0400 Subject: [PATCH 2/8] :zap: perf: postprocess overwritten, dead assignments --- .vscode/settings.json | 5 + common/src/ether/compiler.rs | 84 +++++++++++ common/src/ether/mod.rs | 2 + common/src/ether/selectors.rs | 104 +++++++++++++ heimdall/src/cfg/mod.rs | 6 +- heimdall/src/cfg/util.rs | 108 ------------- heimdall/src/decode/mod.rs | 8 +- heimdall/src/decompile/mod.rs | 4 + .../decompile/out/postprocessers/solidity.rs | 12 +- heimdall/src/decompile/resolve.rs | 48 +----- heimdall/src/decompile/util.rs | 142 ------------------ 11 files changed, 220 insertions(+), 303 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 common/src/ether/compiler.rs create mode 100644 common/src/ether/selectors.rs diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..d8607a11 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "rust-analyzer.linkedProjects": [ + "/common/Cargo.toml" + ] +} \ No newline at end of file diff --git a/common/src/ether/compiler.rs b/common/src/ether/compiler.rs new file mode 100644 index 00000000..d1cc9b29 --- /dev/null +++ b/common/src/ether/compiler.rs @@ -0,0 +1,84 @@ +// returns the compiler version used to compile the contract. +// for example: (solc, 0.8.10) or (vyper, 0.2.16) +pub fn detect_compiler(bytecode: String) -> (String, String) { + + let mut compiler = "unknown".to_string(); + let mut version = "unknown".to_string(); + + // perfom prefix check for rough version matching + if bytecode.starts_with("363d3d373d3d3d363d73") { + compiler = "proxy".to_string(); + version = "minimal".to_string(); + } + else if bytecode.starts_with("366000600037611000600036600073") { + compiler = "proxy".to_string(); + version = "vyper".to_string(); + } + else if bytecode.starts_with("6004361015") { + compiler = "vyper".to_string(); + version = "0.2.0-0.2.4,0.2.11-0.3.3".to_string(); + } + else if bytecode.starts_with("341561000a") { + compiler = "vyper".to_string(); + version = "0.2.5-0.2.8".to_string(); + } + else if bytecode.starts_with("731bf797") { + compiler = "solc".to_string(); + version = "0.4.10-0.4.24".to_string(); + } + else if bytecode.starts_with("6080604052") { + compiler = "solc".to_string(); + version = "0.4.22+".to_string(); + } + else if bytecode.starts_with("6060604052") { + compiler = "solc".to_string(); + version = "0.4.11-0.4.21".to_string(); + } + else if bytecode.contains("7679706572") { + compiler = "vyper".to_string(); + } + else if bytecode.contains("736f6c63") { + compiler = "solc".to_string(); + } + + // perform metadata check + if compiler == "solc" { + let compiler_version = bytecode.split("736f6c6343").collect::>(); + + if compiler_version.len() > 1 { + if let Some(encoded_version) = compiler_version[1].get(0..6) { + let version_array = encoded_version.chars() + .collect::>() + .chunks(2) + .map(|c| c.iter().collect::()) + .collect::>(); + + version = String::new(); + for version_part in version_array { + version.push_str(&format!("{}.", u8::from_str_radix(&version_part, 16).unwrap())); + } + } + } + } + else if compiler == "vyper" { + let compiler_version = bytecode.split("767970657283").collect::>(); + + if compiler_version.len() > 1 { + if let Some(encoded_version) = compiler_version[1].get(0..6) { + let version_array = encoded_version.chars() + .collect::>() + .chunks(2) + .map(|c| c.iter().collect::()) + .collect::>(); + + version = String::new(); + for version_part in version_array { + version.push_str(&format!("{}.", u8::from_str_radix(&version_part, 16).unwrap())); + } + } + } + } + + + (compiler, version.trim_end_matches('.').to_string()) +} diff --git a/common/src/ether/mod.rs b/common/src/ether/mod.rs index f646bac7..84c28550 100644 --- a/common/src/ether/mod.rs +++ b/common/src/ether/mod.rs @@ -1,4 +1,6 @@ pub mod signatures; +pub mod selectors; pub mod solidity; +pub mod compiler; pub mod evm; pub mod yul; \ No newline at end of file diff --git a/common/src/ether/selectors.rs b/common/src/ether/selectors.rs new file mode 100644 index 00000000..40bd5df1 --- /dev/null +++ b/common/src/ether/selectors.rs @@ -0,0 +1,104 @@ +use std::{collections::HashMap, sync::{Arc, Mutex}, time::Duration, thread}; + +use indicatif::ProgressBar; + +use crate::io::logging::Logger; + +use super::{evm::vm::VM, signatures::{resolve_function_signature, ResolvedFunction}}; + +// find all function selectors in the given EVM assembly. +pub fn find_function_selectors(assembly: String) -> Vec { + let mut function_selectors = Vec::new(); + + // search through assembly for PUSH4 instructions, optimistically assuming that they are function selectors + let assembly: Vec = assembly + .split('\n') + .map(|line| line.trim().to_string()) + .collect(); + for line in assembly.iter() { + let instruction_args: Vec = line.split(' ').map(|arg| arg.to_string()).collect(); + + if instruction_args.len() >= 2 { + let instruction = instruction_args[1].clone(); + + if instruction == "PUSH4" { + let function_selector = instruction_args[2].clone(); + function_selectors.push(function_selector); + } + } + } + function_selectors.sort(); + function_selectors.dedup(); + function_selectors +} + +// resolve a selector's function entry point from the EVM bytecode +pub fn resolve_entry_point(evm: &VM, selector: String) -> u128 { + let mut vm = evm.clone(); + + // execute the EVM call to find the entry point for the given selector + vm.calldata = selector.clone(); + while vm.bytecode.len() >= (vm.instruction * 2 + 2) as usize { + let call = vm.step(); + + // if the opcode is an JUMPI and it matched the selector, the next jumpi is the entry point + if call.last_instruction.opcode == "57" { + let jump_condition = call.last_instruction.input_operations[1].solidify(); + let jump_taken = call.last_instruction.inputs[1].try_into().unwrap_or(1); + + if jump_condition.contains(&selector) && + jump_condition.contains("msg.data[0]") && + jump_condition.contains(" == ") && + jump_taken == 1 + { + return call.last_instruction.inputs[0].try_into().unwrap_or(0) + } + } + + if vm.exitcode != 255 || !vm.returndata.is_empty() { + break; + } + } + + 0 +} + +// resolve a function signature from the given selectors +pub fn resolve_function_selectors( + selectors: Vec, + logger: &Logger, +) -> HashMap> { + let resolved_functions: Arc>>> = Arc::new(Mutex::new(HashMap::new())); + let resolve_progress: Arc> = Arc::new(Mutex::new(ProgressBar::new_spinner())); + + let mut threads = Vec::new(); + + resolve_progress.lock().unwrap().enable_steady_tick(Duration::from_millis(100)); + resolve_progress.lock().unwrap().set_style(logger.info_spinner()); + + for selector in selectors { + let function_clone = resolved_functions.clone(); + let resolve_progress = resolve_progress.clone(); + + // create a new thread for each selector + threads.push(thread::spawn(move || { + if let Some(function) = resolve_function_signature(&selector) { + let mut _resolved_functions = function_clone.lock().unwrap(); + let mut _resolve_progress = resolve_progress.lock().unwrap(); + _resolve_progress.set_message(format!("resolved {} selectors...", _resolved_functions.len())); + _resolved_functions.insert(selector, function); + } + })); + + } + + // wait for all threads to finish + for thread in threads { + thread.join().unwrap(); + } + + resolve_progress.lock().unwrap().finish_and_clear(); + + let x = resolved_functions.lock().unwrap().clone(); + x +} \ No newline at end of file diff --git a/heimdall/src/cfg/mod.rs b/heimdall/src/cfg/mod.rs index 1afaa3ba..a03ed935 100644 --- a/heimdall/src/cfg/mod.rs +++ b/heimdall/src/cfg/mod.rs @@ -6,6 +6,8 @@ pub mod util; use heimdall_cache::read_cache; use heimdall_cache::store_cache; +use heimdall_common::ether::compiler::detect_compiler; +use heimdall_common::ether::selectors::find_function_selectors; use indicatif::ProgressBar; use std::env; use std::fs; @@ -27,9 +29,7 @@ use heimdall_common::{ use petgraph::Graph; use crate::cfg::output::build_output; -use crate::cfg::util::detect_compiler; -use crate::cfg::util::find_function_selectors; -use crate::cfg::util::map_contract; +use crate::cfg::util::{map_contract}; #[derive(Debug, Clone, Parser)] #[clap(about = "Generate a visual control flow graph for EVM bytecode", diff --git a/heimdall/src/cfg/util.rs b/heimdall/src/cfg/util.rs index f3e5c83c..3552ba8f 100644 --- a/heimdall/src/cfg/util.rs +++ b/heimdall/src/cfg/util.rs @@ -18,114 +18,6 @@ pub struct VMTrace { pub loop_detected: bool, } -// returns the compiler version used to compile the contract. -// for example: (solc, 0.8.10) or (vyper, 0.2.16) -pub fn detect_compiler(bytecode: String) -> (String, String) { - let mut compiler = "unknown".to_string(); - let mut version = "unknown".to_string(); - - // perfom prefix check for rough version matching - if bytecode.starts_with("363d3d373d3d3d363d73") { - compiler = "proxy".to_string(); - version = "minimal".to_string(); - } else if bytecode.starts_with("366000600037611000600036600073") { - compiler = "proxy".to_string(); - version = "vyper".to_string(); - } else if bytecode.starts_with("6004361015") { - compiler = "vyper".to_string(); - version = "0.2.0-0.2.4,0.2.11-0.3.3".to_string(); - } else if bytecode.starts_with("341561000a") { - compiler = "vyper".to_string(); - version = "0.2.5-0.2.8".to_string(); - } else if bytecode.starts_with("731bf797") { - compiler = "solc".to_string(); - version = "0.4.10-0.4.24".to_string(); - } else if bytecode.starts_with("6080604052") { - compiler = "solc".to_string(); - version = "0.4.".to_string(); - } else if bytecode.starts_with("6060604052") { - compiler = "solc".to_string(); - version = "0.4.11-0.4.21".to_string(); - } else if bytecode.contains("7679706572") { - compiler = "vyper".to_string(); - } else if bytecode.contains("736f6c63") { - compiler = "solc".to_string(); - } - - // perform metadata check - if compiler == "solc" { - let compiler_version = bytecode.split("736f6c6343").collect::>(); - - if compiler_version.len() > 1 { - if let Some(encoded_version) = compiler_version[1].get(0..6) { - let version_array = encoded_version - .chars() - .collect::>() - .chunks(2) - .map(|c| c.iter().collect::()) - .collect::>(); - - version = String::new(); - for version_part in version_array { - version.push_str(&format!( - "{}.", - u8::from_str_radix(&version_part, 16).unwrap() - )); - } - } - } - } else if compiler == "vyper" { - let compiler_version = bytecode.split("767970657283").collect::>(); - - if compiler_version.len() > 1 { - if let Some(encoded_version) = compiler_version[1].get(0..6) { - let version_array = encoded_version - .chars() - .collect::>() - .chunks(2) - .map(|c| c.iter().collect::()) - .collect::>(); - - version = String::new(); - for version_part in version_array { - version.push_str(&format!( - "{}.", - u8::from_str_radix(&version_part, 16).unwrap() - )); - } - } - } - } - - (compiler, version.trim_end_matches('.').to_string()) -} - -// find all function selectors in the given EVM. -pub fn find_function_selectors(assembly: String) -> Vec { - let mut function_selectors = Vec::new(); - - // search through assembly for PUSH4 instructions, optimistically assuming that they are function selectors - let assembly: Vec = assembly - .split('\n') - .map(|line| line.trim().to_string()) - .collect(); - for line in assembly.iter() { - let instruction_args: Vec = line.split(' ').map(|arg| arg.to_string()).collect(); - - if instruction_args.len() >= 2 { - let instruction = instruction_args[1].clone(); - - if instruction == "PUSH4" { - let function_selector = instruction_args[2].clone(); - function_selectors.push(function_selector); - } - } - } - function_selectors.sort(); - function_selectors.dedup(); - function_selectors -} - // build a map of function jump possibilities from the EVM bytecode pub fn map_contract(evm: &VM) -> (VMTrace, u32) { let vm = evm.clone(); diff --git a/heimdall/src/decode/mod.rs b/heimdall/src/decode/mod.rs index a6cbef04..bdbce95c 100644 --- a/heimdall/src/decode/mod.rs +++ b/heimdall/src/decode/mod.rs @@ -23,6 +23,7 @@ use strsim::normalized_damerau_levenshtein as similarity; use crate::decode::util::get_explanation; + #[derive(Debug, Clone, Parser)] #[clap(about = "Decode calldata into readable types", after_help = "For more information, read the wiki: https://jbecker.dev/r/heimdall-rs/wiki", @@ -239,7 +240,12 @@ pub fn decode(args: DecodeArgs) { // build a trace of the calldata let decode_call = trace.add_call(0, line!(), "heimdall".to_string(), "decode".to_string(), vec![shortened_target], "()".to_string()); trace.br(decode_call); - trace.add_message(decode_call, line!(), vec![format!("selector: 0x{function_selector}")]); + trace.add_message(decode_call, line!(), vec![ + format!( + "selector: 0x{function_selector}{}", + if function_selector == "00000000" { " (fallback?)" } else { "" }, + ) + ]); trace.add_message(decode_call, line!(), vec![format!("calldata: {} bytes", calldata.len() / 2usize)]); trace.br(decode_call); diff --git a/heimdall/src/decompile/mod.rs b/heimdall/src/decompile/mod.rs index 4d996e63..1d237c06 100644 --- a/heimdall/src/decompile/mod.rs +++ b/heimdall/src/decompile/mod.rs @@ -12,6 +12,10 @@ use crate::decompile::util::*; use heimdall_cache::read_cache; use heimdall_cache::store_cache; +use heimdall_common::ether::compiler::detect_compiler; +use heimdall_common::ether::selectors::find_function_selectors; +use heimdall_common::ether::selectors::resolve_entry_point; +use heimdall_common::ether::selectors::resolve_function_selectors; use indicatif::ProgressBar; use std::collections::HashMap; use std::env; diff --git a/heimdall/src/decompile/out/postprocessers/solidity.rs b/heimdall/src/decompile/out/postprocessers/solidity.rs index b21da98c..461aa6e2 100644 --- a/heimdall/src/decompile/out/postprocessers/solidity.rs +++ b/heimdall/src/decompile/out/postprocessers/solidity.rs @@ -158,6 +158,7 @@ fn simplify_casts(line: String) -> String { } fn simplify_parentheses(line: String, paren_index: usize) -> String { + // helper function to determine if parentheses are necessary fn are_parentheses_unnecessary(expression: String) -> bool { // safely grab the first and last chars @@ -384,16 +385,23 @@ fn contains_unnecessary_assignment(line: String, lines: &Vec<&String>) -> bool { //remove unused vars for x in lines { + // break if the line contains a function definition if x.contains("function") { break; } if x.contains(" = ") { - let assignment = x.split(" = ").collect::>(); + let assignment = x.split(" = ") + .map(|x| x.trim()) + .collect::>(); + println!("var: {}, assignment: {:?}", var_name, assignment); if assignment[1].contains(var_name) { return false; } + else if assignment[0].split(" ").last() == Some(var_name) { + return true; + } } else if x.contains(var_name) { return false; } @@ -625,7 +633,7 @@ fn finalize(lines: Vec, bar: &ProgressBar) -> Vec { // only pass in lines further than the current line if !contains_unnecessary_assignment( line.trim().to_string(), - &lines[i..].iter().collect::>(), + &lines[i+1..].iter().collect::>(), ) { cleaned_lines.push(line.to_string()); } diff --git a/heimdall/src/decompile/resolve.rs b/heimdall/src/decompile/resolve.rs index eb52d93b..502ef2f5 100644 --- a/heimdall/src/decompile/resolve.rs +++ b/heimdall/src/decompile/resolve.rs @@ -1,54 +1,8 @@ -use std::{collections::HashMap, time::Duration}; -use std::sync::{Arc, Mutex}; -use std::thread; use heimdall_common::{ - ether::signatures::{resolve_function_signature, ResolvedFunction}, - io::logging::Logger, + ether::signatures::{ResolvedFunction}, }; -use indicatif::ProgressBar; - use super::util::Function; -// resolve a list of function selectors to their possible signatures -pub fn resolve_function_selectors( - selectors: Vec, - logger: &Logger, -) -> HashMap> { - let resolved_functions: Arc>>> = Arc::new(Mutex::new(HashMap::new())); - let resolve_progress: Arc> = Arc::new(Mutex::new(ProgressBar::new_spinner())); - - let mut threads = Vec::new(); - - resolve_progress.lock().unwrap().enable_steady_tick(Duration::from_millis(100)); - resolve_progress.lock().unwrap().set_style(logger.info_spinner()); - - for selector in selectors { - let function_clone = resolved_functions.clone(); - let resolve_progress = resolve_progress.clone(); - - // create a new thread for each selector - threads.push(thread::spawn(move || { - if let Some(function) = resolve_function_signature(&selector) { - let mut _resolved_functions = function_clone.lock().unwrap(); - let mut _resolve_progress = resolve_progress.lock().unwrap(); - _resolve_progress.set_message(format!("resolved {} selectors...", _resolved_functions.len())); - _resolved_functions.insert(selector, function); - } - })); - - } - - // wait for all threads to finish - for thread in threads { - thread.join().unwrap(); - } - - resolve_progress.lock().unwrap().finish_and_clear(); - - let x = resolved_functions.lock().unwrap().clone(); - x -} - // match the ResolvedFunction to a list of Function parameters pub fn match_parameters( resolved_functions: Vec, diff --git a/heimdall/src/decompile/util.rs b/heimdall/src/decompile/util.rs index d0fa0743..9e5ac7fe 100644 --- a/heimdall/src/decompile/util.rs +++ b/heimdall/src/decompile/util.rs @@ -114,148 +114,6 @@ pub struct VMTrace { pub loop_detected: bool } -// returns the compiler version used to compile the contract. -// for example: (solc, 0.8.10) or (vyper, 0.2.16) -pub fn detect_compiler(bytecode: String) -> (String, String) { - - let mut compiler = "unknown".to_string(); - let mut version = "unknown".to_string(); - - // perfom prefix check for rough version matching - if bytecode.starts_with("363d3d373d3d3d363d73") { - compiler = "proxy".to_string(); - version = "minimal".to_string(); - } - else if bytecode.starts_with("366000600037611000600036600073") { - compiler = "proxy".to_string(); - version = "vyper".to_string(); - } - else if bytecode.starts_with("6004361015") { - compiler = "vyper".to_string(); - version = "0.2.0-0.2.4,0.2.11-0.3.3".to_string(); - } - else if bytecode.starts_with("341561000a") { - compiler = "vyper".to_string(); - version = "0.2.5-0.2.8".to_string(); - } - else if bytecode.starts_with("731bf797") { - compiler = "solc".to_string(); - version = "0.4.10-0.4.24".to_string(); - } - else if bytecode.starts_with("6080604052") { - compiler = "solc".to_string(); - version = "0.4.22+".to_string(); - } - else if bytecode.starts_with("6060604052") { - compiler = "solc".to_string(); - version = "0.4.11-0.4.21".to_string(); - } - else if bytecode.contains("7679706572") { - compiler = "vyper".to_string(); - } - else if bytecode.contains("736f6c63") { - compiler = "solc".to_string(); - } - - // perform metadata check - if compiler == "solc" { - let compiler_version = bytecode.split("736f6c6343").collect::>(); - - if compiler_version.len() > 1 { - if let Some(encoded_version) = compiler_version[1].get(0..6) { - let version_array = encoded_version.chars() - .collect::>() - .chunks(2) - .map(|c| c.iter().collect::()) - .collect::>(); - - version = String::new(); - for version_part in version_array { - version.push_str(&format!("{}.", u8::from_str_radix(&version_part, 16).unwrap())); - } - } - } - } - else if compiler == "vyper" { - let compiler_version = bytecode.split("767970657283").collect::>(); - - if compiler_version.len() > 1 { - if let Some(encoded_version) = compiler_version[1].get(0..6) { - let version_array = encoded_version.chars() - .collect::>() - .chunks(2) - .map(|c| c.iter().collect::()) - .collect::>(); - - version = String::new(); - for version_part in version_array { - version.push_str(&format!("{}.", u8::from_str_radix(&version_part, 16).unwrap())); - } - } - } - } - - - (compiler, version.trim_end_matches('.').to_string()) -} - -// find all function selectors in the given EVM assembly. -pub fn find_function_selectors(assembly: String) -> Vec { - let mut function_selectors = Vec::new(); - - // search through assembly for PUSH4 instructions, optimistically assuming that they are function selectors - let assembly: Vec = assembly - .split('\n') - .map(|line| line.trim().to_string()) - .collect(); - for line in assembly.iter() { - let instruction_args: Vec = line.split(' ').map(|arg| arg.to_string()).collect(); - - if instruction_args.len() >= 2 { - let instruction = instruction_args[1].clone(); - - if instruction == "PUSH4" { - let function_selector = instruction_args[2].clone(); - function_selectors.push(function_selector); - } - } - } - function_selectors.sort(); - function_selectors.dedup(); - function_selectors -} - -// resolve a selector's function entry point from the EVM bytecode -pub fn resolve_entry_point(evm: &VM, selector: String) -> u128 { - let mut vm = evm.clone(); - - // execute the EVM call to find the entry point for the given selector - vm.calldata = selector.clone(); - while vm.bytecode.len() >= (vm.instruction * 2 + 2) as usize { - let call = vm.step(); - - // if the opcode is an JUMPI and it matched the selector, the next jumpi is the entry point - if call.last_instruction.opcode == "57" { - let jump_condition = call.last_instruction.input_operations[1].solidify(); - let jump_taken = call.last_instruction.inputs[1].try_into().unwrap_or(1); - - if jump_condition.contains(&selector) && - jump_condition.contains("msg.data[0]") && - jump_condition.contains(" == ") && - jump_taken == 1 - { - return call.last_instruction.inputs[0].try_into().unwrap_or(0) - } - } - - if vm.exitcode != 255 || !vm.returndata.is_empty() { - break; - } - } - - 0 -} - // build a map of function jump possibilities from the EVM bytecode pub fn map_selector( evm: &VM, From be1b249cacfa274fd152d26c5c0dc94e8db382a0 Mon Sep 17 00:00:00 2001 From: Jonathan Becker <64037729+Jon-Becker@users.noreply.github.com> Date: Tue, 11 Apr 2023 10:06:43 -0400 Subject: [PATCH 3/8] :sparkles: feat: include event & error declarations at contract head --- .../decompile/out/postprocessers/solidity.rs | 3 +- heimdall/src/decompile/out/solidity.rs | 51 +++++++++++++++++-- 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/heimdall/src/decompile/out/postprocessers/solidity.rs b/heimdall/src/decompile/out/postprocessers/solidity.rs index 461aa6e2..d91cede8 100644 --- a/heimdall/src/decompile/out/postprocessers/solidity.rs +++ b/heimdall/src/decompile/out/postprocessers/solidity.rs @@ -395,11 +395,10 @@ fn contains_unnecessary_assignment(line: String, lines: &Vec<&String>) -> bool { let assignment = x.split(" = ") .map(|x| x.trim()) .collect::>(); - println!("var: {}, assignment: {:?}", var_name, assignment); if assignment[1].contains(var_name) { return false; } - else if assignment[0].split(" ").last() == Some(var_name) { + else if assignment[0].split(' ').last() == Some(var_name) { return true; } } else if x.contains(var_name) { diff --git a/heimdall/src/decompile/out/solidity.rs b/heimdall/src/decompile/out/solidity.rs index 9a60ac9c..52802ef5 100644 --- a/heimdall/src/decompile/out/solidity.rs +++ b/heimdall/src/decompile/out/solidity.rs @@ -16,7 +16,7 @@ use super::{super::{ }, postprocessers::solidity::postprocess}; use serde::{Deserialize, Serialize}; -#[derive(Serialize, Deserialize, PartialEq)] +#[derive(Serialize, Deserialize, PartialEq, Clone)] struct ABIToken { name: String, #[serde(rename = "internalType")] @@ -25,7 +25,7 @@ struct ABIToken { type_: String, } -#[derive(Serialize, Deserialize, PartialEq)] +#[derive(Serialize, Deserialize, PartialEq, Clone)] struct FunctionABI { #[serde(rename = "type")] type_: String, @@ -37,7 +37,7 @@ struct FunctionABI { constant: bool, } -#[derive(Serialize, Deserialize, PartialEq)] +#[derive(Serialize, Deserialize, PartialEq, Clone)] struct ErrorABI { #[serde(rename = "type")] type_: String, @@ -45,7 +45,7 @@ struct ErrorABI { inputs: Vec, } -#[derive(Serialize, Deserialize, PartialEq)] +#[derive(Serialize, Deserialize, PartialEq, Clone)] struct EventABI { #[serde(rename = "type")] type_: String, @@ -53,7 +53,7 @@ struct EventABI { inputs: Vec, } -#[derive(Serialize, Deserialize, PartialEq)] +#[derive(Serialize, Deserialize, PartialEq, Clone)] enum ABIStructure { Function(FunctionABI), Error(ErrorABI), @@ -304,6 +304,47 @@ pub fn output( // write the header to the output file decompiled_output.push(DECOMPILED_SOURCE_HEADER_SOL.replace("{}", env!("CARGO_PKG_VERSION"))); decompiled_output.push(String::from("contract DecompiledContract {")); + + // add blank line if there are events + if abi.iter().any(|x| matches!(x, ABIStructure::Event(_))) { + decompiled_output.push(String::from("")); + } + + // write the contract's events + for event in abi.iter().filter(|x| matches!(x, ABIStructure::Event(_))) { + if let ABIStructure::Event(event) = event { + decompiled_output.push(format!( + "event {}({});", + event.name, + event.inputs + .iter() + .map(|x| format!("{} {}", x.type_, x.name)) + .collect::>() + .join(", ") + )); + } + } + + // add blank line if there are errors + if abi.iter().any(|x| matches!(x, ABIStructure::Error(_))) { + decompiled_output.push(String::from("")); + } + + // write the contract's errors + for error in abi.iter().filter(|x| matches!(x, ABIStructure::Error(_))) { + if let ABIStructure::Error(error) = error { + decompiled_output.push(format!( + "error {}({});", + error.name, + error.inputs + .iter() + .map(|x| format!("{} {}", x.type_, x.name)) + .collect::>() + .join(", ") + )); + } + } + for function in functions { progress_bar.set_message(format!("writing logic for '0x{}'", function.selector)); From 9fe93987474018f51b065756cda117815130d11a Mon Sep 17 00:00:00 2001 From: Jonathan Becker <64037729+Jon-Becker@users.noreply.github.com> Date: Tue, 11 Apr 2023 13:12:06 -0400 Subject: [PATCH 4/8] :wrench: fix: dont remove storage assignments, ever --- heimdall/src/decompile/out/postprocessers/solidity.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/heimdall/src/decompile/out/postprocessers/solidity.rs b/heimdall/src/decompile/out/postprocessers/solidity.rs index d91cede8..6c9cf5ea 100644 --- a/heimdall/src/decompile/out/postprocessers/solidity.rs +++ b/heimdall/src/decompile/out/postprocessers/solidity.rs @@ -383,6 +383,11 @@ fn contains_unnecessary_assignment(line: String, lines: &Vec<&String>) -> bool { .len() - 1]; + // skip lines that contain assignments to storage + if var_name.contains("storage") { + return false; + } + //remove unused vars for x in lines { From 41d0e4281e08538fad58d32aa04f58c1eb25738d Mon Sep 17 00:00:00 2001 From: Jonathan Becker <64037729+Jon-Becker@users.noreply.github.com> Date: Tue, 11 Apr 2023 20:49:58 -0400 Subject: [PATCH 5/8] :wrench: fix: og https && new uri --- heimdall/src/decompile/constants.rs | 5 ++- .../decompile/out/postprocessers/solidity.rs | 34 +++++++++++++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/heimdall/src/decompile/constants.rs b/heimdall/src/decompile/constants.rs index fed7f0db..7ca1e92f 100644 --- a/heimdall/src/decompile/constants.rs +++ b/heimdall/src/decompile/constants.rs @@ -15,7 +15,10 @@ lazy_static! { pub static ref ENCLOSED_EXPRESSION_REGEX: Regex = Regex::new(r"\(.*\)").unwrap(); // detects a memory access - pub static ref MEM_ACCESS_REGEX: Regex = Regex::new(r"memory\[.*\]").unwrap(); + pub static ref MEM_ACCESS_REGEX: Regex = Regex::new(r"memory\[.*?\]").unwrap(); + + // detects a storage access + pub static ref STORAGE_ACCESS_REGEX: Regex = Regex::new(r"storage\[.*?\]").unwrap(); // detects division by 1 pub static ref DIV_BY_ONE_REGEX: Regex = Regex::new(r" \/ 0x01(?!\d)").unwrap(); diff --git a/heimdall/src/decompile/out/postprocessers/solidity.rs b/heimdall/src/decompile/out/postprocessers/solidity.rs index 6c9cf5ea..4bff6bcc 100644 --- a/heimdall/src/decompile/out/postprocessers/solidity.rs +++ b/heimdall/src/decompile/out/postprocessers/solidity.rs @@ -1,6 +1,6 @@ use super::super::super::constants::{ AND_BITMASK_REGEX, AND_BITMASK_REGEX_2, DIV_BY_ONE_REGEX, MEM_ACCESS_REGEX, MUL_BY_ONE_REGEX, - NON_ZERO_BYTE_REGEX, + NON_ZERO_BYTE_REGEX, STORAGE_ACCESS_REGEX }; use crate::decompile::constants::ENCLOSED_EXPRESSION_REGEX; use heimdall_common::{ @@ -15,7 +15,7 @@ use heimdall_common::{ }; use indicatif::ProgressBar; use lazy_static::lazy_static; -use std::{collections::HashMap, sync::Mutex}; +use std::{collections::{HashMap, HashSet}, sync::Mutex}; lazy_static! { static ref MEM_LOOKUP_MAP: Mutex> = Mutex::new(HashMap::new()); @@ -643,6 +643,36 @@ fn finalize(lines: Vec, bar: &ProgressBar) -> Vec { } } + // get a set of all unique storage variables + let mut storage_vars: HashMap = HashMap::new(); + + for line in cleaned_lines.iter() { + + // check for STORAGE_ACCESS_REGEX + if let Some(access) = STORAGE_ACCESS_REGEX.find(&line).unwrap() { + //storage_vars.insert(access.as_str().to_string()); + + // if the line contains an assignment, we can pull the type from the assignment + if line.contains(" = ") { + let assignment = line.split(" = ") + .map(|x| x.trim()) + .collect::>(); + + // check that lhs is a storage variable + if assignment[0].contains("storage") { + println!("assignment: {assignment:?}"); + + continue; + } + } + + // otherwise, we can pull a type from a typed memory variable + + } + } + + println!("storage vars: {storage_vars:?}"); + cleaned_lines } From 2ad31ec48ffb1108766b64588a27403d2c087f59 Mon Sep 17 00:00:00 2001 From: Jonathan Becker <64037729+Jon-Becker@users.noreply.github.com> Date: Thu, 13 Apr 2023 11:47:48 -0400 Subject: [PATCH 6/8] :sparkles: feat: `PUSH0` support --- .vscode/settings.json | 5 - common/src/ether/evm/opcodes.rs | 1 + common/src/ether/evm/vm.rs | 8 ++ common/src/ether/solidity.rs | 4 +- common/src/ether/yul.rs | 14 ++- heimdall/src/decompile/constants.rs | 2 +- .../src/decompile/out/postprocessers/mod.rs | 1 + .../src/decompile/out/postprocessers/tests.rs | 108 +++++++++++++++++ heimdall/src/decompile/tests.rs | 111 +----------------- 9 files changed, 136 insertions(+), 118 deletions(-) delete mode 100644 .vscode/settings.json create mode 100644 heimdall/src/decompile/out/postprocessers/tests.rs diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index d8607a11..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "rust-analyzer.linkedProjects": [ - "/common/Cargo.toml" - ] -} \ No newline at end of file diff --git a/common/src/ether/evm/opcodes.rs b/common/src/ether/evm/opcodes.rs index d8bc8b3b..b5ba3216 100644 --- a/common/src/ether/evm/opcodes.rs +++ b/common/src/ether/evm/opcodes.rs @@ -76,6 +76,7 @@ pub fn opcode(code: &str) -> Opcode { "59" => Opcode { name: "MSIZE", mingas: 2, inputs: 0, outputs: 1 }, "5a" => Opcode { name: "GAS", mingas: 2, inputs: 0, outputs: 1 }, "5b" => Opcode { name: "JUMPDEST", mingas: 1, inputs: 0, outputs: 0 }, + "5f" => Opcode { name: "PUSH0", mingas: 3, inputs: 0, outputs: 1 }, "60" => Opcode { name: "PUSH1", mingas: 3, inputs: 0, outputs: 1 }, "61" => Opcode { name: "PUSH2", mingas: 3, inputs: 0, outputs: 1 }, "62" => Opcode { name: "PUSH3", mingas: 3, inputs: 0, outputs: 1 }, diff --git a/common/src/ether/evm/vm.rs b/common/src/ether/evm/vm.rs index 80efb0f0..2e40d38b 100644 --- a/common/src/ether/evm/vm.rs +++ b/common/src/ether/evm/vm.rs @@ -1383,6 +1383,14 @@ impl VM { ); } + // PUSH0 + if op == 0x5f { + self.stack.push( + U256::from(0u8).encode_hex().as_str(), + operation.clone(), + ); + } + // PUSH1 -> PUSH32 if (0x60..=0x7F).contains(&op) { // Get the number of bytes to push diff --git a/common/src/ether/solidity.rs b/common/src/ether/solidity.rs index 84458227..dda4d939 100644 --- a/common/src/ether/solidity.rs +++ b/common/src/ether/solidity.rs @@ -492,8 +492,10 @@ impl WrappedOpcode { "RETURNDATASIZE" => { solidified_wrapped_opcode.push_str("ret0.length"); } + "PUSH0" => { + solidified_wrapped_opcode.push_str("0"); + } opcode => { - if opcode.starts_with("PUSH") { solidified_wrapped_opcode.push_str(self.inputs[0]._solidify().as_str()); } diff --git a/common/src/ether/yul.rs b/common/src/ether/yul.rs index 69910c9f..e5bcb30b 100644 --- a/common/src/ether/yul.rs +++ b/common/src/ether/yul.rs @@ -6,7 +6,10 @@ impl WrappedOpcode { // Returns a WrappedOpcode's yul representation. pub fn yulify(&self) -> String { - if self.opcode.name.starts_with("PUSH") { + if self.opcode.name == "PUSH0" { + "0".to_string() + } + else if self.opcode.name.starts_with("PUSH") { self.inputs[0]._yulify() } else { @@ -40,6 +43,15 @@ mod tests { use super::*; use ethers::types::U256; + #[test] + fn test_push0() { + + // wraps an ADD operation with 2 raw inputs + let add_operation_wrapped = WrappedOpcode::new(0x5f, vec![]); + assert_eq!(add_operation_wrapped.yulify(), "0x00"); + + } + #[test] fn test_yulify_add() { diff --git a/heimdall/src/decompile/constants.rs b/heimdall/src/decompile/constants.rs index 7ca1e92f..60dc458e 100644 --- a/heimdall/src/decompile/constants.rs +++ b/heimdall/src/decompile/constants.rs @@ -15,7 +15,7 @@ lazy_static! { pub static ref ENCLOSED_EXPRESSION_REGEX: Regex = Regex::new(r"\(.*\)").unwrap(); // detects a memory access - pub static ref MEM_ACCESS_REGEX: Regex = Regex::new(r"memory\[.*?\]").unwrap(); + pub static ref MEM_ACCESS_REGEX: Regex = Regex::new(r"memory\[.*\]").unwrap(); // detects a storage access pub static ref STORAGE_ACCESS_REGEX: Regex = Regex::new(r"storage\[.*?\]").unwrap(); diff --git a/heimdall/src/decompile/out/postprocessers/mod.rs b/heimdall/src/decompile/out/postprocessers/mod.rs index 8110d32d..8f1a5c5f 100644 --- a/heimdall/src/decompile/out/postprocessers/mod.rs +++ b/heimdall/src/decompile/out/postprocessers/mod.rs @@ -1,2 +1,3 @@ pub mod solidity; +pub mod tests; pub mod yul; \ No newline at end of file diff --git a/heimdall/src/decompile/out/postprocessers/tests.rs b/heimdall/src/decompile/out/postprocessers/tests.rs new file mode 100644 index 00000000..1fef719e --- /dev/null +++ b/heimdall/src/decompile/out/postprocessers/tests.rs @@ -0,0 +1,108 @@ +#[cfg(test)] +mod tests { + + use std::collections::HashMap; + + use indicatif::ProgressBar; + + use crate::decompile::out::postprocessers::solidity::postprocess; + + #[test] + fn test_bitmask_conversion() { + let lines = vec![ + String::from("(0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff) & (arg0);"), + ]; + + assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("uint256(arg0);")]); + } + + #[test] + fn test_bitmask_conversion_mask_after() { + let lines = vec![ + String::from("(arg0) & (0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff);"), + ]; + + assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("uint256(arg0);")]); + } + + #[test] + fn test_bitmask_conversion_unusual_mask() { + let lines = vec![ + String::from("(arg0) & (0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff00);"), + ]; + + assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("uint248(arg0);")]); + } + + #[test] + fn test_simplify_casts() { + let lines = vec![ + String::from("uint256(uint256(arg0));"), + ]; + + assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("uint256(arg0);")]); + } + + #[test] + fn test_simplify_casts_complex() { + let lines = vec![ + String::from("ecrecover(uint256(uint256(arg0)), uint256(uint256(arg0)), uint256(uint256(uint256(arg0))));"), + ]; + + assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("ecrecover(uint256(arg0), uint256(arg0), uint256(arg0));")]); + } + + #[test] + fn test_iszero_flip() { + let lines = vec![ + String::from("if (iszero(arg0)) {"), + ]; + + assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("if (!arg0) {")]); + } + + #[test] + fn test_iszero_flip_complex() { + let lines = vec![ + String::from("if (iszero(iszero(arg0))) {"), + ]; + + assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("if (arg0) {")]); + } + + #[test] + fn test_iszero_flip_complex2() { + let lines = vec![ + String::from("if (iszero(iszero(iszero(arg0)))) {"), + ]; + + assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("if (!arg0) {")]); + } + + #[test] + fn test_simplify_parentheses() { + let lines = vec![ + String::from("((arg0))"), + ]; + + assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("arg0")]); + } + + #[test] + fn test_simplify_parentheses_complex() { + let lines = vec![ + String::from("if ((cast(((arg0) + 1) / 10))) {"), + ]; + + assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("if (cast(arg0 + 1 / 10)) {")]); + } + + #[test] + fn test_simplify_parentheses_complex2() { + let lines = vec![ + String::from("if (((((((((((((((cast(((((((((((arg0 * (((((arg1))))))))))))) + 1)) / 10)))))))))))))))) {"), + ]; + + assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("if (cast((arg0 * (arg1)) + 1 / 10)) {")]); + } +} \ No newline at end of file diff --git a/heimdall/src/decompile/tests.rs b/heimdall/src/decompile/tests.rs index d0f4356b..ad6e5bec 100644 --- a/heimdall/src/decompile/tests.rs +++ b/heimdall/src/decompile/tests.rs @@ -203,7 +203,7 @@ mod tests { // assert that the output is correct for line in &["function Unresolved_02751cec(address arg0, bytes memory arg1, bytes memory arg2, bytes memory arg3, address arg4, bytes memory arg5) public returns (bytes memory) {", - "(bool success, bytes memory ret0) = address(var_j).call{ value: 0 }(var_n);", + "(bool success, bytes memory ret0) = address(var_j).call{ value: 0 }(var_", "revert(\"UniswapV2Library: ZERO_ADDRESS\");", "revert(\"TransferHelper: TRANSFER_FAILED\");"] { println!("{line}"); @@ -214,113 +214,4 @@ mod tests { delete_path(&String::from("./output/tests/decompile/test4")); } -} - -#[cfg(test)] -mod postprocess_tests { - - use std::collections::HashMap; - - use indicatif::ProgressBar; - - use crate::decompile::out::postprocessers::solidity::postprocess; - - #[test] - fn test_bitmask_conversion() { - let lines = vec![ - String::from("(0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff) & (arg0);"), - ]; - - assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("uint256(arg0);")]); - } - - #[test] - fn test_bitmask_conversion_mask_after() { - let lines = vec![ - String::from("(arg0) & (0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff);"), - ]; - - assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("uint256(arg0);")]); - } - - #[test] - fn test_bitmask_conversion_unusual_mask() { - let lines = vec![ - String::from("(arg0) & (0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff00);"), - ]; - - assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("uint248(arg0);")]); - } - - #[test] - fn test_simplify_casts() { - let lines = vec![ - String::from("uint256(uint256(arg0));"), - ]; - - assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("uint256(arg0);")]); - } - - #[test] - fn test_simplify_casts_complex() { - let lines = vec![ - String::from("ecrecover(uint256(uint256(arg0)), uint256(uint256(arg0)), uint256(uint256(uint256(arg0))));"), - ]; - - assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("ecrecover(uint256(arg0), uint256(arg0), uint256(arg0));")]); - } - - #[test] - fn test_iszero_flip() { - let lines = vec![ - String::from("if (iszero(arg0)) {"), - ]; - - assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("if (!arg0) {")]); - } - - #[test] - fn test_iszero_flip_complex() { - let lines = vec![ - String::from("if (iszero(iszero(arg0))) {"), - ]; - - assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("if (arg0) {")]); - } - - #[test] - fn test_iszero_flip_complex2() { - let lines = vec![ - String::from("if (iszero(iszero(iszero(arg0)))) {"), - ]; - - assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("if (!arg0) {")]); - } - - #[test] - fn test_simplify_parentheses() { - let lines = vec![ - String::from("((arg0))"), - ]; - - assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("arg0")]); - } - - #[test] - fn test_simplify_parentheses_complex() { - let lines = vec![ - String::from("if ((cast(((arg0) + 1) / 10))) {"), - ]; - - assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("if (cast(arg0 + 1 / 10)) {")]); - } - - #[test] - fn test_simplify_parentheses_complex2() { - let lines = vec![ - String::from("if (((((((((((((((cast(((((((((((arg0 * (((((arg1))))))))))))) + 1)) / 10)))))))))))))))) {"), - ]; - - assert_eq!(postprocess(lines, HashMap::new(), HashMap::new(), &ProgressBar::new(128)), vec![String::from("if (cast((arg0 * (arg1)) + 1 / 10)) {")]); - } } \ No newline at end of file From 54e995c09524518483e889017c2ddd88cb099200 Mon Sep 17 00:00:00 2001 From: Jonathan Becker <64037729+Jon-Becker@users.noreply.github.com> Date: Fri, 14 Apr 2023 12:57:11 -0500 Subject: [PATCH 7/8] =?UTF-8?q?=E2=9C=85=20tests:=20make=20PUSH0=20tests?= =?UTF-8?q?=20pass?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- common/src/ether/solidity.rs | 9 ++++ common/src/ether/yul.rs | 3 +- heimdall/src/decompile/constants.rs | 3 ++ .../decompile/out/postprocessers/solidity.rs | 42 ++++--------------- 4 files changed, 20 insertions(+), 37 deletions(-) diff --git a/common/src/ether/solidity.rs b/common/src/ether/solidity.rs index dda4d939..412661e3 100644 --- a/common/src/ether/solidity.rs +++ b/common/src/ether/solidity.rs @@ -579,6 +579,15 @@ mod tests { use super::*; use ethers::types::U256; + #[test] + fn test_push0() { + + // wraps an ADD operation with 2 raw inputs + let add_operation_wrapped = WrappedOpcode::new(0x5f, vec![]); + assert_eq!(add_operation_wrapped.solidify(), "0"); + + } + #[test] fn test_solidify_add() { diff --git a/common/src/ether/yul.rs b/common/src/ether/yul.rs index e5bcb30b..9f96ff47 100644 --- a/common/src/ether/yul.rs +++ b/common/src/ether/yul.rs @@ -48,8 +48,7 @@ mod tests { // wraps an ADD operation with 2 raw inputs let add_operation_wrapped = WrappedOpcode::new(0x5f, vec![]); - assert_eq!(add_operation_wrapped.yulify(), "0x00"); - + assert_eq!(add_operation_wrapped.yulify(), "0"); } #[test] diff --git a/heimdall/src/decompile/constants.rs b/heimdall/src/decompile/constants.rs index 60dc458e..753fbd5e 100644 --- a/heimdall/src/decompile/constants.rs +++ b/heimdall/src/decompile/constants.rs @@ -25,6 +25,9 @@ lazy_static! { // detects multiplication by 1 pub static ref MUL_BY_ONE_REGEX: Regex = Regex::new(r"\b0x01\b\s*\*\s*| \*\s*\b0x01\b").unwrap(); + + // memory variable regex + pub static ref MEM_VAR_REGEX: Regex = Regex::new(r"var_[a-zA-Z]{1,2}").unwrap(); // extracts commas within a certain expression, not including commas within parentheses pub static ref ARGS_SPLIT_REGEX: Regex = Regex::new(r",\s*(?![^()]*\))").unwrap(); diff --git a/heimdall/src/decompile/out/postprocessers/solidity.rs b/heimdall/src/decompile/out/postprocessers/solidity.rs index 4bff6bcc..fc9be466 100644 --- a/heimdall/src/decompile/out/postprocessers/solidity.rs +++ b/heimdall/src/decompile/out/postprocessers/solidity.rs @@ -1,8 +1,8 @@ use super::super::super::constants::{ AND_BITMASK_REGEX, AND_BITMASK_REGEX_2, DIV_BY_ONE_REGEX, MEM_ACCESS_REGEX, MUL_BY_ONE_REGEX, - NON_ZERO_BYTE_REGEX, STORAGE_ACCESS_REGEX + NON_ZERO_BYTE_REGEX }; -use crate::decompile::constants::ENCLOSED_EXPRESSION_REGEX; +use crate::decompile::constants::{ENCLOSED_EXPRESSION_REGEX}; use heimdall_common::{ constants::TYPE_CAST_REGEX, ether::{ @@ -15,12 +15,13 @@ use heimdall_common::{ }; use indicatif::ProgressBar; use lazy_static::lazy_static; -use std::{collections::{HashMap, HashSet}, sync::Mutex}; +use std::{collections::{HashMap}, sync::Mutex}; lazy_static! { static ref MEM_LOOKUP_MAP: Mutex> = Mutex::new(HashMap::new()); static ref VARIABLE_MAP: Mutex> = Mutex::new(HashMap::new()); - static ref TYPE_MAP: Mutex> = Mutex::new(HashMap::new()); + static ref MEMORY_TYPE_MAP: Mutex> = Mutex::new(HashMap::new()); + static ref STORAGE_TYPE_MAP: Mutex> = Mutex::new(HashMap::new()); } fn convert_bitmask_to_casting(line: String) -> String { @@ -488,7 +489,7 @@ fn replace_expression_with_var(line: String) -> String { fn inherit_infer_type(line: String) -> String { let mut cleaned = line.clone(); - let mut type_map = TYPE_MAP.lock().unwrap(); + let mut type_map = MEMORY_TYPE_MAP.lock().unwrap(); // if the line contains a function definition, wipe the type map and get arg types if line.contains("function") { @@ -526,6 +527,7 @@ fn inherit_infer_type(line: String) -> String { if !var_type.is_empty() { type_map.insert(var_name.to_string(), var_type); } else if !line.starts_with("storage") { + // infer the type from args and vars in the expression for (var, var_type) in type_map.clone().iter() { if cleaned.contains(var) && !type_map.contains_key(var_name) && !var_type.is_empty() @@ -643,36 +645,6 @@ fn finalize(lines: Vec, bar: &ProgressBar) -> Vec { } } - // get a set of all unique storage variables - let mut storage_vars: HashMap = HashMap::new(); - - for line in cleaned_lines.iter() { - - // check for STORAGE_ACCESS_REGEX - if let Some(access) = STORAGE_ACCESS_REGEX.find(&line).unwrap() { - //storage_vars.insert(access.as_str().to_string()); - - // if the line contains an assignment, we can pull the type from the assignment - if line.contains(" = ") { - let assignment = line.split(" = ") - .map(|x| x.trim()) - .collect::>(); - - // check that lhs is a storage variable - if assignment[0].contains("storage") { - println!("assignment: {assignment:?}"); - - continue; - } - } - - // otherwise, we can pull a type from a typed memory variable - - } - } - - println!("storage vars: {storage_vars:?}"); - cleaned_lines } From 5fa679b1f53eed70ef539441eae8bb9b2a4faf21 Mon Sep 17 00:00:00 2001 From: Jonathan Becker <64037729+Jon-Becker@users.noreply.github.com> Date: Fri, 14 Apr 2023 12:58:12 -0500 Subject: [PATCH 8/8] =?UTF-8?q?=F0=9F=91=B7=20build:=20bump=20version=20to?= =?UTF-8?q?=200.4.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 8 ++++---- cache/Cargo.toml | 2 +- common/Cargo.toml | 2 +- config/Cargo.toml | 2 +- heimdall/Cargo.toml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f13e1c51..0408818b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1477,7 +1477,7 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "heimdall" -version = "0.4.2" +version = "0.4.3" dependencies = [ "backtrace", "clap", @@ -1502,7 +1502,7 @@ dependencies = [ [[package]] name = "heimdall-cache" -version = "0.4.2" +version = "0.4.3" dependencies = [ "bincode", "clap", @@ -1513,7 +1513,7 @@ dependencies = [ [[package]] name = "heimdall-common" -version = "0.4.2" +version = "0.4.3" dependencies = [ "async-openai", "clap", @@ -1533,7 +1533,7 @@ dependencies = [ [[package]] name = "heimdall-config" -version = "0.4.2" +version = "0.4.3" dependencies = [ "clap", "clap-verbosity-flag", diff --git a/cache/Cargo.toml b/cache/Cargo.toml index d46ebf09..450a681f 100644 --- a/cache/Cargo.toml +++ b/cache/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "heimdall-cache" -version = "0.4.2" +version = "0.4.3" edition = "2021" license = "MIT" readme = "README.md" diff --git a/common/Cargo.toml b/common/Cargo.toml index 13819c8b..f10334cf 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -5,7 +5,7 @@ keywords = ["ethereum", "web3", "decompiler", "evm", "crypto"] license = "MIT" name = "heimdall-common" readme = "README.md" -version = "0.4.2" +version = "0.4.3" [dependencies] async-openai = "0.10.0" diff --git a/config/Cargo.toml b/config/Cargo.toml index eb903b46..9e12092f 100644 --- a/config/Cargo.toml +++ b/config/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "heimdall-config" -version = "0.4.2" +version = "0.4.3" edition = "2021" license = "MIT" readme = "README.md" diff --git a/heimdall/Cargo.toml b/heimdall/Cargo.toml index d2d872a2..d26d027c 100644 --- a/heimdall/Cargo.toml +++ b/heimdall/Cargo.toml @@ -5,7 +5,7 @@ keywords = ["ethereum", "web3", "decompiler", "evm", "crypto"] license = "MIT" name = "heimdall" readme = "README.md" -version = "0.4.2" +version = "0.4.3" [dependencies] backtrace = "0.3"