Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fuzz harness interface + ELF Loader support #38

Merged
merged 15 commits into from
May 24, 2024
Merged
1 change: 0 additions & 1 deletion generators/ed25519.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import fd58
import hashlib
from test_suite.codec_utils import encode_input
import test_suite.invoke_pb2 as pb
from dataclasses import dataclass
import datetime
Expand Down
1 change: 0 additions & 1 deletion generators/secp256k1.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import fd58
import hashlib
from eth_hash.auto import keccak
from test_suite.codec_utils import encode_input
import test_suite.invoke_pb2 as pb
from dataclasses import dataclass
import datetime
Expand Down
32 changes: 32 additions & 0 deletions invoke.proto
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,35 @@ message InstrFixture {
InstrContext input = 1;
InstrEffects output = 2;
}

message ELFBinary {
bytes data = 1;
}

// Wrapper for the ELF binary and the features that the loader should use
// Note that we currently hardcode the features to be used by the loader,
// so features isn't actually used yet.
message ELFLoaderCtx {
ELFBinary elf = 1;
FeatureSet features = 2;
}

// Captures the results of a elf binary load.
// Structurally similar to fd_sbpf_program_t
message ELFLoaderEffects {
bytes rodata = 1;
uint64 rodata_sz = 2;

// bytes text = 3; // not needed, just points to a region in rodata
uint64 text_cnt = 4;
uint64 text_off = 5;

uint64 entry_pc = 6;

repeated uint64 calldests = 7;
}

message ELFLoaderFixture {
ELFLoaderCtx input = 1;
ELFLoaderEffects output = 2;
}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ dev = [
]

[build-system]
requires = ['setuptools']
requires = ["setuptools", "setuptools_scm>=8"]
build-backend = "setuptools.build_meta"
7 changes: 4 additions & 3 deletions src/test_suite/debugger.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
import os
from test_suite.multiprocessing_utils import (
initialize_process_output_buffers,
process_instruction,
process_target,
)
import test_suite.globals as globals


def debug_target(shared_library, test_input, pipe):
Expand All @@ -23,7 +24,7 @@ def debug_target(shared_library, test_input, pipe):

lib = ctypes.CDLL(shared_library)
lib.sol_compat_init()
process_instruction(lib, test_input)
process_target(lib, test_input)
lib.sol_compat_fini()


Expand Down Expand Up @@ -62,7 +63,7 @@ def debug_host(shared_library, instruction_context, gdb):
# As soon as the target library gets loaded, set a breakpoint
# for the newly appeared executor function
"set breakpoint pending on",
"break sol_compat_instr_execute_v1",
f"break {globals.harness_ctx.fuzz_fn_name}",
# GDB stops the process when attaching, let it continue
"continue",
# ... At this point, the child process has SIGSTOP'ed itself
Expand Down
71 changes: 35 additions & 36 deletions src/test_suite/fixture_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import fd58
from test_suite.codec_utils import encode_input, encode_output
from test_suite.constants import NATIVE_PROGRAM_MAPPING
from test_suite.multiprocessing_utils import (
build_test_results,
read_instr,
read_context,
process_single_test_case,
prune_execution_result,
)
Expand All @@ -23,9 +22,10 @@ def create_fixture(test_file: Path) -> int:
Returns:
- int: 1 on success, 0 on failure
"""
serialized_instr_context = read_instr(test_file)
results = process_single_test_case(serialized_instr_context)
pruned_results = prune_execution_result(serialized_instr_context, results)
serialized_context = read_context(test_file)
results = process_single_test_case(serialized_context)

pruned_results = prune_execution_result(serialized_context, results)

# This is only relevant when you gather results for multiple targets
if globals.only_keep_passing:
Expand All @@ -38,25 +38,25 @@ def create_fixture(test_file: Path) -> int:

serialized_instr_effects = pruned_results[globals.solana_shared_library]

if serialized_instr_context is None or serialized_instr_effects is None:
if serialized_context is None or serialized_instr_effects is None:
return 0

# Create instruction fixture
instr_context = pb.InstrContext()
instr_context.ParseFromString(serialized_instr_context)
instr_effects = pb.InstrEffects()
instr_effects.ParseFromString(serialized_instr_effects)
context = globals.harness_ctx.context_type()
context.ParseFromString(serialized_context)
effects = globals.harness_ctx.effects_type()
effects.ParseFromString(serialized_instr_effects)

fixture = pb.InstrFixture()
fixture.input.MergeFrom(instr_context)
fixture.output.MergeFrom(instr_effects)
fixture = globals.harness_ctx.fixture_type()
fixture.input.MergeFrom(context)
fixture.output.MergeFrom(effects)

return write_fixture_to_disk(
test_file.stem, fixture.SerializeToString(deterministic=True)
)


def write_fixture_to_disk(file_stem: str, serialized_instruction_fixture: str) -> int:
def write_fixture_to_disk(file_stem: str, serialized_fixture: str) -> int:
"""
Writes instruction fixtures to disk. This function outputs in binary format unless
specified otherwise with the --readable flag.
Expand All @@ -67,47 +67,46 @@ def write_fixture_to_disk(file_stem: str, serialized_instruction_fixture: str) -
Returns:
- int: 0 on failure, 1 on success
"""
if serialized_instruction_fixture is None:
if serialized_fixture is None:
return 0

output_dir = globals.output_dir

if globals.organize_fixture_dir:
instr_fixture = pb.InstrFixture()
instr_fixture.ParseFromString(serialized_instruction_fixture)
program_type = get_program_type(instr_fixture)
fixture = globals.harness_ctx.fixture_type()
fixture.ParseFromString(serialized_fixture)
program_type = get_program_type(fixture)
output_dir = output_dir / program_type
output_dir.mkdir(parents=True, exist_ok=True)

if globals.readable:
# Deserialize fixture
instr_fixture = pb.InstrFixture()
instr_fixture.ParseFromString(serialized_instruction_fixture)
fixture = pb.InstrFixture()
fixture.ParseFromString(serialized_fixture)

# Encode fields for instruction context and effects
instr_context = pb.InstrContext()
instr_context.CopyFrom(instr_fixture.input)
encode_input(instr_context)
context = globals.harness_ctx.context_type()
context.CopyFrom(fixture.input)
# encode_input(context)
globals.harness_ctx.context_human_encode_fn(context)

instr_effects = pb.InstrEffects()
instr_effects.CopyFrom(instr_fixture.output)
encode_output(instr_effects)
instr_effects = globals.harness_ctx.effects_type()
instr_effects.CopyFrom(fixture.output)
globals.harness_ctx.effects_human_encode_fn(instr_effects)

instr_fixture.input.CopyFrom(instr_context)
instr_fixture.output.CopyFrom(instr_effects)
fixture.input.CopyFrom(context)
fixture.output.CopyFrom(instr_effects)

with open(output_dir / (file_stem + ".fix.txt"), "w") as f:
f.write(
text_format.MessageToString(instr_fixture, print_unknown_fields=False)
)
f.write(text_format.MessageToString(fixture, print_unknown_fields=False))
else:
with open(output_dir / (file_stem + ".fix"), "wb") as f:
f.write(serialized_instruction_fixture)
f.write(serialized_fixture)

return 1


def extract_instr_context_from_fixture(fixture_file: Path):
def extract_context_from_fixture(fixture_file: Path):
"""
Extract InstrContext from InstrEffects and write to disk.

Expand All @@ -118,12 +117,12 @@ def extract_instr_context_from_fixture(fixture_file: Path):
- int: 1 on success, 0 on failure
"""
try:
instr_fixture = pb.InstrFixture()
fixture = globals.harness_ctx.fixture_type()
with open(fixture_file, "rb") as f:
instr_fixture.ParseFromString(f.read())
fixture.ParseFromString(f.read())

with open(globals.output_dir / (fixture_file.stem + ".bin"), "wb") as f:
f.write(instr_fixture.input.SerializeToString(deterministic=True))
f.write(fixture.input.SerializeToString(deterministic=True))
except:
return 0

Expand Down
16 changes: 16 additions & 0 deletions src/test_suite/fuzz_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from test_suite.fuzz_interface import HarnessCtx
import test_suite.invoke_pb2 as pb
import test_suite.instr.codec_utils as instr_codec


ElfHarness = HarnessCtx(
fuzz_fn_name="sol_compat_elf_loader_v1", fixture_desc=pb.ELFLoaderFixture.DESCRIPTOR
)

InstrHarness = HarnessCtx(
fuzz_fn_name="sol_compat_instr_execute_v1",
fixture_desc=pb.InstrFixture.DESCRIPTOR,
context_human_encode_fn=instr_codec.encode_input,
context_human_decode_fn=instr_codec.decode_input,
effects_human_encode_fn=instr_codec.encode_output,
)
59 changes: 59 additions & 0 deletions src/test_suite/fuzz_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from typing import Callable, Type, TypeVar
from google.protobuf import message, descriptor, message_factory
from dataclasses import dataclass, InitVar

msg_factory = message_factory.MessageFactory()

FixtureType = TypeVar("FixtureType", bound=message.Message)
ContextType = TypeVar("ContextType", bound=message.Message)
EffectsType = TypeVar("EffectsType", bound=message.Message)

"""
Each fuzzing harness should implement this interface in fuzz_context.py

The following defines the interface:
- fuzz_fn_name: The name of the harness function to call in the fuzz target
- fixture_desc: The protobuf descriptor for the fixture message.
- A fixture message is a message that contains an input and output message.
- input: The fuzz target Context
- output: The fuzz target Effects
- diff_effect_fn: A function that compares two effects messages for equality
- human encode/decode functions for the context and effects messages to
convert the messages to/from human-readable format (in-place).
Both context and effects messages can have their own encode/decode functions.
"""


def generic_effects_diff(a: EffectsType, b: EffectsType) -> bool:
return a == b


def generic_human_encode(obj: message.Message) -> None:
pass


def generic_human_decode(obj: message.Message) -> None:
pass


@dataclass
class HarnessCtx:
fuzz_fn_name: str
fixture_desc: InitVar[descriptor.Descriptor]
diff_effect_fn: Callable[[EffectsType, EffectsType], bool] = generic_effects_diff
context_human_encode_fn: Callable[[ContextType], None] = generic_human_encode
context_human_decode_fn: Callable[[ContextType], None] = generic_human_decode
effects_human_encode_fn: Callable[[EffectsType], None] = generic_human_encode
effects_human_decode_fn: Callable[[EffectsType], None] = generic_human_decode
fixture_type: Type[FixtureType] = message.Message
context_type: Type[ContextType] = message.Message
effects_type: Type[EffectsType] = message.Message

def __post_init__(self, fixture_desc):
self.fixture_type = msg_factory.GetPrototype(fixture_desc)
self.context_type = msg_factory.GetPrototype(
fixture_desc.fields_by_name["input"].message_type
)
self.effects_type = msg_factory.GetPrototype(
fixture_desc.fields_by_name["output"].message_type
)
6 changes: 5 additions & 1 deletion src/test_suite/globals.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from test_suite.fuzz_interface import HarnessCtx

# Global variables that can be accessed from processes.

# Target libraries (for run-tests)
target_libraries = {}

# Ground truth library (for run-tests)
solana_shared_library = None
reference_shared_library = None

# Number of iterations (for check-consistency)
n_iterations = 0
Expand All @@ -27,3 +29,5 @@

# (For fixtures) Whether to only keep passing tests
only_keep_passing = False
# Harness context
harness_ctx: HarnessCtx = None
File renamed without changes.
Loading
Loading