Skip to content

Commit

Permalink
Add function to create proto from disassembled instructions (google#50)
Browse files Browse the repository at this point in the history
This patch adds a new function that refactors out some functionality
from the existing proto generation functions to create a proto directly
from disassembled instructions. This allows user to do their own
disassembly (like for easy access to the llvm MCInsts) and efficiently
create a proto afterwards.
  • Loading branch information
boomanaiden154 authored Mar 2, 2024
1 parent defb231 commit 9dffa0a
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 19 deletions.
27 changes: 17 additions & 10 deletions gematria/datasets/bhive_importer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,22 @@ BHiveImporter::BHiveImporter(const Canonicalizer* canonicalizer)
*target_machine_.getMCAsmInfo(), *target_machine_.getMCInstrInfo(),
*target_machine_.getMCRegisterInfo())) {}

BasicBlockProto BHiveImporter::BasicBlockProtoFromInstructions(
llvm::ArrayRef<DisassembledInstruction> disassembled_instructions,
uint64_t base_address /*= 0*/) {
BasicBlockProto basic_block_proto;
for (const DisassembledInstruction& instruction : disassembled_instructions) {
MachineInstructionProto& machine_instruction =
*basic_block_proto.add_machine_instructions();
machine_instruction.set_address(instruction.address);
machine_instruction.set_assembly(instruction.assembly);
machine_instruction.set_machine_code(instruction.machine_code);
*basic_block_proto.add_canonicalized_instructions() = ProtoFromInstruction(
canonicalizer_.InstructionFromMCInst(instruction.mc_inst));
}
return basic_block_proto;
}

absl::StatusOr<BasicBlockProto> BHiveImporter::BasicBlockProtoFromMachineCode(
llvm::ArrayRef<uint8_t> machine_code, uint64_t base_address /*= 0*/) {
BasicBlockProto basic_block_proto;
Expand All @@ -76,16 +92,7 @@ absl::StatusOr<BasicBlockProto> BHiveImporter::BasicBlockProtoFromMachineCode(
return LlvmErrorToStatus(std::move(error));
}

for (DisassembledInstruction& instruction : *instructions) {
MachineInstructionProto& machine_instruction =
*basic_block_proto.add_machine_instructions();
machine_instruction.set_address(instruction.address);
machine_instruction.set_assembly(instruction.assembly);
machine_instruction.set_machine_code(instruction.machine_code);
*basic_block_proto.add_canonicalized_instructions() = ProtoFromInstruction(
canonicalizer_.InstructionFromMCInst(instruction.mc_inst));
}
return basic_block_proto;
return BasicBlockProtoFromInstructions(*instructions);
}

absl::StatusOr<BasicBlockProto>
Expand Down
25 changes: 16 additions & 9 deletions gematria/datasets/bhive_importer.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include "absl/status/statusor.h"
#include "gematria/llvm/canonicalizer.h"
#include "gematria/llvm/disassembler.h"
#include "gematria/proto/basic_block.pb.h"
#include "gematria/proto/throughput.pb.h"
#include "llvm/ADT/ArrayRef.h"
Expand All @@ -42,15 +43,21 @@ class BHiveImporter {
// Does not take ownership of the canonicalizer.
explicit BHiveImporter(const Canonicalizer* canonicalizer);

// Creates a basic block from the given block of machine code. `machine_code`
// must contain machine code of the instructions to include in the basic
// block. Expects that the `machine_code.begin()` is the first byte of the
// first instruction, and `machine_code.rbegin()` is the last byte of the last
// instruction. Uses `base_address` as the address of the first instruction;
// the addresses of following instructions are derived from `base_address` and
// the sizes of the instructions that preceded it.
// Returns an error when parts of `machine_code` do not disassemble using the
// provided canonicalizer.
// Creates a basic block from the given instructions. Uses `base_address` as
// the address of the first instruction; the addresses of following
// instructions are derived from `base_address` and the sizes of the
// instructions that preceded it.
BasicBlockProto BasicBlockProtoFromInstructions(
llvm::ArrayRef<DisassembledInstruction> disassembled_instructions,
uint64_t base_address = 0);

// A version of BasicBlockProtoFromInstructions. Creates a basic block from
// the given block of machine code. `machine_code` must contain machine code
// of the instructions to include in the basic block. Expects that the
// `machine_code.begin()` is the first byte of the first instruction, and
// `machine_code.rbegin()` is the last byte of the last instruction. Returns
// an error when parts of `machine_code` do not disassemble using the provided
// canonicalizer.
absl::StatusOr<BasicBlockProto> BasicBlockProtoFromMachineCode(
llvm::ArrayRef<uint8_t> machine_code, uint64_t base_address = 0);

Expand Down
31 changes: 31 additions & 0 deletions gematria/datasets/bhive_importer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "gematria/llvm/canonicalizer.h"
#include "gematria/llvm/llvm_architecture_support.h"
#include "gematria/testing/matchers.h"
#include "gematria/utils/string.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"

Expand Down Expand Up @@ -57,6 +58,36 @@ TEST_F(BHiveImporterTest, EmptyBlock) {
})pb")));
}

TEST_F(BHiveImporterTest, SingleInstructionHex) {
const auto machine_code_bytes_or_status = ParseHexString("4929d2");
ASSERT_TRUE(machine_code_bytes_or_status.has_value());

std::unique_ptr<llvm::MCInstPrinter> inst_printer =
x86_llvm_->CreateMCInstPrinter(0);

llvm::Expected<std::vector<DisassembledInstruction>> instructions_or_error =
DisassembleAllInstructions(
x86_llvm_->mc_disassembler(), x86_llvm_->mc_instr_info(),
x86_llvm_->mc_register_info(), x86_llvm_->mc_subtarget_info(),
*inst_printer, 0, *machine_code_bytes_or_status);
ASSERT_TRUE(static_cast<bool>(instructions_or_error));
EXPECT_THAT(x86_bhive_importer_->BasicBlockProtoFromInstructions(
*instructions_or_error, 0),
EqualsProto(
R"pb(machine_instructions {
assembly: "\tsubq\t%rdx, %r10"
machine_code: "I)\322"
}
canonicalized_instructions {
mnemonic: "SUB"
llvm_mnemonic: "SUB64rr"
output_operands { register_name: "R10" }
input_operands { register_name: "R10" }
input_operands { register_name: "RDX" }
implicit_output_operands { register_name: "EFLAGS" }
})pb"));
}

TEST_F(BHiveImporterTest, OneInstruction) {
EXPECT_THAT(x86_bhive_importer_->ParseBHiveCsvLine(
kSourceName, "4929d2,100.000000", 0, 1, 0.5),
Expand Down

0 comments on commit 9dffa0a

Please sign in to comment.