diff --git a/.gitignore b/.gitignore index 6f5b84c9..6a4cd2d4 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ /requirements.txt /compile_commands.json + +.vscode \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 8fc9312d..da20d886 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,6 @@ FROM ubuntu:22.04 RUN apt-get update && apt-get install -y clang python3 python3-pip git curl ARG bazelisk_version=1.17.0 RUN curl -L https://github.com/bazelbuild/bazelisk/releases/download/v${bazelisk_version}/bazelisk-linux-amd64 > /usr/bin/bazelisk && chmod +x /usr/bin/bazelisk && ln -s /usr/bin/bazelisk /usr/bin/bazel -WORKDIR /gematria +WORKDIR /granlte COPY . . -RUN pip3 install -r requirements.in - +RUN pip3 install -r requirements.in \ No newline at end of file diff --git a/gematria/datasets/bhive_importer_test.cc b/gematria/datasets/bhive_importer_test.cc index 1094aefe..4e2a5397 100644 --- a/gematria/datasets/bhive_importer_test.cc +++ b/gematria/datasets/bhive_importer_test.cc @@ -227,7 +227,7 @@ TEST_F(BHiveImporterTest, NonStandardColumns) { } TEST_F(BHiveImporterTest, MIRDatasetBasicTest) { - EXPECT_THAT(x86_bhive_importer_->LoadMIRModule("/u9/z277zhu/research/gematria/sample_dataset/data.mir"), + EXPECT_THAT(x86_bhive_importer_->LoadMIRModule("/granlte/sample_dataset/data.mir"), IsOk()); EXPECT_THAT(x86_bhive_importer_->ParseMIRCsvLine(kSourceName, "a,b,BB_13,2.37", 2, 3, kScaling), diff --git a/gematria/llvm/canonicalizer.cc b/gematria/llvm/canonicalizer.cc index 123b94c2..2993710c 100644 --- a/gematria/llvm/canonicalizer.cc +++ b/gematria/llvm/canonicalizer.cc @@ -22,6 +22,7 @@ #include "lib/Target/X86/MCTargetDesc/X86BaseInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Constants.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" @@ -67,6 +68,7 @@ void ReplaceExprOperands(llvm::MCInst& instruction) { } // TODO: Write ReplaceExprOperands for MI (replace unsupported operand) +// Maybe not necessary } // namespace @@ -101,6 +103,12 @@ std::string Canonicalizer::GetRegisterNameOrEmpty( return target_machine_.getMCRegisterInfo()->getName(operand.getReg()); } +std::string Canonicalizer::GetRegisterNameOrEmpty( + const llvm::MachineOperand& operand) const { + assert(operand.isReg()); + return "register"; // TODO: should we call all virtual registers just register? +} + namespace { llvm::SmallVector SplitByAny(std::string_view str, @@ -253,7 +261,6 @@ X86Canonicalizer::X86Canonicalizer(const llvm::TargetMachine* target_machine) X86Canonicalizer::~X86Canonicalizer() = default; -// TODO:PlatformSpecificInstructionFromMI(const llvm::MachineInstrunction) Instruction X86Canonicalizer::PlatformSpecificInstructionFromMachineInstr(const llvm::MachineInstr & MI) const { // NOTE (lukezhuz): For now, we assume that all memory references are aliased. // This is an overly conservative but safe choice. Note that Ithemal chose the @@ -268,47 +275,45 @@ Instruction X86Canonicalizer::PlatformSpecificInstructionFromMachineInstr(const Instruction instruction; instruction.llvm_mnemonic = target_machine_.getMCInstrInfo()->getName(MI.getOpcode()); - // TODO: Write AddX86VendorMnemonicAndPrefixes method for MI AddMIRVendorMnemonicAndPrefixes(*target_machine_.getMCSubtargetInfo(), MI, instruction); - // const llvm::MCInstrDesc& descriptor = instr_info.get(MI.getOpcode()); - // if (descriptor.mayLoad()) { - // instruction.input_operands.push_back( - // InstructionOperand::MemoryLocation(kWholeMemoryAliasGroup)); - // } - // if (descriptor.mayStore()) { - // instruction.output_operands.push_back( - // InstructionOperand::MemoryLocation(kWholeMemoryAliasGroup)); - // } - - // const int memory_operand_index = GetX86MemoryOperandPosition(descriptor); - // for (int operand_index = 0; operand_index < descriptor.getNumOperands(); - // ++operand_index) { - // const bool is_output_operand = operand_index < descriptor.getNumDefs(); - // const bool is_address_computation_tuple = - // operand_index == memory_operand_index; - // // TODO: Write AddOperand method for MI - // AddOperand(MI, /*operand_index=*/operand_index, - // /*is_output_operand=*/is_output_operand, - // /*is_address_computation_tuple=*/is_address_computation_tuple, - // instruction); - // if (is_address_computation_tuple) { - // // A memory reference is represented as a 5-tuple. The whole 5-tuple is - // // processed in one CanonicalizeOperand() call and we need to skip the - // // remaining 4 elements here. - // operand_index += 4; - // } - // } + const llvm::MCInstrDesc& descriptor = instr_info.get(MI.getOpcode()); + if (descriptor.mayLoad()) { + instruction.input_operands.push_back( + InstructionOperand::MemoryLocation(kWholeMemoryAliasGroup)); + } + if (descriptor.mayStore()) { + instruction.output_operands.push_back( + InstructionOperand::MemoryLocation(kWholeMemoryAliasGroup)); + } + + const int memory_operand_index = GetX86MemoryOperandPosition(descriptor); + for (int operand_index = 0; operand_index < descriptor.getNumOperands(); + ++operand_index) { + const bool is_output_operand = operand_index < descriptor.getNumDefs(); + const bool is_address_computation_tuple = + operand_index == memory_operand_index; + AddOperand(MI, /*operand_index=*/operand_index, + /*is_output_operand=*/is_output_operand, + /*is_address_computation_tuple=*/is_address_computation_tuple, + instruction); + if (is_address_computation_tuple) { + // A memory reference is represented as a 5-tuple. The whole 5-tuple is + // processed in one CanonicalizeOperand() call and we need to skip the + // remaining 4 elements here. + operand_index += 4; + } + } - // for (llvm::MCPhysReg implicit_output_register : descriptor.implicit_defs()) { - // instruction.implicit_output_operands.push_back(InstructionOperand::Register( - // register_info.getName(implicit_output_register))); - // } - // for (llvm::MCPhysReg implicit_input_register : descriptor.implicit_uses()) { - // instruction.implicit_input_operands.push_back(InstructionOperand::Register( - // register_info.getName(implicit_input_register))); - // } + for (llvm::MCPhysReg implicit_output_register : descriptor.implicit_defs()) { + instruction.implicit_output_operands.push_back(InstructionOperand::Register( + register_info.getName(implicit_output_register))); + } + for (llvm::MCPhysReg implicit_input_register : descriptor.implicit_uses()) { + instruction.implicit_input_operands.push_back(InstructionOperand::Register( + register_info.getName(implicit_input_register))); + } return instruction; } @@ -427,4 +432,59 @@ void X86Canonicalizer::AddOperand(const llvm::MCInst& mcinst, int operand_index, } } +void X86Canonicalizer::AddOperand(const llvm::MachineInstr& mi, int operand_index, + bool is_output_operand, + bool is_address_computation_tuple, + Instruction& instruction) const { + assert(operand_index < mi.getNumOperands()); + assert(!is_address_computation_tuple || + (operand_index + 5 <= mi.getNumOperands())); + + const llvm::MachineOperand& operand = mi.getOperand(operand_index); + // Skip empty register operand, but not if they are part of a memory 5-tuple. + // Empty register in a memory 5-tuple is for when the address computation uses + // only a subset of components. + if (!is_address_computation_tuple && operand.isReg() && operand.getReg() == 0) + return; + + std::vector& operand_list = + is_output_operand ? instruction.output_operands + : instruction.input_operands; + if (is_address_computation_tuple) { // TODO: Check if MIR has address computation tuple + std::string base_register = GetRegisterNameOrEmpty( + mi.getOperand(operand_index + llvm::X86::AddrBaseReg)); + const int64_t displacement = + mi.getOperand(operand_index + llvm::X86::AddrDisp).getImm(); + std::string index_register = GetRegisterNameOrEmpty( + mi.getOperand(operand_index + llvm::X86::AddrIndexReg)); + const int64_t scaling = + mi.getOperand(operand_index + llvm::X86::AddrScaleAmt).getImm(); + std::string segment_register = GetRegisterNameOrEmpty( + mi.getOperand(operand_index + llvm::X86::AddrSegmentReg)); + operand_list.push_back(InstructionOperand::Address( + /* base_register= */ std::move(base_register), + /* displacement= */ displacement, + /* index_register= */ std::move(index_register), + /* scaling= */ static_cast(scaling), + /* segment_register= */ std::move(segment_register))); + } else if (operand.isReg()) { + operand_list.push_back( + InstructionOperand::Register(GetRegisterNameOrEmpty(operand))); + } else if (operand.isImm()) { + operand_list.push_back( + InstructionOperand::ImmediateValue(operand.getImm())); + } else if (operand.isCImm()) { + operand_list.push_back( + InstructionOperand::ImmediateValue(operand.getCImm()->getZExtValue())); + }else if (operand.isFPImm()) { + operand_list.push_back(InstructionOperand::FpImmediateValue( + llvm::bit_cast(operand.getFPImm()))); + } else { + llvm::errs() << "Unsupported operand type: "; + operand.print(llvm::errs()); + llvm::errs() << "\n"; + assert(false); + } +} + } // namespace gematria diff --git a/gematria/llvm/canonicalizer.h b/gematria/llvm/canonicalizer.h index 5cc251d8..6dcc4c51 100644 --- a/gematria/llvm/canonicalizer.h +++ b/gematria/llvm/canonicalizer.h @@ -68,6 +68,7 @@ class Canonicalizer { // the operand is an "undefined" operand. // This method must not be called when `operand.isReg()` is false. std::string GetRegisterNameOrEmpty(const llvm::MCOperand& operand) const; + std::string GetRegisterNameOrEmpty(const llvm::MachineOperand& operand) const; const llvm::TargetMachine& target_machine_; }; @@ -87,6 +88,9 @@ class X86Canonicalizer final : public Canonicalizer { void AddOperand(const llvm::MCInst& mcinst, int operand_index, bool is_output_operand, bool is_address_computation_tuple, Instruction& instruction) const; + void AddOperand(const llvm::MachineInstr& mi, int operand_index, + bool is_output_operand, bool is_address_computation_tuple, + Instruction& instruction) const; std::unique_ptr mcinst_printer_; };