Skip to content

Commit

Permalink
final commit
Browse files Browse the repository at this point in the history
  • Loading branch information
9Tempest committed Dec 9, 2023
1 parent 9514b4c commit 48f08df
Show file tree
Hide file tree
Showing 9 changed files with 273 additions and 16 deletions.
66 changes: 58 additions & 8 deletions gematria/basic_block/basic_block.cc
Original file line number Diff line number Diff line change
Expand Up @@ -230,14 +230,56 @@ void InstructionOperand::AddTokensToList(
break;
case OperandType::kAddress:
tokens.emplace_back(kAddressToken);
tokens.emplace_back(address().base_register.empty()
? kNoRegisterToken
: address().base_register);
tokens.emplace_back(address().index_register.empty()
? kNoRegisterToken
: address().index_register);
if (!address().segment_register.empty()) {
tokens.push_back(address().segment_register);
if (address().base_register.empty()){
tokens.emplace_back(kNoRegisterToken);
} else {
if (address().base_register_size == '%'){
tokens.emplace_back(getVREG_TOKEN(address().base_register_size));
assert(address().base_register_intefered_register.size() == address().base_register_intefered_register_sizes.size());
for (int i = 0; i < address().base_register_intefered_register.size(); ++i) {
if (address().base_register_intefered_register_sizes[i] == '%'){
tokens.emplace_back(getVREG_TOKEN(address().base_register_intefered_register_sizes[i]));
} else {
tokens.emplace_back(address().base_register_intefered_register[i]);
}
}
} else {
tokens.emplace_back(address().base_register);
}
}
if (address().index_register.empty()){
tokens.emplace_back(kNoRegisterToken);
} else {
if (address().index_register_size == '%'){
tokens.emplace_back(getVREG_TOKEN(address().index_register_size));
assert(address().index_register_intefered_register.size() == address().index_register_intefered_register_sizes.size());
for (int i = 0; i < address().index_register_intefered_register.size(); ++i) {
if (address().index_register_intefered_register_sizes[i] == '%'){
tokens.emplace_back(getVREG_TOKEN(address().index_register_intefered_register_sizes[i]));
} else {
tokens.emplace_back(address().index_register_intefered_register[i]);
}
}
} else {
tokens.emplace_back(address().index_register);
}
}
if (address().segment_register.empty()){
tokens.emplace_back(kNoRegisterToken);
} else {
if (address().segment_register_size == '%'){
tokens.emplace_back(getVREG_TOKEN(address().segment_register_size));
assert(address().segment_register_intefered_register.size() == address().segment_register_intefered_register_sizes.size());
for (int i = 0; i < address().segment_register_intefered_register.size(); ++i) {
if (address().segment_register_intefered_register_sizes[i] == '%'){
tokens.emplace_back(getVREG_TOKEN(address().segment_register_intefered_register_sizes[i]));
} else {
tokens.emplace_back(address().segment_register_intefered_register[i]);
}
}
} else {
tokens.emplace_back(address().segment_register);
}
}
if (address().displacement != 0) {
tokens.emplace_back(kDisplacementToken);
Expand All @@ -248,6 +290,14 @@ void InstructionOperand::AddTokensToList(
break;
case OperandType::kVirtualRegister:
tokens.emplace_back(getVREG_TOKEN(size()));
assert(interfered_registers_.size() == interfered_registers_size_.size());
for (int i = 0; i < interfered_registers_.size(); ++i) {
if (interfered_registers_size_[i] == '%'){
tokens.emplace_back(getVREG_TOKEN(interfered_registers_size_[i]));
} else {
tokens.emplace_back(interfered_registers_[i]);
}
}
break;
}
}
Expand Down
26 changes: 26 additions & 0 deletions gematria/datasets/python/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,30 @@ gematria_py_binary(
"//gematria/proto:canonicalized_instruction_py_pb2",
"//gematria/proto:throughput_py_pb2",
],
)

gematria_py_binary(
name = "evaluate_model",
srcs = ["evaluate_model.py"],
deps = [
"//gematria/basic_block/python:basic_block_protos",
"//gematria/basic_block/python:basic_block",
"//gematria/io/python:tfrecord",
"//gematria/proto:basic_block_py_pb2",
"//gematria/proto:canonicalized_instruction_py_pb2",
"//gematria/proto:throughput_py_pb2",
],
)

gematria_py_binary(
name = "draw",
srcs = ["draw.py"],
deps = [
"//gematria/basic_block/python:basic_block_protos",
"//gematria/basic_block/python:basic_block",
"//gematria/io/python:tfrecord",
"//gematria/proto:basic_block_py_pb2",
"//gematria/proto:canonicalized_instruction_py_pb2",
"//gematria/proto:throughput_py_pb2",
],
)
79 changes: 79 additions & 0 deletions gematria/datasets/python/evaluate_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from gematria.basic_block.python import basic_block
from gematria.basic_block.python import basic_block_protos
from gematria.proto import basic_block_pb2
from gematria.proto import throughput_pb2
from gematria.proto import canonicalized_instruction_pb2
from gematria.io.python import tfrecord

from collections.abc import Sequence

from absl import app
from absl import flags
from absl import logging

import numpy as np
_CanonicalizedInstructionProto = (
canonicalized_instruction_pb2.CanonicalizedInstructionProto
)

r"""Generates tokens from a Gematria data set.
Usage:
gen_tokens \
--gematria_input_tfrecord=/tmp/bhive/skl.tfrecord \
--gematria_output_tokens=/tmp/bhive/skl_tokens.txt \
"""

_INPUT_TFRECORD_FILE = flags.DEFINE_string(
'gematria_input_tfrecord',
None,
'The name of the TFRecord file to read the tokens from.',
required=True,
)


def main(argv: Sequence[str]) -> None:
if len(argv) > 1:
raise app.UsageError('Too many command-line arguments.')
output_blocks = list(
tfrecord.read_protos((_INPUT_TFRECORD_FILE.value,), throughput_pb2.BasicBlockWithThroughputProto)
)
y_actual = []
y_predict = []
for block in output_blocks:
print(block)
assert(len(block.inverse_throughputs) == 2)
y_actual.append(block.inverse_throughputs[0].inverse_throughput_cycles[0])
y_predict.append(block.inverse_throughputs[1].inverse_throughput_cycles[0])

print("y true is:")
print(y_actual)
print("y pred is:")
print(y_predict)
y_actual = np.array(y_actual)
y_predict = np.array(y_predict)

# Ensure that y_actual and y_predict have the same length
if y_actual.shape != y_predict.shape:
raise ValueError("The shapes of y_actual and y_predict must be the same")
# Find the 10th and 90th percentiles of y_actual
p10 = np.percentile(y_actual, 10)
p90 = np.percentile(y_actual, 90)
# Filter both arrays to ignore the bottom and top 10 percentiles
filtered_indices = (y_actual >= p10) & (y_actual <= p90)
filtered_y_actual = y_actual[filtered_indices]
filtered_y_predict = y_predict[filtered_indices]
# Compute MAE for the filtered data
mae = np.mean(np.abs(filtered_y_actual - filtered_y_predict))
# Compute MSE for the filtered data
mse = np.mean((filtered_y_actual - filtered_y_predict) ** 2)
print(f"Mean Absolute Error (MAE) for the 10th to 90th percentile range: {mae}")
print(f"Mean Absolute Error (MSE) for the 10th to 90th percentile range: {mse}")




if __name__ == '__main__':
app.run(main)
1 change: 1 addition & 0 deletions gematria/datasets/python/gen_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def main(argv: Sequence[str]) -> None:
)
token_set = set()
for block in output_blocks:
print(block)
for instruction in block.basic_block.canonicalized_instructions:
ginstruction = basic_block_protos.instruction_from_proto(instruction)
for token in ginstruction.as_token_list():
Expand Down
2 changes: 1 addition & 1 deletion gematria/datasets/python/import_from_mir.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def main(argv: Sequence[str]) -> None:
BB_name = line.split(",")[_MACHINE_BASIC_BLOCK_NAME_COLUMN_INDEX.value]
through_put = line.split(",")[_THROUGHPUT_COLUMN_INDEX.value]
# skip blocks with throughput -1
if float(through_put) == -1 or float(through_put) < 0.1:
if float(through_put) == -1 or float(through_put) < 0.1 or float(through_put) > 10:
num_skipped_blocks += 1
continue
block_proto = importer.ParseMIRCsvLine(
Expand Down
2 changes: 0 additions & 2 deletions gematria/granite/graph_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@
#include "gematria/basic_block/basic_block.h"
#include "gematria/model/oov_token_behavior.h"

#define DEBUG

#ifdef DEBUG
#define LOG(X) std::cerr << X << "\n"
#else
Expand Down
6 changes: 3 additions & 3 deletions gematria/granite/graph_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,13 @@ enum class EdgeType {
kAddressIndexRegister = 4,
kAddressSegmentRegister = 5,
kAddressDisplacement = 6,
kInterference = 7,
// TODO(ondrasej): Remove this value after the experiments for the Granite
// paper are completed. This value is not used, but it affects the size of an
// embedding vector table; removing it would change the size of this table and
// it would invalidate existing checkpoints.
kReverseStructuralDependency = 7,
kInstructionPrefix = 8,
kInterference = 9,
kReverseStructuralDependency = 8,
kInstructionPrefix = 9,
};

std::ostream& operator<<(std::ostream& os, NodeType node_type);
Expand Down
104 changes: 104 additions & 0 deletions gematria/granite/log.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
num_graphs = 1
num_nodes = 13
num_edges = 15
num_node_tokens = 12
num_nodes_per_block = [13]
num_edges_per_block = [15]
register_nodes :
%5 -> 12
%6 -> 11
RBP -> 7
%1 -> 4
%0 -> 2
RDI -> 1
alias_group_nodes :
1 -> 8
node_types = [NodeType::kInstruction NodeType::kRegister
NodeType::kRegister NodeType::kInstruction NodeType::kRegister
NodeType::kInstruction NodeType::kAddressOperand
NodeType::kRegister NodeType::kMemoryOperand
NodeType::kInstruction NodeType::kAddressOperand
NodeType::kRegister NodeType::kRegister]
edge_senders = [10023376455811109]
edge_receivers = [02334565589910912]
edge_types = [EdgeType::kInputOperands EdgeType::kOutputOperands
EdgeType::kStructuralDependency EdgeType::kInputOperands
EdgeType::kOutputOperands EdgeType::kStructuralDependency
EdgeType::kAddressBaseRegister EdgeType::kInputOperands
EdgeType::kInputOperands EdgeType::kOutputOperands
EdgeType::kStructuralDependency EdgeType::kInputOperands
EdgeType::kAddressBaseRegister EdgeType::kInputOperands
EdgeType::kOutputOperands]


==========interference========

=====================================
num_graphs = 1
num_nodes = 14
num_edges = 22
num_node_tokens = 12
num_nodes_per_block = [14,]
num_edges_per_block = [22,]
register_nodes :
RBP -> 10
%outer -> 6
%1 -> 5
%5 -> 4
%6 -> 3
%0 -> 2
RDI -> 1
alias_group_nodes :
1 -> 11
interference_groups :
%5 -> [ %0 ]
%6 -> [ %0 ]
%1 -> [ %0 ]
%0 -> [ %outer %1 %5 %6 ]
node_types = [NodeType::kInstruction,NodeType::kRegister,NodeType::kRegister,
NodeType::kRegister,NodeType::kRegister,NodeType::kRegister,
NodeType::kRegister,NodeType::kInstruction,NodeType::kInstruction,
NodeType::kAddressOperand,NodeType::kRegister,NodeType::kMemoryOperand,
NodeType::kInstruction,NodeType::kAddressOperand,]
edge_senders = [1,0,3,4,5,6,0,2,7,2,7,10,9,5,8,8,11,3,2,13,12,2,]
edge_receivers = [0,2,2,2,2,2,7,7,5,5,8,9,8,8,11,12,12,13,3,12,4,4,]
edge_types = [EdgeType::kInputOperands,EdgeType::kOutputOperands,EdgeType::kInterference,EdgeType::kInterference,
EdgeType::kInterference,EdgeType::kInterference,EdgeType::kStructuralDependency,
EdgeType::kInputOperands,EdgeType::kOutputOperands,EdgeType::kInterference,
EdgeType::kStructuralDependency,EdgeType::kAddressBaseRegister,EdgeType::kInputOperands,
EdgeType::kInputOperands,EdgeType::kOutputOperands,EdgeType::kStructuralDependency,
EdgeType::kInputOperands,EdgeType::kAddressBaseRegister,EdgeType::kInterference,
EdgeType::kInputOperands,EdgeType::kOutputOperands,EdgeType::kInterference,]
InstructionNodeMask = [1,0,0,0,0,0,0,1,1,0,0,0,1,0,]
DeltaBlockIndex = [0,0,0,0,]



num_graphs = 1
num_nodes = 14
num_edges = 23
num_node_tokens = 12
num_nodes_per_block = [14,]
num_edges_per_block = [23,]
register_nodes :
RBP -> 10
%outer -> 6
%1 -> 5
%5 -> 4
%6 -> 3
%0 -> 2
RDI -> 1
alias_group_nodes :
1 -> 11
interference_groups :
%outer -> [ %0 ]
%1 -> [ %0 ]
%5 -> [ %0 ]
%6 -> [ %0 ]
%0 -> [ %outer %1 %5 %6 ]
node_types = [NodeType::kInstruction,NodeType::kRegister,NodeType::kRegister,NodeType::kRegister,NodeType::kRegister,NodeType::kRegister,NodeType::kRegister,NodeType::kInstruction,NodeType::kInstruction,NodeType::kAddressOperand,NodeType::kRegister,NodeType::kMemoryOperand,NodeType::kInstruction,NodeType::kAddressOperand,]
edge_senders = [1,0,3,2,4,2,5,2,6,2,0,2,7,7,10,9,5,8,8,11,3,13,12,]
edge_receivers = [0,2,2,3,2,4,2,5,2,6,7,7,5,8,9,8,8,11,12,12,13,12,4,]
edge_types = [EdgeType::kInputOperands,EdgeType::kOutputOperands,EdgeType::kInterference,EdgeType::kInterference,EdgeType::kInterference,EdgeType::kInterference,EdgeType::kInterference,EdgeType::kInterference,EdgeType::kInterference,EdgeType::kInterference,EdgeType::kStructuralDependency,EdgeType::kInputOperands,EdgeType::kOutputOperands,EdgeType::kStructuralDependency,EdgeType::kAddressBaseRegister,EdgeType::kInputOperands,EdgeType::kInputOperands,EdgeType::kOutputOperands,EdgeType::kStructuralDependency,EdgeType::kInputOperands,EdgeType::kAddressBaseRegister,EdgeType::kInputOperands,EdgeType::kOutputOperands,]
InstructionNodeMask = [1,0,0,0,0,0,0,1,1,0,0,0,1,0,]
DeltaBlockIndex = [0,0,0,0,]
3 changes: 1 addition & 2 deletions gematria/granite/python/graph_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,13 @@ PYBIND11_MODULE(graph_builder, m) {

py::enum_<EdgeType>(m, "EdgeType")
.value("STRUCTURAL_DEPENDENCY", EdgeType::kStructuralDependency)
.value("REVERSE_STRUCTURAL_DEPENDENCY",
EdgeType::kReverseStructuralDependency)
.value("INPUT_OPERANDS", EdgeType::kInputOperands)
.value("OUTPUT_OPERANDS", EdgeType::kOutputOperands)
.value("ADDRESS_BASE_REGISTER", EdgeType::kAddressBaseRegister)
.value("ADDRESS_INDEX_REGISTER", EdgeType::kAddressIndexRegister)
.value("ADDRESS_SEGMENT_REGISTER", EdgeType::kAddressSegmentRegister)
.value("ADDRESS_DISPLACEMENT", EdgeType::kAddressDisplacement)
.value("INTERFERENCE", EdgeType::kInterference)
.export_values();

py::class_<BasicBlockGraphBuilder>(m, "BasicBlockGraphBuilder")
Expand Down

0 comments on commit 48f08df

Please sign in to comment.