Skip to content

Commit

Permalink
backend: compiler: retain allocator to prevent early release & enable…
Browse files Browse the repository at this point in the history
… optional_reshape for int8 pattern
  • Loading branch information
yifeizh2 authored and TaoLv committed Dec 28, 2021
1 parent ef99a0c commit 381a0ac
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 2 deletions.
3 changes: 3 additions & 0 deletions src/backend/graph_compiler/compiler_partition_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ compiler_compiled_partition_impl_t::compiler_compiled_partition_impl_t(
, graph_engine_(graph_engine) {
std::lock_guard<std::mutex> lock(mtx_);
partition_count_map[graph_engine_]++;
graph_engine_->allocator_->retain();
}

compiler_compiled_partition_impl_t::~compiler_compiled_partition_impl_t() {
Expand All @@ -330,6 +331,8 @@ compiler_compiled_partition_impl_t::~compiler_compiled_partition_impl_t() {
sc::release_runtime_memory(graph_engine_.get());
}
}
jit_func_ = nullptr;
graph_engine_->allocator_->release();
}

impl::status_t compiler_compiled_partition_impl_t::execute(
Expand Down
17 changes: 15 additions & 2 deletions src/backend/graph_compiler/patterns/mha_pattern.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ COMPILER_BACKEND_REGISTER_PASSES_DEF_BEGIN(int8_mha_pattern)
|
Transpose
|
Reshape
Reshape (optional)
|
Quantize
|
Expand Down Expand Up @@ -320,8 +320,21 @@ COMPILER_BACKEND_REGISTER_TRANSFORMATION_PASS(compiler, int8_mha_pattern)
auto transpose_output = pgraph->append_op(
impl::op_kind::StaticTranspose,
{in_edge(0, matmul_v, 0)}, "transpose_output");

auto optional_reshape_subgraph
= std::make_shared<pb_graph_t>(
"optional_reshape_subgraph");
auto optional_reshape
= optional_reshape_subgraph->append_op(
impl::op_kind::StaticReshape,
"optional_reshape");
optional_reshape_subgraph->create_input_port(
0, optional_reshape, 0);
optional_reshape_subgraph->create_output_port(
0, optional_reshape, 0);

auto reshape_output
= pgraph->append_op(impl::op_kind::StaticReshape,
= pgraph->append_optional(optional_reshape_subgraph,
{in_edge(0, transpose_output, 0)},
"reshape_output");
pgraph->append_op(impl::op_kind::Quantize,
Expand Down
65 changes: 65 additions & 0 deletions tests/cpp/unit/backend/graph_compiler/test_compile_execute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,3 +354,68 @@ TEST(GCGraphTest, Fp32MHACompileExecutionMultiThreading) {
workers[t_num].join();
}
}

// test allocator release before compiled partition destruction
TEST(GCGraphTest, AllocatorEarlyRelease) {
REQUIRE_AVX512();
impl::graph_t agraph;
add_MHA_subgraph(&agraph, false);
agraph.build_graph();

auto &compiler_backend_ptr
= impl::compiler_impl::compiler_backend_t::get_singleton();
compiler_backend_ptr.get_partitions(agraph, impl::partition_policy::fusion);
auto partitions = agraph.get_partitions();
ASSERT_EQ(partitions.size(), 1);

impl::partition_t p;
p.init(partitions[0]);
auto partition_inputs = p.get_inputs();
auto partition_outputs = p.get_outputs();

std::vector<const impl::logical_tensor_t *> inputs;
std::vector<const impl::logical_tensor_t *> outputs;
for (auto &lt : partition_inputs) {
inputs.push_back(&lt);
}
for (auto &lt : partition_outputs) {
outputs.push_back(&lt);
}
impl::compiled_partition_t cp(p);
impl::allocator_t *allocator = impl::allocator_t::create();
impl::engine_t eng(impl::engine_kind::cpu,
0); // create a new engine rather than use test engine here to
// avoid release the default allocator of the test engine
eng.set_allocator(allocator);
ASSERT_EQ(p.compile(&cp, inputs, outputs, &eng), impl::status::success);

allocator->release(); // release the allocator

std::vector<impl::tensor_t> execution_inputs;
std::vector<impl::tensor_t> execution_outputs;
size_t size = 0;
for (auto &lt : partition_inputs) {
size += compiler_backend_ptr.get_mem_size(lt);
}
for (auto &lt : partition_outputs) {
size += compiler_backend_ptr.get_mem_size(lt);
}
test::vector<char> data(size);

size = 0;
for (auto &lt : partition_inputs) {
impl::tensor_t placeholder(lt, &eng, data.data() + size);
execution_inputs.push_back(placeholder);
size += compiler_backend_ptr.get_mem_size(lt);
}
for (auto &lt : partition_outputs) {
impl::tensor_t placeholder(lt, &eng, data.data() + size);
execution_outputs.push_back(placeholder);
size += compiler_backend_ptr.get_mem_size(lt);
}

impl::stream_t &strm = get_stream();
ASSERT_EQ(cp.execute(&strm, execution_inputs, execution_outputs),
impl::status::success);
strm.wait();
}

0 comments on commit 381a0ac

Please sign in to comment.