-
Notifications
You must be signed in to change notification settings - Fork 75
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
cherry-pick master 代码到SDK2.2.2 分支上 (#559)
* Add Custom Op for Yolov3 Post Process (#512) * add custom op for yolov3 * reset submodule onnx * reset tensorrt * delete build * merge odla_ops_nn * modify for passing link-check Co-authored-by: gcuser <[email protected]> (cherry picked from commit 5847cd3) * ODLA popART pipeline function (#522) * First runnable with single thread & test context * mnist runnable demot to test the pipeline * multi thread put the data to the session run * simple bash to compile and run test * An example of how to use the callback in pipeline * multi threads using local Ctx * Can run with pipeline setting in onnx file * Refactored and add no pipeline multi thread * Move codes to the odla_pipeline.h .cc * Make single empty/zero data, and delete context for empty data after get result * Add mutex to serialization the compute requests * Merge the changes for attention mask & prevous changes * test codes for time * Chage the CMakeList to make the pipeline.cc and new custom op compiled * Successfully run on 24L with attention mask custom OP * custom op attention_mask test code * And name scope to the each node in model * Try throghput test with MLPerf model * only set AMP on feed forward matmul * Run the online pipeling with config hard coded to the config read class * Compile with SDK 2.2 with pipeline online setting * Add config file for pipeline stage setting * Run pipeline with similar performance of popart * change some names & make AMP all 0.445 * Add amp parameter in config file * Detach device and clear session when DestroyComputation * Make the batch_per_step take effect on execution mode SEQUENCE to pass enough size of data * Add the new lock free queue and logging * Fix bug on empty data visit counter * delete the empty context * add some pipeline sync * Make thread sleep for 5 ms when no task in the queue * change the size() of LockFreeQueue to tail-wait * [CI] make the call by main can work with npz files * Move the computation init to create context * Add common functions to common.h and common.cc * move the compuation init out * Move common functions to the test foler * Test the config of ODLA popART and make no configuration act as before * Add tests for call the model.cc * Add FP32 to save as result * Some changes on LockFreeQueue and tests * Fix the rsqrt wrong problem, and remove std cout&cerr to avoid crash * fix the accuracy problem of large bps * Add thread check for context & computation holding to avoid conflicts * Add the batch tools to help on the test to generate model, build and run * Decreasing the empty data put * temporary commit to migrate crashed system * set pipeline information on fly change the mixed style of class member add debug setting and default to false to make the opts set by api remove the old pipeline set api * Fixed the mixed code style and removed redundant codes * Remove the function test codes of the odla_popart * remove some redundant codes and files * Changed the CACHE STRING to CACHE PATH * move ENGINE_CACHE_PATH to odla_popart.cc * format the codes with clang-format-9 -i command * Move json.hpp to third party * Set virtualgraph for model not using pipeline in set_session_opts * Add virtual graph attribute when _odla_computation constructed * Check the shape before extends it with batches_per_step Co-authored-by: gcuser <[email protected]> (cherry picked from commit 6095bdf) * fix on default configuration & computation destroyment (cherry picked from commit 40b9fc8) * definitions for static variables (cherry picked from commit 18e0e83) * disable test case test_constant_popart.cc Co-authored-by: Zars19 <[email protected]> Co-authored-by: jackzipu <[email protected]> Co-authored-by: gcuser <[email protected]>
- Loading branch information
1 parent
0810ace
commit 7f358b9
Showing
33 changed files
with
28,498 additions
and
474 deletions.
There are no files selected for viewing
Submodule tensorrt-7.0.0
updated
6 files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
{ | ||
"version":"1.0.0", | ||
"amp":0.445, | ||
"batch_per_step":10, | ||
"execution_mode":"pipeline", | ||
"ipu_num":2, | ||
"load_onnx":false, | ||
"load_onnx_path":"path", | ||
"pipeline":{ | ||
"^embedding_" : [0, 0], | ||
"^layer[0-9]_" : [0, 0], | ||
"^layer1[0-1]_" : [0, 0], | ||
"^layer1[2-9]_" : [1, 1], | ||
"^layer2[0-3]_" : [1, 1], | ||
"^squad_" : [1, 1] | ||
}, | ||
"queue_type":"LockFreeQueue", | ||
"queue_capacity":1048576, | ||
"save_model" : true, | ||
"save_model_path":"pipeline_test.onnx" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,31 @@ | ||
CXX ?= g++ | ||
CXXFLAGS = -std=c++14 -fPIC -g -DONNX_NAMESPACE=onnx -D_GLIBCXX_USE_CXX11_ABI=0 | ||
CXXFLAGS = -std=c++14 -fPIC -g -DONNX_NAMESPACE=onnx | ||
LDLIBS = -shared -lpopart -lpoplar -lpopops -lpoputil | ||
INCLUDES = -Iinclude | ||
INCLUDES = -Iinclude -Ithird_party/onnx/ -Ithird_party/include | ||
|
||
BUILD_DIR = build | ||
SOURCES = rsqrt.cc erf.cc | ||
SOURCES = rsqrt.cc erf.cc postprocess.cc attention_mask.cc | ||
TARGET = $(BUILD_DIR)/libcustom_ops.so | ||
|
||
all: create_build_dir rsqrt_custom_op rsqrt_test erf_test | ||
all: create_build_dir rsqrt_custom_op rsqrt_test attention_mask_test | ||
|
||
.PHONY: create_build_dir | ||
create_build_dir: | ||
mkdir -p $(BUILD_DIR) | ||
|
||
rsqrt_custom_op: rsqrt.cc erf.cc | ||
rsqrt_custom_op: ${SOURCES} | ||
$(CXX) $(SOURCES) $(LDLIBS) $(CXXFLAGS) $(INCLUDES) -o $(TARGET) | ||
|
||
rsqrt_test: rsqrt_test.cc rsqrt_custom_op | ||
$(CXX) -std=c++14 rsqrt_test.cc -lpopart -lpoplar -lpopops -ldl -DONNX_NAMESPACE=onnx -o rsqrt_test -D_GLIBCXX_USE_CXX11_ABI=0 | ||
$(CXX) -std=c++14 rsqrt_test.cc -lpopart -lpoplar -lpopops -ldl -DONNX_NAMESPACE=onnx -o rsqrt_test | ||
|
||
erf_test: erf_test.cc rsqrt_custom_op | ||
$(CXX) -std=c++14 erf_test.cc -lpopart -lpoplar -lpopops -ldl -DONNX_NAMESPACE=onnx -o erf_test -D_GLIBCXX_USE_CXX11_ABI=0 | ||
#erf_test: erf_test.cc rsqrt_custom_op | ||
# $(CXX) -std=c++14 erf_test.cc -lpopart -lpoplar -lpopops -ldl -DONNX_NAMESPACE=onnx -o erf_test | ||
|
||
attention_mask_test: attention_mask_test.cc rsqrt_custom_op | ||
# $(CXX) $(LDLIBS) $(CXXFLAGS) $(INCLUDES) -o attention_mask_test | ||
$(CXX) -std=c++14 -fPIC -g -DONNX_NAMESPACE=onnx attention_mask_test.cc -lpopart -lpoplar -lpopops -ldl -o attention_mask_test | ||
|
||
.PHONY: clean | ||
clean: | ||
rm -r $(BUILD_DIR) rsqrt_test erf_test | ||
rm -r $(BUILD_DIR) rsqrt_test attention_mask_test |
144 changes: 144 additions & 0 deletions
144
ODLA/platforms/odla_popart/custom_ops/attention_mask.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
// Copyright (c) 2019 Graphcore Ltd. All rights reserved. | ||
|
||
#include <iostream> | ||
#include <popart/names.hpp> | ||
#include <popart/op.hpp> | ||
#include <popart/opmanager.hpp> | ||
#include <popart/popx/devicex.hpp> | ||
#include <popart/popx/opx.hpp> | ||
#include <popart/popx/opxmanager.hpp> | ||
#include <popart/region.hpp> | ||
#include <popart/shapeinference.hpp> | ||
#include <popops/Cast.hpp> | ||
#include <popops/ElementWise.hpp> | ||
#include <popops/Rearrange.hpp> | ||
#include <poputil/TileMapping.hpp> | ||
#include <random> | ||
|
||
using namespace popart; | ||
using namespace popart::popx; | ||
using namespace popops::expr; | ||
|
||
namespace CustomOperators { | ||
const popart::OperatorIdentifier AttentionMask = {"ai.graphcore", | ||
"AttentionMask", 1}; | ||
} // namespace CustomOperators | ||
|
||
// An InplaceIdentityOp that doesn't return any grad ops. This allows you to | ||
// disconnect the flow of gradients when creating the backwards pass | ||
class AttentionMaskOp : public popart::Op { | ||
public: | ||
poplar::Type dataType; | ||
|
||
AttentionMaskOp(const popart::OperatorIdentifier& _opid, | ||
const Op::Settings& settings_, poplar::Type& dataTypeIn) | ||
: Op(_opid, settings_), dataType(dataTypeIn) {} | ||
|
||
void setup() final { | ||
// input shape [B, S] | ||
Shape inShape = inInfo(0).shape(); | ||
Shape refShape = inInfo(1).shape(); | ||
|
||
// output shape [B, 1, S, S] | ||
Shape outShape = {inShape.at(0), 1, inShape.at(1), inShape.at(1)}; | ||
|
||
if (dataType == poplar::HALF) | ||
outInfo(0) = {"FLOAT16", outShape}; | ||
else | ||
outInfo(0) = {"FLOAT", outShape}; | ||
} | ||
|
||
std::unique_ptr<Op> clone() const final { | ||
return std::make_unique<AttentionMaskOp>(*this); | ||
} | ||
|
||
float getSubgraphValue() const final { return getLowSubgraphValue(); } | ||
}; | ||
|
||
static popart::OpDefinition attentionMaskOpDef({}); | ||
|
||
static popart::OpCreator<AttentionMaskOp> attentionMaskOpCreator( | ||
popart::OpDefinitions({{CustomOperators::AttentionMask, | ||
attentionMaskOpDef}}), | ||
[](const popart::OpCreatorInfo& oci) -> std::unique_ptr<popart::Op> { | ||
std::string type = | ||
oci.attributes.getAttribute<Attributes::String>("dataType"); | ||
poplar::Type dataType = (type == "FLOAT") ? poplar::FLOAT : poplar::HALF; | ||
|
||
return std::unique_ptr<AttentionMaskOp>( | ||
new AttentionMaskOp(oci.opid, oci.settings, dataType)); | ||
}, | ||
true); | ||
|
||
class AttentionMaskOpX : public popart::popx::Opx { | ||
public: | ||
AttentionMaskOpX(popart::Op* op, popart::popx::Devicex* devicex) | ||
: popart::popx::Opx(op, devicex) { | ||
verifyOp<AttentionMaskOp>(op, CustomOperators::AttentionMask); | ||
} | ||
|
||
popart::popx::InputCreatorType getInputCreatorType(popart::InIndex) const { | ||
return popart::popx::InputCreatorType::CanUnwind; | ||
} | ||
|
||
poplar::Tensor unwindTensorLayout(poplar::Tensor tensor, popart::InIndex, | ||
popart::OutIndex) const { | ||
return tensor; | ||
} | ||
|
||
popart::view::RegMap unwindRegion(popart::InIndex, popart::OutIndex) const { | ||
return [this](const popart::view::Region& r) { | ||
return popart::view::Regions(1, r); | ||
}; | ||
} | ||
|
||
void grow(poplar::program::Sequence& prog) const final { | ||
AttentionMaskOp& myOp = getOp<AttentionMaskOp>(); | ||
|
||
poplar::Type dataType = myOp.dataType; | ||
poplar::Graph& graph = Opx::graph(); | ||
// input tensor shape [B, S] | ||
poplar::Tensor seqIndex = getInTensor(0); | ||
std::size_t batchSize = seqIndex.dim(0); | ||
std::size_t seqLength = seqIndex.dim(1); | ||
seqIndex = seqIndex.reshape({batchSize, seqLength, 1}); | ||
seqIndex = popops::cast(graph, seqIndex, dataType, prog, "input_mask_f"); | ||
poplar::Tensor attentionMatrix = getInTensor(1); | ||
|
||
const auto dimOrdering = | ||
poputil::detectDimGroupings(graph, attentionMatrix); | ||
bool swapOrder = !dimOrdering.empty() && dimOrdering.front().first == 2; | ||
auto seqMask = | ||
swapOrder ? popops::sub(graph, seqIndex.dimShuffle({0, 2, 1}), seqIndex, | ||
prog, "maskVal") | ||
.dimShuffle({0, 2, 1}) | ||
: popops::sub(graph, seqIndex, seqIndex.dimShuffle({0, 2, 1}), | ||
prog, "maskVal"); | ||
popops::absInPlace(graph, seqMask, prog); | ||
popops::tanhInPlace(graph, seqMask, prog); | ||
|
||
// Create constant tensor; | ||
std::mt19937 randomEngine; | ||
unsigned totalTile = graph.getTarget().getTilesPerIPU(); | ||
std::uniform_int_distribution<> distrib(0, totalTile - 1); | ||
int tileForConst = distrib(randomEngine); | ||
poplar::Tensor minValue = graph.addConstant(dataType, {}, -10000.0); | ||
graph.setTileMapping(minValue, tileForConst); | ||
|
||
// Create log mask | ||
popops::mulInPlace(graph, seqMask, minValue, prog); | ||
seqMask = seqMask.reshape({batchSize, 1, seqLength, seqLength}); | ||
setOutTensor(0, seqMask); | ||
} | ||
}; | ||
|
||
static popart::popx::OpxCreator<AttentionMaskOpX> attentionMaskOpxCreator( | ||
CustomOperators::AttentionMask); | ||
|
||
static popart::RegisterShapeInferenceFunction AttentionMaskShapeInfer( | ||
CustomOperators::AttentionMask, [](ShapeInferenceContext& ctx) { | ||
auto B = ctx.inInfo(1).shape().at(0); | ||
auto S = ctx.inInfo(1).shape().at(3); | ||
auto dtype = ctx.inInfo(1).data_type(); | ||
ctx.outInfo(0) = {dtype, Shape({B, 1, S, S})}; | ||
}); |
Oops, something went wrong.