-
Notifications
You must be signed in to change notification settings - Fork 75
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Odla_trt optimized and model test #499
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,8 +20,11 @@ | |
#include <NvInferRuntime.h> | ||
#include <ODLA/odla.h> | ||
#include <bits/stdint-intn.h> | ||
#include <cuda.h> | ||
#include <cuda_runtime.h> | ||
|
||
#include <time.h> | ||
#include <ctime> | ||
#include <cassert> | ||
#include <cmath> | ||
#include <cstddef> | ||
|
@@ -31,10 +34,12 @@ | |
#include <numeric> | ||
#include <unordered_map> | ||
#include <vector> | ||
#include <mutex> | ||
|
||
#include "plugins/initPlugin.h" | ||
|
||
using namespace nvinfer1; | ||
using namespace std; | ||
|
||
#if !defined(ODLA_VERSION_NUMBER) || (ODLA_VERSION_NUMBER < 50) | ||
#error This library requires minimum ODLA version 0.5 | ||
|
@@ -173,8 +178,7 @@ struct _odla_computation { | |
network = builder->createNetworkV2(flags); | ||
#endif | ||
} | ||
} | ||
|
||
} | ||
~_odla_computation() { | ||
if (!load_engine_mode) { | ||
builder->destroy(); | ||
|
@@ -189,6 +193,8 @@ struct _odla_context { | |
odla_computation comp = nullptr; | ||
nvinfer1::ICudaEngine* engine = nullptr; | ||
nvinfer1::IExecutionContext* ctx = nullptr; | ||
void* temp_input_ptr = nullptr; | ||
void* temp_output_ptr = nullptr; | ||
#if NV_TENSORRT_MAJOR >= 7 | ||
nvinfer1::IBuilderConfig* builder_cfg = nullptr; | ||
nvinfer1::IOptimizationProfile* builder_profile = nullptr; | ||
|
@@ -210,6 +216,7 @@ struct _odla_context { | |
std::unordered_map<std::string, InputPtrInfo> input_ptrs; | ||
|
||
int run_batch_size = 0; | ||
// CUdeviceptr cumemalloc_address; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please remove |
||
_odla_context(odla_computation comp) : comp(comp) { | ||
if (!comp->load_engine_mode) { | ||
#if NV_TENSORRT_MAJOR < 7 | ||
|
@@ -238,7 +245,6 @@ struct _odla_context { | |
builder_cfg->addOptimizationProfile(builder_profile); | ||
} | ||
builder_cfg->setMaxWorkspaceSize(comp->max_workspace_size); | ||
|
||
if (comp->fp16_mode) { | ||
builder_cfg->setFlag(BuilderFlag::kFP16); | ||
builder_cfg->setFlag(BuilderFlag::kSTRICT_TYPES); | ||
|
@@ -509,13 +515,18 @@ odla_status odla_SetContextItem(odla_context context, odla_item_type type, | |
switch (type) { | ||
case ODLA_RUN_BATCH_SIZE: | ||
context->run_batch_size = *(reinterpret_cast<int*>(value)); | ||
// odla_value_shape real_shape = value->type.shape; | ||
// size_t bytes = | ||
// GetTotalElements(real_shape) * GetElementSize(value->type.element_type); | ||
// CUdeviceptr dev_ptr; | ||
// CHECK(cuMemAlloc(&dev_ptr, bytes)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove dead code |
||
// context->cumemalloc_address = dev_ptr; | ||
break; | ||
|
||
default: | ||
std::cerr << "Unsupported property type: " << type << std::endl; | ||
return ODLA_FAILURE; | ||
} | ||
|
||
return ODLA_SUCCESS; | ||
} | ||
|
||
|
@@ -534,7 +545,15 @@ odla_value odla_CreateArgument(odla_value_type type, const odla_value_id id) { | |
auto input = g_comp->network->addInput(name, GetNVDataType(type.element_type), | ||
GetNVDims(type.shape)); | ||
odla_value v = CreateValue(input, type, id); | ||
g_comp->inputs[name] = v; | ||
g_comp->inputs[name] = v; //inputs[input] = v | ||
// odla_value_shape real_shape = v->type.shape; | ||
// std::cerr << "odla_value_shape:" << real_shape << "\n"; | ||
// size_t bytes = | ||
// GetTotalElements(real_shape) * GetTotalElements(v->type.element_type); | ||
// CHECK(cudaMalloc(&dev_ptr, bytes)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
// void* validated_data_ptr = | ||
// ValidateValuePtr(value->type, const_cast<void*>(data_ptr)); | ||
// // CHECK(cudaMemcpy(dev_ptr, )) | ||
g_comp->input_vals.push_back(v); | ||
return v; | ||
} | ||
|
@@ -576,7 +595,7 @@ odla_status odla_SetValueAsOutput(const odla_value val) { | |
val->tensor->setName(name); | ||
g_comp->network->markOutput(*val->tensor); | ||
return ODLA_SUCCESS; | ||
} | ||
} | ||
odla_status odla_GetNumOfOutputsFromComputation( | ||
const odla_computation computation, odla_uint32* num_outputs) { | ||
*num_outputs = computation->output_vals.size(); | ||
|
@@ -594,22 +613,43 @@ odla_status odla_GetOutputFromComputationByIdx( | |
return ODLA_SUCCESS; | ||
} | ||
|
||
//这里 每运行一个batch都会运行 | ||
odla_status odla_BindToArgument(odla_value value, const odla_void* data_ptr, | ||
odla_context context) { | ||
// CUdeviceptr dev_ptr; | ||
clock_t startTime, endTime; | ||
void* dev_ptr = nullptr; | ||
odla_value_shape real_shape = value->type.shape; | ||
if ((g_comp && g_comp->is_dynamic_batch) || context->run_batch_size) { | ||
real_shape.dims[0] = context->run_batch_size; | ||
} | ||
size_t bytes = | ||
GetTotalElements(real_shape) * GetElementSize(value->type.element_type); | ||
CHECK(cudaMalloc(&dev_ptr, bytes)); | ||
// CHECK(cuMemAlloc(&dev_ptr, bytes)); | ||
// CHECK(cudaMalloc(&dev_ptr, bytes)); | ||
// 在这里检测一下有没有预先cudamalloc过,如果有过,将数据传到对应地址 | ||
// CUdeviceptr dev_ptr = context->cumemalloc_addres; | ||
// std::cerr << "context->temp_input_ptr:" << context->temp_input_ptr << "\n"; | ||
if (context->temp_input_ptr == nullptr) { | ||
CHECK(cudaMalloc(&(context->temp_input_ptr), bytes)); | ||
} | ||
dev_ptr = context->temp_input_ptr; | ||
void* validated_data_ptr = | ||
ValidateValuePtr(value->type, const_cast<void*>(data_ptr)); | ||
// void* pagelocked_buffer = context->input_ptrs[value->name].host_ptr; | ||
// startTime = clock(); | ||
// CHECK(cuMemcpyHtoD(dev_ptr, validated_data_ptr, bytes)); | ||
CHECK(cudaMemcpy(dev_ptr, validated_data_ptr, bytes, cudaMemcpyHostToDevice)); | ||
|
||
// endTime = clock(); | ||
// std::cout << "the run time is:" << (double) (endTime - startTime) /CLOCKS_PER_SEC << "s" << std::endl; | ||
// std::ofstream outf; | ||
// outf.open("odla_cudamemcpy_times.txt", std::ios::app); | ||
// outf << (double) (endTime - startTime) /CLOCKS_PER_SEC << std::endl; | ||
// outf.close(); | ||
// void* dev1_ptr; | ||
// dev1_ptr = (void*) dev_ptr; | ||
// CHECK(cudaMemcpy(dev_ptr, validated_data_ptr, bytes, cudaMemcpyHostToDevice)); | ||
context->input_ptrs[value->name] = {.host_ptr = data_ptr, .dev_ptr = dev_ptr}; | ||
|
||
return ODLA_SUCCESS; | ||
} | ||
|
||
|
@@ -623,15 +663,19 @@ odla_status odla_BindToArgumentById(const odla_value_id value_id, | |
|
||
odla_status odla_BindToOutput(odla_value value, odla_void* data_ptr, | ||
odla_context context) { | ||
// CUdeviceptr dst; | ||
void* dst = nullptr; | ||
odla_value_shape real_shape = value->type.shape; | ||
if ((g_comp && g_comp->is_dynamic_batch) || context->run_batch_size) { | ||
real_shape.dims[0] = context->run_batch_size; | ||
} | ||
size_t bytes = | ||
GetTotalElements(real_shape) * GetElementSize(value->type.element_type); | ||
|
||
CHECK(cudaMalloc(&dst, bytes)); | ||
if (context->temp_output_ptr == nullptr){ | ||
CHECK(cudaMalloc(&(context->temp_output_ptr), bytes)); | ||
} | ||
dst = context->temp_output_ptr; | ||
// CHECK(cudaMalloc(&dst, bytes)); | ||
|
||
context->output_ptrs[value->name] = { | ||
.host_ptr = data_ptr, .dev_ptr = dst, .len = bytes, .vt = value->type}; | ||
|
@@ -852,6 +896,9 @@ odla_status odla_GetValueType(const odla_value value, | |
odla_status odla_ExecuteComputation(odla_computation comp, odla_context context, | ||
odla_compute_mode mode, | ||
odla_device device) { | ||
|
||
// clock_t startTime, endTime; | ||
|
||
std::vector<void*> buffers; | ||
auto add_to_buffer = [&](const std::string& name, void* ptr) { | ||
int idx = context->engine->getBindingIndex(name.c_str()); | ||
|
@@ -863,9 +910,14 @@ odla_status odla_ExecuteComputation(odla_computation comp, odla_context context, | |
} | ||
}; | ||
for (auto& kv : context->input_ptrs) { | ||
add_to_buffer(kv.first, kv.second.dev_ptr); | ||
// void* kv_second_devptr; | ||
// kv_second_devptr = (void*) kv.second.dev_ptr; | ||
add_to_buffer(kv.first, kv.second.dev_ptr); //kv.first: input, kv.second.dev_ptr: 0x7f7698600000 | ||
} | ||
for (auto& kv : context->output_ptrs) { | ||
// void* kv_second_devptr; | ||
// kv_second_devptr = (void*) kv.second.dev_ptr; | ||
// add_to_buffer(kv.first, kv_second_devptr); | ||
add_to_buffer(kv.first, kv.second.dev_ptr); | ||
} | ||
if (comp->is_dynamic_batch) { | ||
|
@@ -894,12 +946,10 @@ odla_status odla_ExecuteComputation(odla_computation comp, odla_context context, | |
cudaMemcpyDeviceToHost)); | ||
} | ||
} | ||
|
||
// copy results and free temp buffers. | ||
for (auto& ptr : buffers) { | ||
CHECK(cudaFree(ptr)); | ||
} | ||
|
||
// for (auto& ptr : buffers) { | ||
// CHECK(cudaFree(ptr)); | ||
// } | ||
context->input_ptrs.clear(); | ||
context->output_ptrs.clear(); | ||
return ODLA_SUCCESS; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
export SRC_DIR=${PWD}/.. | ||
export BUILD_DIR=$SRC_DIR/build | ||
export MODELS_SRC=/models | ||
|
||
export MODELS_ROOT=/models | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why add a new variable? |
||
export HALO_BIN=$BUILD_DIR/bin/halo | ||
export ODLA_INC=$SRC_DIR/ODLA/include | ||
export ODLA_LIB=$BUILD_DIR/lib | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,17 +12,20 @@ fi | |
|
||
if [[ $TEST_WITH_GPU -eq 1 ]]; then | ||
echo "======== Testing with ODLA TensorRT ========" | ||
for i in 1 2 4 8 16 32 64 | ||
do | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where is "i" used? |
||
python3 $curr_dir/../../invoke_halo.py --model $model_file \ | ||
--label-file $curr_dir/../1000_labels.txt --image-dir $image_dir \ | ||
--odla tensorrt | tee $1 | ||
done | ||
# RUN: FileCheck --input-file %t.1 %s | ||
fi | ||
|
||
# Using HALO to compile and run inference with ODLA XNNPACK | ||
echo "======== Testing with ODLA DNNL ========" | ||
python3 $curr_dir/../../invoke_halo.py --model $model_file \ | ||
--label-file $curr_dir/../1000_labels.txt --image-dir $image_dir \ | ||
--odla dnnl | tee $2 | ||
# # Using HALO to compile and run inference with ODLA XNNPACK | ||
# echo "======== Testing with ODLA DNNL ========" | ||
# python3 $curr_dir/../../invoke_halo.py --model $model_file \ | ||
# --label-file $curr_dir/../1000_labels.txt --image-dir $image_dir \ | ||
# --odla dnnl | tee $2 | ||
# RUN: FileCheck --input-file %t.2 %s | ||
|
||
# CHECK: dog.jpg ==> "wallaby, brush kangaroo", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#!/bin/bash | ||
# RUN: %s | ||
model_name="alexnet" | ||
docker_model_file="/models/vision/classification/$model_name" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use $MODEL_ROOT |
||
model_file="$docker_model_file/$model_name"".onnx" | ||
image_dir="/models/vision/test_images" | ||
curr_dir=`dirname $0` | ||
|
||
# # Download model if it is not exist | ||
# if [ ! -e $model_file ]; then | ||
# $curr_dir/../get_cls_model_from_pytorch.py $model_name $model_file | ||
# fi | ||
|
||
# Download sample images if it is not exist | ||
# $curr_dir/../../get_images.sh $image_dir | ||
|
||
echo "=======Testing alexnet with TensorRT=======" | ||
python3 $curr_dir/../../onnx2tensorrt.py --model $model_file \ | ||
--label-file $curr_dir/../1000_labels.txt | ||
|
||
|
||
# if [[ $TEST_WITH_GPU -eq 1 ]]; then | ||
# echo "======== Testing with ODLA TensorRT ========" | ||
# python3 $curr_dir/../../invoke_halo.py --model $model_file --image-dir $image_dir --odla tensorrt | ||
# fi | ||
|
||
# # Using HALO to compile and run inference with ODLA XNNPACK | ||
# echo "======== Testing with ODLA DNNL ========" | ||
# python3 $curr_dir/../../invoke_halo.py --model $model_file --label-file $curr_dir/../1000_labels.txt --image-dir $image_dir --odla dnnl |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#!/bin/bash | ||
# RUN: %s | ||
model_name="densenet" | ||
docker_model_file="/models/vision/classification/$model_name" | ||
model_file="$docker_model_file/$model_name""121.onnx" | ||
image_dir="/models/vision/test_images" | ||
curr_dir=`dirname $0` | ||
|
||
# # Download model if it is not exist | ||
# if [ ! -e $model_file ]; then | ||
# $curr_dir/../get_cls_model_from_pytorch.py $model_name $model_file | ||
# fi | ||
|
||
# Download sample images if it is not exist | ||
# $curr_dir/../../get_images.sh $image_dir | ||
for i in 1 2 4 8 16 32 64 | ||
do | ||
echo "=======Testing densenet with TensorRT=======" | ||
python3 $curr_dir/../../onnx2tensorrt.py --model $model_file --label-file $curr_dir/../1000_labels.txt --batch_size $i | ||
done | ||
|
||
# if [[ $TEST_WITH_GPU -eq 1 ]]; then | ||
# echo "======== Testing with ODLA TensorRT ========" | ||
# python3 $curr_dir/../../invoke_halo.py --model $model_file --image-dir $image_dir --odla tensorrt | ||
# fi | ||
|
||
# # Using HALO to compile and run inference with ODLA XNNPACK | ||
# echo "======== Testing with ODLA DNNL ========" | ||
# python3 $curr_dir/../../invoke_halo.py --model $model_file --label-file $curr_dir/../1000_labels.txt --image-dir $image_dir --odla dnnl |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#!/bin/bash | ||
# RUN: %s %t.1 %t.2 | ||
|
||
model_name="efficientnet" | ||
model_file="$MODELS_ROOT/vision/classification/efficientnet/$model_name-lite4-11.onnx" | ||
image_dir="$MODELS_ROOT/vision/test_images" | ||
if [[ $# != 0 ]];then | ||
export TEST_TEMP_DIR=`dirname $1` | ||
fi | ||
|
||
curr_dir=`dirname $0` | ||
|
||
# check if GPU is enabled or not | ||
if [[ $TEST_WITH_GPU -eq 1 ]]; then | ||
echo "======== Testing with ODLA TensorRT ========" | ||
for i in 1 | ||
do | ||
python3 $curr_dir/../../invoke_halo.py --batch_size $i --model $model_file \ | ||
--label-file $curr_dir/../1000_labels.txt --image-dir $image_dir \ | ||
--odla tensorrt --convert-layout-to=nhwc | tee $1 | ||
done | ||
# RUN: FileCheck --input-file %t.1 %s | ||
fi | ||
|
||
# Using HALO to compile and run inference with ODLA XNNPACK | ||
echo "======== Testing with ODLA XNNPACK (NHWC) ========" | ||
python3 $curr_dir/../../invoke_halo.py --model $model_file \ | ||
--label-file $curr_dir/../1000_labels.txt --image-dir $image_dir \ | ||
--odla xnnpack --convert-layout-to=nhwc | tee $2 | ||
# RUN: FileCheck --input-file %t.2 %s | ||
|
||
# CHECK: dog.jpg ==> "Samoyed, Samoyede", | ||
# CHECK-NEXT: food.jpg ==> "ice cream, icecream", | ||
# CHECK-NEXT: plane.jpg ==> "liner, ocean liner", | ||
# CHECK-NEXT: sport.jpg ==> "ski", |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what if there are multiple inputs/outputs?