Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Commit

Permalink
Merge pull request #35 from ROCmSoftwarePlatform/rocrand
Browse files Browse the repository at this point in the history
Rocrand
  • Loading branch information
sunway513 authored Feb 8, 2018
2 parents 2bab3f0 + 117f540 commit 0a68b83
Show file tree
Hide file tree
Showing 10 changed files with 147 additions and 468 deletions.
4 changes: 3 additions & 1 deletion include/caffe/internal_thread.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace caffe {
*/
class InternalThread {
public:
InternalThread() : thread_() {}
InternalThread() : device_(0), thread_() {}
virtual ~InternalThread();

/**
Expand All @@ -34,6 +34,8 @@ class InternalThread {
bool is_started() const;

protected:
int device_;

/* Implement this method in your subclass
with the code you want your thread to run. */
virtual void InternalThreadEntry() {}
Expand Down
37 changes: 10 additions & 27 deletions include/caffe/layers/cudnn_conv_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ namespace caffe {
template <typename Dtype>
class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
public:
explicit CuDNNConvolutionLayer(const LayerParameter& param)
: ConvolutionLayer<Dtype>(param), handles_setup_(false) {}
explicit CuDNNConvolutionLayer(const LayerParameter& param);

virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
Expand All @@ -43,49 +43,32 @@ class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);


bool handles_setup_;

#ifdef USE_MIOPEN
miopenHandle_t* handle_;
hipStream_t* stream_;

// algorithms for forward and backwards convolutions
miopenConvFwdAlgorithm_t* fwd_algo_;
miopenConvBwdWeightsAlgorithm_t* bwd_weight_algo_;
miopenConvBwdDataAlgorithm_t* bwd_data_algo_;
vector<miopenConvFwdAlgorithm_t> fwd_algo_;
vector<miopenConvBwdWeightsAlgorithm_t> bwd_weight_algo_;
vector<miopenConvBwdDataAlgorithm_t> bwd_data_algo_;

vector<miopenTensorDescriptor_t> bottom_descs_, top_descs_;
miopenTensorDescriptor_t bias_desc_;
miopenTensorDescriptor_t filter_desc_;
vector<miopenConvolutionDescriptor_t> conv_descs_;

int N_, C_, W_, H_;
#endif

#ifdef USE_CUDNN
cudnnHandle_t* handle_;
cudaStream_t* stream_;

// algorithms for forward and backwards convolutions
cudnnConvolutionFwdAlgo_t *fwd_algo_;
cudnnConvolutionBwdFilterAlgo_t *bwd_filter_algo_;
cudnnConvolutionBwdDataAlgo_t *bwd_data_algo_;

vector<cudnnTensorDescriptor_t> bottom_descs_, top_descs_;
cudnnTensorDescriptor_t bias_desc_;
cudnnFilterDescriptor_t filter_desc_;
vector<cudnnConvolutionDescriptor_t> conv_descs_;
miopenHandle_t handle_;
#endif

int bottom_offset_, top_offset_, bias_offset_;

size_t *workspace_fwd_sizes_;
size_t *workspace_bwd_data_sizes_;
size_t *workspace_bwd_filter_sizes_;
vector<size_t> workspace_fwd_sizes_;
vector<size_t> workspace_bwd_filter_sizes_;
vector<size_t> workspace_bwd_data_sizes_;
size_t workspaceSizeInBytes; // size of underlying storage
void *workspaceData; // underlying storage
void **workspace; // aliases into workspaceData
vector<void*> workspace; // aliases into workspaceData
};
#endif

Expand Down
8 changes: 3 additions & 5 deletions src/caffe/internal_thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,15 @@ bool InternalThread::must_stop() {

void InternalThread::StartInternalThread() {
CHECK(!is_started()) << "Threads should persist and not be restarted.";
LOG(INFO) << "Starting internal thread on device " << device_;

int device = 0;
#ifndef CPU_ONLY
HIP_CHECK(hipGetDevice(&device));
#endif
Caffe::Brew mode = Caffe::mode();
int rand_seed = caffe_rng_rand();
int solver_count = Caffe::solver_count();
bool root_solver = Caffe::root_solver();

try {
thread_.reset(new boost::thread(&InternalThread::entry, this, device, mode,
thread_.reset(new boost::thread(&InternalThread::entry, this, device_, mode,
rand_seed, solver_count, root_solver));
} catch (std::exception& e) {
LOG(FATAL) << "Thread exception: " << e.what();
Expand All @@ -40,6 +37,7 @@ void InternalThread::StartInternalThread() {

void InternalThread::entry(int device, Caffe::Brew mode, int rand_seed,
int solver_count, bool root_solver) {
LOG(INFO) << "Started internal thread on device " << device;
#ifndef CPU_ONLY
HIP_CHECK(hipSetDevice(device));
#endif
Expand Down
8 changes: 3 additions & 5 deletions src/caffe/layers/base_data_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,7 @@ void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
#ifndef CPU_ONLY
hipStream_t stream;
if (Caffe::mode() == Caffe::GPU) {
HIP_CHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
}
hipStream_t stream = nullptr;
#endif

try {
Expand All @@ -98,7 +95,8 @@ void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
}
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
HIP_CHECK(hipStreamDestroy(stream));
if (stream != nullptr)
HIP_CHECK(hipStreamDestroy(stream));
}
#endif
}
Expand Down
Loading

0 comments on commit 0a68b83

Please sign in to comment.