Skip to content

Commit

Permalink
Add param for high pri cuda stream for SingleGPUExecutor (#1501)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #1501

GFlag to use high priority stream for SingleGPUExecutor

Differential Revision:
D51222730

Privacy Context Container: L1138451

fbshipit-source-id: a8e6ec224b20c93bfa1675d4b80050549be729a7
  • Loading branch information
Ivan Kobzarev authored and facebook-github-bot committed Nov 12, 2023
1 parent 6fbeafe commit 9db35bb
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ class SingleGPUExecutor {
std::shared_ptr<ISingleGPUExecutorObserver> observer =
std::make_shared<EmptySingleGPUExecutorObserver>(),
c10::Device resultDevice = c10::kCPU,
size_t numProcessThreads = 1u);
size_t numProcessThreads = 1u,
bool useHighPriCudaStream = false);

// Moveable only
SingleGPUExecutor(SingleGPUExecutor&& executor) noexcept = default;
Expand All @@ -50,6 +51,7 @@ class SingleGPUExecutor {
const ExecInfos execInfos_;
const size_t numGpu_;
const size_t numProcessThreads_;
const bool useHighPriCudaStream_;
const c10::Device resultDevice_;
std::shared_ptr<ISingleGPUExecutorObserver> observer_;
folly::MPMCQueue<std::shared_ptr<PredictionBatch>> requests_;
Expand Down
8 changes: 5 additions & 3 deletions torchrec/inference/src/SingleGPUExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@ SingleGPUExecutor::SingleGPUExecutor(
size_t numGpu,
std::shared_ptr<ISingleGPUExecutorObserver> observer,
c10::Device resultDevice,
size_t numProcessThreads)
size_t numProcessThreads,
bool useHighPriCudaStream)
: manager_(manager),
execInfos_(std::move(execInfos)),
numGpu_(numGpu),
numProcessThreads_(numProcessThreads),
useHighPriCudaStream_(useHighPriCudaStream),
resultDevice_(resultDevice),
observer_(observer),
requests_(kQUEUE_CAPACITY),
Expand Down Expand Up @@ -104,8 +106,8 @@ void SingleGPUExecutor::process() {
c10::InferenceMode inferenceModeGuard;
std::vector<c10::cuda::CUDAStream> streams;
for (size_t i = 0; i < numGpu_; ++i) {
streams.push_back(at::cuda::getStreamFromPool(
false /* isHighPriority */, i /* device */));
streams.push_back(
at::cuda::getStreamFromPool(useHighPriCudaStream_, i /* device */));
}
at::cuda::CUDAMultiStreamGuard streamGuard(streams);

Expand Down

0 comments on commit 9db35bb

Please sign in to comment.