Skip to content

Commit

Permalink
Add rocm reserve fot gfx10*+ (#2643)
Browse files Browse the repository at this point in the history
  • Loading branch information
jayfurmanek authored Sep 3, 2024
1 parent 4e7b1c1 commit 44bf582
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions tensorflow/compiler/xla/stream_executor/rocm/rocm_driver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1272,13 +1272,19 @@ static tsl::StatusOr<T> GetSimpleAttribute(hipDevice_t device,
// (e.g. rocBLAS alone needs~200 MB to put its kernels as of ROCm 4.1)
const uint64_t RESERVED_GFX908 = 1048576 * 512;
const uint64_t RESERVED_GFX9_X = 1048576 * 1024;
const uint64_t RESERVED_GFX10_X = 1048576 * 512;
if (gcnArchName.substr(0, 6) == "gfx908") {
*reserve = RESERVED_GFX908;
} else if (gcnArchName.substr(0, 6) == "gfx90a" ||
gcnArchName.substr(0, 6) == "gfx940" ||
gcnArchName.substr(0, 6) == "gfx941" ||
gcnArchName.substr(0, 6) == "gfx942" ) {
*reserve = RESERVED_GFX9_X;
} else if (gcnArchName.substr(0, 6) == "gfx1030" ||
gcnArchName.substr(0, 6) == "gfx1100" ||
gcnArchName.substr(0, 6) == "gfx1200" ||
gcnArchName.substr(0, 6) == "gfx1201" ) {
*reserve = RESERVED_GFX10_X;
}
return true;
}
Expand Down

0 comments on commit 44bf582

Please sign in to comment.