From 7e6a57af3867a58993380dfc8485529a0f62467e Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Wed, 29 May 2024 09:15:02 +0200 Subject: [PATCH] Fix and test the legacy memory pool. (#2402) [skip julia] [skip cuda] [skip downstream] [skip subpackages] --- .buildkite/pipeline.yml | 19 +++++++++++++++++++ Project.toml | 2 +- src/memory.jl | 24 ++++++++++++++++-------- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 9dedfd20d2..3e48faaf60 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -280,6 +280,25 @@ steps: if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip special\]/ && !build.pull_request.draft timeout_in_minutes: 30 + - label: "Legacy memory allocator" + plugins: + - JuliaCI/julia#v1: + version: "1.10" + - JuliaCI/julia-test#v1: + test_args: "--quickfail core base" + - JuliaCI/julia-coverage#v1: + dirs: + - src + - lib + - examples + agents: + queue: "juliagpu" + cuda: "*" + env: + JULIA_CUDA_MEMORY_POOL: 'none' + if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip special\]/ && !build.pull_request.draft + timeout_in_minutes: 30 + - label: "CuArray with {{matrix.memory}} memory" plugins: - JuliaCI/julia#v1: diff --git a/Project.toml b/Project.toml index ee731d97b3..db06456975 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "CUDA" uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" -version = "5.4.1" +version = "5.4.2" [deps] AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c" diff --git a/src/memory.jl b/src/memory.jl index dd0da9cd81..b2910a0a58 100644 --- a/src/memory.jl +++ b/src/memory.jl @@ -627,28 +627,36 @@ end @inline function _pool_alloc(::Type{DeviceMemory}, sz) state = active_state() - if stream_ordered(state.device) + mem = if stream_ordered(state.device) pool_mark!(state.device, true) pool = pool_create(state.device) - end - mem = let pool = pool # closure capture bug retry_reclaim(isnothing) do memory_limit_exceeded(sz) && return nothing # try the actual allocation try - if stream_ordered(state.device) - alloc(DeviceMemory, sz; async=true, state.stream, pool) - else - alloc(DeviceMemory, sz; async=false) - end + alloc(DeviceMemory, sz; async=true, state.stream, pool) + catch err + isa(err, OutOfGPUMemoryError) || rethrow() + return nothing + end + end + else + retry_reclaim(isnothing) do + memory_limit_exceeded(sz) && return nothing + + # try the actual allocation + try + alloc(DeviceMemory, sz; async=false) catch err isa(err, OutOfGPUMemoryError) || rethrow() return nothing end end end + # NOTE: the `retry_reclaim` body is duplicated to work around + # closure capture issues with the `pool` variable mem === nothing && throw(OutOfGPUMemoryError(sz)) account!(memory_stats(state.device), sz)