From 8a6a7d399ea99eb105a289061a02d3ef63512334 Mon Sep 17 00:00:00 2001 From: Roger-luo Date: Fri, 21 Oct 2022 21:30:08 +0000 Subject: [PATCH 1/6] start working on CUDA patch --- lib/BloqadeCUDA/Project.toml | 2 ++ lib/BloqadeCUDA/src/patch.jl | 3 +++ lib/BloqadeCUDA/src/sample.jl | 2 ++ lib/BloqadeCUDA/test/subspace.jl | 41 ++++++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+) create mode 100644 lib/BloqadeCUDA/src/sample.jl create mode 100644 lib/BloqadeCUDA/test/subspace.jl diff --git a/lib/BloqadeCUDA/Project.toml b/lib/BloqadeCUDA/Project.toml index 615dc94b2..64443424c 100644 --- a/lib/BloqadeCUDA/Project.toml +++ b/lib/BloqadeCUDA/Project.toml @@ -5,8 +5,10 @@ version = "0.1.1" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +CuYao = "b48ca7a8-dd42-11e8-2b8e-1b7706800275" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +YaoSubspaceArrayReg = "bd27d05e-4ce1-5e79-84dd-c5d7d508ade2" [compat] Adapt = "3" diff --git a/lib/BloqadeCUDA/src/patch.jl b/lib/BloqadeCUDA/src/patch.jl index 413328919..c1342fd3d 100644 --- a/lib/BloqadeCUDA/src/patch.jl +++ b/lib/BloqadeCUDA/src/patch.jl @@ -1,3 +1,4 @@ +@static if !hasmethod(+, Tuple{Diagonal{T,<:CuArray}, CuSparseMatrixCSC{T}} where T) # https://github.com/JuliaGPU/CUDA.jl/issues/1469 for SparseMatrixType in [:CuSparseMatrixCSC, :CuSparseMatrixCSR] @@ -10,3 +11,5 @@ for SparseMatrixType in [:CuSparseMatrixCSC, :CuSparseMatrixCSR] end end end + +end \ No newline at end of file diff --git a/lib/BloqadeCUDA/src/sample.jl b/lib/BloqadeCUDA/src/sample.jl new file mode 100644 index 000000000..b764c8108 --- /dev/null +++ b/lib/BloqadeCUDA/src/sample.jl @@ -0,0 +1,2 @@ +function sample(weights::CuVector) +end diff --git a/lib/BloqadeCUDA/test/subspace.jl b/lib/BloqadeCUDA/test/subspace.jl new file mode 100644 index 000000000..73d430865 --- /dev/null +++ b/lib/BloqadeCUDA/test/subspace.jl @@ -0,0 +1,41 @@ +using Test +using Random +using CUDA +using YaoSubspaceArrayReg +using CuYao + +function sample(rng::AbstractRNG, wv::AbstractWeights) + 1 == firstindex(wv) || + throw(ArgumentError("non 1-based arrays are not supported")) + t = rand(rng) * sum(wv) + n = length(wv) + i = 1 + cw = wv[1] + while cw < t && i < n + i += 1 + @inbounds cw += wv[i] + end + return i +end + +function sample(rng::Union{RNG,GPUArrays.RNG}, weights::CuVector) + dices = rand(rng, length(weights)) + slots = cumsum(weights) + return map(dices) do d + # TODO: use binary search + findfirst(d, slots) + searchsortedfirst + end +end + + + +space = Subspace(10, sort(randperm(1 << 10)[1:76] .- 1)) +r = SubspaceArrayReg(randn(ComplexF64, 76), space) + +dr = cu(r) +@which measure(r) + +@which measure(ComputationalBasis(), r, AllLocs(); nshots=10) + +measure(dr) From 7db5152fa8729bf329d8893e41a3e216e2095c33 Mon Sep 17 00:00:00 2001 From: weinbe58 Date: Mon, 14 Nov 2022 16:41:06 -0500 Subject: [PATCH 2/6] patching CuYao.cu for SubspaceArrayReg --- lib/BloqadeCUDA/Project.toml | 3 +++ lib/BloqadeCUDA/src/BloqadeCUDA.jl | 4 ++++ lib/BloqadeCUDA/src/patch.jl | 12 ++++++++++++ lib/BloqadeExpr/src/space.jl | 4 ++++ lib/YaoSubspaceArrayReg/src/type.jl | 2 +- 5 files changed, 24 insertions(+), 1 deletion(-) diff --git a/lib/BloqadeCUDA/Project.toml b/lib/BloqadeCUDA/Project.toml index 64443424c..b9fd85c5e 100644 --- a/lib/BloqadeCUDA/Project.toml +++ b/lib/BloqadeCUDA/Project.toml @@ -6,8 +6,11 @@ version = "0.1.1" Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CuYao = "b48ca7a8-dd42-11e8-2b8e-1b7706800275" +GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +YaoArrayRegister = "e600142f-9330-5003-8abb-0ebd767abc51" YaoSubspaceArrayReg = "bd27d05e-4ce1-5e79-84dd-c5d7d508ade2" [compat] diff --git a/lib/BloqadeCUDA/src/BloqadeCUDA.jl b/lib/BloqadeCUDA/src/BloqadeCUDA.jl index e481cf0b4..66125779e 100644 --- a/lib/BloqadeCUDA/src/BloqadeCUDA.jl +++ b/lib/BloqadeCUDA/src/BloqadeCUDA.jl @@ -6,7 +6,11 @@ using CUDA using LinearAlgebra using CUDA.CUSPARSE using CUDA.CUSPARSE: CuSparseMatrixCSC, CuSparseMatrixCSR, AbstractCuSparseMatrix +using CuYao +using YaoSubspaceArrayReg include("patch.jl") +export cu + end diff --git a/lib/BloqadeCUDA/src/patch.jl b/lib/BloqadeCUDA/src/patch.jl index c1342fd3d..bdaff51c9 100644 --- a/lib/BloqadeCUDA/src/patch.jl +++ b/lib/BloqadeCUDA/src/patch.jl @@ -12,4 +12,16 @@ for SparseMatrixType in [:CuSparseMatrixCSC, :CuSparseMatrixCSR] end end +end + +function CuYao.cu(reg::SubspaceArrayReg{D}) where {D} + println("here") + natoms = reg.natoms + new_state = CuArray(reg.state) + new_subspace = Subspace( + reg.subspace.nqubits, + reg.subspace.map, + CuArray(reg.subspace.subspace_v) + ) + return SubspaceArrayReg{D}(new_state,new_subspace) end \ No newline at end of file diff --git a/lib/BloqadeExpr/src/space.jl b/lib/BloqadeExpr/src/space.jl index 6e2812477..d3859810f 100644 --- a/lib/BloqadeExpr/src/space.jl +++ b/lib/BloqadeExpr/src/space.jl @@ -112,3 +112,7 @@ Base.to_index(ss::Subspace) = ss.subspace_v .+ 1 function Base.:(==)(x::Subspace, y::Subspace) return (x.nqubits == y.nqubits) && (x.map == y.map) && (x.subspace_v == y.subspace_v) end + +function Adapt.adapt_structure(to, x::Subspace{T, S <: AbstractVector{T}}) where T + return Subspace{T,S}(x.nqubits,x.map, Adapt.adapt_structure(to, x.subspace_v)) +end diff --git a/lib/YaoSubspaceArrayReg/src/type.jl b/lib/YaoSubspaceArrayReg/src/type.jl index d4640446f..03af6839f 100644 --- a/lib/YaoSubspaceArrayReg/src/type.jl +++ b/lib/YaoSubspaceArrayReg/src/type.jl @@ -42,7 +42,7 @@ YaoArrayRegister.relaxedvec(reg::SubspaceArrayReg) = reg.state YaoArrayRegister.datatype(reg::SubspaceArrayReg) = eltype(reg.state) function Adapt.adapt_structure(to, x::SubspaceArrayReg{D}) where D - return SubspaceArrayReg{D}(Adapt.adapt(to, x.state), x.subspace) + return SubspaceArrayReg{D}(Adapt.adapt(to, x.state), Adapt.adapt_structure(to, x.subspace)) end """ From 8ecd0257c427c2ef25a721424d452b50616499f0 Mon Sep 17 00:00:00 2001 From: weinbe58 Date: Mon, 14 Nov 2022 16:42:22 -0500 Subject: [PATCH 3/6] removing print --- lib/BloqadeCUDA/src/patch.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/BloqadeCUDA/src/patch.jl b/lib/BloqadeCUDA/src/patch.jl index bdaff51c9..2a4920419 100644 --- a/lib/BloqadeCUDA/src/patch.jl +++ b/lib/BloqadeCUDA/src/patch.jl @@ -15,7 +15,7 @@ end end function CuYao.cu(reg::SubspaceArrayReg{D}) where {D} - println("here") + natoms = reg.natoms new_state = CuArray(reg.state) new_subspace = Subspace( From e638027fd9a5f356e56ac0665cd21dfd17a70ac6 Mon Sep 17 00:00:00 2001 From: weinbe58 Date: Mon, 14 Nov 2022 16:49:48 -0500 Subject: [PATCH 4/6] prototype of sampler on GPU. --- lib/BloqadeCUDA/test/subspace.jl | 72 +++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/lib/BloqadeCUDA/test/subspace.jl b/lib/BloqadeCUDA/test/subspace.jl index 73d430865..dde0c5189 100644 --- a/lib/BloqadeCUDA/test/subspace.jl +++ b/lib/BloqadeCUDA/test/subspace.jl @@ -2,40 +2,64 @@ using Test using Random using CUDA using YaoSubspaceArrayReg +using YaoArrayRegister +using StatsBase using CuYao +using GPUArrays +using Adapt +using BloqadeCUDA -function sample(rng::AbstractRNG, wv::AbstractWeights) - 1 == firstindex(wv) || - throw(ArgumentError("non 1-based arrays are not supported")) - t = rand(rng) * sum(wv) - n = length(wv) - i = 1 - cw = wv[1] - while cw < t && i < n - i += 1 - @inbounds cw += wv[i] - end - return i + +struct Sampler{A,B} + cum_prob::A + values::B +end + +struct CuWeights{T<:Real} + values::CuVector{T} + sum::T +end + + +function Adapt.adapt_structure(to, sampler::Sampler) + cum_prob = Adapt.adapt_structure(to, sampler.cum_prob) + values = Adapt.adapt_structure(to, sampler.values) + Sampler(cum_prob,values) +end + + +function StatsBase.Weights(values::CuVector{T}) where {T<:Real} + return CuWeights{eltype(values)}(values,sum(values)) end -function sample(rng::Union{RNG,GPUArrays.RNG}, weights::CuVector) - dices = rand(rng, length(weights)) - slots = cumsum(weights) - return map(dices) do d - # TODO: use binary search - findfirst(d, slots) - searchsortedfirst - end +function (sampler::Sampler)(x) + i = searchsortedfirst(sampler.cum_prob, x) + i = clamp(i, firstindex(sampler.values), lastindex(sampler.values)) + @inbounds sampler.values[i] +end + + +function sample(rng::AbstractRNG, subspace_v::CuVector, weights::CuWeights,nshots::Integer) + dices = rand(rng, nshots) + sampler = Sampler(cumsum(weights.values),subspace_v) + + Array(sampler.(dices)) end space = Subspace(10, sort(randperm(1 << 10)[1:76] .- 1)) r = SubspaceArrayReg(randn(ComplexF64, 76), space) - +normalize!(r) dr = cu(r) -@which measure(r) -@which measure(ComputationalBasis(), r, AllLocs(); nshots=10) -measure(dr) +weights = Weights(abs2.(relaxedvec(dr))) +subspace_v = vec(dr.subspace) + +samples = sample(CURAND.default_rng(),subspace_v,weights,1000) + +# weights = Weights(abs2.(relaxedvec(dr))) +# @which measure(r) + +# measure(dr) \ No newline at end of file From 489d3dea303afc4391a597e2db0a31d90d84da0f Mon Sep 17 00:00:00 2001 From: weinbe58 Date: Mon, 14 Nov 2022 22:06:27 -0500 Subject: [PATCH 5/6] removing 'cu' overload --- lib/BloqadeCUDA/src/BloqadeCUDA.jl | 2 +- lib/BloqadeCUDA/src/patch.jl | 4 +++- lib/YaoSubspaceArrayReg/src/type.jl | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/BloqadeCUDA/src/BloqadeCUDA.jl b/lib/BloqadeCUDA/src/BloqadeCUDA.jl index 66125779e..4e5a4d05b 100644 --- a/lib/BloqadeCUDA/src/BloqadeCUDA.jl +++ b/lib/BloqadeCUDA/src/BloqadeCUDA.jl @@ -11,6 +11,6 @@ using YaoSubspaceArrayReg include("patch.jl") -export cu +# export cu end diff --git a/lib/BloqadeCUDA/src/patch.jl b/lib/BloqadeCUDA/src/patch.jl index 2a4920419..c14c31fcc 100644 --- a/lib/BloqadeCUDA/src/patch.jl +++ b/lib/BloqadeCUDA/src/patch.jl @@ -14,6 +14,7 @@ end end +#= function CuYao.cu(reg::SubspaceArrayReg{D}) where {D} natoms = reg.natoms @@ -24,4 +25,5 @@ function CuYao.cu(reg::SubspaceArrayReg{D}) where {D} CuArray(reg.subspace.subspace_v) ) return SubspaceArrayReg{D}(new_state,new_subspace) -end \ No newline at end of file +end +=# \ No newline at end of file diff --git a/lib/YaoSubspaceArrayReg/src/type.jl b/lib/YaoSubspaceArrayReg/src/type.jl index 03af6839f..d4640446f 100644 --- a/lib/YaoSubspaceArrayReg/src/type.jl +++ b/lib/YaoSubspaceArrayReg/src/type.jl @@ -42,7 +42,7 @@ YaoArrayRegister.relaxedvec(reg::SubspaceArrayReg) = reg.state YaoArrayRegister.datatype(reg::SubspaceArrayReg) = eltype(reg.state) function Adapt.adapt_structure(to, x::SubspaceArrayReg{D}) where D - return SubspaceArrayReg{D}(Adapt.adapt(to, x.state), Adapt.adapt_structure(to, x.subspace)) + return SubspaceArrayReg{D}(Adapt.adapt(to, x.state), x.subspace) end """ From b0741ee41884d9b70e6bf7cd6d6eddbf38e38f85 Mon Sep 17 00:00:00 2001 From: weinbe58 Date: Mon, 14 Nov 2022 22:07:23 -0500 Subject: [PATCH 6/6] two use cases for sampling. --- lib/BloqadeCUDA/Project.toml | 1 + lib/BloqadeCUDA/test/subspace.jl | 80 +++++++++++++++++++++----------- 2 files changed, 54 insertions(+), 27 deletions(-) diff --git a/lib/BloqadeCUDA/Project.toml b/lib/BloqadeCUDA/Project.toml index b9fd85c5e..c8d155421 100644 --- a/lib/BloqadeCUDA/Project.toml +++ b/lib/BloqadeCUDA/Project.toml @@ -4,6 +4,7 @@ version = "0.1.1" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CuYao = "b48ca7a8-dd42-11e8-2b8e-1b7706800275" GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" diff --git a/lib/BloqadeCUDA/test/subspace.jl b/lib/BloqadeCUDA/test/subspace.jl index dde0c5189..42afd0d8b 100644 --- a/lib/BloqadeCUDA/test/subspace.jl +++ b/lib/BloqadeCUDA/test/subspace.jl @@ -9,57 +9,83 @@ using GPUArrays using Adapt using BloqadeCUDA - -struct Sampler{A,B} - cum_prob::A +# TODO: use Alias sampler +struct SamplerWithValues{T,A<:AbstractVector{T},B<:AbstractVector} + total::T + cumulative_weights::A values::B end -struct CuWeights{T<:Real} - values::CuVector{T} - sum::T +function SamplerWithValues(weights,values) + length(weights) != length(values) && error("Sampler expects values with same length as weights.") + total = sum(weights) + cumulative_weights = cumsum(weights) + return SamplerWithValues(total,cumulative_weights,values) end - -function Adapt.adapt_structure(to, sampler::Sampler) - cum_prob = Adapt.adapt_structure(to, sampler.cum_prob) +function Adapt.adapt_structure(to, sampler::SamplerWithValues) + cumulative_weights = Adapt.adapt_structure(to, sampler.cumulative_weights) values = Adapt.adapt_structure(to, sampler.values) - Sampler(cum_prob,values) + SamplerWithValues(sampler.total,cumulative_weights,values) +end + +function (sampler::SamplerWithValues)(x) + i = searchsortedfirst(sampler.cumulative_weights, x) + i = clamp(i, firstindex(sampler.cumulative_weights), lastindex(sampler.cumulative_weights)) + @inbounds sampler.values[i] end +# TODO: use Alias sampler +struct Sampler{T,A<:AbstractVector{T}} + total::T + cumulative_weights::A +end -function StatsBase.Weights(values::CuVector{T}) where {T<:Real} - return CuWeights{eltype(values)}(values,sum(values)) +function Sampler(weights) + total = sum(weights) + cumulative_weights = cumsum(weights) + return Sampler(total,cumulative_weights) +end + +function Adapt.adapt_structure(to, sampler::Sampler) + cumulative_weights = Adapt.adapt_structure(to, sampler.cumulative_weights) + Sampler(sampler.total,cumulative_weights) end function (sampler::Sampler)(x) - i = searchsortedfirst(sampler.cum_prob, x) - i = clamp(i, firstindex(sampler.values), lastindex(sampler.values)) - @inbounds sampler.values[i] + i = searchsortedfirst(sampler.cumulative_weights, x) + clamp(i, firstindex(sampler.cumulative_weights), lastindex(sampler.cumulative_weights)) end -function sample(rng::AbstractRNG, subspace_v::CuVector, weights::CuWeights,nshots::Integer) - dices = rand(rng, nshots) - sampler = Sampler(cumsum(weights.values),subspace_v) +function sample(rng::AbstractRNG, weights::CuVector,nshots::Integer) + sampler = Sampler(weights) + + dices = sampler.total .* rand(rng, nshots) - Array(sampler.(dices)) + sampler.(dices) end +function sample(rng::AbstractRNG, weights::CuVector,nshots::Integer, obs::CuVector) + sampler = SamplerWithValues(weights,obs) + + dices = sampler.total .* rand(rng, nshots) + + sampler.(dices) +end -space = Subspace(10, sort(randperm(1 << 10)[1:76] .- 1)) -r = SubspaceArrayReg(randn(ComplexF64, 76), space) +N = 20 +ntot = 2^(N-4) +space = Subspace(N, sort(randperm(1 << N)[1:ntot] .- 1)) +r = SubspaceArrayReg(randn(ComplexF64, ntot), space) normalize!(r) dr = cu(r) -weights = Weights(abs2.(relaxedvec(dr))) -subspace_v = vec(dr.subspace) +weights = abs2.(relaxedvec(dr)) -samples = sample(CURAND.default_rng(),subspace_v,weights,1000) +using BenchmarkTools -# weights = Weights(abs2.(relaxedvec(dr))) -# @which measure(r) +@benchmark CUDA.@sync sample(CURAND.default_rng(),weights,10000) -# measure(dr) \ No newline at end of file