From 5658207ccf1b5dbb7bd028f50fd114ad2d90eed7 Mon Sep 17 00:00:00 2001 From: nHackel Date: Tue, 16 Jul 2024 14:16:48 +0200 Subject: [PATCH 01/18] Use Adapt.jl for moving SF to GPU --- Project.toml | 2 ++ src/MPIReco.jl | 1 + src/SystemMatrix/SystemMatrix.jl | 4 +--- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index b4ded33..895f00e 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Tobias Knopp "] version = "0.6.0" [deps] +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" DistributedArrays = "aaf54ef3-cdf8-58ed-94cc-d582ad619b94" @@ -25,6 +26,7 @@ Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" [compat] AbstractImageReconstruction = "0.3" +Adapt = "3, 4" DSP = "0.6, 0.7" Distributed = "1" DistributedArrays = "0.6" diff --git a/src/MPIReco.jl b/src/MPIReco.jl index 9d80cb8..a915eb4 100644 --- a/src/MPIReco.jl +++ b/src/MPIReco.jl @@ -5,6 +5,7 @@ module MPIReco @reexport using MPIFiles const shape = MPIFiles.shape using AbstractImageReconstruction + using Adapt @reexport using DSP using ProgressMeter using Unitful diff --git a/src/SystemMatrix/SystemMatrix.jl b/src/SystemMatrix/SystemMatrix.jl index 1b9375a..5acc348 100644 --- a/src/SystemMatrix/SystemMatrix.jl +++ b/src/SystemMatrix/SystemMatrix.jl @@ -67,9 +67,7 @@ end function process(t::Type{<:AbstractMPIRecoAlgorithm}, params::DenseSystemMatixLoadingParameter, sf::MPIFile, frequencies::Vector{CartesianIndex{2}}) S, grid = getSF(sf, frequencies, nothing; toKwargs(params)...) @info "Loading SM" - if !isa(S, params.arrayType) - S = params.arrayType(S) - end + S = adapt(params.arrayType, S) return S, grid end From 824776ff67a8bbf3accba4367c305b6113374ca9 Mon Sep 17 00:00:00 2001 From: nHackel Date: Wed, 17 Jul 2024 11:03:36 +0200 Subject: [PATCH 02/18] Add GPU support for sparse single patch recos --- .../SinglePatchAlgorithm.jl | 20 ++++++------ .../SinglePatchBGEstimationAlgorithm.jl | 15 +++++---- ...glePatchTemporalRegularizationAlgorithm.jl | 15 +++++---- src/SystemMatrix/SystemMatrix.jl | 26 ++++++++++++---- test/ReconstructionGPU.jl | 31 ++++++++++++++++--- 5 files changed, 75 insertions(+), 32 deletions(-) diff --git a/src/Algorithms/SinglePatchAlgorithms/SinglePatchAlgorithm.jl b/src/Algorithms/SinglePatchAlgorithms/SinglePatchAlgorithm.jl index 8f7c192..c7236d4 100644 --- a/src/Algorithms/SinglePatchAlgorithms/SinglePatchAlgorithm.jl +++ b/src/Algorithms/SinglePatchAlgorithms/SinglePatchAlgorithm.jl @@ -1,19 +1,21 @@ Base.@kwdef struct SinglePatchReconstructionParameter{L<:AbstractSystemMatrixLoadingParameter, SL<:AbstractLinearSolver, - SP<:AbstractSolverParameters{SL}, R<:AbstractRegularization, W<:AbstractWeightingParameters} <: AbstractSinglePatchReconstructionParameters + matT <: AbstractArray, SP<:AbstractSolverParameters{SL}, R<:AbstractRegularization, W<:AbstractWeightingParameters} <: AbstractSinglePatchReconstructionParameters # File sf::MPIFile sfLoad::Union{L, ProcessResultCache{L}} + arrayType::Type{matT} = Array # Solver solverParams::SP reg::Vector{R} = AbstractRegularization[] weightingParams::W = NoWeightingParameters() end -Base.@kwdef mutable struct SinglePatchReconstructionAlgorithm{P} <: AbstractSinglePatchReconstructionAlgorithm where {P<:AbstractSinglePatchAlgorithmParameters} +Base.@kwdef mutable struct SinglePatchReconstructionAlgorithm{P, matT <: AbstractArray} <: AbstractSinglePatchReconstructionAlgorithm where {P<:AbstractSinglePatchAlgorithmParameters} params::P # Could also do reconstruction progress meter here sf::Union{MPIFile, Vector{MPIFile}} S::AbstractArray + arrayType::Type{matT} grid::RegularGridPositions freqs::Vector{CartesianIndex{2}} output::Channel{Any} @@ -23,16 +25,16 @@ function SinglePatchReconstruction(params::SinglePatchParameters{<:AbstractMPIPr return SinglePatchReconstructionAlgorithm(params) end function SinglePatchReconstructionAlgorithm(params::SinglePatchParameters{<:AbstractMPIPreProcessingParameters, R, PT}) where {R<:AbstractSinglePatchReconstructionParameters, PT <:AbstractMPIPostProcessingParameters} - freqs, S, grid = prepareSystemMatrix(params.reco) - return SinglePatchReconstructionAlgorithm(params, params.reco.sf, S, grid, freqs, Channel{Any}(Inf)) + freqs, S, grid, arrayType = prepareSystemMatrix(params.reco) + return SinglePatchReconstructionAlgorithm(params, params.reco.sf, S, arrayType, grid, freqs, Channel{Any}(Inf)) end recoAlgorithmTypes(::Type{SinglePatchReconstruction}) = SystemMatrixBasedAlgorithm() AbstractImageReconstruction.parameter(algo::SinglePatchReconstructionAlgorithm) = algo.params function prepareSystemMatrix(reco::SinglePatchReconstructionParameter{L,S}) where {L<:AbstractSystemMatrixLoadingParameter, S<:AbstractLinearSolver} freqs, sf, grid = process(AbstractMPIRecoAlgorithm, reco.sfLoad, reco.sf) - sf, grid = prepareSF(S, sf, grid) - return freqs, sf, grid + sf, grid = process(AbstractMPIRecoAlgorithm, reco.sfLoad, S, sf, grid, reco.arrayType) + return freqs, sf, grid, reco.arrayType end @@ -44,13 +46,13 @@ function process(algo::SinglePatchReconstructionAlgorithm, params::Union{A, Proc @warn "System matrix and measurement have different element data type. Mapping measurment data to system matrix element type." result = map(eltype(algo.S),result) end - result = copyto!(similar(algo.S, size(result)...), result) + result = adapt(algo.arrayType, result) return result end function process(algo::SinglePatchReconstructionAlgorithm, params::SinglePatchReconstructionParameter, u) - weights = process(algo, params.weightingParams, u) + weights = adapt(algo.arrayType, process(algo, params.weightingParams, u)) B = getLinearOperator(algo, params) @@ -68,5 +70,5 @@ function getLinearOperator(algo::SinglePatchReconstructionAlgorithm, params::Sin end function getLinearOperator(algo::SinglePatchReconstructionAlgorithm, params::SinglePatchReconstructionParameter{<:SparseSystemMatrixLoadingParameter, S}) where {S} - return process(algo, params.sfLoad, eltype(algo.S), tuple(shape(algo.grid)...)) + return process(algo, params.sfLoad, eltype(algo.S), algo.arrayType, tuple(shape(algo.grid)...)) end \ No newline at end of file diff --git a/src/Algorithms/SinglePatchAlgorithms/SinglePatchBGEstimationAlgorithm.jl b/src/Algorithms/SinglePatchAlgorithms/SinglePatchBGEstimationAlgorithm.jl index d17cd7d..67e6bf4 100644 --- a/src/Algorithms/SinglePatchAlgorithms/SinglePatchBGEstimationAlgorithm.jl +++ b/src/Algorithms/SinglePatchAlgorithms/SinglePatchBGEstimationAlgorithm.jl @@ -1,9 +1,10 @@ export SinglePatchBGEstimationAlgorithm, SinglePatchBGEstimationReconstructionParameter Base.@kwdef struct SinglePatchBGEstimationReconstructionParameter{L<:DenseSystemMatixLoadingParameter, - SP<:AbstractSolverParameters} <: AbstractSinglePatchReconstructionParameters + SP<:AbstractSolverParameters, matT <: AbstractArray} <: AbstractSinglePatchReconstructionParameters # File sf::MPIFile sfLoad::Union{L, ProcessResultCache{L}} + arrayType::Type{matT} = Array # Solver solverParams::SP λ::Float32 @@ -12,11 +13,12 @@ Base.@kwdef struct SinglePatchBGEstimationReconstructionParameter{L<:DenseSystem bgDict::BGDictParameter end -Base.@kwdef mutable struct SinglePatchBGEstimationAlgorithm{P} <: AbstractSinglePatchReconstructionAlgorithm where {P<:AbstractSinglePatchAlgorithmParameters} +Base.@kwdef mutable struct SinglePatchBGEstimationAlgorithm{P, matT <: AbstractArray} <: AbstractSinglePatchReconstructionAlgorithm where {P<:AbstractSinglePatchAlgorithmParameters} params::P # Could also do reconstruction progress meter here sf::Union{MPIFile,Vector{MPIFile}} S::AbstractArray + arrayType::Type{matT} bgDict::AbstractArray grid::RegularGridPositions freqs::Vector{CartesianIndex{2}} @@ -27,16 +29,16 @@ function SinglePatchReconstruction(params::SinglePatchParameters{<:AbstractMPIPr return SinglePatchBGEstimationAlgorithm(params) end function SinglePatchBGEstimationAlgorithm(params::SinglePatchParameters{<:AbstractMPIPreProcessingParameters,R,PT}) where {R<:SinglePatchBGEstimationReconstructionParameter,PT<:AbstractMPIPostProcessingParameters} - freqs, S, grid = prepareSystemMatrix(params.reco) - return SinglePatchBGEstimationAlgorithm(params, params.reco.sf, S, process(SinglePatchBGEstimationAlgorithm, freqs, params.reco.bgDict), grid, freqs, Channel{Any}(Inf)) + freqs, S, grid, arrayType = prepareSystemMatrix(params.reco) + return SinglePatchBGEstimationAlgorithm(params, params.reco.sf, S, arrayType, process(SinglePatchBGEstimationAlgorithm, freqs, params.reco.bgDict), grid, freqs, Channel{Any}(Inf)) end recoAlgorithmTypes(::Type{SinglePatchBGEstimationAlgorithm}) = SystemMatrixBasedAlgorithm() AbstractImageReconstruction.parameter(algo::SinglePatchBGEstimationAlgorithm) = algo.origParam function prepareSystemMatrix(reco::SinglePatchBGEstimationReconstructionParameter{L}) where {L<:AbstractSystemMatrixLoadingParameter} freqs, sf, grid = process(AbstractMPIRecoAlgorithm, reco.sfLoad, reco.sf) - sf, grid = prepareSF(Kaczmarz, sf, grid) - return freqs, sf, grid + sf, grid = process(AbstractMPIRecoAlgorithm, reco.sfLoad, Kaczmarz, sf, grid, reco.arrayType) + return freqs, sf, grid, reco.arrayType end AbstractImageReconstruction.take!(algo::SinglePatchBGEstimationAlgorithm) = Base.take!(algo.output) @@ -47,6 +49,7 @@ function process(algo::SinglePatchBGEstimationAlgorithm, params::AbstractMPIPreP @warn "System matrix and measurement have different element data type. Mapping measurment data to system matrix element type." result = map(eltype(algo.S), result) end + result = adapt(algo.arrayType, result) return result end diff --git a/src/Algorithms/SinglePatchAlgorithms/SinglePatchTemporalRegularizationAlgorithm.jl b/src/Algorithms/SinglePatchAlgorithms/SinglePatchTemporalRegularizationAlgorithm.jl index d9c8b01..9208b3f 100644 --- a/src/Algorithms/SinglePatchAlgorithms/SinglePatchTemporalRegularizationAlgorithm.jl +++ b/src/Algorithms/SinglePatchAlgorithms/SinglePatchTemporalRegularizationAlgorithm.jl @@ -1,9 +1,10 @@ export SinglePatchTemporalRegularizationAlgorithm, SinglePatchTemporalRegularizationReconstructionParameter Base.@kwdef struct SinglePatchTemporalRegularizationReconstructionParameter{L<:DenseSystemMatixLoadingParameter, - SP<:AbstractSolverParameters} <: AbstractSinglePatchReconstructionParameters + SP<:AbstractSolverParameters, matT <: AbstractArray} <: AbstractSinglePatchReconstructionParameters # File sf::MPIFile sfLoad::Union{L, ProcessResultCache{L}} + arrayType::Type{matT} = Array # Solver solverParams::SP λ::Float32 @@ -14,10 +15,11 @@ Base.@kwdef struct SinglePatchTemporalRegularizationReconstructionParameter{L<:D bgDict::BGDictParameter end -Base.@kwdef mutable struct SinglePatchTemporalRegularizationAlgorithm{P} <: AbstractSinglePatchReconstructionAlgorithm where {P<:AbstractSinglePatchAlgorithmParameters} +Base.@kwdef mutable struct SinglePatchTemporalRegularizationAlgorithm{P, matT <: AbstractArray} <: AbstractSinglePatchReconstructionAlgorithm where {P<:AbstractSinglePatchAlgorithmParameters} params::P sf::Union{MPIFile,Vector{MPIFile}} S::AbstractArray + arrayType::Type{matT} bgDict::AbstractArray idxFG::Union{Nothing, UnitRange{Int64}, Vector{Int64}} = nothing idxFG::Union{Nothing, UnitRange{Int64}, Vector{Int64}} = nothing @@ -30,8 +32,8 @@ function SinglePatchReconstruction(params::SinglePatchParameters{<:AbstractMPIPr return SinglePatchTemporalRegularizationAlgorithm(params) end function SinglePatchTemporalRegularizationAlgorithm(params::SinglePatchParameters{<:AbstractMPIPreProcessingParameters,R,PT}) where {R<:SinglePatchTemporalRegularizationReconstructionParameter,PT<:AbstractMPIPostProcessingParameters} - freqs, S, grid = prepareSystemMatrix(params.reco) - return SinglePatchTemporalRegularizationAlgorithm(params, params.reco.sf, S, process(SinglePatchTemporalRegularizationAlgorithm, params.reco.bgDict, freqs) + freqs, S, grid, arrayType = prepareSystemMatrix(params.reco) + return SinglePatchTemporalRegularizationAlgorithm(params, params.reco.sf, S, arrayType, process(SinglePatchTemporalRegularizationAlgorithm, params.reco.bgDict, freqs) ,params.reco.idxFG, params.reco.idxBG, grid, freqs, Channel{Any}(Inf)) end recoAlgorithmTypes(::Type{SinglePatchTemporalRegularizationAlgorithm}) = SystemMatrixBasedAlgorithm() @@ -39,8 +41,8 @@ AbstractImageReconstruction.parameter(algo::SinglePatchTemporalRegularizationAlg function prepareSystemMatrix(reco::SinglePatchTemporalRegularizationReconstructionParameter{L}) where {L<:AbstractSystemMatrixLoadingParameter} freqs, sf, grid = process(AbstractMPIRecoAlgorithm, reco.sfLoad, reco.sf) - sf, grid = prepareSF(Kaczmarz, sf, grid) - return freqs, sf, grid + sf, grid = process(AbstractMPIRecoAlgorithm, reco.sfLoad, Kaczmarz, sf, grid, reco.arrayType) + return freqs, sf, grid, reco.arrayType end AbstractImageReconstruction.take!(algo::SinglePatchTemporalRegularizationAlgorithm) = Base.take!(algo.output) @@ -51,6 +53,7 @@ function process(algo::SinglePatchTemporalRegularizationAlgorithm, params::Abstr @warn "System matrix and measurement have different element data type. Mapping measurment data to system matrix element type." result = map(eltype(algo.S), result) end + result = adapt(algo.arrayType, result) return result end diff --git a/src/SystemMatrix/SystemMatrix.jl b/src/SystemMatrix/SystemMatrix.jl index 5acc348..c9aff3d 100644 --- a/src/SystemMatrix/SystemMatrix.jl +++ b/src/SystemMatrix/SystemMatrix.jl @@ -67,7 +67,6 @@ end function process(t::Type{<:AbstractMPIRecoAlgorithm}, params::DenseSystemMatixLoadingParameter, sf::MPIFile, frequencies::Vector{CartesianIndex{2}}) S, grid = getSF(sf, frequencies, nothing; toKwargs(params)...) @info "Loading SM" - S = adapt(params.arrayType, S) return S, grid end @@ -90,8 +89,8 @@ function process(t::Type{<:AbstractMPIRecoAlgorithm}, params::SparseSystemMatrix S, grid = getSF(sf, frequencies, params.sparseTrafo; toKwargs(params)...) return S, grid end -function process(t::Type{<:AbstractMPIRecoAlgorithm}, params::SparseSystemMatrixLoadingParameter, elType::Type{<:Number}, shape::NTuple{N, Int64}) where N - return createLinearOperator(params.sparseTrafo, elType; shape) +function process(t::Type{<:AbstractMPIRecoAlgorithm}, params::SparseSystemMatrixLoadingParameter, elType::Type{<:Number}, arrayType, shape::NTuple{N, Int64}) where N + return createLinearOperator(params.sparseTrafo, elType; shape, S = typeof(arrayType{elType}(undef, 0))) end function converttoreal(S::AbstractArray{Complex{T}},f) where T @@ -123,15 +122,30 @@ function getSF(bSF, frequencies, sparseTrafo, solver::AbstractString; kargs...) end end getSF(bSF, frequencies, sparseTrafo, solver::AbstractLinearSolver; kargs...) = getSF(bSF, frequencies, sparseTrafo, typeof(solver); kargs...) -function getSF(bSF, frequencies, sparseTrafo, solver::Type{<:AbstractLinearSolver}; kargs...) +function getSF(bSF, frequencies, sparseTrafo, solver::Type{<:AbstractLinearSolver}; arrayType = Array, kargs...) SF, grid = getSF(bSF, frequencies, sparseTrafo; kargs...) - return prepareSF(solver, SF, grid) + return prepareSF(solver, SF, grid, arrayType) end + +function AbstractImageReconstruction.process(::Type{<:AbstractMPIRecoAlgorithm}, params::AbstractSystemMatrixLoadingParameter, solver::Type{<:AbstractLinearSolver}, sf, grid, arrayType) + return prepareSF(solver, sf, grid, arrayType) +end +function prepareSF(solver::Type{<:AbstractLinearSolver}, SF, grid, arrayType) + SF, grid = prepareSF(solver, SF, grid) + # adapt(Array, Sparse-CPU) results in dense array + return arrayType != Array ? adapt(arrayType, SF) : SF, grid +end prepareSF(solver::Type{Kaczmarz}, SF, grid) = transpose(SF), grid prepareSF(solver::Type{PseudoInverse}, SF, grid) = SVD(svd(transpose(SF))...), grid -prepareSF(solver::Union{Type{<:RegularizedLeastSquares.AbstractLinearSolver}}, SF, grid) = copy(transpose(SF)), grid prepareSF(solver::Type{DirectSolver}, SF, grid) = RegularizedLeastSquares.tikhonovLU(copy(transpose(SF))), grid +prepareSF(solver::Type{Kaczmarz}, SF::AbstractSparseArray, grid) = transpose(SF), grid +prepareSF(solver::Type{PseudoInverse}, SF::AbstractSparseArray, grid) = SVD(svd(transpose(SF))...), grid +prepareSF(solver::Type{DirectSolver}, SF::AbstractSparseArray, grid) = RegularizedLeastSquares.tikhonovLU(copy(transpose(SF))), grid + +prepareSF(solver, SF , grid) = copy(transpose(SF)), grid +prepareSF(solver, SF::AbstractSparseArray, grid) = sparse(transpose(SF)), grid + prepareNormalSF(solver::AbstractLinearSolver, SF) = prepareNormalSF(typeof(solver), SF) prepareNormalSF(solver::Type{<:RegularizedLeastSquares.AbstractLinearSolver}, SF) = nothing diff --git a/test/ReconstructionGPU.jl b/test/ReconstructionGPU.jl index 489a0b4..43f1bbb 100644 --- a/test/ReconstructionGPU.jl +++ b/test/ReconstructionGPU.jl @@ -1,5 +1,5 @@ for arrayType in arrayTypes - @testset "Reconstruction $arrayType" begin + @testset "Single Patch Reconstruction: $arrayType" begin bSF = MPIFile(joinpath(datadir, "calibrations", "12.mdf")) b = MPIFile(joinpath(datadir, "measurements", "20211226_203916_MultiPatch", "1.mdf")) names = (:color, :x, :y, :z, :time) @@ -22,12 +22,33 @@ for arrayType in arrayTypes params[:solver] = CGNR c1 = reconstruct("SinglePatch", b; params...) - @test axisnames(c1) == names - @test axisvalues(c1) == values c2 = reconstruct("SinglePatch", b; params..., arrayType = arrayType) - @test axisnames(c2) == names - @test axisvalues(c2) == values + + @test isapprox(arraydata(c1), arraydata(c2)) + end + + @testset "Sparse Single Patch Reconstruction: $arrayType" begin + bSF = MPIFile(joinpath(datadir, "calibrations", "7.mdf")) + b = MPIFile(joinpath(datadir, "measurements","20211226_204612_Dice", "1.mdf")) + redFactor = 0.01 + + params = Dict{Symbol, Any}() + params[:SNRThresh] = 2 + params[:frames] = 1:100 + params[:minFreq] = 80e3 + params[:recChannels] = 1:2 + params[:iterations] = 3 + params[:spectralLeakageCorrection] = false + params[:sf] = bSF + params[:reg] = [L2Regularization(0.1f0)] + params[:numAverages] = 100 + params[:solver] = CGNR + params[:redFactor] = redFactor + + c1 = reconstruct("SinglePatchSparse", b; params..., sparseTrafo = "FFT", useDFFoV = false) + + c2 = reconstruct("SinglePatchSparse", b; params..., sparseTrafo = "FFT", useDFFoV = false, arrayType = arrayType) @test isapprox(arraydata(c1), arraydata(c2)) end From 15315b1b617a6065f2a2593527f307ad6119eed0 Mon Sep 17 00:00:00 2001 From: nHackel Date: Wed, 17 Jul 2024 11:20:32 +0200 Subject: [PATCH 03/18] Comment out AMD cards for now --- .buildkite/pipeline.yml | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 44c9a3f..5fb2854 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -17,21 +17,21 @@ steps: include("test/gpu/cuda.jl")' timeout_in_minutes: 30 - - label: "AMD GPUs -- MPIReco.jl" - plugins: - - JuliaCI/julia#v1: - version: "1.10" - agents: - queue: "juliagpu" - rocm: "*" - rocmgpu: "*" - command: | - julia --color=yes --project -e ' - using Pkg - Pkg.add("TestEnv") - using TestEnv - TestEnv.activate(); - Pkg.add("AMDGPU") - Pkg.instantiate() - include("test/gpu/rocm.jl")' - timeout_in_minutes: 30 \ No newline at end of file + # - label: "AMD GPUs -- MPIReco.jl" + # plugins: + # - JuliaCI/julia#v1: + # version: "1.10" + # agents: + # queue: "juliagpu" + # rocm: "*" + # rocmgpu: "*" + # command: | + # julia --color=yes --project -e ' + # using Pkg + # Pkg.add("TestEnv") + # using TestEnv + # TestEnv.activate(); + # Pkg.add("AMDGPU") + # Pkg.instantiate() + # include("test/gpu/rocm.jl")' + # timeout_in_minutes: 30 \ No newline at end of file From 38b0ac78fd23b92e495bf02122f47afa3818ebb1 Mon Sep 17 00:00:00 2001 From: nHackel Date: Thu, 18 Jul 2024 18:35:37 +0200 Subject: [PATCH 04/18] Add DenseMultiPatchOperator and multi patch operator hierarchy Still missing some array functionality and show --- src/MultiPatch.jl | 70 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 11 deletions(-) diff --git a/src/MultiPatch.jl b/src/MultiPatch.jl index e2d4cd8..1c4e08f 100644 --- a/src/MultiPatch.jl +++ b/src/MultiPatch.jl @@ -79,20 +79,43 @@ abstract type AbstractMultiPatchOperatorParameter <: AbstractMPIRecoParameters e # MultiPatchOperator is a type that acts as the MPI system matrix but exploits # its sparse structure. # Its very important to keep this type typestable -mutable struct MultiPatchOperator{T, V <: AbstractMatrix{T}, U<:Positions} +export AbstractMultiPatchOperator, MultiPatchOperator, DenseMultiPatchOperator +abstract type AbstractMultiPatchOperator{T, V} <: AbstractArray{T, 2} end +mutable struct MultiPatchOperator{T, V <: AbstractMatrix{T}, U<:Positions, I <: Integer, vecI <: AbstractVector{I}, matI <: AbstractMatrix{I}} <: AbstractMultiPatchOperator{T, V} S::Vector{V} grid::U - N::Int - M::Int - RowToPatch::Vector{Int} - xcc::Vector{Vector{Int}} - xss::Vector{Vector{Int}} - sign::Matrix{Int} - nPatches::Int - patchToSMIdx::Vector{Int} + N::I + M::I + RowToPatch::vecI + xcc::Vector{vecI} + xss::Vector{vecI} + sign::matI + nPatches::I + patchToSMIdx::vecI +end + +mutable struct DenseMultiPatchOperator{T, V <: AbstractArray{T, 3}, U<:Positions, I <: Integer, vecI <: AbstractVector{I}, matI <: AbstractMatrix{I}} <: AbstractMultiPatchOperator{T, V} + S::V + grid::U + N::I + M::I + RowToPatch::vecI + xcc::matI + xss::matI + sign::matI + nPatches::I + patchToSMIdx::vecI +end + +function Base.convert(::Type{DenseMultiPatchOperator}, op::MultiPatchOperator) + S = stack(op.S) + xcc = stack(op.xcc) + xss = stack(op.xss) + return DenseMultiPatchOperator(S, op.grid, op.N, op.M, op.RowToPatch, xcc, xss, op.sign, op.nPatches, op.patchToSMIdx) end eltype(FFOp::MultiPatchOperator) = eltype(FFOp.S[1]) +eltype(FFOp::DenseMultiPatchOperator) = eltype(FFOp.S) function MultiPatchOperatorHighLevel(SF::MPIFile, bMeas, freq, bgCorrection::Bool; kargs...) return MultiPatchOperatorHighLevel(MultiMPIFile([SF]), bMeas, freq, bgCorrection; kargs...) @@ -294,7 +317,7 @@ function MultiPatchOperatorExpliciteMapping(SFs::MultiMPIFile, freq; bgCorrectio # now that we have all grids we can calculate the indices within the recoGrid xcc, xss = calculateLUT(grids, recoGrid) - return MultiPatchOperator{eltype(first(S)), reduce(promote_type, typeof.(S)), typeof(recoGrid)}(S, recoGrid, length(recoGrid), M*numPatches, + return MultiPatchOperator{eltype(first(S)), reduce(promote_type, typeof.(S)), typeof(recoGrid), eltype(patchToSMIdx), typeof(patchToSMIdx), typeof(sign)}(S, recoGrid, length(recoGrid), M*numPatches, RowToPatch, xcc, xss, sign, numPatches, patchToSMIdx) end @@ -411,7 +434,7 @@ function MultiPatchOperatorRegular(SFs::MultiMPIFile, freq; bgCorrection::Bool, sign = ones(Int, M, numPatches) - return MultiPatchOperator{eltype(first(S)), reduce(promote_type, typeof.(S)), typeof(recoGrid)}(S, recoGrid, length(recoGrid), M*numPatches, + return MultiPatchOperator{eltype(first(S)), reduce(promote_type, typeof.(S)), typeof(recoGrid), eltype(patchToSMIdx), typeof(patchToSMIdx), typeof(sign)}(S, recoGrid, length(recoGrid), M*numPatches, RowToPatch, xcc, xss, sign, numPatches, patchToSMIdx) end @@ -442,8 +465,27 @@ function size(FFOp::MultiPatchOperator,i::Int) end size(FFOp::MultiPatchOperator) = (FFOp.M,FFOp.N) +size(FFTOp::DenseMultiPatchOperator) = (FFTOp.M,FFTOp.N) length(FFOp::MultiPatchOperator) = size(FFOp,1)*size(FFOp,2) +function getindex(op::MultiPatchOperator, i, j) + p = op.RowToPatch[i] + xs = op.xss[p] + row = mod1(i,div(op.M,op.nPatches)) + A = op.S[op.patchToSMIdx[p]] + sign = op.sign[j,op.patchToSMIdx[p]] + # TODO or zero + return sign*A[row,xs[j]] +end + +function LinearAlgebra.mul!(b::AbstractVector{T}, op::MultiPatchOperator{T}, x::AbstractVector{T}) where T + b[:] .= zero(T) + for i in 1:size(op, 1) + b[i] = dot_with_matrix_row(op, x, i) + end + return b +end + ### The following is intended to use the standard kaczmarz method ### function RegularizedLeastSquares.normalize(norm::SystemMatrixBasedNormalization, op::MultiPatchOperator, b) @@ -458,6 +500,12 @@ function RegularizedLeastSquares.normalize(norm::SystemMatrixBasedNormalization, return trace end +function RegularizedLeastSquares.normalize(norm::SystemMatrixBasedNormalization, op::DenseMultiPatchOperator, b) + trace = sum([RegularizedLeastSquares.normalize(norm, view(S, :, :, i), b)*size(S, 2) for (i, S) in axes(op.S, 3)]) + trace/=size(op, 2) + return trace +end + function calculateTraceOfNormalMatrix(Op::MultiPatchOperator, weights) if length(Op.S) == 1 trace = calculateTraceOfNormalMatrix(Op.S[1],weights) From 78fad07097241a68008eeac79ff97712786f20dd Mon Sep 17 00:00:00 2001 From: nHackel Date: Thu, 18 Jul 2024 18:36:18 +0200 Subject: [PATCH 05/18] Add KernelAbstractionsExt and custom kernel for DenseMultiPatchOps mul! --- Project.toml | 9 +++ .../MPIRecoKernelAbstractionsExt.jl | 8 +++ .../MultiPatch.jl | 69 +++++++++++++++++++ 3 files changed, 86 insertions(+) create mode 100644 ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl create mode 100644 ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl diff --git a/Project.toml b/Project.toml index 895f00e..8d407d6 100644 --- a/Project.toml +++ b/Project.toml @@ -31,9 +31,11 @@ DSP = "0.6, 0.7" Distributed = "1" DistributedArrays = "0.6" FFTW = "1.3" +GPUArrays = "8, 9, 10" ImageUtils = "0.2" IniFile = "0.5" JLArrays = "0.1" +KernelAbstractions = "0.8, 0.9" LinearAlgebra = "1" LinearOperators = "2.3" LinearOperatorCollection = "2" @@ -58,5 +60,12 @@ ImageMagick = "6218d12a-5da1-5696-b52f-db25d2ecc6d1" ImageQualityIndexes = "2996bd0c-7a13-11e9-2da2-2f5ce47296a9" Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" +[weakdeps] +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" + [targets] test = ["Test", "HTTP", "FileIO", "LazyArtifacts", "Scratch", "ImageMagick", "ImageQualityIndexes", "Unitful", "JLArrays"] + +[extensions] +MPIRecoKernelAbstractionsExt = ["KernelAbstractions", "GPUArrays"] \ No newline at end of file diff --git a/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl b/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl new file mode 100644 index 0000000..cf0f4fb --- /dev/null +++ b/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl @@ -0,0 +1,8 @@ +module MPIRecoKernelAbstractionsExt + +using MPIReco, MPIReco.Adapt, MPIReco.LinearAlgebra +using KernelAbstractions, GPUArrays + +include("MultiPatch.jl") + +end \ No newline at end of file diff --git a/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl b/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl new file mode 100644 index 0000000..95f3ff4 --- /dev/null +++ b/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl @@ -0,0 +1,69 @@ +function Adapt.adapt_structure(::Type{arrT}, op::MultiPatchOperator) where {arrT <: AbstractGPUArray} + validSMs = all(x -> size(x) == size(op.S[1]), op.S) + validXCC = all(x -> length(x) == length(op.xcc[1]), op.xcc) + validXSS = all(x -> length(x) == length(op.xss[1]), op.xss) + + # Ideally we create a DenseMultiPatchOperator on the GPU + if validSMs && validXCC && validXSS + S = adapt(arrT, stack(op.S)) + # We want to use Int32 for better GPU performance + xcc = Int32.(adapt(arrT, stack(op.xcc))) + xss = Int32.(adapt(arrT, stack(op.xss))) + sign = Int32.(adapt(arrT, op.sign)) + RowToPatch = Int32.(adapt(arrT, op.RowToPatch)) + patchToSMIdx = Int32.(adapt(arrT, op.patchToSMIdx)) + return DenseMultiPatchOperator(S, op.grid, Int32(op.N), Int32(op.M), RowToPatch, xcc, xss, sign, Int32(op.nPatches), patchToSMIdx) + else + throw(ArgumentError("Cannot adapt MultiPatchOperator to $arrT, since it cannot be represented as a DenseMultiPatchOperator")) + end +end + +@kernel function dense_mul!(b, @Const(x), @Const(S), @Const(xcc), @Const(xss), @Const(signs), @Const(M), @Const(RowToPatch), @Const(patchToSMIdx)) + # Each group/block handles a single row of the operator + operator_row = @index(Group, Linear) # k + patch = RowToPatch[operator_row] # p + patch_row = mod1(operator_row, M) # j + smIdx = patchToSMIdx[patch] + sign = signs[patch_row, smIdx] + grid_stride = prod(@groupsize()) + N = size(xss, 1) + + # We want to use a grid-stride loop to perform the sparse matrix-vector product. + # Each thread performs a single element-wise multiplication and reduction in its shared spot. + # Afterwards we reduce over the shared memory. + localIdx = @index(Local, Linear) + shared = @localmem eltype(b) prod(@groupsize()) + shared[localIdx] = zero(eltype(b)) + + # First we iterate over the sparse indices + i = localIdx + while i <= N + shared[localIdx] = shared[localIdx] + sign * S[patch_row, xss[i, patch], smIdx] * x[xcc[i, patch]] + i += grid_stride + end + @synchronize + + # Now we need to reduce the shared memory to get the final result + @private s = div(min(grid_stride, N), Int32(2)) + while s > Int32(0) + if localIdx <= s + shared[localIdx] = shared[localIdx] + shared[localIdx + s] + end + s >>= 1 + @synchronize + end + + # Write the result out to b + if localIdx == 1 + b[operator_row] = shared[localIdx] + end +end + +function LinearAlgebra.mul!(b::AbstractVector{T}, op::DenseMultiPatchOperator{T, V}, x::AbstractVector{T}) where {T, V} + b[:] .= zero(T) + backend = get_backend(b) + kernel = dense_mul!(backend, 256) + kernel(b, x, op.S, op.xcc, op.xss, op.sign, div(op.M, op.nPatches), op.RowToPatch, op.patchToSMIdx; ndrange = (256, size(op, 1))) + synchronize(backend) + return b +end \ No newline at end of file From 2c527c3985a5c32ec4df303f500896e3fbf55c31 Mon Sep 17 00:00:00 2001 From: nHackel Date: Fri, 19 Jul 2024 10:41:55 +0200 Subject: [PATCH 06/18] Improve mul! MultiPatch performance --- .../MPIRecoKernelAbstractionsExt.jl | 1 + ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl | 11 ++++------- src/MultiPatch.jl | 1 - 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl b/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl index cf0f4fb..2217625 100644 --- a/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl +++ b/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl @@ -2,6 +2,7 @@ module MPIRecoKernelAbstractionsExt using MPIReco, MPIReco.Adapt, MPIReco.LinearAlgebra using KernelAbstractions, GPUArrays +using KernelAbstractions.Extras: @unroll include("MultiPatch.jl") diff --git a/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl b/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl index 95f3ff4..908b04e 100644 --- a/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl +++ b/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl @@ -18,13 +18,13 @@ function Adapt.adapt_structure(::Type{arrT}, op::MultiPatchOperator) where {arrT end end -@kernel function dense_mul!(b, @Const(x), @Const(S), @Const(xcc), @Const(xss), @Const(signs), @Const(M), @Const(RowToPatch), @Const(patchToSMIdx)) +@kernel cpu = false inbounds = true function dense_mul!(b, @Const(x), @Const(S), @Const(xcc), @Const(xss), @Const(signs), @Const(M), @Const(RowToPatch), @Const(patchToSMIdx)) # Each group/block handles a single row of the operator operator_row = @index(Group, Linear) # k patch = RowToPatch[operator_row] # p patch_row = mod1(operator_row, M) # j smIdx = patchToSMIdx[patch] - sign = signs[patch_row, smIdx] + sign = eltype(b)(signs[patch_row, smIdx]) grid_stride = prod(@groupsize()) N = size(xss, 1) @@ -32,14 +32,12 @@ end # Each thread performs a single element-wise multiplication and reduction in its shared spot. # Afterwards we reduce over the shared memory. localIdx = @index(Local, Linear) - shared = @localmem eltype(b) prod(@groupsize()) + shared = @localmem eltype(b) grid_stride shared[localIdx] = zero(eltype(b)) # First we iterate over the sparse indices - i = localIdx - while i <= N + @unroll for i = localIdx:grid_stride:N shared[localIdx] = shared[localIdx] + sign * S[patch_row, xss[i, patch], smIdx] * x[xcc[i, patch]] - i += grid_stride end @synchronize @@ -60,7 +58,6 @@ end end function LinearAlgebra.mul!(b::AbstractVector{T}, op::DenseMultiPatchOperator{T, V}, x::AbstractVector{T}) where {T, V} - b[:] .= zero(T) backend = get_backend(b) kernel = dense_mul!(backend, 256) kernel(b, x, op.S, op.xcc, op.xss, op.sign, div(op.M, op.nPatches), op.RowToPatch, op.patchToSMIdx; ndrange = (256, size(op, 1))) diff --git a/src/MultiPatch.jl b/src/MultiPatch.jl index 1c4e08f..a15ec69 100644 --- a/src/MultiPatch.jl +++ b/src/MultiPatch.jl @@ -479,7 +479,6 @@ function getindex(op::MultiPatchOperator, i, j) end function LinearAlgebra.mul!(b::AbstractVector{T}, op::MultiPatchOperator{T}, x::AbstractVector{T}) where T - b[:] .= zero(T) for i in 1:size(op, 1) b[i] = dot_with_matrix_row(op, x, i) end From 4ecc85ed2ef0073730dcaddbae604ab78eadac09 Mon Sep 17 00:00:00 2001 From: nHackel Date: Fri, 19 Jul 2024 13:47:07 +0200 Subject: [PATCH 07/18] Implement getindex for MultiPatchOperator --- src/MultiPatch.jl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/MultiPatch.jl b/src/MultiPatch.jl index a15ec69..b2d7719 100644 --- a/src/MultiPatch.jl +++ b/src/MultiPatch.jl @@ -468,14 +468,19 @@ size(FFOp::MultiPatchOperator) = (FFOp.M,FFOp.N) size(FFTOp::DenseMultiPatchOperator) = (FFTOp.M,FFTOp.N) length(FFOp::MultiPatchOperator) = size(FFOp,1)*size(FFOp,2) -function getindex(op::MultiPatchOperator, i, j) +function getindex(op::MultiPatchOperator, i::I, j::I) where I <: Integer p = op.RowToPatch[i] xs = op.xss[p] + xc = op.xcc[p] row = mod1(i,div(op.M,op.nPatches)) A = op.S[op.patchToSMIdx[p]] - sign = op.sign[j,op.patchToSMIdx[p]] - # TODO or zero - return sign*A[row,xs[j]] + sign = op.sign[row,op.patchToSMIdx[p]] + index = findfirst(isequal(j), xc) + if !isnothing(index) + return sign*A[row,xs[index]] + else + return zero(eltype(op)) + end end function LinearAlgebra.mul!(b::AbstractVector{T}, op::MultiPatchOperator{T}, x::AbstractVector{T}) where T From dd6c8ecf8f251ea3c21ab09e272d858ff5205507 Mon Sep 17 00:00:00 2001 From: nHackel Date: Fri, 19 Jul 2024 14:05:28 +0000 Subject: [PATCH 08/18] Init (untested) GPU adjoint MultiPatch --- .../MPIRecoKernelAbstractionsExt.jl | 1 + .../MultiPatch.jl | 34 ++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl b/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl index 2217625..dbf5a5c 100644 --- a/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl +++ b/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl @@ -3,6 +3,7 @@ module MPIRecoKernelAbstractionsExt using MPIReco, MPIReco.Adapt, MPIReco.LinearAlgebra using KernelAbstractions, GPUArrays using KernelAbstractions.Extras: @unroll +using KernelAbstractions.Atomix include("MultiPatch.jl") diff --git a/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl b/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl index 908b04e..78de48e 100644 --- a/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl +++ b/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl @@ -57,10 +57,42 @@ end end end -function LinearAlgebra.mul!(b::AbstractVector{T}, op::DenseMultiPatchOperator{T, V}, x::AbstractVector{T}) where {T, V} +function LinearAlgebra.mul!(b::AbstractVector{T}, op::DenseMultiPatchOperator{T, V}, x::AbstractVector{T}) where {T, V <: AbstractGPUArray} backend = get_backend(b) kernel = dense_mul!(backend, 256) kernel(b, x, op.S, op.xcc, op.xss, op.sign, div(op.M, op.nPatches), op.RowToPatch, op.patchToSMIdx; ndrange = (256, size(op, 1))) synchronize(backend) return b +end + +@kernel cpu = false function dense_mul_adj!(res, @Const(t), @Const(S), @Const(xcc), @Const(xss), @Const(signs), @Const(M), @Const(RowToPatch), @Const(patchToSMIdx)) + # Each group/block handles a single column of the adjoint(operator) + # i.e. a row of the operator + localIdx = @index(Local, Linear) + groupIdx = @index(Group, Linear) # k + patch = RowToPatch[groupIdx] # p + patch_row = mod1(operator_row, M) # j + smIdx = patchToSMIdx[patch] + sign = eltype(b)(signs[patch_row, smIdx]) + grid_stride = prod(@groupsize()) + N = size(xss, 1) + + + # Each thread within the block will add the same value of t + val = t[groupIdx] + + # Since we go along the columns during a matrix-vector product, + # we have a race condition with other threads writing to the same result. + for i = localIdx:grid_stride:N + Atomix.@atomic res[xcc[i, patch]] += sign * S[patch_row, xss[i, patch], smIdx] * val + end +end + +function LinearAlgebra.mul!(res::AbstractVector{T}, adj::Adjoint{T, OP}, t::AbstractVector{T}) where {T, V <: AbstractGPUArray, OP <: DenseMultiPatchOperator{T, V}} + backend = get_backend(res) + op = adj.parent + kernel = dense_mul_adj!(backend, 256) + kernel(res, t, op.S, op.xcc, op.xss, op.sign, div(op.M, op.nPatches), op.RowToPatch, op.patchToSMIdx; ndrange = (256, size(op, 1))) + synchronize(backend) + return res end \ No newline at end of file From 328ddcd3734feb3f437f7be29a863ba6cdf7d68c Mon Sep 17 00:00:00 2001 From: nHackel Date: Mon, 22 Jul 2024 11:24:11 +0200 Subject: [PATCH 09/18] Fix adjoint for GPU MultiPatch --- Project.toml | 4 +++- .../MPIRecoKernelAbstractionsExt.jl | 2 +- ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 8d407d6..05dd6db 100644 --- a/Project.toml +++ b/Project.toml @@ -27,6 +27,7 @@ Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" [compat] AbstractImageReconstruction = "0.3" Adapt = "3, 4" +Atomix = "0.1" DSP = "0.6, 0.7" Distributed = "1" DistributedArrays = "0.6" @@ -61,6 +62,7 @@ ImageQualityIndexes = "2996bd0c-7a13-11e9-2da2-2f5ce47296a9" Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" [weakdeps] +Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" @@ -68,4 +70,4 @@ GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" test = ["Test", "HTTP", "FileIO", "LazyArtifacts", "Scratch", "ImageMagick", "ImageQualityIndexes", "Unitful", "JLArrays"] [extensions] -MPIRecoKernelAbstractionsExt = ["KernelAbstractions", "GPUArrays"] \ No newline at end of file +MPIRecoKernelAbstractionsExt = ["Atomix","KernelAbstractions", "GPUArrays"] \ No newline at end of file diff --git a/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl b/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl index dbf5a5c..750cef1 100644 --- a/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl +++ b/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl @@ -3,7 +3,7 @@ module MPIRecoKernelAbstractionsExt using MPIReco, MPIReco.Adapt, MPIReco.LinearAlgebra using KernelAbstractions, GPUArrays using KernelAbstractions.Extras: @unroll -using KernelAbstractions.Atomix +using Atomix include("MultiPatch.jl") diff --git a/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl b/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl index 78de48e..ac7dc68 100644 --- a/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl +++ b/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl @@ -84,7 +84,7 @@ end # Since we go along the columns during a matrix-vector product, # we have a race condition with other threads writing to the same result. for i = localIdx:grid_stride:N - Atomix.@atomic res[xcc[i, patch]] += sign * S[patch_row, xss[i, patch], smIdx] * val + Atomix.@atomic res[xcc[i, patch]] += adjoint(sign * S[patch_row, xss[i, patch], smIdx]) * val end end From e555e9644967b721ea539273bebb386d26d516d7 Mon Sep 17 00:00:00 2001 From: nHackel Date: Mon, 22 Jul 2024 15:23:19 +0200 Subject: [PATCH 10/18] Fix atomic issues for MultiPatch GPU adjoint with hacky reinterpret --- ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl b/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl index ac7dc68..babc036 100644 --- a/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl +++ b/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl @@ -65,15 +65,15 @@ function LinearAlgebra.mul!(b::AbstractVector{T}, op::DenseMultiPatchOperator{T, return b end -@kernel cpu = false function dense_mul_adj!(res, @Const(t), @Const(S), @Const(xcc), @Const(xss), @Const(signs), @Const(M), @Const(RowToPatch), @Const(patchToSMIdx)) +@kernel function dense_mul_adj!(res, @Const(t), @Const(S), @Const(xcc), @Const(xss), @Const(signs), @Const(M), @Const(RowToPatch), @Const(patchToSMIdx)) # Each group/block handles a single column of the adjoint(operator) # i.e. a row of the operator localIdx = @index(Local, Linear) groupIdx = @index(Group, Linear) # k patch = RowToPatch[groupIdx] # p - patch_row = mod1(operator_row, M) # j + patch_row = mod1(groupIdx, M) # j smIdx = patchToSMIdx[patch] - sign = eltype(b)(signs[patch_row, smIdx]) + sign = eltype(res)(signs[patch_row, smIdx]) grid_stride = prod(@groupsize()) N = size(xss, 1) @@ -84,15 +84,19 @@ end # Since we go along the columns during a matrix-vector product, # we have a race condition with other threads writing to the same result. for i = localIdx:grid_stride:N - Atomix.@atomic res[xcc[i, patch]] += adjoint(sign * S[patch_row, xss[i, patch], smIdx]) * val + tmp = sign * adjoint(S[patch_row, xss[i, patch], smIdx]) * val + # @atomic is not supported for ComplexF32 numbers + Atomix.@atomic res[1, xcc[i, patch]] += tmp.re + Atomix.@atomic res[2, xcc[i, patch]] += tmp.im end end -function LinearAlgebra.mul!(res::AbstractVector{T}, adj::Adjoint{T, OP}, t::AbstractVector{T}) where {T, V <: AbstractGPUArray, OP <: DenseMultiPatchOperator{T, V}} +function LinearAlgebra.mul!(res::AbstractVector{T}, adj::Adjoint{T, OP}, t::AbstractVector{T}) where {T <: Complex, V <: AbstractArray, OP <: DenseMultiPatchOperator{T, V}} backend = get_backend(res) op = adj.parent kernel = dense_mul_adj!(backend, 256) - kernel(res, t, op.S, op.xcc, op.xss, op.sign, div(op.M, op.nPatches), op.RowToPatch, op.patchToSMIdx; ndrange = (256, size(op, 1))) + # We have to reinterpret the result as a real array, because atomic operations on Complex numbers are not supported + kernel(reinterpret(reshape, real(eltype(res)), res), t, op.S, op.xcc, op.xss, op.sign, div(op.M, op.nPatches), op.RowToPatch, op.patchToSMIdx; ndrange = (256, size(op, 1))) synchronize(backend) return res end \ No newline at end of file From 6ea911922754b2fddafc34361ad35da6099d4b58 Mon Sep 17 00:00:00 2001 From: nHackel Date: Mon, 22 Jul 2024 17:20:46 +0200 Subject: [PATCH 11/18] Fix errors of multi patch op interfacing with LinearOperators and Adapt --- ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl | 13 +++++++------ src/MPIReco.jl | 1 + src/MultiPatch.jl | 15 ++++++++++----- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl b/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl index babc036..21fbfbf 100644 --- a/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl +++ b/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl @@ -12,7 +12,7 @@ function Adapt.adapt_structure(::Type{arrT}, op::MultiPatchOperator) where {arrT sign = Int32.(adapt(arrT, op.sign)) RowToPatch = Int32.(adapt(arrT, op.RowToPatch)) patchToSMIdx = Int32.(adapt(arrT, op.patchToSMIdx)) - return DenseMultiPatchOperator(S, op.grid, Int32(op.N), Int32(op.M), RowToPatch, xcc, xss, sign, Int32(op.nPatches), patchToSMIdx) + return DenseMultiPatchOperator(S, op.grid, op.N, op.M, RowToPatch, xcc, xss, sign, Int32(op.nPatches), patchToSMIdx) else throw(ArgumentError("Cannot adapt MultiPatchOperator to $arrT, since it cannot be represented as a DenseMultiPatchOperator")) end @@ -26,7 +26,7 @@ end smIdx = patchToSMIdx[patch] sign = eltype(b)(signs[patch_row, smIdx]) grid_stride = prod(@groupsize()) - N = size(xss, 1) + N = Int32(size(xss, 1)) # We want to use a grid-stride loop to perform the sparse matrix-vector product. # Each thread performs a single element-wise multiplication and reduction in its shared spot. @@ -60,12 +60,12 @@ end function LinearAlgebra.mul!(b::AbstractVector{T}, op::DenseMultiPatchOperator{T, V}, x::AbstractVector{T}) where {T, V <: AbstractGPUArray} backend = get_backend(b) kernel = dense_mul!(backend, 256) - kernel(b, x, op.S, op.xcc, op.xss, op.sign, div(op.M, op.nPatches), op.RowToPatch, op.patchToSMIdx; ndrange = (256, size(op, 1))) + kernel(b, x, op.S, op.xcc, op.xss, op.sign, Int32(div(op.M, op.nPatches)), op.RowToPatch, op.patchToSMIdx; ndrange = (256, size(op, 1))) synchronize(backend) return b end -@kernel function dense_mul_adj!(res, @Const(t), @Const(S), @Const(xcc), @Const(xss), @Const(signs), @Const(M), @Const(RowToPatch), @Const(patchToSMIdx)) +@kernel inbounds = true function dense_mul_adj!(res, @Const(t), @Const(S), @Const(xcc), @Const(xss), @Const(signs), @Const(M), @Const(RowToPatch), @Const(patchToSMIdx)) # Each group/block handles a single column of the adjoint(operator) # i.e. a row of the operator localIdx = @index(Local, Linear) @@ -75,7 +75,7 @@ end smIdx = patchToSMIdx[patch] sign = eltype(res)(signs[patch_row, smIdx]) grid_stride = prod(@groupsize()) - N = size(xss, 1) + N = Int32(size(xss, 1)) # Each thread within the block will add the same value of t @@ -94,9 +94,10 @@ end function LinearAlgebra.mul!(res::AbstractVector{T}, adj::Adjoint{T, OP}, t::AbstractVector{T}) where {T <: Complex, V <: AbstractArray, OP <: DenseMultiPatchOperator{T, V}} backend = get_backend(res) op = adj.parent + res .= zero(T) # We need to zero the result, because we are using += in the kernel kernel = dense_mul_adj!(backend, 256) # We have to reinterpret the result as a real array, because atomic operations on Complex numbers are not supported - kernel(reinterpret(reshape, real(eltype(res)), res), t, op.S, op.xcc, op.xss, op.sign, div(op.M, op.nPatches), op.RowToPatch, op.patchToSMIdx; ndrange = (256, size(op, 1))) + kernel(reinterpret(reshape, real(eltype(res)), res), t, op.S, op.xcc, op.xss, op.sign, Int32(div(op.M, op.nPatches)), op.RowToPatch, op.patchToSMIdx; ndrange = (256, size(op, 1))) synchronize(backend) return res end \ No newline at end of file diff --git a/src/MPIReco.jl b/src/MPIReco.jl index a915eb4..df1ad2e 100644 --- a/src/MPIReco.jl +++ b/src/MPIReco.jl @@ -1,6 +1,7 @@ module MPIReco using Reexport @reexport using RegularizedLeastSquares + using RegularizedLeastSquares.LinearOperators @reexport using ImageUtils @reexport using MPIFiles const shape = MPIFiles.shape diff --git a/src/MultiPatch.jl b/src/MultiPatch.jl index b2d7719..d4882e0 100644 --- a/src/MultiPatch.jl +++ b/src/MultiPatch.jl @@ -84,8 +84,8 @@ abstract type AbstractMultiPatchOperator{T, V} <: AbstractArray{T, 2} end mutable struct MultiPatchOperator{T, V <: AbstractMatrix{T}, U<:Positions, I <: Integer, vecI <: AbstractVector{I}, matI <: AbstractMatrix{I}} <: AbstractMultiPatchOperator{T, V} S::Vector{V} grid::U - N::I - M::I + N::Int64 + M::Int64 RowToPatch::vecI xcc::Vector{vecI} xss::Vector{vecI} @@ -97,8 +97,8 @@ end mutable struct DenseMultiPatchOperator{T, V <: AbstractArray{T, 3}, U<:Positions, I <: Integer, vecI <: AbstractVector{I}, matI <: AbstractMatrix{I}} <: AbstractMultiPatchOperator{T, V} S::V grid::U - N::I - M::I + N::Int64 + M::Int64 RowToPatch::vecI xcc::matI xss::matI @@ -107,6 +107,11 @@ mutable struct DenseMultiPatchOperator{T, V <: AbstractArray{T, 3}, U<:Positions patchToSMIdx::vecI end +Adapt.adapt_structure(::Type{Array}, op::AbstractMultiPatchOperator) = op +LinearOperators.storage_type(op::MultiPatchOperator) = LinearOperators.storage_type(first(op.S)) +LinearOperators.storage_type(op::DenseMultiPatchOperator) = typeof(similar(op.S, 0)) + + function Base.convert(::Type{DenseMultiPatchOperator}, op::MultiPatchOperator) S = stack(op.S) xcc = stack(op.xcc) @@ -505,7 +510,7 @@ function RegularizedLeastSquares.normalize(norm::SystemMatrixBasedNormalization, end function RegularizedLeastSquares.normalize(norm::SystemMatrixBasedNormalization, op::DenseMultiPatchOperator, b) - trace = sum([RegularizedLeastSquares.normalize(norm, view(S, :, :, i), b)*size(S, 2) for (i, S) in axes(op.S, 3)]) + trace = sum([RegularizedLeastSquares.normalize(norm, view(op.S, :, :, i), b)*size(op.S, 2) for i in axes(op.S, 3)]) trace/=size(op, 2) return trace end From 2d7e3a64c50e2b1155f1c27de0f5fa5e2b3df3b0 Mon Sep 17 00:00:00 2001 From: nHackel Date: Mon, 22 Jul 2024 17:21:55 +0200 Subject: [PATCH 12/18] Add solver multiple solver and GPU support to multi patch recos --- config/MultiPatch.toml | 2 +- .../MultiPatchAlgorithm.jl | 26 ++++++++++++------- test/MultiPatch.jl | 2 +- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/config/MultiPatch.toml b/config/MultiPatch.toml index 2d12da3..db00167 100644 --- a/config/MultiPatch.toml +++ b/config/MultiPatch.toml @@ -26,7 +26,7 @@ _module = "MPIReco" _module = "MPIReco" [parameter.reco.solverParams] - _type = "RecoPlan{SimpleSolverParameters}" + _type = "RecoPlan{ElaborateSolverParameters}" _module = "MPIReco" [parameter.pre] diff --git a/src/Algorithms/MultiPatchAlgorithms/MultiPatchAlgorithm.jl b/src/Algorithms/MultiPatchAlgorithms/MultiPatchAlgorithm.jl index 2209ff0..6f0a79e 100644 --- a/src/Algorithms/MultiPatchAlgorithms/MultiPatchAlgorithm.jl +++ b/src/Algorithms/MultiPatchAlgorithms/MultiPatchAlgorithm.jl @@ -1,5 +1,6 @@ export MultiPatchReconstructionAlgorithm, MultiPatchReconstructionParameter -Base.@kwdef struct MultiPatchReconstructionParameter{F<:AbstractFrequencyFilterParameter,O<:AbstractMultiPatchOperatorParameter, S<:AbstractSolverParameters, FF<:AbstractFocusFieldPositions, FFSF<:AbstractFocusFieldPositions} <: AbstractMultiPatchReconstructionParameters +Base.@kwdef struct MultiPatchReconstructionParameter{matT <: AbstractArray,F<:AbstractFrequencyFilterParameter,O<:AbstractMultiPatchOperatorParameter, S<:AbstractSolverParameters, FF<:AbstractFocusFieldPositions, FFSF<:AbstractFocusFieldPositions, R <: AbstractRegularization} <: AbstractMultiPatchReconstructionParameters + arrayType::Type{matT} = Array # File sf::MultiMPIFile freqFilter::F @@ -7,16 +8,17 @@ Base.@kwdef struct MultiPatchReconstructionParameter{F<:AbstractFrequencyFilterP ffPos::FF = DefaultFocusFieldPositions() ffPosSF::FFSF = DefaultFocusFieldPositions() solverParams::S - λ::Float32 + reg::Vector{R} = AbstractRegularization[] # weightingType::WeightingType = WeightingType.None end -Base.@kwdef mutable struct MultiPatchReconstructionAlgorithm{P} <: AbstractMultiPatchReconstructionAlgorithm where {P<:AbstractMultiPatchAlgorithmParameters} +Base.@kwdef mutable struct MultiPatchReconstructionAlgorithm{P, matT <: AbstractArray} <: AbstractMultiPatchReconstructionAlgorithm where {P<:AbstractMultiPatchAlgorithmParameters} params::P # Could also do reconstruction progress meter here opParams::Union{AbstractMultiPatchOperatorParameter, Nothing} = nothing sf::MultiMPIFile - ffOp::Union{Nothing, MultiPatchOperator} + arrayType::Type{matT} + ffOp::Union{Nothing, AbstractMultiPatchOperator} ffPos::Union{Nothing,AbstractArray} ffPosSF::Union{Nothing,AbstractArray} freqs::Vector{CartesianIndex{2}} @@ -38,7 +40,7 @@ function MultiPatchReconstructionAlgorithm(params::MultiPatchParameters{<:Abstra ffPosSF = [vec(ffPos(SF))[l] for l=1:L, SF in reco.sf] end - return MultiPatchReconstructionAlgorithm(params, reco.opParams, reco.sf, nothing, ffPos_, ffPosSF, freqs, Channel{Any}(Inf)) + return MultiPatchReconstructionAlgorithm(params, reco.opParams, reco.sf, reco.arrayType, nothing, ffPos_, ffPosSF, freqs, Channel{Any}(Inf)) end recoAlgorithmTypes(::Type{MultiPatchReconstruction}) = SystemMatrixBasedAlgorithm() AbstractImageReconstruction.parameter(algo::MultiPatchReconstructionAlgorithm) = algo.origParam @@ -76,8 +78,14 @@ function process(algo::MultiPatchReconstructionAlgorithm, params::AbstractMultiP ffPosSF = algo.ffPosSF - return MultiPatchOperator(algo.sf, frequencies; toKwargs(params)..., - gradient = gradient, FFPos = ffPos_, FFPosSF = ffPosSF) + operator = MultiPatchOperator(algo.sf, frequencies; toKwargs(params)..., gradient = gradient, FFPos = ffPos_, FFPosSF = ffPosSF) + return adapt(algo.arrayType, operator) +end + +function process(algo::MultiPatchReconstructionAlgorithm, params::Union{A, ProcessResultCache{<:A}}, f::MPIFile, args...) where A <: AbstractMPIPreProcessingParameters + result = process(typeof(algo), params, f, args...) + result = adapt(algo.arrayType, result) + return result end function process(t::Type{<:MultiPatchReconstructionAlgorithm}, params::CommonPreProcessingParameters{NoBackgroundCorrectionParameters}, f::MPIFile, frequencies::Union{Vector{CartesianIndex{2}}, Nothing} = nothing) @@ -114,8 +122,8 @@ function process(::Type{<:MultiPatchReconstructionAlgorithm}, params::ExternalPr return data end -function process(algo::MultiPatchReconstructionAlgorithm, params::MultiPatchReconstructionParameter, u::Array) - solver = LeastSquaresParameters(S = algo.ffOp, reg = [L2Regularization(params.λ)], solverParams = params.solverParams) +function process(algo::MultiPatchReconstructionAlgorithm, params::MultiPatchReconstructionParameter, u::AbstractArray) + solver = LeastSquaresParameters(S = algo.ffOp, reg = params.reg, solverParams = params.solverParams) result = process(algo, solver, u) diff --git a/test/MultiPatch.jl b/test/MultiPatch.jl index 03b6be3..3eeb64b 100644 --- a/test/MultiPatch.jl +++ b/test/MultiPatch.jl @@ -22,7 +22,7 @@ using MPIReco params[:iterations] = 1 params[:spectralLeakageCorrection] = false params[:sf] = bSF - params[:λ] = 0 + params[:reg] = L2Regularization(0.0f0) params[:tfCorrection] = false c1 = reconstruct("MultiPatch", b; params...) @test axisnames(c1) == names From 269c16a33036b07e0bdcd7bc1bf90db56dcfc4ed Mon Sep 17 00:00:00 2001 From: nHackel Date: Mon, 22 Jul 2024 18:00:20 +0200 Subject: [PATCH 13/18] Update tests for Multipatch --- test/MultiPatch.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/MultiPatch.jl b/test/MultiPatch.jl index 3eeb64b..f67a7e7 100644 --- a/test/MultiPatch.jl +++ b/test/MultiPatch.jl @@ -22,8 +22,9 @@ using MPIReco params[:iterations] = 1 params[:spectralLeakageCorrection] = false params[:sf] = bSF - params[:reg] = L2Regularization(0.0f0) + params[:reg] = [L2Regularization(0.0f0)] params[:tfCorrection] = false + params[:solver] = Kaczmarz c1 = reconstruct("MultiPatch", b; params...) @test axisnames(c1) == names @test axisvalues(c1) == values1 From 5d65a55bea53d951919e7461164f636c285a86be Mon Sep 17 00:00:00 2001 From: nHackel Date: Tue, 23 Jul 2024 10:18:38 +0200 Subject: [PATCH 14/18] Add efficient single-threaded adjoint for multipatch operator --- src/MultiPatch.jl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/MultiPatch.jl b/src/MultiPatch.jl index d4882e0..5a19e3e 100644 --- a/src/MultiPatch.jl +++ b/src/MultiPatch.jl @@ -495,6 +495,26 @@ function LinearAlgebra.mul!(b::AbstractVector{T}, op::MultiPatchOperator{T}, x:: return b end +function LinearAlgebra.mul!(res::AbstractVector{T}, adj::Adjoint{T, OP}, t::AbstractVector{T}) where {T, V <: AbstractArray, OP <: MultiPatchOperator{T, V}} + op = adj.parent + res .= zero(T) + + for i in 1:size(op, 1) + val = t[i] + + p = op.RowToPatch[i] + xs = op.xss[p] + xc = op.xcc[p] + row = mod1(i,div(op.M,op.nPatches)) + A = op.S[op.patchToSMIdx[p]] + sign = op.sign[row,op.patchToSMIdx[p]] + + for j in 1:length(xs) + res[xc[j]] += adjoint(sign*A[row,xs[j]]) * val + end + end + return res +end ### The following is intended to use the standard kaczmarz method ### function RegularizedLeastSquares.normalize(norm::SystemMatrixBasedNormalization, op::MultiPatchOperator, b) From 093cebf418293d640ae4fc2d1d40a7a942b026e8 Mon Sep 17 00:00:00 2001 From: nHackel Date: Tue, 23 Jul 2024 11:05:08 +0200 Subject: [PATCH 15/18] Add GPU multi patch test --- test/MultiPatch.jl | 1 + test/ReconstructionGPU.jl | 62 +++++++++++++++++++++++++++++++++++---- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/test/MultiPatch.jl b/test/MultiPatch.jl index f67a7e7..2a91af5 100644 --- a/test/MultiPatch.jl +++ b/test/MultiPatch.jl @@ -53,6 +53,7 @@ using MPIReco # flexible multi-patch reconstruction dirs = ["8.mdf", "9.mdf", "10.mdf", "11.mdf"] bSFs = MultiMPIFile(joinpath.(datadir, "calibrations", dirs)) + params[:sf] = bSFs mapping = [1,2,3,4] freq = filterFrequencies(bSFs, SNRThresh=5, minFreq=80e3) S = [getSF(SF,freq,nothing,Kaczmarz, bgcorrection=false)[1] for SF in bSFs] diff --git a/test/ReconstructionGPU.jl b/test/ReconstructionGPU.jl index 43f1bbb..13f3c18 100644 --- a/test/ReconstructionGPU.jl +++ b/test/ReconstructionGPU.jl @@ -22,10 +22,13 @@ for arrayType in arrayTypes params[:solver] = CGNR c1 = reconstruct("SinglePatch", b; params...) - c2 = reconstruct("SinglePatch", b; params..., arrayType = arrayType) - @test isapprox(arraydata(c1), arraydata(c2)) + + params[:weightingParams] = WhiteningWeightingParameters(whiteningMeas = bSF) + c3 = reconstruct("SinglePatch", b; params...) + c4 = reconstruct("SinglePatch", b; params..., arrayType = arrayType) + @test isapprox(arraydata(c3), arraydata(c4)) end @testset "Sparse Single Patch Reconstruction: $arrayType" begin @@ -45,11 +48,60 @@ for arrayType in arrayTypes params[:numAverages] = 100 params[:solver] = CGNR params[:redFactor] = redFactor + params[:sparseTrafo] = "FFT" + params[:useDFFoV] = false - c1 = reconstruct("SinglePatchSparse", b; params..., sparseTrafo = "FFT", useDFFoV = false) + c1 = reconstruct("SinglePatchSparse", b; params...) + c2 = reconstruct("SinglePatchSparse", b; params..., arrayType = arrayType) + @test isapprox(arraydata(c1), arraydata(c2)) + + params[:weightingParams] = WhiteningWeightingParameters(whiteningMeas = bSF) + c3 = reconstruct("SinglePatchSparse", b; params...) + c4 = reconstruct("SinglePatchSparse", b; params..., arrayType = arrayType) + @test isapprox(arraydata(c3), arraydata(c4)) + end + + @testset "Multi Patch Reconstruction: $arrayType" begin + dirs = ["1.mdf", "2.mdf", "3.mdf", "4.mdf"] + b = MultiMPIFile(joinpath.(datadir, "measurements", "20211226_203916_MultiPatch", dirs)) - c2 = reconstruct("SinglePatchSparse", b; params..., sparseTrafo = "FFT", useDFFoV = false, arrayType = arrayType) + dirs = ["8.mdf", "9.mdf", "10.mdf", "11.mdf"] + bSFs = MultiMPIFile(joinpath.(datadir, "calibrations", dirs)) - @test isapprox(arraydata(c1), arraydata(c2)) + + mapping = [1,2,3,4] + freq = filterFrequencies(bSFs, SNRThresh=5, minFreq=80e3) + S = [getSF(SF,freq,nothing,Kaczmarz, bgcorrection=false)[1] for SF in bSFs] + SFGridCenter = zeros(3,4) + + FFPos = zeros(3,4) + FFPos[:,1] = [-0.008, 0.008, 0.0] + FFPos[:,2] = [-0.008, -0.008, 0.0] + FFPos[:,3] = [0.008, 0.008, 0.0] + FFPos[:,4] = [0.008, -0.008, 0.0] + ffPos = CustomFocusFieldPositions(FFPos) + + + params = Dict{Symbol, Any}() + params[:SNRThresh] = 5 + params[:frames] = [1] + params[:minFreq] = 80e3 + params[:recChannels] = 1:2 + params[:iterations] = 50 + params[:spectralLeakageCorrection] = true + params[:sf] = bSFs + params[:reg] = [L2Regularization(0.0f0)] + params[:tfCorrection] = false + params[:solver] = CGNR + + opParams = ExplicitMultiPatchParameter(;tfCorrection = false, systemMatrices = S, SFGridCenter = SFGridCenter, mapping = mapping) + params[:opParams] = opParams + params[:ffPos] = ffPos + params[:ffPosSF] = ffPos + + c1 = reconstruct("MultiPatch", b; params...) + c2 = reconstruct("MultiPatch", b; params..., arrayType = arrayType) + @test isapprox(arraydata(c1), arraydata(c2), rtol = 0.02) end + end \ No newline at end of file From 83431dc44e710d46d83927ac39a391f1cc802482 Mon Sep 17 00:00:00 2001 From: nHackel Date: Tue, 23 Jul 2024 16:32:11 +0200 Subject: [PATCH 16/18] Add GPU support for Kaczmarz MultiPatch --- .../MPIRecoKernelAbstractionsExt.jl | 2 +- .../MultiPatch.jl | 44 ++++++++++++++- src/MultiPatch.jl | 53 +++---------------- test/ReconstructionGPU.jl | 21 +++++++- 4 files changed, 70 insertions(+), 50 deletions(-) diff --git a/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl b/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl index 750cef1..e3d43e2 100644 --- a/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl +++ b/ext/MPIRecoKernelAbstractionsExt/MPIRecoKernelAbstractionsExt.jl @@ -1,6 +1,6 @@ module MPIRecoKernelAbstractionsExt -using MPIReco, MPIReco.Adapt, MPIReco.LinearAlgebra +using MPIReco, MPIReco.Adapt, MPIReco.LinearAlgebra, MPIReco.RegularizedLeastSquares using KernelAbstractions, GPUArrays using KernelAbstractions.Extras: @unroll using Atomix diff --git a/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl b/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl index 21fbfbf..f99af94 100644 --- a/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl +++ b/ext/MPIRecoKernelAbstractionsExt/MultiPatch.jl @@ -84,14 +84,14 @@ end # Since we go along the columns during a matrix-vector product, # we have a race condition with other threads writing to the same result. for i = localIdx:grid_stride:N - tmp = sign * adjoint(S[patch_row, xss[i, patch], smIdx]) * val + tmp = sign * conj(S[patch_row, xss[i, patch], smIdx]) * val # @atomic is not supported for ComplexF32 numbers Atomix.@atomic res[1, xcc[i, patch]] += tmp.re Atomix.@atomic res[2, xcc[i, patch]] += tmp.im end end -function LinearAlgebra.mul!(res::AbstractVector{T}, adj::Adjoint{T, OP}, t::AbstractVector{T}) where {T <: Complex, V <: AbstractArray, OP <: DenseMultiPatchOperator{T, V}} +function LinearAlgebra.mul!(res::AbstractVector{T}, adj::Adjoint{T, OP}, t::AbstractVector{T}) where {T <: Complex, V <: AbstractGPUArray, OP <: DenseMultiPatchOperator{T, V}} backend = get_backend(res) op = adj.parent res .= zero(T) # We need to zero the result, because we are using += in the kernel @@ -100,4 +100,44 @@ function LinearAlgebra.mul!(res::AbstractVector{T}, adj::Adjoint{T, OP}, t::Abst kernel(reinterpret(reshape, real(eltype(res)), res), t, op.S, op.xcc, op.xss, op.sign, Int32(div(op.M, op.nPatches)), op.RowToPatch, op.patchToSMIdx; ndrange = (256, size(op, 1))) synchronize(backend) return res +end + +# Kaczmarz specific functions +function RegularizedLeastSquares.dot_with_matrix_row(op::DenseMultiPatchOperator{T, V}, x::AbstractArray{T}, k::Int) where {T, V <: AbstractGPUArray} + patch = @allowscalar op.RowToPatch[k] + patch_row = mod1(k, div(op.M,op.nPatches)) + smIdx = @allowscalar op.patchToSMIdx[patch] + sign = @allowscalar op.sign[patch_row, smIdx] + S = op.S + # Inplace reduce-broadcast: https://github.com/JuliaLang/julia/pull/31020 + return sum(Broadcast.instantiate(Base.broadcasted(view(op.xss, :, patch), view(op.xcc, :, patch)) do xs, xc + @inbounds sign * S[patch_row, xs, smIdx] * x[xc] + end)) +end + +function RegularizedLeastSquares.rownorm²(op::DenseMultiPatchOperator{T, V}, row::Int64) where {T, V <: AbstractGPUArray} + patch = @allowscalar op.RowToPatch[row] + patch_row = mod1(row, div(op.M,op.nPatches)) + smIdx = @allowscalar op.patchToSMIdx[patch] + sign = @allowscalar op.sign[patch_row, smIdx] + S = op.S + return mapreduce(xs -> abs2(sign * S[patch_row, xs, smIdx]), +, view(op.xss, :, patch)) +end + +@kernel cpu = false function kaczmarz_update_kernel!(x, @Const(S), @Const(row), @Const(beta), @Const(xcc), @Const(xss), @Const(signs), @Const(M), @Const(RowToPatch), @Const(patchToSMIdx)) + # Each thread handles one element of the kaczmarz update + idx = @index(Global, Linear) + patch = RowToPatch[row] + patch_row = mod1(row, M) + smIdx = patchToSMIdx[patch] + sign = eltype(x)(signs[patch_row, smIdx]) + x[xcc[idx, patch]] += beta * conj(sign * S[patch_row, xss[idx, patch], smIdx]) +end + +function RegularizedLeastSquares.kaczmarz_update!(op::DenseMultiPatchOperator{T, V}, x::vecT, row, beta) where {T, vecT <: AbstractGPUVector{T}, V <: AbstractGPUArray{T}} + backend = get_backend(x) + kernel = kaczmarz_update_kernel!(backend, 256) + kernel(x, op.S, row, beta, op.xcc, op.xss, op.sign, Int32(div(op.M, op.nPatches)), op.RowToPatch, op.patchToSMIdx; ndrange = size(op.xss, 1)) + synchronize(backend) + return x end \ No newline at end of file diff --git a/src/MultiPatch.jl b/src/MultiPatch.jl index 5a19e3e..cbb059c 100644 --- a/src/MultiPatch.jl +++ b/src/MultiPatch.jl @@ -586,52 +586,13 @@ function kaczmarz_update_!(A,x,beta,xs,xc,j,sign) end end -# TODO implement for ProdOp{WeightingOp, MultiPatchOperator} -function initkaczmarz(Op::MultiPatchOperator{T},λ) where T - denom = T[] #zeros(T,Op.M) - rowindex = Int64[] #zeros(Int64,Op.M) - - MSub = div(Op.M,Op.nPatches) - - if length(Op.S) == 1 - for i=1:MSub - s² = rownorm²(Op.S[1],i)#*weights[i]^2 - if s²>0 - for l=1:Op.nPatches - k = i+MSub*(l-1) - push!(denom,1/(s²+λ)) #denom[k] = weights[i]^2/(s²+λ) - push!(rowindex,k) #rowindex[k] = k - end - end - end - else - for l=1:Op.nPatches - for i=1:MSub - s² = rownorm²(Op.S[Op.patchToSMIdx[l]],i)#*weights[i]^2 - if s²>0 - k = i+MSub*(l-1) - push!(denom,1/(s²+λ)) #denom[k] = weights[i]^2/(s²+λ) - push!(rowindex,k) #rowindex[k] = k - end - end - end - end - - Op, denom, rowindex -end - +function RegularizedLeastSquares.rownorm²(op::MultiPatchOperator, row::Int64) + p = op.RowToPatch[row] + xs = op.xss[p] -function RegularizedLeastSquares.rowProbabilities(Op::MultiPatchOperator, rowindex) - p = zeros(length(rowindex)) - - for i=1:length(rowindex) - k = rowindex[i] - j = mod1(k, div(Op.M,Op.nPatches)) - patch = Op.RowToPatch[k] - A = Op.S[Op.patchToSMIdx[patch]] - p[i] = rownorm²(A, j) - end - p ./= sum(p) + j = mod1(row,div(op.M,op.nPatches)) + A = op.S[op.patchToSMIdx[p]] + sign = op.sign[j,op.patchToSMIdx[p]] - return p + return mapreduce(x -> abs2(sign*A[j,x]), +, xs) end diff --git a/test/ReconstructionGPU.jl b/test/ReconstructionGPU.jl index 13f3c18..d2dda8a 100644 --- a/test/ReconstructionGPU.jl +++ b/test/ReconstructionGPU.jl @@ -88,7 +88,7 @@ for arrayType in arrayTypes params[:minFreq] = 80e3 params[:recChannels] = 1:2 params[:iterations] = 50 - params[:spectralLeakageCorrection] = true + params[:spectralLeakageCorrection] = false params[:sf] = bSFs params[:reg] = [L2Regularization(0.0f0)] params[:tfCorrection] = false @@ -102,6 +102,25 @@ for arrayType in arrayTypes c1 = reconstruct("MultiPatch", b; params...) c2 = reconstruct("MultiPatch", b; params..., arrayType = arrayType) @test isapprox(arraydata(c1), arraydata(c2), rtol = 0.02) + + # Test Kaczmarz since it uses specific functions and not just mul! + bSF = MultiMPIFile([joinpath(datadir, "calibrations", "12.mdf")]) + dirs = ["1.mdf", "2.mdf", "3.mdf", "4.mdf"] + b = MultiMPIFile(joinpath.(datadir, "measurements", "20211226_203916_MultiPatch", dirs)) + params = Dict{Symbol, Any}() + params[:SNRThresh] = 5 + params[:frames] = [1] + params[:minFreq] = 80e3 + params[:recChannels] = 1:2 + params[:iterations] = 1 + params[:spectralLeakageCorrection] = false + params[:sf] = bSF + params[:reg] = [L2Regularization(0.0f0)] + params[:tfCorrection] = false + params[:solver] = Kaczmarz + c3 = reconstruct("MultiPatch", b; params...) + c4 = reconstruct("MultiPatch", b; params..., arrayType = arrayType) + @test isapprox(arraydata(c3), arraydata(c4)) end end \ No newline at end of file From 8b25dd8ba2ac1ece64bb65818ad14d8a2af8e5ce Mon Sep 17 00:00:00 2001 From: nHackel Date: Tue, 23 Jul 2024 17:34:21 +0200 Subject: [PATCH 17/18] Fix bugs in multi-patch tests --- src/Algorithms/MultiPatchAlgorithms/MultiPatchPeriodicMotion.jl | 2 +- src/MultiPatch.jl | 2 +- test/MultiGradient.jl | 2 +- test/SMExtrapolation.jl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Algorithms/MultiPatchAlgorithms/MultiPatchPeriodicMotion.jl b/src/Algorithms/MultiPatchAlgorithms/MultiPatchPeriodicMotion.jl index 4cdaccd..97dcf56 100644 --- a/src/Algorithms/MultiPatchAlgorithms/MultiPatchPeriodicMotion.jl +++ b/src/Algorithms/MultiPatchAlgorithms/MultiPatchPeriodicMotion.jl @@ -24,7 +24,7 @@ end function MultiPatchReconstructionAlgorithm(params::MultiPatchParameters{<:PeriodicMotionPreProcessing,<:PeriodicMotionReconstructionParameter,<:AbstractMPIPostProcessingParameters}) reco = params.reco freqs = process(MultiPatchReconstructionAlgorithm, reco.freqFilter, reco.sf) - return MultiPatchReconstructionAlgorithm(params, nothing, reco.sf, nothing, nothing, nothing, freqs, Channel{Any}(Inf)) + return MultiPatchReconstructionAlgorithm(params, nothing, reco.sf, Array, nothing, nothing, nothing, freqs, Channel{Any}(Inf)) end function AbstractImageReconstruction.put!(algo::MultiPatchReconstructionAlgorithm{MultiPatchParameters{PT, R, T}}, data::MPIFile) where {R, T, PT <: PeriodicMotionPreProcessing} diff --git a/src/MultiPatch.jl b/src/MultiPatch.jl index cbb059c..00fcff8 100644 --- a/src/MultiPatch.jl +++ b/src/MultiPatch.jl @@ -222,7 +222,7 @@ function MultiPatchOperatorExpliciteMapping(SFs::MultiMPIFile, freq; bgCorrectio gridsize = hcat(calibSize.(SFs)...), fov = hcat(calibFov.(SFs)...), grid = nothing, - tfCorrection = true, + tfCorrection = rxHasTransferFunction(SFs), kargs...) diff --git a/test/MultiGradient.jl b/test/MultiGradient.jl index 9a31280..e3cd379 100644 --- a/test/MultiGradient.jl +++ b/test/MultiGradient.jl @@ -15,7 +15,7 @@ using MPIReco params[:recChannels] = 1:2 params[:iterations] = 3 params[:sf] = bSFs - params[:λ] = 0.003 + params[:reg] = [L2Regularization(0.003)] params[:tfCorrection] = false params[:roundPatches] = false diff --git a/test/SMExtrapolation.jl b/test/SMExtrapolation.jl index 987d73f..6941970 100644 --- a/test/SMExtrapolation.jl +++ b/test/SMExtrapolation.jl @@ -56,7 +56,7 @@ using MPIReco params[:iterations] = 1 params[:spectralLeakageCorrection] = false params[:sf] = bSFs - params[:λ] = 0 + params[:reg] = [L2Regularization(0.0f0)] params[:tfCorrection] = false c2 = reconstruct("MultiPatch", b; params...) From 62a6813e75d30778b05abf0bdd95e7ed249a78e1 Mon Sep 17 00:00:00 2001 From: nHackel Date: Tue, 23 Jul 2024 17:34:32 +0200 Subject: [PATCH 18/18] Set Kaczmarz as default solver --- src/LeastSquares.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/LeastSquares.jl b/src/LeastSquares.jl index 17d3c96..e7b1b67 100644 --- a/src/LeastSquares.jl +++ b/src/LeastSquares.jl @@ -27,7 +27,7 @@ Base.@kwdef struct ConstraintMaskedSolverParameters{S, P<:AbstractSolverParamete end export ElaborateSolverParameters Base.@kwdef mutable struct ElaborateSolverParameters{SL} <: AbstractSolverParameters{SL} - solver::Type{SL} + solver::Type{SL} = Kaczmarz iterations::Int64 = 10 enforceReal::Bool = true enforcePositive::Bool = true