From eb0291cf2eb57a92d6a066ef7e0ba67ff1b51f4f Mon Sep 17 00:00:00 2001 From: kamesy Date: Thu, 25 Aug 2022 17:00:31 -0700 Subject: [PATCH] Add multi-threaded map --- src/bgremove/ismv.jl | 4 +- src/bgremove/lbv.jl | 10 +-- src/bgremove/pdf.jl | 2 +- src/bgremove/sharp.jl | 6 +- src/inversion/direct.jl | 2 +- src/inversion/nltv.jl | 2 +- src/inversion/tv.jl | 2 +- src/utils/kernels.jl | 14 ++-- src/utils/lsmr.jl | 22 ++---- src/utils/poisson_solver/mgpcg.jl | 10 +-- .../poisson_solver/multigrid/transfer.jl | 3 +- src/utils/poisson_solver/poisson_solver.jl | 14 ++-- src/utils/r2star.jl | 18 ++--- src/utils/utils.jl | 67 ++++++++++++------- 14 files changed, 91 insertions(+), 85 deletions(-) diff --git a/src/bgremove/ismv.jl b/src/bgremove/ismv.jl index d848e3b..48a8ec5 100644 --- a/src/bgremove/ismv.jl +++ b/src/bgremove/ismv.jl @@ -91,8 +91,8 @@ function _ismv!( _δ = one(eltype(s)) - sqrt(eps(eltype(s))) # erode mask - s = _tcopyto!(s, m) # in-place type conversion, reuse smv var - m0 = _tcopyto!(m0, m) + s = tcopyto!(s, m) # in-place type conversion, reuse smv var + m0 = tcopyto!(m0, m) F̂ = mul!(F̂, P, s) @inbounds @batch for I in eachindex(F̂) diff --git a/src/bgremove/lbv.jl b/src/bgremove/lbv.jl index 80b4ec1..bbde24e 100644 --- a/src/bgremove/lbv.jl +++ b/src/bgremove/lbv.jl @@ -104,16 +104,16 @@ function _lbv!( flc = fl else mc = similar(mask, szc) - mc = _tcopyto!(mc, @view(mask[Rc])) + mc = tcopyto!(mc, @view(mask[Rc])) if N == 3 fc = @view(f[Rc]) flc = similar(fl, szc) - flc = _tcopyto!(flc, @view(fl[Rc])) + flc = tcopyto!(flc, @view(fl[Rc])) else fc = @view(f[Rc,:]) flc = similar(fl, (szc..., size(fl, 4))) - flc = _tcopyto!(flc, @view(fl[Rc,:])) + flc = tcopyto!(flc, @view(fl[Rc,:])) end end @@ -147,9 +147,9 @@ function _lbv!( if _crop if N == 3 - _tcopyto!(@view(fl[Rc]), flc) + tcopyto!(@view(fl[Rc]), flc) else - _tcopyto!(@view(fl[Rc,:]), flc) + tcopyto!(@view(fl[Rc,:]), flc) end end diff --git a/src/bgremove/pdf.jl b/src/bgremove/pdf.jl index 824468c..cf2a03d 100644 --- a/src/bgremove/pdf.jl +++ b/src/bgremove/pdf.jl @@ -133,7 +133,7 @@ function _pdf!( # pre-compute mask*weights if W === nothing # no weights - MW = _tcopyto!(MW, m) + MW = tcopyto!(MW, m) elseif ndims(W) == 3 # same weights for all echoes diff --git a/src/bgremove/sharp.jl b/src/bgremove/sharp.jl index caec75c..95e7ff9 100644 --- a/src/bgremove/sharp.jl +++ b/src/bgremove/sharp.jl @@ -86,7 +86,7 @@ function _sharp!( _δ = one(eltype(s)) - sqrt(eps(eltype(s))) # erode mask - s = _tcopyto!(s, m) # in-place type conversion, reuse smv var + s = tcopyto!(s, m) # in-place type conversion, reuse smv var F̂ = mul!(F̂, P, s) @inbounds @batch for I in eachindex(F̂) @@ -232,7 +232,7 @@ function _sharp!( _δ = one(eltype(s)) - sqrt(eps(eltype(s))) # fft of original mask - s = _tcopyto!(s, mr) # in-place type conversion + s = tcopyto!(s, mr) # in-place type conversion M̂ = mul!(M̂, P, s) @inbounds for (i, r) in enumerate(rs) @@ -344,7 +344,7 @@ function _sharp!( _δ = one(eltype(s)) - sqrt(eps(eltype(s))) # fft of original mask - s = _tcopyto!(s, mr) # in-place type conversion + s = tcopyto!(s, mr) # in-place type conversion M̂ = mul!(M̂, P, s) @inbounds for (i, r) in enumerate(rs) diff --git a/src/inversion/direct.jl b/src/inversion/direct.jl index eafa739..d008bf1 100644 --- a/src/inversion/direct.jl +++ b/src/inversion/direct.jl @@ -270,7 +270,7 @@ function _kdiv_ikernel!( elseif reg == :laplacian Γ = _laplace_kernel!(Γ, F, f, vsz, P) - Γ = _tcopyto!(abs2, Γ, Γ) + Γ = tmap!(abs2, Γ) end @inbounds @batch for I in eachindex(D) diff --git a/src/inversion/nltv.jl b/src/inversion/nltv.jl index c0f81cd..d192d47 100644 --- a/src/inversion/nltv.jl +++ b/src/inversion/nltv.jl @@ -260,7 +260,7 @@ function _nltv!( end if W !== nothing - F̂ = _tcopyto!(F̂, X̂) # real ifft overwrites input + F̂ = tcopyto!(F̂, X̂) # real ifft overwrites input end xp = mul!(xp, iP, X̂) diff --git a/src/inversion/tv.jl b/src/inversion/tv.jl index c71212b..057e91a 100644 --- a/src/inversion/tv.jl +++ b/src/inversion/tv.jl @@ -272,7 +272,7 @@ function _tv!( end if W !== nothing - F̂ = _tcopyto!(F̂, X̂) # real ifft overwrites input + F̂ = tcopyto!(F̂, X̂) # real ifft overwrites input end xp = mul!(xp, iP, X̂) diff --git a/src/utils/kernels.jl b/src/utils/kernels.jl index a29b896..6f189f9 100644 --- a/src/utils/kernels.jl +++ b/src/utils/kernels.jl @@ -136,7 +136,7 @@ function _dipole_kernel!( d = _dipole_kernel!(d, sz, vsz, bdir, :i, shift=true) D̂ = mul!(D̂, P, d) - D = _tcopyto!(real, D, D̂) + D = tmap!(real, D, D̂) return D end @@ -152,7 +152,7 @@ function _dipole_kernel!( D̂ = _dipole_kernel!(D̂, sz, vsz, bdir, :i, shift=true) D̂ = P*D̂ - D = _tcopyto!(real, D, D̂) + D = tmap!(real, D, D̂) return D end @@ -321,7 +321,7 @@ function _smv_kernel!( # normalizing a = inv(sum(s)) - s = _tcopyto!(x -> a*x, s, s) + s = tmap!(x -> a*x, s) return s end @@ -341,7 +341,7 @@ function _smv_kernel!( # fft, discard imaginary (even function -> imag = 0), and normalize Ŝ = mul!(Ŝ, P, s) - S = _tcopyto!(x -> a*real(x), S, Ŝ) + S = tmap!(x -> a*real(x), S, Ŝ) return S end @@ -360,7 +360,7 @@ function _smv_kernel!( # fft, discard imaginary (even function -> imag = 0), and normalize Ŝ = P*Ŝ - S = _tcopyto!(x -> a*real(x), S, Ŝ) + S = tmap!(x -> a*real(x), S, Ŝ) return S end @@ -498,7 +498,7 @@ function _laplace_kernel!( Δ = _laplace_kernel!(Δ, vsz, negative=negative, shift=true) L̂ = mul!(L̂, P, Δ) - L = _tcopyto!(real, L, L̂) + L = tmap!(real, L, L̂) return L end @@ -513,7 +513,7 @@ function _laplace_kernel!( L̂ = _laplace_kernel!(L̂, vsz, negative=negative, shift=true) L̂ = P*L̂ - L = _tcopyto!(real, L, L̂) + L = tmap!(real, L, L̂) return L end diff --git a/src/utils/lsmr.jl b/src/utils/lsmr.jl index 190d385..ed60498 100644 --- a/src/utils/lsmr.jl +++ b/src/utils/lsmr.jl @@ -39,7 +39,7 @@ end function LSMRWorkspace(x::AbstractArray, A, b::AbstractArray) T = typeof(one(eltype(b)) / one(eltype(A))) - u = isa(b, Array) ? tcopy(b) : copy(b) + u = tcopy(b) v = similar(x, T) h = similar(x, T) @@ -163,23 +163,9 @@ function lsmr!( cbar = one(Tr) sbar = zero(Tr) - if isa(x, Array) - tfill!(x, 0) - else - fill!(x, 0) - end - - if isa(h, Array) && isa(v, Array) - _tcopyto!(h, v) - else - copyto!(h, v) - end - - if isa(hbar, Array) - tfill!(hbar, 0) - else - fill!(hbar, 0) - end + tfill!(x, 0) + tfill!(hbar, 0) + tcopyto!(h, v) # Initialize variables for estimation of ||r||. βdd = β diff --git a/src/utils/poisson_solver/mgpcg.jl b/src/utils/poisson_solver/mgpcg.jl index c00502a..ad6e59f 100644 --- a/src/utils/poisson_solver/mgpcg.jl +++ b/src/utils/poisson_solver/mgpcg.jl @@ -62,10 +62,10 @@ function mgpcg!( maxlevel = maxlevel ) - q = tzero(b, size(A)) p = similar(b, size(A)) + q = similar(b, size(A)) - M.workspace.x[1] = q + M.workspace.x[1] = tfill!(q, zero(T)) if N == 3 M.workspace.b[1] = tcopy(b) @@ -76,12 +76,12 @@ function mgpcg!( M.workspace.b[1] = similar(b, size(A)) for t in axes(b, 4) - _tcopyto!(xt, @view(x[:,:,:,t])) - _tcopyto!(M.workspace.b[1], @view(b[:,:,:,t])) + tcopyto!(xt, @view(x[:,:,:,t])) + tcopyto!(M.workspace.b[1], @view(b[:,:,:,t])) xt = _mgpcg!(xt, M, p, cycle, ncycles, atol, rtol, maxit, verbose) - _tcopyto!(@view(x[:,:,:,t]), xt) + tcopyto!(@view(x[:,:,:,t]), xt) end end diff --git a/src/utils/poisson_solver/multigrid/transfer.jl b/src/utils/poisson_solver/multigrid/transfer.jl index 1487416..bf801d0 100644 --- a/src/utils/poisson_solver/multigrid/transfer.jl +++ b/src/utils/poisson_solver/multigrid/transfer.jl @@ -10,7 +10,8 @@ end function restrict(interior::AbstractArray{Bool, 3}) sz = size(interior) szc = restrict_size(sz) - return restrict!(tzero(interior, szc), interior) + interiorc = tfill!(similar(interior, szc), zero(Bool)) + return restrict!(interiorc, interior) end function restrict!(mc::AbstractArray{Bool, 3}, m::AbstractArray{Bool, 3}) diff --git a/src/utils/poisson_solver/poisson_solver.jl b/src/utils/poisson_solver/poisson_solver.jl index e0c80ac..25538e9 100644 --- a/src/utils/poisson_solver/poisson_solver.jl +++ b/src/utils/poisson_solver/poisson_solver.jl @@ -47,16 +47,16 @@ function solve_poisson_mgpcg!( uc = u else mc = similar(mask, szc) - mc = _tcopyto!(mc, @view(mask[Rc])) + mc = tcopyto!(mc, @view(mask[Rc])) if N == 3 d2uc = @view(d2u[Rc]) uc = similar(u, szc) - uc = _tcopyto!(uc, @view(u[Rc])) + uc = tcopyto!(uc, @view(u[Rc])) else d2uc = @view(d2u[Rc,:]) uc = similar(u, (szc..., size(u, 4))) - uc = _tcopyto!(uc, @view(u[Rc,:])) + uc = tcopyto!(uc, @view(u[Rc,:])) end end @@ -66,9 +66,9 @@ function solve_poisson_mgpcg!( if _crop if N == 3 - _tcopyto!(@view(u[Rc]), uc) + tcopyto!(@view(u[Rc]), uc) else - _tcopyto!(@view(u[Rc,:]), uc) + tcopyto!(@view(u[Rc,:]), uc) end end @@ -160,7 +160,7 @@ function solve_poisson_fft!( iP = inv(P) d2û = P*d2u else - d2û = _tcopyto!(similar(d2u, complex(eltype(d2u))), d2u) + d2û = tcopyto!(similar(d2u, complex(eltype(d2u))), d2u) P = plan_fft!(d2û, 1:3) iP = inv(P) d2û = P*d2û @@ -188,7 +188,7 @@ function solve_poisson_fft!( u = mul!(u, iP, d2û) else d2û = iP*d2û - u = _tcopyto!(real, u, d2û) + u = tmap!(real, u, d2û) end return u diff --git a/src/utils/r2star.jl b/src/utils/r2star.jl index 8c92980..6d78b79 100644 --- a/src/utils/r2star.jl +++ b/src/utils/r2star.jl @@ -26,7 +26,8 @@ function r2star_ll( size(mag, N) == NT || throw(DimensionMismatch()) mask === nothing || length(mask) == length(mag) ÷ NT || throw(DimensionMismatch()) - r2s = tzero(mag, size(mag)[1:N-1]) + r2s = similar(mag, size(mag)[1:N-1]) + r2s = tfill!(r2s, zero(T)) vmag = reshape(mag, :, NT) vr2s = vec(r2s) @@ -34,10 +35,8 @@ function r2star_ll( A = qr(Matrix{T}([-[TEs...] ones(NT)])) if mask === nothing - b = tcopy(log, transpose(vmag)) - + b = tmap(log, transpose(vmag)) x̂ = ldiv!(A, b) - @inbounds @batch for I in eachindex(vr2s) vr2s[I] = x̂[1,I] end @@ -110,7 +109,8 @@ function r2star_arlo( all((≈)(TEs[2]-TEs[1]), TEs[2:end].-TEs[1:end-1]) || throw(DomainError("ARLO requires equidistant echoes")) - r2s = tzero(mag, size(mag)[1:N-1]) + r2s = similar(mag, size(mag)[1:N-1]) + r2s = tfill!(r2s, zero(T)) vmag = reshape(mag, :, NT) vr2s = vec(r2s) @@ -238,8 +238,9 @@ function r2star_crsi( sigma === nothing || NR == N-1 || throw(DimensionMismatch()) M > 0 || throw(ArgumentError("interpolation factor M must be greater than 0")) - P = tcopy(x -> x*x, mag) - r2s = tzero(mag, size(mag)[1:N-1]) + P = tmap(x -> x*x, mag) + r2s = similar(mag, size(mag)[1:N-1]) + r2s = tfill!(r2s, zero(T)) vP = reshape(P, :, NT) vr2s = vec(r2s) @@ -376,7 +377,8 @@ function r2star_numart2s( size(mag, N) == NT || throw(DimensionMismatch()) mask === nothing || length(mask) == length(mag) ÷ NT || throw(DimensionMismatch()) - r2s = tzero(mag, size(mag)[1:N-1]) + r2s = similar(mag, size(mag)[1:N-1]) + r2s = tfill!(r2s, zero(T)) vmag = reshape(mag, :, NT) vr2s = vec(r2s) diff --git a/src/utils/utils.jl b/src/utils/utils.jl index c9ab520..06a3f36 100644 --- a/src/utils/utils.jl +++ b/src/utils/utils.jl @@ -153,7 +153,7 @@ function padarray!( all(szp .>= sz) || throw(DimensionMismatch()) if szp == sz - return _tcopyto!(xp, x) + return tcopyto!(xp, x) end valT = convert(Txp, val) @@ -286,7 +286,7 @@ Crop array to mask. function crop_mask(x::AbstractArray, m::AbstractArray{T} = x; out = zero(T)) where {T} checkshape(x, m, (:x, :m)) Rc = crop_indices(m, out) - xc = _tcopyto!(similar(x, size(Rc)), @view(x[Rc])) + xc = tcopyto!(similar(x, size(Rc)), @view(x[Rc])) return xc end @@ -387,7 +387,7 @@ function erode_mask!( checkshape(m1, m0, (:emask, :mask)) if iter < 1 - return _tcopyto!(m1, m0) + return tcopyto!(m1, m0) end if iter > 1 @@ -405,7 +405,7 @@ function erode_mask!( end if t < iter - _tcopyto!(m0, m1) + tcopyto!(m0, m1) end end @@ -467,7 +467,7 @@ function psf2otf( if szk == sz _kp = k else - _kp = tzero(k, sz) + _kp = tfill!(similar(k, sz), zero(T)) @inbounds @batch minbatch=1024 for I in CartesianIndices(k) _kp[I] = k[I] end @@ -485,7 +485,7 @@ function psf2otf( # discard imaginary part if within roundoff error nops = length(k)*sum(log2, szk) if maximum(x -> abs(imag(x)), K) / maximum(abs2, K) ≤ nops*eps(T) - _tcopyto!(real, K, K) + tmap!(real, K) end return K @@ -496,38 +496,55 @@ end ##### Multi-threaded Base utilities ##### -function tzero(x::AbstractArray{T}, sz::NTuple{N, Integer} = size(x)) where {T, N} - return tfill!(similar(x, sz), zero(T)) -end +tzero(x) = zero(x) +tzero(x::AbstractArray{T}) where {T} = tfill!(similar(x, typeof(zero(T))), zero(T)) -function tcopy(x::AbstractArray) - return _tcopyto!(similar(x), x) -end -function tcopy(f::Function, x::AbstractArray) - return _tcopyto!(f, similar(x), x) -end +tfill!(A, x) = fill!(A, x) function tfill!(A::AbstractArray{T}, x) where {T} xT = convert(T, x) - @inbounds @batch minbatch=1024 for I in eachindex(A) - A[I] = xT + @batch minbatch=1024 for I in eachindex(A) + @inbounds A[I] = xT end return A end -function _tcopyto!(y, x) - @inbounds @batch minbatch=1024 for I in eachindex(y, x) - y[I] = x[I] + +tmap(f, iters...) = map(f, iters...) + +function tmap(f, A::AbstractArray) + isempty(A) && return similar(A, 0) + dest = similar(A, typeof(f(A[1]))) + return tmap!(f, dest, A) +end + + +tmap!(f, dest, iters...) = map!(f, dest, iters...) +tmap!(f, A::AbstractArray) = tmap!(f, A, A) + +function tmap!(f::F, dest::AbstractArray, A::AbstractArray) where {F} + checkshape(Bool, dest, A) || return map!(f, dest, A) + @batch minbatch=1024 for I in eachindex(dest, A) + val = f(@inbounds A[I]) + @inbounds dest[I] = val end - return y + return dest end -function _tcopyto!(f::Function, y, x) - @inbounds @batch minbatch=1024 for I in eachindex(y, x) - y[I] = f(x[I]) + +tcopy(x) = copy(x) +tcopy(x::AbstractArray) = tcopyto!(similar(x), x) + + +tcopyto!(dest, src) = copyto!(dest, src) + +function tcopyto!(dest::AbstractArray, src::AbstractArray) + checkshape(Bool, dest, src) || return copyto!(dest, src) + @batch minbatch=1024 for I in eachindex(dest, src) + @inbounds dest[I] = src[I] end - return y + return dest end