diff --git a/src/bgremove/ismv.jl b/src/bgremove/ismv.jl
index d848e3b..48a8ec5 100644
--- a/src/bgremove/ismv.jl
+++ b/src/bgremove/ismv.jl
@@ -91,8 +91,8 @@ function _ismv!(
     _δ = one(eltype(s)) - sqrt(eps(eltype(s)))
 
     # erode mask
-    s = _tcopyto!(s, m) # in-place type conversion, reuse smv var
-    m0 = _tcopyto!(m0, m)
+    s = tcopyto!(s, m) # in-place type conversion, reuse smv var
+    m0 = tcopyto!(m0, m)
 
     F̂ = mul!(F̂, P, s)
     @inbounds @batch for I in eachindex(F̂)
diff --git a/src/bgremove/lbv.jl b/src/bgremove/lbv.jl
index 80b4ec1..bbde24e 100644
--- a/src/bgremove/lbv.jl
+++ b/src/bgremove/lbv.jl
@@ -104,16 +104,16 @@ function _lbv!(
         flc = fl
     else
         mc = similar(mask, szc)
-        mc = _tcopyto!(mc, @view(mask[Rc]))
+        mc = tcopyto!(mc, @view(mask[Rc]))
 
         if N == 3
             fc = @view(f[Rc])
             flc = similar(fl, szc)
-            flc = _tcopyto!(flc, @view(fl[Rc]))
+            flc = tcopyto!(flc, @view(fl[Rc]))
         else
             fc = @view(f[Rc,:])
             flc = similar(fl, (szc..., size(fl, 4)))
-            flc = _tcopyto!(flc, @view(fl[Rc,:]))
+            flc = tcopyto!(flc, @view(fl[Rc,:]))
         end
     end
 
@@ -147,9 +147,9 @@ function _lbv!(
 
     if _crop
         if N == 3
-            _tcopyto!(@view(fl[Rc]), flc)
+            tcopyto!(@view(fl[Rc]), flc)
         else
-            _tcopyto!(@view(fl[Rc,:]), flc)
+            tcopyto!(@view(fl[Rc,:]), flc)
         end
     end
 
diff --git a/src/bgremove/pdf.jl b/src/bgremove/pdf.jl
index 824468c..cf2a03d 100644
--- a/src/bgremove/pdf.jl
+++ b/src/bgremove/pdf.jl
@@ -133,7 +133,7 @@ function _pdf!(
     # pre-compute mask*weights
     if W === nothing
         # no weights
-        MW = _tcopyto!(MW, m)
+        MW = tcopyto!(MW, m)
 
     elseif ndims(W) == 3
         # same weights for all echoes
diff --git a/src/bgremove/sharp.jl b/src/bgremove/sharp.jl
index caec75c..95e7ff9 100644
--- a/src/bgremove/sharp.jl
+++ b/src/bgremove/sharp.jl
@@ -86,7 +86,7 @@ function _sharp!(
     _δ    = one(eltype(s)) - sqrt(eps(eltype(s)))
 
     # erode mask
-    s = _tcopyto!(s, m) # in-place type conversion, reuse smv var
+    s = tcopyto!(s, m) # in-place type conversion, reuse smv var
 
     F̂ = mul!(F̂, P, s)
     @inbounds @batch for I in eachindex(F̂)
@@ -232,7 +232,7 @@ function _sharp!(
     _δ    = one(eltype(s)) - sqrt(eps(eltype(s)))
 
     # fft of original mask
-    s = _tcopyto!(s, mr) # in-place type conversion
+    s = tcopyto!(s, mr) # in-place type conversion
     M̂ = mul!(M̂, P, s)
 
     @inbounds for (i, r) in enumerate(rs)
@@ -344,7 +344,7 @@ function _sharp!(
     _δ    = one(eltype(s)) - sqrt(eps(eltype(s)))
 
     # fft of original mask
-    s = _tcopyto!(s, mr) # in-place type conversion
+    s = tcopyto!(s, mr) # in-place type conversion
     M̂ = mul!(M̂, P, s)
 
     @inbounds for (i, r) in enumerate(rs)
diff --git a/src/inversion/direct.jl b/src/inversion/direct.jl
index eafa739..d008bf1 100644
--- a/src/inversion/direct.jl
+++ b/src/inversion/direct.jl
@@ -270,7 +270,7 @@ function _kdiv_ikernel!(
 
         elseif reg == :laplacian
             Γ = _laplace_kernel!(Γ, F, f, vsz, P)
-            Γ = _tcopyto!(abs2, Γ, Γ)
+            Γ = tmap!(abs2, Γ)
         end
 
         @inbounds @batch for I in eachindex(D)
diff --git a/src/inversion/nltv.jl b/src/inversion/nltv.jl
index c0f81cd..d192d47 100644
--- a/src/inversion/nltv.jl
+++ b/src/inversion/nltv.jl
@@ -260,7 +260,7 @@ function _nltv!(
             end
 
             if W !== nothing
-                F̂ = _tcopyto!(F̂, X̂) # real ifft overwrites input
+                F̂ = tcopyto!(F̂, X̂) # real ifft overwrites input
             end
 
             xp = mul!(xp, iP, X̂)
diff --git a/src/inversion/tv.jl b/src/inversion/tv.jl
index c71212b..057e91a 100644
--- a/src/inversion/tv.jl
+++ b/src/inversion/tv.jl
@@ -272,7 +272,7 @@ function _tv!(
             end
 
             if W !== nothing
-                F̂ = _tcopyto!(F̂, X̂) # real ifft overwrites input
+                F̂ = tcopyto!(F̂, X̂) # real ifft overwrites input
             end
 
             xp = mul!(xp, iP, X̂)
diff --git a/src/utils/kernels.jl b/src/utils/kernels.jl
index a29b896..6f189f9 100644
--- a/src/utils/kernels.jl
+++ b/src/utils/kernels.jl
@@ -136,7 +136,7 @@ function _dipole_kernel!(
     d = _dipole_kernel!(d, sz, vsz, bdir, :i, shift=true)
 
     D̂ = mul!(D̂, P, d)
-    D = _tcopyto!(real, D, D̂)
+    D = tmap!(real, D, D̂)
 
     return D
 end
@@ -152,7 +152,7 @@ function _dipole_kernel!(
     D̂ = _dipole_kernel!(D̂, sz, vsz, bdir, :i, shift=true)
 
     D̂ = P*D̂
-    D = _tcopyto!(real, D, D̂)
+    D = tmap!(real, D, D̂)
 
     return D
 end
@@ -321,7 +321,7 @@ function _smv_kernel!(
 
     # normalizing
     a = inv(sum(s))
-    s = _tcopyto!(x -> a*x, s, s)
+    s = tmap!(x -> a*x, s)
 
     return s
 end
@@ -341,7 +341,7 @@ function _smv_kernel!(
 
     # fft, discard imaginary (even function -> imag = 0), and normalize
     Ŝ = mul!(Ŝ, P, s)
-    S = _tcopyto!(x -> a*real(x), S, Ŝ)
+    S = tmap!(x -> a*real(x), S, Ŝ)
 
     return S
 end
@@ -360,7 +360,7 @@ function _smv_kernel!(
 
     # fft, discard imaginary (even function -> imag = 0), and normalize
     Ŝ = P*Ŝ
-    S = _tcopyto!(x -> a*real(x), S, Ŝ)
+    S = tmap!(x -> a*real(x), S, Ŝ)
 
     return S
 end
@@ -498,7 +498,7 @@ function _laplace_kernel!(
     Δ = _laplace_kernel!(Δ, vsz, negative=negative, shift=true)
 
     L̂ = mul!(L̂, P, Δ)
-    L = _tcopyto!(real, L, L̂)
+    L = tmap!(real, L, L̂)
 
     return L
 end
@@ -513,7 +513,7 @@ function _laplace_kernel!(
     L̂ = _laplace_kernel!(L̂, vsz, negative=negative, shift=true)
 
     L̂ = P*L̂
-    L = _tcopyto!(real, L, L̂)
+    L = tmap!(real, L, L̂)
 
     return L
 end
diff --git a/src/utils/lsmr.jl b/src/utils/lsmr.jl
index 190d385..ed60498 100644
--- a/src/utils/lsmr.jl
+++ b/src/utils/lsmr.jl
@@ -39,7 +39,7 @@ end
 function LSMRWorkspace(x::AbstractArray, A, b::AbstractArray)
     T = typeof(one(eltype(b)) / one(eltype(A)))
 
-    u = isa(b, Array) ? tcopy(b) : copy(b)
+    u = tcopy(b)
 
     v = similar(x, T)
     h = similar(x, T)
@@ -163,23 +163,9 @@ function lsmr!(
     cbar = one(Tr)
     sbar = zero(Tr)
 
-    if isa(x, Array)
-        tfill!(x, 0)
-    else
-        fill!(x, 0)
-    end
-
-    if isa(h, Array) && isa(v, Array)
-        _tcopyto!(h, v)
-    else
-        copyto!(h, v)
-    end
-
-    if isa(hbar, Array)
-        tfill!(hbar, 0)
-    else
-        fill!(hbar, 0)
-    end
+    tfill!(x, 0)
+    tfill!(hbar, 0)
+    tcopyto!(h, v)
 
     # Initialize variables for estimation of ||r||.
     βdd = β
diff --git a/src/utils/poisson_solver/mgpcg.jl b/src/utils/poisson_solver/mgpcg.jl
index c00502a..ad6e59f 100644
--- a/src/utils/poisson_solver/mgpcg.jl
+++ b/src/utils/poisson_solver/mgpcg.jl
@@ -62,10 +62,10 @@ function mgpcg!(
         maxlevel = maxlevel
     )
 
-    q = tzero(b, size(A))
     p = similar(b, size(A))
+    q = similar(b, size(A))
 
-    M.workspace.x[1] = q
+    M.workspace.x[1] = tfill!(q, zero(T))
 
     if N == 3
         M.workspace.b[1] = tcopy(b)
@@ -76,12 +76,12 @@ function mgpcg!(
         M.workspace.b[1] = similar(b, size(A))
 
         for t in axes(b, 4)
-            _tcopyto!(xt, @view(x[:,:,:,t]))
-            _tcopyto!(M.workspace.b[1], @view(b[:,:,:,t]))
+            tcopyto!(xt, @view(x[:,:,:,t]))
+            tcopyto!(M.workspace.b[1], @view(b[:,:,:,t]))
 
             xt = _mgpcg!(xt, M, p, cycle, ncycles, atol, rtol, maxit, verbose)
 
-            _tcopyto!(@view(x[:,:,:,t]), xt)
+            tcopyto!(@view(x[:,:,:,t]), xt)
         end
     end
 
diff --git a/src/utils/poisson_solver/multigrid/transfer.jl b/src/utils/poisson_solver/multigrid/transfer.jl
index 1487416..bf801d0 100644
--- a/src/utils/poisson_solver/multigrid/transfer.jl
+++ b/src/utils/poisson_solver/multigrid/transfer.jl
@@ -10,7 +10,8 @@ end
 function restrict(interior::AbstractArray{Bool, 3})
     sz = size(interior)
     szc = restrict_size(sz)
-    return restrict!(tzero(interior, szc), interior)
+    interiorc = tfill!(similar(interior, szc), zero(Bool))
+    return restrict!(interiorc, interior)
 end
 
 function restrict!(mc::AbstractArray{Bool, 3}, m::AbstractArray{Bool, 3})
diff --git a/src/utils/poisson_solver/poisson_solver.jl b/src/utils/poisson_solver/poisson_solver.jl
index e0c80ac..25538e9 100644
--- a/src/utils/poisson_solver/poisson_solver.jl
+++ b/src/utils/poisson_solver/poisson_solver.jl
@@ -47,16 +47,16 @@ function solve_poisson_mgpcg!(
         uc = u
     else
         mc = similar(mask, szc)
-        mc = _tcopyto!(mc, @view(mask[Rc]))
+        mc = tcopyto!(mc, @view(mask[Rc]))
 
         if N == 3
             d2uc = @view(d2u[Rc])
             uc = similar(u, szc)
-            uc = _tcopyto!(uc, @view(u[Rc]))
+            uc = tcopyto!(uc, @view(u[Rc]))
         else
             d2uc = @view(d2u[Rc,:])
             uc = similar(u, (szc..., size(u, 4)))
-            uc = _tcopyto!(uc, @view(u[Rc,:]))
+            uc = tcopyto!(uc, @view(u[Rc,:]))
         end
     end
 
@@ -66,9 +66,9 @@ function solve_poisson_mgpcg!(
 
     if _crop
         if N == 3
-            _tcopyto!(@view(u[Rc]), uc)
+            tcopyto!(@view(u[Rc]), uc)
         else
-            _tcopyto!(@view(u[Rc,:]), uc)
+            tcopyto!(@view(u[Rc,:]), uc)
         end
     end
 
@@ -160,7 +160,7 @@ function solve_poisson_fft!(
         iP = inv(P)
         d2û = P*d2u
     else
-        d2û = _tcopyto!(similar(d2u, complex(eltype(d2u))), d2u)
+        d2û = tcopyto!(similar(d2u, complex(eltype(d2u))), d2u)
         P = plan_fft!(d2û, 1:3)
         iP = inv(P)
         d2û = P*d2û
@@ -188,7 +188,7 @@ function solve_poisson_fft!(
         u = mul!(u, iP, d2û)
     else
         d2û = iP*d2û
-        u = _tcopyto!(real, u, d2û)
+        u = tmap!(real, u, d2û)
     end
 
     return u
diff --git a/src/utils/r2star.jl b/src/utils/r2star.jl
index 8c92980..6d78b79 100644
--- a/src/utils/r2star.jl
+++ b/src/utils/r2star.jl
@@ -26,7 +26,8 @@ function r2star_ll(
     size(mag, N) == NT || throw(DimensionMismatch())
     mask === nothing || length(mask) == length(mag) ÷ NT || throw(DimensionMismatch())
 
-    r2s = tzero(mag, size(mag)[1:N-1])
+    r2s = similar(mag, size(mag)[1:N-1])
+    r2s = tfill!(r2s, zero(T))
 
     vmag = reshape(mag, :, NT)
     vr2s = vec(r2s)
@@ -34,10 +35,8 @@ function r2star_ll(
     A = qr(Matrix{T}([-[TEs...] ones(NT)]))
 
     if mask === nothing
-        b = tcopy(log, transpose(vmag))
-
+        b = tmap(log, transpose(vmag))
         x̂ = ldiv!(A, b)
-
         @inbounds @batch for I in eachindex(vr2s)
             vr2s[I] = x̂[1,I]
         end
@@ -110,7 +109,8 @@ function r2star_arlo(
     all((≈)(TEs[2]-TEs[1]), TEs[2:end].-TEs[1:end-1]) ||
         throw(DomainError("ARLO requires equidistant echoes"))
 
-    r2s = tzero(mag, size(mag)[1:N-1])
+    r2s = similar(mag, size(mag)[1:N-1])
+    r2s = tfill!(r2s, zero(T))
 
     vmag = reshape(mag, :, NT)
     vr2s = vec(r2s)
@@ -238,8 +238,9 @@ function r2star_crsi(
     sigma === nothing || NR == N-1 || throw(DimensionMismatch())
     M > 0 || throw(ArgumentError("interpolation factor M must be greater than 0"))
 
-    P = tcopy(x -> x*x, mag)
-    r2s = tzero(mag, size(mag)[1:N-1])
+    P = tmap(x -> x*x, mag)
+    r2s = similar(mag, size(mag)[1:N-1])
+    r2s = tfill!(r2s, zero(T))
 
     vP = reshape(P, :, NT)
     vr2s = vec(r2s)
@@ -376,7 +377,8 @@ function r2star_numart2s(
     size(mag, N) == NT || throw(DimensionMismatch())
     mask === nothing || length(mask) == length(mag) ÷ NT || throw(DimensionMismatch())
 
-    r2s = tzero(mag, size(mag)[1:N-1])
+    r2s = similar(mag, size(mag)[1:N-1])
+    r2s = tfill!(r2s, zero(T))
 
     vmag = reshape(mag, :, NT)
     vr2s = vec(r2s)
diff --git a/src/utils/utils.jl b/src/utils/utils.jl
index c9ab520..06a3f36 100644
--- a/src/utils/utils.jl
+++ b/src/utils/utils.jl
@@ -153,7 +153,7 @@ function padarray!(
     all(szp .>= sz) || throw(DimensionMismatch())
 
     if szp == sz
-        return _tcopyto!(xp, x)
+        return tcopyto!(xp, x)
     end
 
     valT = convert(Txp, val)
@@ -286,7 +286,7 @@ Crop array to mask.
 function crop_mask(x::AbstractArray, m::AbstractArray{T} = x; out = zero(T)) where {T}
     checkshape(x, m, (:x, :m))
     Rc = crop_indices(m, out)
-    xc = _tcopyto!(similar(x, size(Rc)), @view(x[Rc]))
+    xc = tcopyto!(similar(x, size(Rc)), @view(x[Rc]))
     return xc
 end
 
@@ -387,7 +387,7 @@ function erode_mask!(
     checkshape(m1, m0, (:emask, :mask))
 
     if iter < 1
-        return _tcopyto!(m1, m0)
+        return tcopyto!(m1, m0)
     end
 
     if iter > 1
@@ -405,7 +405,7 @@ function erode_mask!(
         end
 
         if t < iter
-            _tcopyto!(m0, m1)
+            tcopyto!(m0, m1)
         end
     end
 
@@ -467,7 +467,7 @@ function psf2otf(
     if szk == sz
         _kp = k
     else
-        _kp = tzero(k, sz)
+        _kp = tfill!(similar(k, sz), zero(T))
         @inbounds @batch minbatch=1024 for I in CartesianIndices(k)
             _kp[I] = k[I]
         end
@@ -485,7 +485,7 @@ function psf2otf(
     # discard imaginary part if within roundoff error
     nops = length(k)*sum(log2, szk)
     if maximum(x -> abs(imag(x)), K) / maximum(abs2, K) ≤ nops*eps(T)
-        _tcopyto!(real, K, K)
+        tmap!(real, K)
     end
 
     return K
@@ -496,38 +496,55 @@ end
 ##### Multi-threaded Base utilities
 #####
 
-function tzero(x::AbstractArray{T}, sz::NTuple{N, Integer} = size(x)) where {T, N}
-    return tfill!(similar(x, sz), zero(T))
-end
+tzero(x) = zero(x)
+tzero(x::AbstractArray{T}) where {T} = tfill!(similar(x, typeof(zero(T))), zero(T))
 
-function tcopy(x::AbstractArray)
-    return _tcopyto!(similar(x), x)
-end
 
-function tcopy(f::Function, x::AbstractArray)
-    return _tcopyto!(f, similar(x), x)
-end
+tfill!(A, x) = fill!(A, x)
 
 function tfill!(A::AbstractArray{T}, x) where {T}
     xT = convert(T, x)
-    @inbounds @batch minbatch=1024 for I in eachindex(A)
-        A[I] = xT
+    @batch minbatch=1024 for I in eachindex(A)
+        @inbounds A[I] = xT
     end
     return A
 end
 
-function _tcopyto!(y, x)
-    @inbounds @batch minbatch=1024 for I in eachindex(y, x)
-        y[I] = x[I]
+
+tmap(f, iters...) = map(f, iters...)
+
+function tmap(f, A::AbstractArray)
+    isempty(A) && return similar(A, 0)
+    dest = similar(A, typeof(f(A[1])))
+    return tmap!(f, dest, A)
+end
+
+
+tmap!(f, dest, iters...) = map!(f, dest, iters...)
+tmap!(f, A::AbstractArray) = tmap!(f, A, A)
+
+function tmap!(f::F, dest::AbstractArray, A::AbstractArray) where {F}
+    checkshape(Bool, dest, A) || return map!(f, dest, A)
+    @batch minbatch=1024 for I in eachindex(dest, A)
+        val = f(@inbounds A[I])
+        @inbounds dest[I] = val
     end
-    return y
+    return dest
 end
 
-function _tcopyto!(f::Function, y, x)
-    @inbounds @batch minbatch=1024 for I in eachindex(y, x)
-        y[I] = f(x[I])
+
+tcopy(x) = copy(x)
+tcopy(x::AbstractArray) = tcopyto!(similar(x), x)
+
+
+tcopyto!(dest, src) = copyto!(dest, src)
+
+function tcopyto!(dest::AbstractArray, src::AbstractArray)
+    checkshape(Bool, dest, src) || return copyto!(dest, src)
+    @batch minbatch=1024 for I in eachindex(dest, src)
+        @inbounds dest[I] = src[I]
     end
-    return y
+    return dest
 end