Skip to content

Commit

Permalink
implement mapreduce
Browse files Browse the repository at this point in the history
  • Loading branch information
tiemvanderdeure committed Nov 5, 2024
1 parent 251bbbd commit b08543f
Showing 1 changed file with 94 additions and 0 deletions.
94 changes: 94 additions & 0 deletions src/skipmissing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,97 @@ function Base.show(io::IO, s::SkipMissingVal)
show(io, s.x)
print(io, ')')
end


## This is adapted from https://github.com/JuliaLang/julia/blob/50713ee4a82eb1b5613647cd74b027315f665080/base/missing.jl#L273-L362
## It ensures more precise calculation of reductions like sum. See https://github.com/rafaqz/Rasters.jl/issues/812
Base.mapreduce(f, op, itr::SkipMissingVal) =
Base._mapreduce(f, op, IndexStyle(itr.x), itr)

function Base._mapreduce(f, op, ::IndexLinear, itr::SkipMissingVal)
A = itr.x
ai = missing
inds = LinearIndices(A)
i = first(inds)
ilast = last(inds)
for outer i in i:ilast
@inbounds ai = A[i]
!_missing(ai, itr) && break
end
_missing(ai, itr) && return mapreduce_empty(f, op, eltype(itr))
a1::eltype(itr) = ai
i == typemax(typeof(i)) && return mapreduce_first(f, op, a1)
i += 1
ai = missing
for outer i in i:ilast
@inbounds ai = A[i]
!_missing(ai, itr) && break
end
_missing(ai, itr) && return mapreduce_first(f, op, a1)
# We know A contains at least two non-missing entries: the result cannot be nothing
something(Base.mapreduce_impl(f, op, itr, first(inds), last(inds)))
end

Base._mapreduce(f, op, ::IndexCartesian, itr::SkipMissingVal) = Base.mapfoldl(f, op, itr)

Base.mapreduce_impl(f, op, A::SkipMissingVal, ifirst::Integer, ilast::Integer) =
Base.mapreduce_impl(f, op, A, ifirst, ilast, Base.pairwise_blocksize(f, op))

# Returns nothing when the input contains only missing values, and Some(x) otherwise
@noinline function Base.mapreduce_impl(f, op, itr::SkipMissingVal,
ifirst::Integer, ilast::Integer, blksize::Int)
A = itr.x
if ifirst > ilast
return nothing
elseif ifirst == ilast
@inbounds a1 = A[ifirst]
if ismissing(a1)
return nothing
else
return Some(mapreduce_first(f, op, a1))
end
elseif ilast - ifirst < blksize
# sequential portion
ai = missing
i = ifirst
for outer i in i:ilast
@inbounds ai = A[i]
!_missing(ai, itr) && break
end
_missing(ai, itr) && return nothing
a1 = ai::eltype(itr)
i == typemax(typeof(i)) && return Some(mapreduce_first(f, op, a1))
i += 1
ai = missing
for outer i in i:ilast
@inbounds ai = A[i]
!_missing(ai, itr) && break
end
_missing(ai, itr) && return Some(mapreduce_first(f, op, a1))
a2 = ai::eltype(itr)
i == typemax(typeof(i)) && return Some(op(f(a1), f(a2)))
i += 1
v = op(f(a1), f(a2))
@simd for i = i:ilast
@inbounds ai = A[i]
if !_missing(ai, itr)
v = op(v, f(ai))
end
end
return Some(v)
else
# pairwise portion
imid = ifirst + (ilast - ifirst) >> 1
v1 = Base.mapreduce_impl(f, op, itr, ifirst, imid, blksize)
v2 = Base.mapreduce_impl(f, op, itr, imid+1, ilast, blksize)
if v1 === nothing && v2 === nothing
return nothing
elseif v1 === nothing
return v2
elseif v2 === nothing
return v1
else
return Some(op(something(v1), something(v2)))
end
end
end

0 comments on commit b08543f

Please sign in to comment.