Skip to content

Commit

Permalink
use BitArray for burning buffer (#533)
Browse files Browse the repository at this point in the history
* optimise rasterize

* threading is `false` by default

* simplify locking

* update threaded docstring

* remove lock arg

* bugfix rasterize and test threading
  • Loading branch information
rafaqz authored Oct 21, 2023
1 parent 10c6c08 commit 88af714
Show file tree
Hide file tree
Showing 12 changed files with 184 additions and 243 deletions.
4 changes: 2 additions & 2 deletions ext/RastersArchGDALExt/warp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ end

function _warp(A::AbstractRaster, flags::Dict; filename=nothing, suffix="", kw...)
filename = RA._maybe_add_suffix(filename, suffix)
flagvect = reduce([flags...]; init=[]) do acc, (key, val)
flagvect = reduce([flags...]; init=String[]) do acc, (key, val)
append!(acc, String[_asflag(key), _stringvect(val)...])
end
tempfile = isnothing(filename) ? nothing : tempname() * ".tif"
Expand All @@ -84,7 +84,7 @@ function _warp(A::AbstractRaster, flags::Dict; filename=nothing, suffix="", kw..
# Either read the MEM dataset, or get the filename as a FileArray
# And permute the dimensions back to what they were in A
p_raster = _maybe_permute_from_gdal(raster, dims(A))
# Either read the MEM dataset to an Array, or keep a filename base raster lazy
# Either read the MEM dataset to an Array, or keep the raster lazy
return isnothing(filename) ? read(p_raster) : p_raster
end
end
Expand Down
1 change: 0 additions & 1 deletion src/Rasters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ include("read.jl")
include("write.jl")
include("show.jl")
include("plotrecipes.jl")
include("sectorlock.jl")

include("methods/burning/edges.jl")
include("methods/burning/allocs.jl")
Expand Down
14 changes: 5 additions & 9 deletions src/methods/burning/array_init.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Like `create` but without disk writes, mostly for Bool/Union{Missing,Boo},
# and uses `similar` where possible
# TODO merge this with `create` somehow
_init_bools(to; kw...) = _init_bools(to, Bool; kw...)
_init_bools(to; kw...) = _init_bools(to, BitArray; kw...)
_init_bools(to, T::Type; kw...) = _init_bools(to, T, nothing; kw...)
_init_bools(to::AbstractRasterSeries, T::Type, data; kw...) = _init_bools(first(to), T, data; kw...)
_init_bools(to::AbstractRasterStack, T::Type, data; kw...) = _init_bools(first(to), T, data; kw...)
Expand Down Expand Up @@ -31,15 +31,11 @@ function _init_bools(to, dims::DimTuple, T::Type, data; collapse::Union{Bool,Not
end
end

function _alloc_bools(to, dims::DimTuple, ::Type{Bool}; missingval=false, metadata=NoMetadata(), kw...)
if length(dims) > 2
# Use a BitArray
return Raster(falses(size(dims)), dims; missingval, metadata) # Use a BitArray
else
return Raster(zeros(Bool, size(dims)), dims; missingval, metadata) # Use a BitArray
end
function _alloc_bools(to, dims::DimTuple, ::Type{BitArray}; missingval=false, metadata=NoMetadata(), kw...)
# Use a BitArray
return Raster(falses(size(dims)), dims; missingval, metadata)
end
function _alloc_bools(to, dims::DimTuple, ::Type{T}; missingval=false, metadata=NoMetadata(), kw...) where T
function _alloc_bools(to, dims::DimTuple, ::Type{<:Array{T}}; missingval=false, metadata=NoMetadata(), kw...) where T
# Use an `Array`
data = fill!(Raster{T}(undef, dims), missingval)
return rebuild(data; missingval, metadata)
Expand Down
2 changes: 1 addition & 1 deletion src/methods/burning/geometry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ function _burn_geometry!(B::AbstractRaster, ::GI.AbstractFeatureCollectionTrait,
end
# Where geoms is an iterator
function _burn_geometry!(B::AbstractRaster, trait::Nothing, geoms;
collapse::Union{Bool,Nothing}=nothing, lock=SectorLocks(), verbose=true, progress=true, threaded=true,
collapse::Union{Bool,Nothing}=nothing, lock=Threads.SpinLock(), verbose=true, progress=true, threaded=true,
allocs=_burning_allocs(B; threaded), kw...
)::Bool
range = _geomindices(geoms)
Expand Down
2 changes: 1 addition & 1 deletion src/methods/burning/point.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ end
_fill_index!(x, fill, I)
else
sector = CartesianIndices(map(i -> i:i, I))
Base.lock(lock, sector)
Base.lock(lock)
_fill_index!(x, fill, I)
Base.unlock(lock)
end
Expand Down
31 changes: 17 additions & 14 deletions src/methods/coverage.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const COVERAGE_KEYWORDS = """
10 x 10 or 100 points that contribute to coverage. Using `100` means 10,000 points
contribute. Performance will decline as `scale` increases. Memory use will grow
by `scale^2` when `mode=:union`.
$THREADED_KEYWORD
$PROGRESS_KEYWORD
$VERBOSE_KEYWORD
"""
Expand Down Expand Up @@ -92,11 +93,12 @@ function _coverage!(A::AbstractRaster, ::GI.AbstractGeometryTrait, geom, r; scal
return A
end
function _coverage!(A::AbstractRaster, ::Nothing, geoms, r::Rasterizer; mode, scale)
n = _nthreads()
n = r.threaded ? _nthreads() : 1
buffers = (
allocs = _burning_allocs(A; threaded=r.threaded),
linebuffer = [_init_bools(A, Bool; missingval=false) for _ in 1:n],
centerbuffer = [_init_bools(A, Bool; missingval=false) for _ in 1:n],
# We always need a vector of threads, but just
allocs = _burning_allocs(A; nthreads=n, threaded=true),
linebuffer = [_init_bools(A, BitArray; missingval=false) for _ in 1:n],
centerbuffer = [_init_bools(A, BitArray; missingval=false) for _ in 1:n],
block_crossings = [[Vector{Float64}(undef, 0) for _ in 1:scale] for _ in 1:n],
burnstatus=[fill(BurnStatus(), scale) for _ in 1:n],
subbuffer = [fill!(Array{Bool}(undef, scale, scale), false) for _ in 1:n],
Expand All @@ -116,11 +118,11 @@ end

# Combines coverage at the sub-pixel level for a final value 0-1
function _union_coverage!(A::AbstractRaster, geoms, buffers;
scale, subpixel_dims, progress=true, threaded=true
scale, subpixel_dims, progress=true, threaded=false
)
n = _nthreads()
centeracc = [_init_bools(A, Bool; missingval=false) for _ in 1:n]
lineacc = [_init_bools(A, Bool; missingval=false) for _ in 1:n]
centeracc = [_init_bools(A, BitArray; missingval=false) for _ in 1:n]
lineacc = [_init_bools(A, BitArray; missingval=false) for _ in 1:n]
subpixel_buffer = [falses(size(A) .* scale) for _ in 1:n]

allbuffers = merge(buffers, (; centeracc, lineacc, subpixel_buffer))
Expand All @@ -129,6 +131,7 @@ function _union_coverage!(A::AbstractRaster, geoms, buffers;
_run(range, threaded, progress, "Calculating coverage buffers...") do i
geom = _getgeom(geoms, i)
idx = Threads.threadid()
# Get buffers for each thread as a NamedTuple
thread_buffers = map(b -> b[idx], allbuffers)
_union_coverage!(A, geom; scale, subpixel_buffer, thread_buffers...)
fill!(thread_buffers.linebuffer, false)
Expand Down Expand Up @@ -170,12 +173,12 @@ function _union_coverage!(A::AbstractRaster, geoms, buffers;
end
function _union_coverage!(A::AbstractRaster, geom;
scale,
linebuffer=_init_bools(A, Bool; missingval=false),
centerbuffer=_init_bools(A, Bool; missingval=false),
linebuffer=_init_bools(A, BitArray; missingval=false),
centerbuffer=_init_bools(A, BitArray; missingval=false),
allocs=Allocs(linebuffer),
subpixel_buffer=falses(size(A) .* scale),
centeracc=_init_bools(A, Bool; missingval=false),
lineacc=_init_bools(A, Bool; missingval=false),
centeracc=_init_bools(A, BitArray; missingval=false),
lineacc=_init_bools(A, BitArray; missingval=false),
burnstatus=[BurnStatus() for _ in 1:scale],
block_crossings=[Vector{Float64}(undef, 0) for _ in 1:scale],
subbuffer=falses(scale, scale),
Expand Down Expand Up @@ -251,7 +254,7 @@ end

# Sums all coverage
function _sum_coverage!(A::AbstractRaster, geoms, buffers;
scale, subpixel_dims, verbose=true, progress=true, threaded=true
scale, subpixel_dims, verbose=true, progress=true, threaded=false
)
n = _nthreads()
coveragebuffers = [fill!(similar(A), 0.0) for _ in 1:n]
Expand All @@ -276,8 +279,8 @@ function _sum_coverage!(A::AbstractRaster, geoms, buffers;
end
function _sum_coverage!(A::AbstractRaster, geom;
scale,
linebuffer=_init_bools(A, Bool; missingval=false),
centerbuffer=_init_bools(A, Bool; missingval=false),
linebuffer=_init_bools(A, BitArray; missingval=false),
centerbuffer=_init_bools(A, BitArray; missingval=false),
allocs=Allocs(linebuffer),
block_crossings=[Vector{Float64}(undef, 0) for _ in 1:scale],
subbuffer=falses(scale, scale),
Expand Down
14 changes: 8 additions & 6 deletions src/methods/mask.jl
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,11 @@ The array returned from calling `boolmask` on a `AbstractRaster` is a
- `missingval`: The missing value of the source array, with default `missingval(raster)`.
# Geometry keywords
# Keywords
$GEOM_KEYWORDS
$THREADED_KEYWORD
$PROGRESS_KEYWORD
And specifically for `shape=:polygon`:
Expand Down Expand Up @@ -245,14 +247,14 @@ function boolmask end
boolmask(series::AbstractRasterSeries; kw...) = boolmask(first(series); kw...)
boolmask(stack::AbstractRasterStack; kw...) = boolmask(first(stack); kw...)
function boolmask(source::AbstractRaster; kw...)
dest = _init_bools(source, Bool, nothing; kw..., missingval=false)
dest = _init_bools(source, BitArray, nothing; kw..., missingval=false)
return boolmask!(dest, source; kw...)
end
function boolmask(x; to=nothing, kw...)
if to isa Union{AbstractDimArray,AbstractDimStack,DimTuple}
to = dims(to, DEFAULT_POINT_ORDER)
end
A = _init_bools(to, Bool, x; kw..., missingval=false)
A = _init_bools(to, BitArray, x; kw..., missingval=false)
return boolmask!(A, x; kw...)
end

Expand All @@ -264,7 +266,7 @@ function boolmask!(dest::AbstractRaster, src::AbstractRaster;
end
end
function boolmask!(dest::AbstractRaster, geoms;
allocs=nothing, lock=nothing, progress=true, threaded=true, kw...
allocs=nothing, lock=nothing, progress=true, threaded=false, kw...
)
if isnothing(allocs)
allocs = _burning_allocs(dest; threaded)
Expand Down Expand Up @@ -322,11 +324,11 @@ $EXPERIMENTAL
missingmask(series::AbstractRasterSeries; kw...) = missingmask(first(series); kw...)
missingmask(stack::AbstractRasterStack; kw...) = missingmask(first(stack); kw...)
function missingmask(source::AbstractRaster; kw...)
dest = _init_bools(source, Union{Missing,Bool}, nothing; kw..., missingval=missing)
dest = _init_bools(source, Array{Union{Missing,Bool}}, nothing; kw..., missingval=missing)
return missingmask!(dest, source; kw...)
end
function missingmask(x; to=nothing, kw...)
B = _init_bools(to, Union{Missing,Bool}, x; kw..., missingval=missing)
B = _init_bools(to, Array{Union{Missing,Bool}}, x; kw..., missingval=missing)
return missingmask!(B, x; kw...)
end

Expand Down
20 changes: 8 additions & 12 deletions src/methods/rasterize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ struct Rasterizer{T,G,F,R,O,I,M}
op::O
init::I
missingval::M
lock::Union{SectorLocks,Nothing}
lock::Union{Threads.SpinLock,Nothing}
shape::Symbol
boundary::Symbol
verbose::Bool
Expand All @@ -93,7 +93,7 @@ function Rasterizer(geom, fill, fillitr;
filename=nothing,
verbose=true,
progress=true,
threaded=true,
threaded=false,
kw...
)
# A single geometry does not need a reducing function
Expand Down Expand Up @@ -138,7 +138,7 @@ function Rasterizer(geom, fill, fillitr;
@warn "currently `:points` rasterization of multiple non-`PointTrait` geometries may be innaccurate for `reducer` methods besides $stable_reductions. Make a Rasters.jl github issue if you need this to work"
end
eltype, missingval = get_eltype_missingval(eltype, missingval, fillitr, init, filename, op, reducer)
lock = threaded ? SectorLocks() : nothing
lock = threaded ? Threads.SpinLock() : nothing

return Rasterizer(eltype, geom, fillitr, reducer, op, init, missingval, lock, shape, boundary, verbose, progress, threaded)
end
Expand Down Expand Up @@ -323,7 +323,7 @@ const RASTERIZE_KEYWORDS = """
- `progress`: show a progress bar, `true` by default, `false` to hide..
- `verbose`: print information and warnings whne there are problems with the rasterisation.
`true` by default.
- `threaded`: run operations in parallel. `true` by default.
$THREADED_KEYWORD
"""

const RASTERIZE_ARGUMENTS = """
Expand Down Expand Up @@ -569,12 +569,12 @@ function _rasterize!(A, ::GI.AbstractGeometryTrait, geom, fill, r::Rasterizer; a
V = view(A, Touches(ext))
length(V) > 0 || return false

bools = _init_bools(commondims(V, DEFAULT_POINT_ORDER), Bool; metadata=metadata(A))
bools = _init_bools(commondims(V, DEFAULT_POINT_ORDER), BitArray; metadata=metadata(A))
boolmask!(bools, geom; allocs, lock, shape, boundary, verbose, progress)
hasburned = any(bools)
if hasburned
# Avoid race conditions with a SectorLock
isnothing(lock) || Base.lock(lock, V)
# Avoid race conditions
isnothing(lock) || Base.lock(lock)
_fill!(V, bools, fill, op, init, missingval)
isnothing(lock) || Base.unlock(lock)
end
Expand All @@ -584,11 +584,7 @@ end
# Fill points
function _rasterize!(A, trait::GI.AbstractPointTrait, point, fill, r::Rasterizer; allocs=nothing)
# Avoid race conditions whern Point is in a mixed set of Geometries
# isnothing(r.lock) || Base.lock(r.lock, A)
hasburned = _fill_point!(A, trait, point; fill, r.lock)
# isnothing(r.lock) || Base.unlock(r.lock)
# for all points we avoid parallel rasterization completely - this method should not be hit often
return hasburned
return _fill_point!(A, trait, point; fill, r.lock)
end
function _rasterize!(A, trait::Nothing, geoms, fill, r::Rasterizer; allocs=nothing)
if r.shape === :point
Expand Down
9 changes: 9 additions & 0 deletions src/methods/shared_docstrings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@ const SHAPE_KEYWORDS = """
The default is `:center`.
"""

const THREADED_KEYWORD = """
- `threaded`: run operations in parallel, `false` by default. In some circumstances `threaded`
can give large speedups over single-threaded operation. This can be true for complicated
geometries written into low-resolution rasters, but may not be for simple geometries with
high-resolution rasters. With very large rasters threading may be counter productive due
to excessing memory use. Caution should also be used: `threaded` should not be used in in-place
functions wrinting to `BitArray` or other arrays where race conditions can occur.
"""

const GEOM_KEYWORDS = """
$TO_KEYWORD
$RES_KEYWORD
Expand Down
83 changes: 0 additions & 83 deletions src/sectorlock.jl

This file was deleted.

Loading

0 comments on commit 88af714

Please sign in to comment.