Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better extract #565

Merged
merged 17 commits into from
Dec 2, 2023
152 changes: 108 additions & 44 deletions src/methods/extract.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ sliced arrays or stacks will be returned instead of single values.

# Keywords

- `geometry`: include a `:geometry` column with the corresponding points for each value, `true` by default.
- `index`: include an `:index` column of the `CartesianIndex` for each value, `false` by default.
- `names`: `Tuple` of `Symbol` corresponding to layers of a `RasterStack`. All layers by default.
- `skipmissing`: skip missing points automatically.
- `atol`: a tolorerance for floating point lookup values for when the `LookupArray`
contains `Points`. `atol` is ignored for `Intervals`.

Expand All @@ -32,12 +36,9 @@ st = RasterStack(WorldClim{BioClim}, (1, 3, 5, 7, 12)) |> replace_missing
# Download some occurrence data
obs = GBIF2.occurrence_search("Burramys parvus"; limit=5, year="2009")

# Convert observations to points
pnts = collect((o.decimalLongitude, o.decimalLatitude) for o in obs if !ismissing(o.decimalLongitude))

# use `extract` to get values for all layers at each observation point.
# We `collect` to get a `Vector` from the lazy iterator.
collect(extract(st, pnts))
collect(extract(st, pnts; skipmissing=true))
Copy link
Contributor

@tiemvanderdeure tiemvanderdeure Nov 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No point in using collect on a Vector, right?

Suggested change
# use `extract` to get values for all layers at each observation point.
# We `collect` to get a `Vector` from the lazy iterator.
collect(extract(st, pnts))
collect(extract(st, pnts; skipmissing=true))
# use `extract` to get values for all layers at each observation point.
extract(st, pnts; skipmissing=true)


# output
5-element Vector{NamedTuple{(:geometry, :bio1, :bio3, :bio5, :bio7, :bio12)}}:
Expand All @@ -47,66 +48,129 @@ collect(extract(st, pnts))
(geometry = (0.52, 40.37), bio1 = missing, bio3 = missing, bio5 = missing, bio7 = missing, bio12 = missing)
(geometry = (0.32, 40.24), bio1 = 16.321388f0, bio3 = 41.659454f0, bio5 = 30.029825f0, bio7 = 25.544561f0, bio12 = 480.0f0)
```

Note: passing in arrays, geometry collections or feature collections
containing a mix of points and other geometries has undefined results.
"""
function extract end
function extract(x::RasterStackOrArray, data;
dims=DD.dims(x, DEFAULT_POINT_ORDER), kw...
dims=DD.dims(x, DEFAULT_POINT_ORDER), names=_names(x), kw...
)
_extract(x, data; dims, names=_names(x), kw...)
_extract(x, data; dims, names=NamedTuple{names}(names), kw...)
end
_extract(A::RasterStackOrArray, point::Missing; kw...) = missing
function _extract(A::RasterStackOrArray, geom; kw...)
_extract(A, GI.geomtrait(geom), geom; kw...)

function _extract(A::RasterStackOrArray, geom::Missing; names, kw...)
[_maybe_add_fields(A, map(_ -> missing, names), missing, missing; kw...)]
end
function _extract(A::RasterStackOrArray, ::Nothing, geoms; kw...)
geom1 = first(skipmissing(geoms))
if GI.isgeometry(geom1) || GI.isfeature(geom1) || GI.isfeaturecollection(geom1)
(_extract(A, g; kw...) for g in geoms)
_extract(A::RasterStackOrArray, geom; kw...) = _extract(A, GI.geomtrait(geom), geom; kw...)
function _extract(A::RasterStackOrArray, ::Nothing, geoms; names, skipmissing=false, kw...)
# Handle empty / all missing cases
(length(geoms) > 0 && any(!ismissing, geoms)) || return typeof(_maybe_add_fields(A, map(_ -> missing, names), missing, missing; kw...))[]
geom1 = first(Base.skipmissing(geoms)) # TODO: will fail if `geoms` is empty or all missing
rafaqz marked this conversation as resolved.
Show resolved Hide resolved
trait1 = GI.trait(geom1)
# We need to split out points from other geoms
# TODO this will fail with mixed point/geom vectors
if trait1 isa GI.PointTrait
rows = (_extract_point(A, g; names, kw...) for g in geoms)
return skipmissing ? collect(_skip_missing_rows(rows)) : collect(rows)
else
throw(ArgumentError("`data` does not contain geomety objects"))
# This will be a list of vectors, we need to flatten it into one
rows = Iterators.flatten(_extract(A, g; names, skipmissing, kw...) for g in geoms)
return skipmissing ? collect(_skip_missing_rows(rows)) : collect(rows)
end
end
function _extract(A::RasterStackOrArray, ::GI.AbstractFeatureTrait, feature; kw...)
_extract(A, GI.geometry(feature); kw...)
end
function _extract(A::RasterStackOrArray, ::GI.AbstractMultiPointTrait, geom; kw...)
(_extract(A, p; kw...) for p in GI.getpoint(geom))
function _extract(A::RasterStackOrArray, ::GI.FeatureCollectionTrait, fc; kw...)
# Fall back to the Array/iterator method for feature collections
_extract(A, [GI.geometry(f) for f in GI.getfeature(fc)]; kw...)
end
function _extract(A::RasterStackOrArray, ::GI.AbstractGeometryTrait, geom; names, kw...)
B = boolmask(geom; to=dims(A, DEFAULT_POINT_ORDER), kw...)
pts = DimPoints(B)
dis = DimIndices(B)
((; geometry=_geom_nt(dims(B), pts[I]), _prop_nt(A, I, names)...) for I in CartesianIndices(B) if B[I])
function _extract(A::RasterStackOrArray, ::GI.AbstractMultiPointTrait, geom; skipmissing=false, kw...)
rows = (_extract_point(A, g; kw...) for g in GI.getpoint(geom))
return skipmissing ? collect(_skip_missing_rows(rows)) : collect(rows)
end
_geom_nt(dims::DimTuple, pts) = NamedTuple{map(dim2key, dims)}(pts)
_prop_nt(st::AbstractRasterStack, I, names::NamedTuple{K}) where K = NamedTuple{K}(values(st[I]))
_prop_nt(A::AbstractRaster, I, names::NamedTuple{K}) where K = NamedTuple{K}((A[I],))

function _extract(x::RasterStackOrArray, ::GI.PointTrait, point; dims, names, atol=nothing)
# Get the actual dimensions available in the object
coords = map(DD.commondims(x, dims)) do d
_dimcoord(d, point)
end
function _extract(A::RasterStackOrArray, ::GI.AbstractGeometryTrait, geom;
names, geometry=true, index=false, skipmissing=false, kw...
)
# Make a raster mask of the geometry
B = boolmask(geom; to=commondims(A, DEFAULT_POINT_ORDER), kw...)
# Add a row for each pixel that is `true` in the mask
rows = (_maybe_add_fields(A, _prop_nt(A, I, names), I; index, geometry) for I in CartesianIndices(B) if B[I])
# Maybe skip missing rows
return skipmissing ? collect(_skip_missing_rows(rows)) : collect(rows)
end
_extract(x::RasterStackOrArray, trait::GI.PointTrait, point; kw...) =
_extract_point(x, point; kw...)

@inline _skip_missing_rows(rows) = Iterators.filter(row -> !any(ismissing, row), rows)

@inline _prop_nt(st::AbstractRasterStack, I, names::NamedTuple{K}) where K = t[I][K]
@inline _prop_nt(A::AbstractRaster, I, names::NamedTuple{K}) where K = NamedTuple{K}((A[I],))

# Extract the values
if any(map(ismissing, coords))
# TODO test this branch somehow
geometry = map(_ -> missing, coords)
# Extract a single point
function _extract_point(x::RasterStackOrArray, point;
dims, names::NamedTuple{K}, atol=nothing, geometry=true, index=false, kw...
) where K
# The point itself might be missing, so return missing for every field
if ismissing(point)
layer_vals = map(_ -> missing, names)
geom = missing
I = missing
else
selectors = map(dims, coords) do d, c
_at_or_contains(d, c, atol)
# Get the actual dimensions available in the object
coords = map(DD.commondims(x, dims)) do d
_dimcoord(d, point)
end
# If any are coordinates missing, also return missing for everything
if any(map(ismissing, coords))
layer_vals = map(_ -> missing, names)
geom = missing
I = missing
else
selectors = map(dims, coords) do d, c
_at_or_contains(d, c, atol)
end
# Check the selector is in bounds / actually selectable
if DD.hasselection(x, selectors)
I = DD.dims2indices(x, selectors)
layer_vals = x isa Raster ? NamedTuple{K}((x[I...],)) : x[I...][K]
else
# Otherwise return `missing` for everything
I = missing
layer_vals = map(_ -> missing, names)
end
# Return the point for the geometry, either way
geom = point
end
layer_vals = if DD.hasselection(x, selectors)
x isa Raster ? (x[selectors...],) : x[selectors...]
end

return _maybe_add_fields(x, layer_vals, geom, I; geometry, index)
end
function _extract_point(A::RasterStackOrArray, point::Missing; names, kw...)
# Missing points return a single row
return _maybe_add_fields(A, map(_ -> missing, names), missing, missing; kw...)
end

# Maybe add optional fields
@inline function _maybe_add_fields(A, layer_vals::NamedTuple, I; kw...)
_maybe_add_fields(A, layer_vals, DimPoints(A)[I], I; kw...)
end
@inline function _maybe_add_fields(A, layer_vals::NamedTuple, point, I; geometry=true, index=false, kw...)
if geometry
if index
merge((; geometry=point, index=I), layer_vals)
else
merge((; geometry=point), layer_vals)
end
else
if index
merge((; index=I), layer_vals)
else
map(_ -> missing, names)
layer_vals
end
geometry = point
end
properties = NamedTuple{keys(names)}(layer_vals)
return (; geometry, properties...)
end

_names(A::AbstractRaster) = NamedTuple{(Symbol(name(A)),)}((Symbol(name(A)),))
_names(A::AbstractRasterStack) = NamedTuple{keys(A)}(keys(A))
_names(A::AbstractRaster) = (Symbol(name(A)),)
_names(A::AbstractRasterStack) = keys(A)
130 changes: 112 additions & 18 deletions test/methods.jl
Original file line number Diff line number Diff line change
Expand Up @@ -256,35 +256,129 @@ end
# Polygon(::Val{:ArchGDAL}, values) = ArchGDAL.createpolygon(values)

createpoint(args...) = ArchGDAL.createpoint(args...)
createfeature(x::Tuple{<:Any,<:Any}) = NamedTuple{(:geometry,:test)}(x)
createfeature(x::Tuple{<:Any,<:Any,<:Any}) = NamedTuple{(:geometry,:test,:test2)}(x)
createfeature(::Missing) = missing

@testset "extract" begin
dimz = (X(9.0:1.0:10.0), Y(0.1:0.1:0.2))
ga = Raster([1 2; 3 4], dimz; name=:test, missingval=missing)
ga2 = Raster([5 6; 7 8], dimz; name=:test2, missingval=missing)
st = RasterStack(ga, ga2)
rast = Raster([1 2; 3 4], dimz; name=:test, missingval=missing)
rast2 = Raster([5 6; 7 8], dimz; name=:test2, missingval=missing)
rast_m = Raster([1 2; 3 missing], dimz; name=:test, missingval=missing)
st = RasterStack(rast, rast2)
@testset "from Raster" begin
# Tuple points
@test all(extract(ga, [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]) .===
createfeature.([missing, ((9.0, 0.1), 1), ((9.0, 0.2), 2), ((10.0, 0.3), missing)]))
@test all(extract(rast, [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]) .=== [
(geometry = missing, test = missing)
(geometry = (9.0, 0.1), test = 1)
(geometry = (9.0, 0.2), test = 2)
(geometry = (10.0, 0.3), test = missing)
])
@test all(extract(rast_m, [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]; skipmissing=true) .=== [
(geometry = (9.0, 0.1), test = 1)
(geometry = (9.0, 0.2), test = 2)
])
@test all(extract(rast_m, [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false) .=== [
(test = 1,)
(test = 2,)
])
@test all(extract(rast_m, [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false, index=true) .=== [
(index = (1, 1), test = 1,)
(index = (1, 2), test = 2,)
])
# NamedTuple (reversed) points
@test all(extract(ga, [missing, (Y=0.1, X=9.0), (Y=0.2, X=10.0), (Y=0.3, X=10.0)]) |> collect .===
createfeature.([missing, ((Y=0.1, X=9.0), 1), ((Y=0.2, X=10.0), 4), ((Y=0.3, X=10.0), missing)]))
@test all(extract(rast, [missing, (Y=0.1, X=9.0), (Y=0.2, X=10.0), (Y=0.3, X=10.0)]) .=== [
(geometry = missing, test = missing)
(geometry = (Y = 0.1, X = 9.0), test = 1)
(geometry = (Y = 0.2, X = 10.0), test = 4)
(geometry = (Y = 0.3, X = 10.0), test = missing)
])
# Vector points
@test all(extract(ga, [[9.0, 0.1], [10.0, 0.2]]) .== createfeature.([([9.0, 0.1], 1), ([10.0, 0.2], 4)]))
# ArchGDAL equality is broken
# @test all(extract(ga, ArchGDAL.createmultipoint([[0.1, 9.0], [0.2, 10.0], [0.3, 10.0]])) .==
# createfeature.([(createpoint(0.1, 9.0), 1), (createpoint(0.2, 10.0), 4), (createpoint(0.3, 10.0), missing)]))
@test all(extract(rast, [[9.0, 0.1], [10.0, 0.2]]) .== [
(geometry = [9.0, 0.1], test = 1)
(geometry = [10.0, 0.2], test = 4)
])
# Extract a polygon
p = ArchGDAL.createpolygon([[[8.0, 0.0], [11.0, 0.0], [11.0, 0.4], [8.0, 0.0]]])
@test all(extract(ga, p) .===
createfeature.([((X=9.0, Y=0.1), 1), ((X=10.0, Y=0.1), 3), ((X=10.0, Y=0.2), 4)]))
@test all(extract(rast_m, p) .=== [
(geometry = (9.0, 0.1), test = 1)
(geometry = (10.0, 0.1), test = 3)
(geometry = (10.0, 0.2), test = missing)
])
# Extract a vector of polygons
@test all(extract(rast_m, [p, p]) .=== [
(geometry = (9.0, 0.1), test = 1)
(geometry = (10.0, 0.1), test = 3)
(geometry = (10.0, 0.2), test = missing)
(geometry = (9.0, 0.1), test = 1)
(geometry = (10.0, 0.1), test = 3)
(geometry = (10.0, 0.2), test = missing)
])
# Test all the keyword combinations
@test all(extract(rast_m, p) .=== [
(geometry = (9.0, 0.1), test = 1)
(geometry = (10.0, 0.1), test = 3)
(geometry = (10.0, 0.2), test = missing)
])
@test all(extract(rast_m, p; geometry=false) .=== [
(test = 1,)
(test = 3,)
(test = missing,)
])
@test all(extract(rast_m, p; geometry=false, index=true) .=== [
(index = CartesianIndex(1, 1), test = 1)
(index = CartesianIndex(2, 1), test = 3)
(index = CartesianIndex(2, 2), test = missing)
])
@test all(extract(rast_m, p; index=true) .=== [
(geometry = (9.0, 0.1), index = CartesianIndex(1, 1), test = 1)
(geometry = (10.0, 0.1), index = CartesianIndex(2, 1), test = 3)
(geometry = (10.0, 0.2), index = CartesianIndex(2, 2), test = missing)
])
@test extract(rast_m, p; skipmissing=true) == [
(geometry = (9.0, 0.1), test = 1)
(geometry = (10.0, 0.1), test = 3)
]
@test extract(rast_m, p; skipmissing=true, geometry=false) == [
(test = 1,)
(test = 3,)
]
@test extract(rast_m, p; skipmissing=true, geometry=false, index=true) == [
(index = CartesianIndex(1, 1), test = 1)
(index = CartesianIndex(2, 1), test = 3)
]
@test extract(rast_m, p; skipmissing=true, index=true) == [
(geometry = (9.0, 0.1), index = CartesianIndex(1, 1), test = 1)
(geometry = (10.0, 0.1), index = CartesianIndex(2, 1), test = 3)
]
# Empty geoms
@test extract(rast, []) == NamedTuple{(:geometry, :test),Tuple{Missing,Missing}}[]
@test extract(rast, []; geometry=false) == NamedTuple{(:test,),Tuple{Missing}}[]
end

@testset "from stack" begin
@test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]) |> collect .===
createfeature.([missing, ((9.0, 0.1), 1, 5), ((10.0, 0.2), 4, 8), ((10.0, 0.3), missing, missing)]))
@test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]) .=== [
(geometry = missing, test = missing, test2 = missing)
(geometry = (9.0, 0.1), test = 1, test2 = 5)
(geometry = (10.0, 0.2), test = 4, test2 = 8)
(geometry = (10.0, 0.3), test = missing, test2 = missing)
])
@test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true) == [
(geometry = (9.0, 0.1), test = 1, test2 = 5)
(geometry = (10.0, 0.2), test = 4, test2 = 8)
]
@test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false) == [
(test = 1, test2 = 5)
(test = 4, test2 = 8)
]
@test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false, index=true) == [
(index = (1, 1), test = 1, test2 = 5)
(index = (2, 2), test = 4, test2 = 8)
]
# Subset with `names`
@test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; names=(:test2,)) .=== [
(geometry = missing, test2 = missing)
(geometry = (9.0, 0.1), test2 = 5)
(geometry = (10.0, 0.2), test2 = 8)
(geometry = (10.0, 0.3), test2 = missing)
])
end
end

Expand Down
Loading