diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 67086681e..c88d0fadc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,8 +17,8 @@ jobs: - '1' os: - ubuntu-latest - - macOS-latest - # - windows-latest + # - macOS-latest + - windows-latest arch: - x64 steps: diff --git a/src/methods/extract.jl b/src/methods/extract.jl index 79c90078e..029e967ab 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -15,6 +15,10 @@ sliced arrays or stacks will be returned instead of single values. # Keywords +- `geometry`: include a `:geometry` column with the corresponding points for each value, `true` by default. +- `index`: include an `:index` column of the `CartesianIndex` for each value, `false` by default. +- `names`: `Tuple` of `Symbol` corresponding to layers of a `RasterStack`. All layers by default. +- `skipmissing`: skip missing points automatically. - `atol`: a tolorerance for floating point lookup values for when the `LookupArray` contains `Points`. `atol` is ignored for `Intervals`. @@ -32,12 +36,9 @@ st = RasterStack(WorldClim{BioClim}, (1, 3, 5, 7, 12)) |> replace_missing # Download some occurrence data obs = GBIF2.occurrence_search("Burramys parvus"; limit=5, year="2009") -# Convert observations to points -pnts = collect((o.decimalLongitude, o.decimalLatitude) for o in obs if !ismissing(o.decimalLongitude)) - # use `extract` to get values for all layers at each observation point. # We `collect` to get a `Vector` from the lazy iterator. -collect(extract(st, pnts)) +extract(st, obs; skipmissing=true) # output 5-element Vector{NamedTuple{(:geometry, :bio1, :bio3, :bio5, :bio7, :bio12)}}: @@ -47,66 +48,144 @@ collect(extract(st, pnts)) (geometry = (0.52, 40.37), bio1 = missing, bio3 = missing, bio5 = missing, bio7 = missing, bio12 = missing) (geometry = (0.32, 40.24), bio1 = 16.321388f0, bio3 = 41.659454f0, bio5 = 30.029825f0, bio7 = 25.544561f0, bio12 = 480.0f0) ``` + +Note: passing in arrays, geometry collections or feature collections +containing a mix of points and other geometries has undefined results. """ function extract end function extract(x::RasterStackOrArray, data; - dims=DD.dims(x, DEFAULT_POINT_ORDER), kw... + dims=DD.dims(x, DEFAULT_POINT_ORDER), names=_names(x), geometry=true, index=false, kw... ) - _extract(x, data; dims, names=_names(x), kw...) + T = if geometry + keys = index ? (:geometry, :index, names...,) : (:geometry, names...,) + NamedTuple{keys} + else + keys = index ? (:index, names...,) : (names...,) + NamedTuple{keys} + end + names = NamedTuple{names}(names) + if Tables.istable(data) + geomcolnames = GI.geometrycolumns(data) + isnothing(geomcolnames) && throw(ArgumentError("No `:geometry` column and `GeoInterface.geometrycolums(::$(typeof(data)))` does not define alternate columns")) + geometries = Tables.getcolumn(Tables.columns(data), first(geomcolnames)) + _extract(T, x, geometries; dims, names, kw...) + else + _extract(T, x, data; dims, names, kw...) + end end -_extract(A::RasterStackOrArray, point::Missing; kw...) = missing -function _extract(A::RasterStackOrArray, geom; kw...) - _extract(A, GI.geomtrait(geom), geom; kw...) + +function _extract(T, A::RasterStackOrArray, geom::Missing; names, kw...) + [_maybe_add_fields(T, A, map(_ -> missing, names), missing, missing)] end -function _extract(A::RasterStackOrArray, ::Nothing, geoms; kw...) - geom1 = first(skipmissing(geoms)) - if GI.isgeometry(geom1) || GI.isfeature(geom1) || GI.isfeaturecollection(geom1) - (_extract(A, g; kw...) for g in geoms) +_extract(T, A::RasterStackOrArray, geom; kw...) = + _extract(T, A, GI.geomtrait(geom), geom; kw...) +_extract(T, A::RasterStackOrArray, ::Nothing, geom; kw...) = + throw(ArgumentError("$geom is not a valid GeoInterface.jl geometry")) +function _extract(T, A::RasterStackOrArray, ::Nothing, geoms::AbstractArray; names, skipmissing=false, kw...) + # Handle empty / all missing cases + (length(geoms) > 0 && any(!ismissing, geoms)) || return T[] + + # Handle cases with some invalid geometries + invalid_geom_idx = findfirst(g -> !ismissing(g) && GI.geomtrait(g) === nothing, geoms) + invalid_geom_idx === nothing || throw(ArgumentError("$(geoms[invalid_geom_idx]) is not a valid GeoInterface.jl geometry")) + + geom1 = first(Base.skipmissing(geoms)) + trait1 = GI.trait(geom1) + # We need to split out points from other geoms + # TODO this will fail with mixed point/geom vectors + if trait1 isa GI.PointTrait + rows = (_extract_point(T, A, g; names, kw...) for g in geoms) + return skipmissing ? collect(_skip_missing_rows(rows)) : collect(rows) else - throw(ArgumentError("`data` does not contain geomety objects")) + # This will be a list of vectors, we need to flatten it into one + rows = Iterators.flatten(_extract(T, A, g; names, skipmissing, kw...) for g in geoms) + return skipmissing ? collect(_skip_missing_rows(rows)) : collect(rows) end end -function _extract(A::RasterStackOrArray, ::GI.AbstractFeatureTrait, feature; kw...) - _extract(A, GI.geometry(feature); kw...) +function _extract(T, A::RasterStackOrArray, ::GI.AbstractFeatureTrait, feature; kw...) + _extract(T, A, GI.geometry(feature); kw...) end -function _extract(A::RasterStackOrArray, ::GI.AbstractMultiPointTrait, geom; kw...) - (_extract(A, p; kw...) for p in GI.getpoint(geom)) +function _extract(T, A::RasterStackOrArray, ::GI.FeatureCollectionTrait, fc; kw...) + # Fall back to the Array/iterator method for feature collections + _extract(T, A, [GI.geometry(f) for f in GI.getfeature(fc)]; kw...) end -function _extract(A::RasterStackOrArray, ::GI.AbstractGeometryTrait, geom; names, kw...) - B = boolmask(geom; to=dims(A, DEFAULT_POINT_ORDER), kw...) - pts = DimPoints(B) - dis = DimIndices(B) - ((; geometry=_geom_nt(dims(B), pts[I]), _prop_nt(A, I, names)...) for I in CartesianIndices(B) if B[I]) +function _extract(T, A::RasterStackOrArray, ::GI.AbstractMultiPointTrait, geom; skipmissing=false, kw...) + rows = (_extract_point(T, A, g; kw...) for g in GI.getpoint(geom)) + return skipmissing ? collect(_skip_missing_rows(rows)) : collect(rows) end -_geom_nt(dims::DimTuple, pts) = NamedTuple{map(dim2key, dims)}(pts) -_prop_nt(st::AbstractRasterStack, I, names::NamedTuple{K}) where K = NamedTuple{K}(values(st[I])) -_prop_nt(A::AbstractRaster, I, names::NamedTuple{K}) where K = NamedTuple{K}((A[I],)) - -function _extract(x::RasterStackOrArray, ::GI.PointTrait, point; dims, names, atol=nothing) - # Get the actual dimensions available in the object - coords = map(DD.commondims(x, dims)) do d - _dimcoord(d, point) - end +function _extract(T, A::RasterStackOrArray, ::GI.AbstractGeometryTrait, geom; + names, skipmissing=false, kw... +) + # Make a raster mask of the geometry + B = boolmask(geom; to=commondims(A, DEFAULT_POINT_ORDER), kw...) + # Add a row for each pixel that is `true` in the mask + rows = (_maybe_add_fields(T, A, _prop_nt(A, I, names), I) for I in CartesianIndices(B) if B[I]) + # Maybe skip missing rows + return skipmissing ? collect(_skip_missing_rows(rows)) : collect(rows) +end +_extract(T, x::RasterStackOrArray, trait::GI.PointTrait, point; kw...) = + _extract_point(T, x, point; kw...) - # Extract the values - if any(map(ismissing, coords)) - # TODO test this branch somehow - geometry = map(_ -> missing, coords) +@inline _skip_missing_rows(rows) = Iterators.filter(row -> !any(ismissing, row), rows) + +@inline _prop_nt(st::AbstractRasterStack, I, names::NamedTuple{K}) where K = t[I][K] +@inline _prop_nt(A::AbstractRaster, I, names::NamedTuple{K}) where K = NamedTuple{K}((A[I],)) + +# Extract a single point +function _extract_point(T, x::RasterStackOrArray, point; + dims, names::NamedTuple{K}, atol=nothing, kw... +) where K + # The point itself might be missing, so return missing for every field + if ismissing(point) layer_vals = map(_ -> missing, names) + geom = missing + I = missing else - selectors = map(dims, coords) do d, c - _at_or_contains(d, c, atol) + # Get the actual dimensions available in the object + coords = map(DD.commondims(x, dims)) do d + _dimcoord(d, point) end - layer_vals = if DD.hasselection(x, selectors) - x isa Raster ? (x[selectors...],) : x[selectors...] + # If any are coordinates missing, also return missing for everything + if any(map(ismissing, coords)) + layer_vals = map(_ -> missing, names) + geom = missing + I = missing else - map(_ -> missing, names) + selectors = map(dims, coords) do d, c + _at_or_contains(d, c, atol) + end + # Check the selector is in bounds / actually selectable + if DD.hasselection(x, selectors) + I = DD.dims2indices(x, selectors) + layer_vals = x isa Raster ? NamedTuple{K}((x[I...],)) : x[I...][K] + else + # Otherwise return `missing` for everything + I = missing + layer_vals = map(_ -> missing, names) + end + # Return the point for the geometry, either way + geom = point end - geometry = point end - properties = NamedTuple{keys(names)}(layer_vals) - return (; geometry, properties...) + + return _maybe_add_fields(T, x, layer_vals, geom, I) +end +function _extract_point(T, A::RasterStackOrArray, point::Missing; names, kw...) + # Missing points return a single row + return _maybe_add_fields(T, A, map(_ -> missing, names), missing, missing) +end + +# Maybe add optional fields +@inline function _maybe_add_fields(T, A, layer_vals::NamedTuple, I) + _maybe_add_fields(T, A, layer_vals, DimPoints(A)[I], I) +end +@inline function _maybe_add_fields(::Type{T}, A, layer_vals::NamedTuple, point, I)::T where {T<:NamedTuple{K}} where K + if :geometry in K + :index in K ? merge((; geometry=point, index=I), layer_vals) : merge((; geometry=point), layer_vals) + else + :index in K ? merge((; index=I), layer_vals) : layer_vals + end end -_names(A::AbstractRaster) = NamedTuple{(Symbol(name(A)),)}((Symbol(name(A)),)) -_names(A::AbstractRasterStack) = NamedTuple{keys(A)}(keys(A)) +_names(A::AbstractRaster) = (Symbol(name(A)),) +_names(A::AbstractRasterStack) = keys(A) diff --git a/test/methods.jl b/test/methods.jl index aef73bde2..59779f372 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -256,35 +256,157 @@ end # Polygon(::Val{:ArchGDAL}, values) = ArchGDAL.createpolygon(values) createpoint(args...) = ArchGDAL.createpoint(args...) -createfeature(x::Tuple{<:Any,<:Any}) = NamedTuple{(:geometry,:test)}(x) -createfeature(x::Tuple{<:Any,<:Any,<:Any}) = NamedTuple{(:geometry,:test,:test2)}(x) -createfeature(::Missing) = missing @testset "extract" begin dimz = (X(9.0:1.0:10.0), Y(0.1:0.1:0.2)) - ga = Raster([1 2; 3 4], dimz; name=:test, missingval=missing) - ga2 = Raster([5 6; 7 8], dimz; name=:test2, missingval=missing) - st = RasterStack(ga, ga2) + rast = Raster([1 2; 3 4], dimz; name=:test, missingval=missing) + rast2 = Raster([5 6; 7 8], dimz; name=:test2, missingval=missing) + rast_m = Raster([1 2; 3 missing], dimz; name=:test, missingval=missing) + table = (geometry=[missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)], foo=zeros(4)) + st = RasterStack(rast, rast2) @testset "from Raster" begin # Tuple points - @test all(extract(ga, [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]) .=== - createfeature.([missing, ((9.0, 0.1), 1), ((9.0, 0.2), 2), ((10.0, 0.3), missing)])) + ex = extract(rast, [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]) + @test eltype(ex) == NamedTuple{(:geometry,:test)} + @test all(ex .=== [ + (geometry = missing, test = missing) + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + (geometry = (10.0, 0.3), test = missing) + ]) + ex = extract(rast_m, [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]; skipmissing=true) + @test eltype(ex) == NamedTuple{(:geometry,:test),Tuple{Tuple{Float64,Float64},Int}} + @test all(ex .=== [(geometry = (9.0, 0.1), test = 1), (geometry = (9.0, 0.2), test = 2)]) + ex = extract(rast_m, [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false) + @test eltype(ex) == NamedTuple{(:test,),Tuple{Int}} + @test all(ex .=== [(test = 1,), (test = 2,)]) + @test all(extract(rast_m, [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false, index=true) .=== [ + (index = (1, 1), test = 1,) + (index = (1, 2), test = 2,) + ]) # NamedTuple (reversed) points - @test all(extract(ga, [missing, (Y=0.1, X=9.0), (Y=0.2, X=10.0), (Y=0.3, X=10.0)]) |> collect .=== - createfeature.([missing, ((Y=0.1, X=9.0), 1), ((Y=0.2, X=10.0), 4), ((Y=0.3, X=10.0), missing)])) + @test all(extract(rast, [missing, (Y=0.1, X=9.0), (Y=0.2, X=10.0), (Y=0.3, X=10.0)]) .=== [ + (geometry = missing, test = missing) + (geometry = (Y = 0.1, X = 9.0), test = 1) + (geometry = (Y = 0.2, X = 10.0), test = 4) + (geometry = (Y = 0.3, X = 10.0), test = missing) + ]) # Vector points - @test all(extract(ga, [[9.0, 0.1], [10.0, 0.2]]) .== createfeature.([([9.0, 0.1], 1), ([10.0, 0.2], 4)])) - # ArchGDAL equality is broken - # @test all(extract(ga, ArchGDAL.createmultipoint([[0.1, 9.0], [0.2, 10.0], [0.3, 10.0]])) .== - # createfeature.([(createpoint(0.1, 9.0), 1), (createpoint(0.2, 10.0), 4), (createpoint(0.3, 10.0), missing)])) + @test all(extract(rast, [[9.0, 0.1], [10.0, 0.2]]) .== [ + (geometry = [9.0, 0.1], test = 1) + (geometry = [10.0, 0.2], test = 4) + ]) # Extract a polygon p = ArchGDAL.createpolygon([[[8.0, 0.0], [11.0, 0.0], [11.0, 0.4], [8.0, 0.0]]]) - @test all(extract(ga, p) .=== - createfeature.([((X=9.0, Y=0.1), 1), ((X=10.0, Y=0.1), 3), ((X=10.0, Y=0.2), 4)])) + @test all(extract(rast_m, p) .=== [ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) + # Extract a vector of polygons + ex = extract(rast_m, [p, p]) + @test eltype(ex) == NamedTuple{(:geometry,:test)} + @test all(ex .=== [ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) + # Test all the keyword combinations + @test all(extract(rast_m, p) .=== [ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) + @test all(extract(rast_m, p; geometry=false) .=== [ + (test = 1,) + (test = 3,) + (test = missing,) + ]) + @test all(extract(rast_m, p; geometry=false, index=true) .=== [ + (index = CartesianIndex(1, 1), test = 1) + (index = CartesianIndex(2, 1), test = 3) + (index = CartesianIndex(2, 2), test = missing) + ]) + @test all(extract(rast_m, p; index=true) .=== [ + (geometry = (9.0, 0.1), index = CartesianIndex(1, 1), test = 1) + (geometry = (10.0, 0.1), index = CartesianIndex(2, 1), test = 3) + (geometry = (10.0, 0.2), index = CartesianIndex(2, 2), test = missing) + ]) + @test extract(rast_m, p; skipmissing=true) == [ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + ] + @test extract(rast_m, p; skipmissing=true, geometry=false) == [ + (test = 1,) + (test = 3,) + ] + @test extract(rast_m, p; skipmissing=true, geometry=false, index=true) == [ + (index = CartesianIndex(1, 1), test = 1) + (index = CartesianIndex(2, 1), test = 3) + ] + @test extract(rast_m, p; skipmissing=true, index=true) == [ + (geometry = (9.0, 0.1), index = CartesianIndex(1, 1), test = 1) + (geometry = (10.0, 0.1), index = CartesianIndex(2, 1), test = 3) + ] + # Empty geoms + @test extract(rast, []) == NamedTuple{(:geometry, :test),Tuple{Missing,Missing}}[] + @test extract(rast, []; geometry=false) == NamedTuple{(:test,),Tuple{Missing}}[] + # Missing coord errors + @test_throws ArgumentError extract(rast, [(0.0, missing), (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]) + @test_throws ArgumentError extract(rast, [(9.0, 0.1), (0.0, missing), (9.0, 0.2), (10.0, 0.3)]) + @test_throws ArgumentError extract(rast, [(X=0.0, Y=missing), (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]) end + + @testset "with table" begin + @test all(extract(rast, table) .=== [ + (geometry = missing, test = missing) + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + (geometry = (10.0, 0.3), test = missing) + ]) + @test extract(rast, table; skipmissing=true) == [ + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + ] + @test extract(rast, table; skipmissing=true, geometry=false) == [ + (test = 1,) + (test = 2,) + ] + @test extract(rast, table; skipmissing=true, geometry=false, index=true) == [ + (index = (1, 1), test = 1,) + (index = (1, 2), test = 2,) + ] + end + @testset "from stack" begin - @test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]) |> collect .=== - createfeature.([missing, ((9.0, 0.1), 1, 5), ((10.0, 0.2), 4, 8), ((10.0, 0.3), missing, missing)])) + @test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]) .=== [ + (geometry = missing, test = missing, test2 = missing) + (geometry = (9.0, 0.1), test = 1, test2 = 5) + (geometry = (10.0, 0.2), test = 4, test2 = 8) + (geometry = (10.0, 0.3), test = missing, test2 = missing) + ]) + @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true) == [ + (geometry = (9.0, 0.1), test = 1, test2 = 5) + (geometry = (10.0, 0.2), test = 4, test2 = 8) + ] + @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false) == [ + (test = 1, test2 = 5) + (test = 4, test2 = 8) + ] + @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false, index=true) == [ + (index = (1, 1), test = 1, test2 = 5) + (index = (2, 2), test = 4, test2 = 8) + ] + # Subset with `names` + @test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; names=(:test2,)) .=== [ + (geometry = missing, test2 = missing) + (geometry = (9.0, 0.1), test2 = 5) + (geometry = (10.0, 0.2), test2 = 8) + (geometry = (10.0, 0.3), test2 = missing) + ]) end end