Skip to content

Commit

Permalink
Added filter_nan kwarg to filter out missing data when rasterizing (#11)
Browse files Browse the repository at this point in the history
* Added filter_nan kwarg to filter out missing data when rasterizing

* set default nodata to NaN for rasterization
  • Loading branch information
snowman2 authored Dec 12, 2019
1 parent 4cc1705 commit f388690
Show file tree
Hide file tree
Showing 11 changed files with 99 additions and 36 deletions.
6 changes: 4 additions & 2 deletions geocube/api/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"""
GeoCube client core functionality
"""
import numpy

from geocube.geo_utils.geobox import GeoBoxMaker
from geocube.vector_to_cube import VectorToCube

Expand All @@ -15,7 +17,7 @@ def make_geocube(
align=None,
geom=None,
like=None,
fill=None,
fill=numpy.nan,
group_by=None,
interpolate_na_method=None,
categorical_enums=None,
Expand Down Expand Up @@ -59,7 +61,7 @@ def make_geocube(
gcds = make_geocube(vector_data='my_vector.geopackage', like=other_gcds)
fill: float, optional
The value to fill in the grid with for nodata. Default is -9999.0.
The value to fill in the grid with for nodata. Default is NaN.
group_by: str, optional
When specified, perform basic combining/reducing of the data on this column.
interpolate_na_method: {‘linear’, ‘nearest’, ‘cubic’}, optional
Expand Down
4 changes: 3 additions & 1 deletion geocube/cli/commands/make_geocube.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""

import click
import numpy
import xarray

from geocube.api import core
Expand All @@ -27,7 +28,8 @@
"-f",
"--fill",
type=float,
help="The value to fill in the grid with for nodata. Default is -9999.0.",
help="The value to fill in the grid with for nodata. Default is NaN.",
default=numpy.nan,
required=False,
)
@click.option(
Expand Down
73 changes: 48 additions & 25 deletions geocube/rasterize.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,26 @@
from geocube.logger import get_logger


def _remove_missing_data(data_values, geometry_array):
"""
Missing data causes issues with interpolation of point data
https://github.com/corteva/geocube/issues/9
This filters the data so those issues don't cause problems.
"""
not_missing_data = ~pandas.isnull(data_values)
geometry_array = geometry_array[not_missing_data]
data_values = data_values[not_missing_data]
return data_values, geometry_array


def rasterize_image(
geometry_array,
data_values,
geobox,
fill=-9999.0,
fill,
merge_alg=MergeAlg.replace,
filter_nan=False,
**ignored_kwargs
):
"""
Expand All @@ -33,10 +47,13 @@ def rasterize_image(
Data values associated with the list of geojson shapes
geobox: :obj:`datacube.utils.geometry.GeoBox`
Transform of the resulting image.
fill: float, optional
The value to fill in the grid with for nodata. Default is -9999.0.
fill: float
The value to fill in the grid with for nodata.
merge_alg: `rasterio.enums.MergeAlg`, optional
The algorithm for merging values into one cell. Default is `MergeAlg.replace`.
filter_nan: bool, optional
If True, will remove nodata values from the data before rasterization.
Default is False.
**ignored_kwargs:
These are there to be flexible with additional rasterization methods and
will be ignored.
Expand All @@ -50,6 +67,10 @@ def rasterize_image(
logger = get_logger()

try:
if filter_nan:
data_values, geometry_array = _remove_missing_data(
data_values, geometry_array
)
image = rasterio.features.rasterize(
zip(geometry_array.apply(mapping).values, data_values),
out_shape=(geobox.height, geobox.width),
Expand All @@ -66,26 +87,14 @@ def rasterize_image(
raise


def _remove_missing_data(data_values, geometry_array):
"""
Missing data causes issues with interpolation of point data
https://github.com/corteva/geocube/issues/9
This filters the data so those issues don't cause problems.
"""
not_missing_data = ~pandas.isnull(data_values)
geometry_array = geometry_array[not_missing_data]
data_values = data_values[not_missing_data]
return data_values, geometry_array


def rasterize_points_griddata(
geometry_array,
data_values,
grid_coords,
fill=-9999.0,
fill,
method="nearest",
rescale=False,
filter_nan=False,
**ignored_kwargs
):
"""
Expand All @@ -100,12 +109,15 @@ def rasterize_points_griddata(
Data values associated with the list of geojson shapes
grid_coords: dict
Output from `rioxarray.rioxarray.affine_to_coords`
fill: float, optional
The value to fill in the grid with for nodata. Default is -9999.0.
fill: float
The value to fill in the grid with for nodata.
method: {‘linear’, ‘nearest’, ‘cubic’}, optional
The method to use for interpolation in `scipy.interpolate.griddata`.
rescale: bool, optional
Rescale points to unit cube before performing interpolation. Default is false.
filter_nan: bool, optional
If True, will remove nodata values from the data before rasterization.
Default is False.
**ignored_kwargs:
These are there to be flexible with additional rasterization methods and
will be ignored.
Expand All @@ -118,7 +130,10 @@ def rasterize_points_griddata(
if data_values.dtype == object:
return None
try:
data_values, geometry_array = _remove_missing_data(data_values, geometry_array)
if filter_nan:
data_values, geometry_array = _remove_missing_data(
data_values, geometry_array
)
return griddata(
points=(geometry_array.x, geometry_array.y),
values=data_values,
Expand All @@ -134,7 +149,12 @@ def rasterize_points_griddata(


def rasterize_points_radial(
geometry_array, data_values, grid_coords, method="linear", **ignored_kwargs
geometry_array,
data_values,
grid_coords,
method="linear",
filter_nan=False,
**ignored_kwargs
):
"""
This method uses scipy.interpolate.Rbf to interpolate point data
Expand All @@ -148,13 +168,13 @@ def rasterize_points_radial(
Data values associated with the list of geojson shapes
grid_coords: dict
Output from `rioxarray.rioxarray.affine_to_coords`
fill: float, optional
The value to fill in the grid with for nodata. Default is -9999.0.
method: str, optional
The function to use for interpolation in `scipy.interpolate.Rbf`.
{'multiquadric', 'inverse', 'gaussian', 'linear',
'cubic', 'quintic', 'thin_plate'}
filter_nan: bool, optional
If True, will remove nodata values from the data before rasterization.
Default is False.
**ignored_kwargs:
These are there to be flexible with additional rasterization methods and
will be ignored.
Expand All @@ -167,7 +187,10 @@ def rasterize_points_radial(
logger = get_logger()

try:
data_values, geometry_array = _remove_missing_data(data_values, geometry_array)
if filter_nan:
data_values, geometry_array = _remove_missing_data(
data_values, geometry_array
)
interp = Rbf(geometry_array.x, geometry_array.y, data_values, function=method)
return interp(*numpy.meshgrid(grid_coords["x"], grid_coords["y"]))
except ValueError as ter:
Expand Down
4 changes: 2 additions & 2 deletions geocube/vector_to_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def __init__(self, vector_data, geobox_maker, fill=None, categorical_enums=None)
geobox_maker: :obj:`geocube.geo_utils.geobox.GeoBoxMaker`
The geobox for the grid to be generated from the vector data.
fill: float, optional
The value to fill in the grid with for nodata. Default is -9999.0.
The value to fill in the grid with for nodata. Default is NaN.
categorical_enums: dict, optional
A dictionary of all categories for the table columns containing
categorical data.
Expand All @@ -74,7 +74,7 @@ def __init__(self, vector_data, geobox_maker, fill=None, categorical_enums=None)
self.grid_coords = affine_to_coords(
self.geobox.affine, self.geobox.width, self.geobox.height
)
self.fill = fill if fill is not None else -9999.0
self.fill = fill if fill is not None else numpy.nan
if categorical_enums is not None:
for column_name, categories in categorical_enums.items():
category_type = pandas.api.types.CategoricalDtype(
Expand Down
3 changes: 2 additions & 1 deletion sphinx/history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ History

0.0.10
------
- Filter out missing data when interpolating from point data (issue #9)
- Added filter_nan kwarg to filter out missing data when rasterizing (issue #9)
- Change default fill value to NaN when rasterizing (pull #11)

0.0.9
-----
Expand Down
Loading

0 comments on commit f388690

Please sign in to comment.