Skip to content

Commit

Permalink
Merge pull request #64 from rom-py/raf-bnd
Browse files Browse the repository at this point in the history
Raf bnd
  • Loading branch information
tomdurrant authored Jan 15, 2024
2 parents 04555bd + 13268fb commit 709546d
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 176 deletions.
194 changes: 77 additions & 117 deletions rompy/core/boundary.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,8 @@
import xarray as xr
from pydantic import Field, model_validator

from rompy.core.data import (
DataGrid,
SourceBase,
SourceDatamesh,
SourceDataset,
SourceFile,
SourceIntake,
)
from rompy.core.data import (DataGrid, SourceBase, SourceDatamesh,
SourceDataset, SourceFile, SourceIntake)
from rompy.core.grid import RegularGrid
from rompy.core.time import TimeRange

Expand Down Expand Up @@ -124,131 +118,61 @@ class DataBoundary(DataGrid):
description="Model type discriminator",
)
id: str = Field(description="Unique identifier for this data source")
spacing: Optional[float] = Field(
spacing: Optional[Union[float, Literal["parent"]]] = Field(
default=None,
description=(
"Spacing between boundary points, by default defined as the minimum "
"distance between points in the dataset"
"Spacing between points along the grid boundary to retrieve data for. If "
"None (default), points are defined from the the actual grid object "
"passed to the `get` method. If 'parent', the resolution of the parent "
"dataset is used to define the spacing."
),
gt=0.0,
)
sel_method: Literal["nearest", "interp"] = Field(
default="nearest",
sel_method: Literal["sel", "interp"] = Field(
default="sel",
description=(
"Wavespectra method to use for selecting boundary points from the dataset. Only sel_method or interpolate_method can be set, not both."
"Xarray method to use for selecting boundary points from the dataset"
),
)
sel_method_kwargs: dict = Field(
default={}, description="Keyword arguments for sel_method passed to wavespectra"
)
interpolate_method: Literal[
"nearest", "zero", "slinear", "quadratic", "cubic", "polynomial"
] = Field(
default=None,
description=(
"Wavespectra method to use for selecting boundary points from the dataset. Only sel_method or interpolate_method can be set, not both."
),
)
interp_method_kwargs: dict = Field(
default={}, description="Keyword arguments for sel_method passed to wavespectra"
default={}, description="Keyword arguments for sel_method"
)
crop_data: bool = Field(
default=True,
description="Update crop filter from Time object if passed to get method",
)

# validator that ensures that only one of sel_method and interpolate_method is set
@model_validator(mode="after")
def assert_sel_method(cls, v):
if v.sel_method is not None and v.interpolate_method is not None:
raise ValueError("Only one of sel_method and interpolate_method can be set")
if v.sel_method is None and v.interpolate_method is None:
raise ValueError("Either sel_method or interpolate_method must be set")
return v

def _filter_grid(self, *args, **kwargs):
"""Overwrite DataGrid's which assumes a regular grid."""
pass

def _boundary_resolutions(self, grid):
"""Boundary resolution based on the shortest distance between points.
def _source_grid_spacing(self) -> float:
"""Return the lowest grid spacing in the source dataset.
The boundary resolution should be based on the dataset resolution instead of
the grid resolution to avoid creating points unecessarily. Here we find the
minimum distance between points in the dataset and use that to define the
boundary resolution ensuring the grid sizes are divisible by the resolution.
In a gridded dataset this is defined as the lowest spacing between adjacent
points in the dataset. In other dataset types such as a station dataset this
method needs to be overriden to return the lowest spacing between points.
"""
# Find out the minimum distance between points in the original dataset
buffer = 2 * min(grid.dx, grid.dy)
x0, y0, x1, y1 = grid.bbox(buffer=buffer)
# Select dataset points just outside the actual grid to optimise the search
ds = self.ds.spec.sel([x0, x1], [y0, y1], method="bbox")
points = list(zip(ds.lon.values, ds.lat.values))
min_distance = find_minimum_distance(points)
# Calculate resolutions ensuring at least 3 points per side
xlen = grid.maxx - grid.minx
nx = max(xlen // min_distance, 3)
dx = xlen / nx
ylen = grid.maxy - grid.miny
ny = max(ylen // min_distance, 3)
dy = ylen / ny
return dx, dy

def _boundary_points(self, grid):
"""Coordinates of boundary points based on grid bbox and dataset resolution."""
if issubclass(grid.__class__, RegularGrid):
if self.spacing is None:
dx, dy = self._boundary_resolutions(grid)
spacing = min(dx, dy)
else:
spacing = self.spacing
points = grid.points_along_boundary(spacing=spacing)
if len(points.geoms) < 4:
logger.warning(
f"There are only {len(points)} boundary points (less than 1 point per grid side), "
f"consider setting a smaller spacing (the current spacing is {spacing})"
)
xbnd = np.array([p.x for p in points.geoms])
ybnd = np.array([p.y for p in points.geoms])
elif grid.__class__.__name__ == "SCHISMGrid":
xbnd, ybnd = grid.ocean_boundary()
return xbnd, ybnd

def _sel_boundary(self, grid):
"""Select the boundary points from the dataset."""
xbnd, ybnd = self._boundary_points(grid)
xbnd = xr.DataArray(xbnd, dims=("site",))
ybnd = xr.DataArray(ybnd, dims=("site",))
if self.sel_method != None:
ds = self.ds.sel(
{self.coords.x: xbnd, self.coords.y: ybnd},
method=self.sel_method,
**self.sel_method_kwargs,
)
dx = np.diff(sorted(self.ds[self.coords.x].values)).min()
dy = np.diff(sorted(self.ds[self.coords.y].values)).min()
return min(dx, dy)

def _set_spacing(self) -> float:
"""Define spacing from the parent dataset if required."""
if self.spacing == "parent":
return self._source_grid_spacing()
else:
ds = self.ds.interp(
{self.coords.x: xbnd, self.coords.y: ybnd},
method=self.interpolate_method,
**self.interp_method_kwargs,
)
return ds

def plot(self, model_grid=None, cmap="turbo", fscale=10, ax=None, **kwargs):
return scatter_plot(
self, model_grid=model_grid, cmap=cmap, fscale=fscale, ax=ax, **kwargs
)
return self.spacing

def plot_boundary(self, grid=None, fscale=10, ax=None, **kwargs):
"""Plot the boundary points on a map."""
ds = self._sel_boundary(grid)
fig, ax = grid.plot(ax=ax, fscale=fscale, **kwargs)
return scatter_plot(
self,
ds=ds,
fscale=fscale,
ax=ax,
**kwargs,
)
def _sel_boundary(self, grid) -> xr.Dataset:
"""Select the boundary points from the dataset."""
xbnd, ybnd = grid.boundary_points(spacing=self._set_spacing())
coords = {
self.coords.x: xr.DataArray(xbnd, dims=("site",)),
self.coords.y: xr.DataArray(ybnd, dims=("site",)),
}
return getattr(self.ds, self.sel_method)(coords, **self.sel_method_kwargs)

def get(
self, destdir: str | Path, grid: RegularGrid, time: Optional[TimeRange] = None
Expand Down Expand Up @@ -277,23 +201,40 @@ def get(
ds.to_netcdf(outfile)
return outfile

def plot(self, model_grid=None, cmap="turbo", fscale=10, ax=None, **kwargs):
return scatter_plot(
self, model_grid=model_grid, cmap=cmap, fscale=fscale, ax=ax, **kwargs
)

def plot_boundary(self, grid=None, fscale=10, ax=None, **kwargs):
"""Plot the boundary points on a map."""
ds = self._sel_boundary(grid)
fig, ax = grid.plot(ax=ax, fscale=fscale, **kwargs)
return scatter_plot(
self,
ds=ds,
fscale=fscale,
ax=ax,
**kwargs,
)


class BoundaryWaveStation(DataBoundary):
"""Wave boundary data from station datasets.
Notes
-----
Note
----
The `tolerance` behaves differently with sel_methods `idw` and `nearest`; in `idw`
sites with no enough neighbours within `tolerance` are masked whereas in `nearest`
an exception is raised (see wavespectra documentation for more details).
Note
----
Be aware that when using `idw` missing values will be returned for sites with less
than 2 neighbours within `tolerance` in the original dataset. This is okay for land
mask areas but could cause boundary issues when on an open boundary location. To
avoid this either use `nearest` or increase `tolerance` to include more neighbours.
TODO: Allow specifying resolutions along x and y instead of a single value.
"""

grid_type: Literal["boundary_wave_station"] = Field(
Expand All @@ -307,7 +248,6 @@ class BoundaryWaveStation(DataBoundary):
),
discriminator="model_type",
)

sel_method: Literal["idw", "nearest"] = Field(
default="idw",
description=(
Expand All @@ -322,9 +262,29 @@ def assert_has_wavespectra_accessor(self) -> "BoundaryWaveStation":
raise ValueError(f"Wavespectra compatible source is required")
return self

def _sel_boundary(self, grid):
def _source_grid_spacing(self, grid) -> float:
"""Return the lowest spacing between points in the source dataset."""
# Select dataset points just outside the actual grid to optimise the search
xbnd, ybnd = grid.boundary().exterior.coords.xy
dx = np.diff(xbnd).min()
dy = np.diff(ybnd).min()
buffer = 2 * min(dx, dy)
x0, y0, x1, y1 = grid.bbox(buffer=buffer)
ds = self.ds.spec.sel([x0, x1], [y0, y1], method="bbox")
# Return the closest distance between adjacent points in cropped dataset
points = list(zip(ds.lon.values, ds.lat.values))
return find_minimum_distance(points)

def _set_spacing(self, grid) -> float:
"""Define spacing from the parent dataset if required."""
if self.spacing == "parent":
return self._source_grid_spacing(grid)
else:
return self.spacing

def _sel_boundary(self, grid) -> xr.Dataset:
"""Select the boundary points from the dataset."""
xbnd, ybnd = self._boundary_points(grid)
xbnd, ybnd = grid.boundary_points(spacing=self._set_spacing(grid))
ds = self.ds.spec.sel(
lons=xbnd,
lats=ybnd,
Expand Down
73 changes: 33 additions & 40 deletions rompy/core/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,66 +63,59 @@ def bbox(self, buffer=0.0) -> Bbox:
bbox = [ll_x, ll_y, ur_x, ur_y]
return bbox

def _get_boundary(self, tolerance=0.2) -> Polygon:
def _get_convex_hull(self, tolerance=0.2) -> Polygon:
xys = list(zip(self.x.flatten(), self.y.flatten()))
polygon = MultiPoint(xys).convex_hull
polygon = polygon.simplify(tolerance=tolerance)

return polygon

def boundary(self, tolerance=0.2):
"""
Returns a boundary polygon as a Shapely Polygon object. Sub-classes can override the private method Grid._get_boundary() but must return a Shapely polygon object.
def boundary(self, tolerance=0.2) -> Polygon:
"""Returns the convex hull boundary polygon from the grid.
Parameters
----------
tolerance : float
See https://shapely.readthedocs.io/en/stable/manual.html#object.simplify
tolerance: float
Simplify polygon shape based on maximum distance from original geometry,
see https://shapely.readthedocs.io/en/stable/manual.html#object.simplify.
Returns
-------
polygon : shapely.Polygon see https://shapely.readthedocs.io/en/stable/manual.html#Polygon
polygon: shapely.Polygon
See https://shapely.readthedocs.io/en/stable/manual.html#Polygon
"""
return self._get_boundary(tolerance=tolerance)
return self._get_convex_hull(tolerance=tolerance)

def boundary_points(self, tolerance=0.2):
"""
Convenience function to convert boundary Shapely Polygon
to arrays of coordinates
def boundary_points(self, spacing=None, tolerance=0.2) -> tuple:
"""Returns array of coordinates from boundary polygon.
Parameters
----------
tolerance : float
Passed to Grid.boundary
See https://shapely.readthedocs.io/en/stable/manual.html#object.simplify
tolerance: float
Simplify polygon shape based on maximum distance from original geometry,
see https://shapely.readthedocs.io/en/stable/manual.html#object.simplify.
spacing: float
If specified, points are returned evenly spaced along the boundary at the
specified spacing, otherwise all points are returned.
Returns:
--------
points: tuple
Tuple of x and y coordinates of the boundary points.
"""
polygon = self.boundary(tolerance=tolerance)
hull_x, hull_y = polygon.exterior.coords.xy
return hull_x, hull_y

def points_along_boundary(self, spacing):
"""Points evenly spaced along the grid boundary.
Parameters
----------
spacing : float
The spacing between points along the boundary
Returns
-------
points : MultiPoint
A Shapely MultiPoint object containing the points along the boundary.
"""
polygon = self.boundary(tolerance=0)
perimeter = polygon.length
if perimeter < spacing:
raise ValueError(f"Spacing = {spacing} > grid perimeter = {perimeter}")
num_points = int(np.ceil(perimeter / spacing))
points = [polygon.boundary.interpolate(i * spacing) for i in range(num_points)]
return MultiPoint(points)
if spacing is None:
xpts, ypts = polygon.exterior.coords.xy
else:
perimeter = polygon.length
if perimeter < spacing:
raise ValueError(f"Spacing = {spacing} > grid perimeter = {perimeter}")
npts = int(np.ceil(perimeter / spacing))
points = [polygon.boundary.interpolate(i * spacing) for i in range(npts)]
xpts = [point.x for point in points]
ypts = [point.y for point in points]
return np.array(xpts), np.array(ypts)

def _figsize(self, x0, x1, y0, y1, fscale):
xlen = abs(x1 - x0)
Expand Down
Loading

0 comments on commit 709546d

Please sign in to comment.