Skip to content

Commit

Permalink
Merge pull request #303 from Deltares/from-structured-dataset
Browse files Browse the repository at this point in the history
From structured dataset
  • Loading branch information
Huite authored Oct 21, 2024
2 parents 93bd34f + e111a64 commit d12fd75
Show file tree
Hide file tree
Showing 6 changed files with 332 additions and 119 deletions.
2 changes: 1 addition & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ UgridDataset
UgridDataset
UgridDataset.ugrid
UgridDataset.from_geodataframe
UgridDataset.from_structured

UGRID Accessor
--------------
Expand Down Expand Up @@ -348,7 +349,6 @@ UGRID2D Topology
Ugrid2d.to_dataset
Ugrid2d.from_geodataframe
Ugrid2d.from_structured
Ugrid2d.from_structured_multicoord
Ugrid2d.from_structured_bounds
Ugrid2d.from_structured_intervals1d
Ugrid2d.from_structured_intervals2d
Expand Down
24 changes: 22 additions & 2 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,26 @@ All notable changes to this project will be documented in this file.
The format is based on `Keep a Changelog`_, and this project adheres to
`Semantic Versioning`_.

Unreleased
----------

Changed
~~~~~~~

- :meth:`xugrid.UgridDataArrayAccessor.from_structured` previously required the
literal dimensions ``("y", "x")``. This requirement has been relaxed, it will
now infer the dimensions from the provided coordinates.
- :meth:`xugrid.Ugrid2d.from_structured` previously only supported 1D
coordinates; it now detects whether coordinates are 1D or 2D automatically.
Accordingly, :meth:`xugrid.Ugrid2d.from_structured_multicoord` should no
longer be used, and calling it will give a FutureWarning.

Added
~~~~~

- :meth:`xugrid.UgridDataset.from_structured` has been added to create
UgriDatasets from xarray Datasets.

[0.12.1] 2024-09-09
-------------------

Expand Down Expand Up @@ -78,7 +98,7 @@ Changed
- Selection operations such as :meth:`UgridDataArrayAccessor.sel_points` will
now also return points that are located on the edges of 2D topologies.
- :attr:`xugrid.Ugrid1d.dimensions` and :attr:`xugrid.Ugrid2d.dimensions` now
raise a FutureWarning; use ``.dims`` or ``.sizes`` instead.
give a FutureWarning; use ``.dims`` or ``.sizes`` instead.
- Improved performance of :func:`xugrid.open_dataset` and
:func:`xugrid.merge_partitions` when handling datasets with a large number
of variables (>100).
Expand Down Expand Up @@ -827,4 +847,4 @@ Added
------------------

.. _Keep a Changelog: https://keepachangelog.com/en/1.0.0/
.. _Semantic Versioning: https://semver.org/spec/v2.0.0.html
.. _Semantic Versioning: https://semver.org/spec/v2.0.0.html
4 changes: 2 additions & 2 deletions tests/test_ugrid2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,7 +1217,7 @@ def test_from_structured():
assert grid.n_face == 4


def test_form_structured_multicoord():
def test_from_structured_multicoord():
da = xr.DataArray(
data=np.ones((2, 2)),
coords={
Expand All @@ -1226,7 +1226,7 @@ def test_form_structured_multicoord():
},
dims=("y", "x"),
)
grid = xugrid.Ugrid2d.from_structured_multicoord(da, x="xc", y="yc")
grid = xugrid.Ugrid2d._from_structured_multicoord(da, x="xc", y="yc")
assert isinstance(grid, xugrid.Ugrid2d)
assert grid.n_face == 4

Expand Down
148 changes: 107 additions & 41 deletions tests/test_ugrid_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,47 +162,6 @@ def test_rename(self):
renamed = self.uda.ugrid.rename("renamed")
assert "renamed_nFaces" in renamed.dims

def test_from_structured(self):
da = xr.DataArray([0.0, 1.0, 2.0], {"x": [5.0, 10.0, 15.0]}, ["x"])
with pytest.raises(ValueError, match="Last two dimensions of da"):
xugrid.UgridDataArray.from_structured(da)

da = xr.DataArray(
data=np.arange(2 * 3 * 4).reshape((2, 3, 4)),
coords={"layer": [1, 2], "y": [5.0, 10.0, 15.0], "x": [2.0, 4.0, 6.0, 8.0]},
dims=["layer", "y", "x"],
name="grid",
)
uda = xugrid.UgridDataArray.from_structured(da)
assert isinstance(uda, xugrid.UgridDataArray)
assert uda.name == "grid"
assert uda.dims == ("layer", "mesh2d_nFaces")
assert uda.shape == (2, 12)
assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]])
# Check whether flipping the y-axis doesn't cause any problems
flipped = da.isel(y=slice(None, None, -1))
uda = xugrid.UgridDataArray.from_structured(flipped)
assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]])

def test_from_structured_multicoord(self):
da = xr.DataArray(
data=[[0, 1], [2, 3]],
coords={
"yc": (("y", "x"), [[12.0, 11.0], [12.0, 11.0]]),
"xc": (("y", "x"), [[10.0, 12.0], [10.0, 12.0]]),
},
dims=("y", "x"),
)
uda = xugrid.UgridDataArray.from_structured(da)
assert isinstance(uda, xugrid.UgridDataArray)
assert np.array_equal(np.unique(uda.ugrid.grid.node_x), [-0.5, 0.5, 1.5])
assert np.array_equal(uda.data, [0, 1, 2, 3])

uda = xugrid.UgridDataArray.from_structured(da, x="xc", y="yc")
assert isinstance(uda, xugrid.UgridDataArray)
assert np.array_equal(np.unique(uda.ugrid.grid.node_x), [9.0, 11.0, 13.0])
assert np.array_equal(uda.data, [0, 1, 2, 3])

def test_unary_op(self):
alltrue = self.uda.astype(bool)
allfalse = alltrue.copy()
Expand Down Expand Up @@ -806,6 +765,113 @@ def test_reindex_like(self):
assert isinstance(back, xugrid.UgridDataset)


class TestFromStructured:
@pytest.fixture(autouse=True)
def setup(self):
self.da1d = xr.DataArray(
[0.0, 1.0, 2.0, 3.0], {"x": [2.0, 4.0, 6.0, 8.0]}, ["x"]
)
self.da2d = xr.DataArray(
data=np.arange(2 * 3 * 4).reshape((2, 3, 4)),
coords={"layer": [1, 2], "y": [5.0, 10.0, 15.0], "x": [2.0, 4.0, 6.0, 8.0]},
dims=["layer", "y", "x"],
name="grid",
)
self.da_coords2d = xr.DataArray(
data=[[0, 1], [2, 3]],
coords={
"yc": (("y", "x"), [[12.0, 11.0], [12.0, 11.0]]),
"xc": (("y", "x"), [[10.0, 12.0], [10.0, 12.0]]),
},
dims=("y", "x"),
)
self.ds = xr.Dataset(
{
"a": self.da2d,
"b": self.da1d,
"c": 1.0,
}
)

def test_error_1d(self):
with pytest.raises(
ValueError, match="DataArray must have at least two spatial dimensions"
):
xugrid.UgridDataArray.from_structured(self.da1d)

def test_error_x_xor_y(self):
with pytest.raises(ValueError, match="Provide both x and y, or neither."):
xugrid.UgridDataArray.from_structured(self.da2d, x="this")

def test_missing_xy(self):
with pytest.raises(ValueError, match="Coordinates xc and yc are not present."):
xugrid.UgridDataArray.from_structured(self.da2d, x="xc", y="yc")

def test_from_dataarray(self):
uda = xugrid.UgridDataArray.from_structured(self.da2d)
assert isinstance(uda, xugrid.UgridDataArray)
assert uda.name == "grid"
assert uda.dims == ("layer", "mesh2d_nFaces")
assert uda.shape == (2, 12)
assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]])
# Check whether flipping the y-axis doesn't cause any problems
flipped = self.da2d.isel(y=slice(None, None, -1))
uda = xugrid.UgridDataArray.from_structured(flipped)
assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]])

# And test transposed.
daT = self.da2d.transpose()
uda = xugrid.UgridDataArray.from_structured(daT)
assert isinstance(uda, xugrid.UgridDataArray)
assert uda.name == "grid"
assert uda.dims == ("layer", "mesh2d_nFaces")
assert uda.shape == (2, 12)
assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]])

def test_from_multicoord(self):
uda = xugrid.UgridDataArray.from_structured(self.da_coords2d)
assert isinstance(uda, xugrid.UgridDataArray)
assert np.array_equal(np.unique(uda.ugrid.grid.node_x), [-0.5, 0.5, 1.5])
assert np.array_equal(uda.data, [0, 1, 2, 3])

uda = xugrid.UgridDataArray.from_structured(self.da_coords2d, x="xc", y="yc")
assert isinstance(uda, xugrid.UgridDataArray)
assert np.array_equal(np.unique(uda.ugrid.grid.node_x), [9.0, 11.0, 13.0])
assert np.array_equal(uda.data, [0, 1, 2, 3])

def test_from_dataset(self):
uds = xugrid.UgridDataset.from_structured(self.ds)
assert isinstance(uds, xugrid.UgridDataset)
assert set(uds.data_vars) == {"a", "b", "c"}
assert uds["a"].dims == ("layer", "mesh2d_nFaces")
assert uds["b"].dims == ("x",)
assert uds["c"].dims == ()
uda = uds["a"]
assert uda.shape == (2, 12)
assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]])

def test_from_multicoord_dataset(self):
ds = self.ds.copy()
da = self.da_coords2d.rename({"x": "x1", "y": "y1"})
ds["d"] = da
# Unspecified: it'll only infer x and y.
uds = xugrid.UgridDataset.from_structured(ds)
assert isinstance(uds, xugrid.UgridDataset)
assert uds["a"].dims == ("layer", "mesh2d_nFaces")
assert uds["d"].dims == ("y1", "x1")
assert len(uds.ugrid.grids) == 1
# Now specify separate topologies.
uds = xugrid.UgridDataset.from_structured(
ds, {"mesh2d_0": ("x", "y"), "mesh2d_1": ("xc", "yc")}
)
assert isinstance(uds, xugrid.UgridDataset)
assert uds["a"].dims == ("layer", "mesh2d_0_nFaces")
assert uds["b"].dims == ("x",)
assert uds["c"].dims == ()
assert uds["d"].dims == ("mesh2d_1_nFaces",)
assert len(uds.ugrid.grids) == 2


def test_multiple_coordinates():
grid = GRID()
ds = UGRID_DS()
Expand Down
134 changes: 106 additions & 28 deletions xugrid/core/wrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,14 +243,21 @@ def from_structured(
The spatial dimensions are flattened into a single UGRID face dimension.
By default, this method looks for the "x" and "y" coordinates and assumes
they are one-dimensional. To convert rotated or curvilinear coordinates,
provide the names of the x and y coordinates.
By default, this method looks for:
1. ``"x"`` and ``"y"`` dimensions.
2. ``"longitude"`` and ``"latitude"`` dimensions.
3. ``"axis"`` attributes of "X" or "Y" on coordinates.
4. ``"standard_name"`` attributes of "longitude", "latitude",
"projection_x_coordinate", or "project_y_coordinate" on coordinate
variables.
Specify the x and y coordinate names explicitly otherwise.
Parameters
----------
da: xr.DataArray
Last two dimensions must be ``("y", "x")``.
Last two dimensions must be the y and x dimension (in that order!).
x: str, default: None
Which coordinate to use as the UGRID x-coordinate.
y: str, default: None
Expand All @@ -260,30 +267,15 @@ def from_structured(
-------
unstructured: UgridDataArray
"""
if da.dims[-2:] != ("y", "x"):
raise ValueError('Last two dimensions of da must be ("y", "x")')
if (x is None) ^ (y is None):
raise ValueError("Provide both x and y, or neither.")
if x is None:
grid = Ugrid2d.from_structured(da)
else:
# Find out if it's multi-dimensional
xdim = da[x].ndim
if xdim == 1:
grid = Ugrid2d.from_structured(da, x=x, y=y)
elif xdim == 2:
grid = Ugrid2d.from_structured_multicoord(da, x=x, y=y)
else:
raise ValueError(f"x and y must be 1D or 2D. Found: {xdim}")

dims = da.dims[:-2]
coords = {k: da.coords[k] for k in dims}
face_da = xr.DataArray(
da.data.reshape(*da.shape[:-2], -1),
coords=coords,
dims=[*dims, grid.face_dimension],
name=da.name,
)
if da.ndim < 2:
raise ValueError(
"DataArray must have at least two spatial dimensions. "
f"Found: {da.dims}."
)
grid, stackdims = Ugrid2d.from_structured(da, x, y, return_dims=True)
face_da = da.stack( # noqa: PD013
{grid.face_dimension: stackdims}, create_index=False
).drop_vars(stackdims, errors="ignore")
return UgridDataArray(face_da, grid)

@staticmethod
Expand Down Expand Up @@ -421,3 +413,89 @@ def from_geodataframe(geodataframe: "geopandas.GeoDataFrame"): # type: ignore #
grid = grid_from_geodataframe(geodataframe)
ds = xr.Dataset.from_dataframe(geodataframe.drop("geometry", axis=1))
return UgridDataset(ds, [grid])

@staticmethod
def from_structured(
dataset: xr.Dataset, topology: dict | None = None
) -> "UgridDataset":
"""
Create a UgridDataset from a (structured) xarray Dataset.
The spatial dimensions are flattened into a single UGRID face dimension.
By default, this method looks for:
1. ``"x"`` and ``"y"`` dimensions.
2. ``"longitude"`` and ``"latitude"`` dimensions.
3. ``"axis"`` attributes of "X" or "Y" on coordinates.
4. ``"standard_name"`` attributes of "longitude", "latitude",
"projection_x_coordinate", or "project_y_coordinate" on coordinate
variables.
Specify the x and y coordinate names explicitly otherwise, see the
examples.
Parameters
----------
dataset: xr.Dataset
topology: dict, optional, default is None.
Mapping of topology name to x and y coordinate variables.
If None, defaults to ``{"mesh2d": (None, None)}``.
Returns
-------
unstructured: UgridDataset
Examples
--------
By default, this method will look for ``"x"`` and ``"y"``
coordinates and returns a UgriDataset with a Ugrid topology named
mesh2d:
>>> uds = xugrid.UgridDataset.from_structured(dataset)
In case of other names, the name of the resulting UGRID topology and
the x and y coordinates must be specified:
>>> uds = xugrid.UgridDataset.from_structured(
>>> dataset,
>>> topology={"my_mesh2d": ("xc", "yc")},
>>> )
In case of multiple grid topologies in a single dataset, the names must
be specified as well:
>>> uds = xugrid.UgridDataset.from_structured(
>>> dataset,
>>> topology={"mesh2d_xy": ("x", "y"), "mesh2d_lonlat": {"lon", "lat"},
>>> )
"""
if topology is None:
topology = {"mesh2d": (None, None)}

grids = []
dss = []
for name, (x, y) in topology.items():
grid, stackdims = Ugrid2d.from_structured(
dataset, x=x, y=y, name=name, return_dims=True
)
# Use subset to check that ALL dims of stackdims are present in the
# variable.
checkdims = set(stackdims)
ugrid_vars = [
name
for name, var in dataset.data_vars.items()
if checkdims.issubset(var.dims)
]
dss.append(
dataset[ugrid_vars] # noqa: PD013
.stack({grid.face_dimension: stackdims})
.drop_vars(stackdims + (grid.face_dimension,))
)
grids.append(grid)
# Add the original dataset to include all non-UGRID variables.
dss.append(dataset)
# Then merge with compat="override". This'll pick the first available
# variable: i.e. it will prioritize the UGRID form.
merged = xr.merge(dss, compat="override")
return UgridDataset(merged, grids)
Loading

0 comments on commit d12fd75

Please sign in to comment.