Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Partial collapse of multi-dim string coords: take 2 #5955

Merged
merged 13 commits into from
Aug 30, 2024
3 changes: 3 additions & 0 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ This document explains the changes made to Iris for this release

#. N/A

#. `@rcomer`_ enabled partial collapse of multi-dimensional string coordinates,
fixing :issue:`3653`. (:pull:`5955`)


💣 Incompatible Changes
=======================
Expand Down
41 changes: 29 additions & 12 deletions lib/iris/coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -2115,22 +2115,39 @@ def collapsed(self, dims_to_collapse=None):
if np.issubdtype(self.dtype, np.str_):
# Collapse the coordinate by serializing the points and
# bounds as strings.
def serialize(x):
return "|".join([str(i) for i in x.flatten()])
def serialize(x, axis):
if axis is None:
return "|".join(str(i) for i in x.flatten())
stephenworsley marked this conversation as resolved.
Show resolved Hide resolved

# np.apply_along_axis combined with str.join will truncate strings in
# some cases (https://github.com/numpy/numpy/issues/8352), so we need to
# loop through the array directly. First move (possibly multiple) axis
# of interest to trailing dim(s), then make a 2D array we can loop
# through.
work_array = np.moveaxis(x, axis, range(-len(axis), 0))
out_shape = work_array.shape[: -len(axis)]
work_array = work_array.reshape(np.prod(out_shape, dtype=int), -1)

joined = []
for arr_slice in work_array:
joined.append(serialize(arr_slice, None))

return np.array(joined).reshape(out_shape)

bounds = None
if self.has_bounds():
shape = self._bounds_dm.shape[1:]
bounds = []
for index in np.ndindex(shape):
index_slice = (slice(None),) + tuple(index)
bounds.append(serialize(self.bounds[index_slice]))
stephenworsley marked this conversation as resolved.
Show resolved Hide resolved
dtype = np.dtype("U{}".format(max(map(len, bounds))))
bounds = np.array(bounds, dtype=dtype).reshape((1,) + shape)
points = serialize(self.points)
dtype = np.dtype("U{}".format(len(points)))
# Express dims_to_collapse as non-negative integers.
if dims_to_collapse is None:
dims_to_collapse = range(self.ndim)
else:
dims_to_collapse = tuple(
dim % self.ndim for dim in dims_to_collapse
)
bounds = serialize(self.bounds, dims_to_collapse)

points = serialize(self.points, dims_to_collapse)
# Create the new collapsed coordinate.
coord = self.copy(points=np.array(points, dtype=dtype), bounds=bounds)
coord = self.copy(points=np.array(points), bounds=bounds)
stephenworsley marked this conversation as resolved.
Show resolved Hide resolved
else:
# Collapse the coordinate by calculating the bounded extremes.
if self.ndim > 1:
Expand Down
107 changes: 107 additions & 0 deletions lib/iris/tests/unit/coords/test_Coord.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import cf_units
import dask.array as da
import numpy as np
import numpy.ma as ma
import pytest

import iris
Expand Down Expand Up @@ -701,6 +702,112 @@ def test_lazy_3_bounds(self):
self.assertArrayAlmostEqual(collapsed_coord.points, da.array([2.0]))
self.assertArrayAlmostEqual(collapsed_coord.bounds, da.array([[0.0, 4.0]]))

def test_string_masked(self):
points = ma.array(["foo", "bar", "bing"], mask=[0, 1, 0], dtype=str)
coord = AuxCoord(points)

collapsed_coord = coord.collapsed(0)

expected = "foo|--|bing"
self.assertEqual(collapsed_coord.points, expected)

def test_string_nd_first(self):
self.setupTestArrays((3, 4))
coord = AuxCoord(self.pts_real.astype(str))

collapsed_coord = coord.collapsed(0)
expected = [
"0.0|40.0|80.0",
"10.0|50.0|90.0",
"20.0|60.0|100.0",
"30.0|70.0|110.0",
]

self.assertArrayEqual(collapsed_coord.points, expected)

def test_string_nd_second(self):
self.setupTestArrays((3, 4))
coord = AuxCoord(self.pts_real.astype(str))

collapsed_coord = coord.collapsed(1)
expected = [
"0.0|10.0|20.0|30.0",
"40.0|50.0|60.0|70.0",
"80.0|90.0|100.0|110.0",
]

self.assertArrayEqual(collapsed_coord.points, expected)

def test_string_nd_both(self):
self.setupTestArrays((3, 4))
coord = AuxCoord(self.pts_real.astype(str))

collapsed_coord = coord.collapsed()
expected = ["0.0|10.0|20.0|30.0|40.0|50.0|60.0|70.0|80.0|90.0|100.0|110.0"]

self.assertArrayEqual(collapsed_coord.points, expected)

def test_string_nd_bounds_first(self):
self.setupTestArrays((3, 4))
coord = AuxCoord(self.pts_real.astype(str), bounds=self.bds_real.astype(str))

collapsed_coord = coord.collapsed(0)

# Points handling is as for non bounded case. So just check bounds.
expected_lower = [
"-2.0|38.0|78.0",
"8.0|48.0|88.0",
"18.0|58.0|98.0",
"28.0|68.0|108.0",
]

expected_upper = [
"2.0|42.0|82.0",
"12.0|52.0|92.0",
"22.0|62.0|102.0",
"32.0|72.0|112.0",
]

self.assertArrayEqual(collapsed_coord.bounds[:, 0], expected_lower)
self.assertArrayEqual(collapsed_coord.bounds[:, 1], expected_upper)

def test_string_nd_bounds_second(self):
self.setupTestArrays((3, 4))
coord = AuxCoord(self.pts_real.astype(str), bounds=self.bds_real.astype(str))

collapsed_coord = coord.collapsed(1)

# Points handling is as for non bounded case. So just check bounds.
expected_lower = [
"-2.0|8.0|18.0|28.0",
"38.0|48.0|58.0|68.0",
"78.0|88.0|98.0|108.0",
]

expected_upper = [
"2.0|12.0|22.0|32.0",
"42.0|52.0|62.0|72.0",
"82.0|92.0|102.0|112.0",
]

self.assertArrayEqual(collapsed_coord.bounds[:, 0], expected_lower)
self.assertArrayEqual(collapsed_coord.bounds[:, 1], expected_upper)

def test_string_nd_bounds_both(self):
self.setupTestArrays((3, 4))
coord = AuxCoord(self.pts_real.astype(str), bounds=self.bds_real.astype(str))

collapsed_coord = coord.collapsed()

# Points handling is as for non bounded case. So just check bounds.
expected_lower = ["-2.0|8.0|18.0|28.0|38.0|48.0|58.0|68.0|78.0|88.0|98.0|108.0"]
expected_upper = [
"2.0|12.0|22.0|32.0|42.0|52.0|62.0|72.0|82.0|92.0|102.0|112.0"
]

self.assertArrayEqual(collapsed_coord.bounds[:, 0], expected_lower)
self.assertArrayEqual(collapsed_coord.bounds[:, 1], expected_upper)


class Test_is_compatible(tests.IrisTest):
def setUp(self):
Expand Down
Loading