Skip to content

Commit

Permalink
Adding cov() functionality with tests (#1129)
Browse files Browse the repository at this point in the history
* Adding cov() functionality with tests. Base function largely taken from numpy's cov() implementation Signed-off-by: Joseph Guman <[email protected]>

* Update cunumeric/module.py

Co-authored-by: Manolis Papadakis <[email protected]>

* Update cunumeric/module.py

Co-authored-by: Manolis Papadakis <[email protected]>

* Update cunumeric/module.py

Co-authored-by: Manolis Papadakis <[email protected]>

* Update cunumeric/module.py

Co-authored-by: Manolis Papadakis <[email protected]>

* Update cunumeric/module.py

Co-authored-by: Manolis Papadakis <[email protected]>

* Update cunumeric/module.py

Co-authored-by: Manolis Papadakis <[email protected]>

* Additional minor changes for style and functionality signed-off-by: Joseph Guman <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fixing unresolvable style issue signed-off-by: Joseph Guman <[email protected]>

* Use clip to avoid creating an intermediate

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: Joseph Thomas Guman <[email protected]>
Co-authored-by: Manolis Papadakis <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
4 people authored Mar 8, 2024
1 parent 5838fa6 commit 119dd6d
Show file tree
Hide file tree
Showing 4 changed files with 273 additions and 0 deletions.
14 changes: 14 additions & 0 deletions cunumeric/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2257,6 +2257,20 @@ def clip(
Multiple GPUs, Multiple CPUs
"""
min = (
min
if min is not None
else np.iinfo(self.dtype).min
if self.dtype.kind == "i"
else -np.inf
)
max = (
max
if max is not None
else np.iinfo(self.dtype).max
if self.dtype.kind == "i"
else np.inf
)
args = (
np.array(min, dtype=self.dtype),
np.array(max, dtype=self.dtype),
Expand Down
162 changes: 162 additions & 0 deletions cunumeric/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -7545,6 +7545,168 @@ def var(
)


@add_boilerplate("m", "y", "fweights", "aweights")
def cov(
m: ndarray,
y: Optional[ndarray] = None,
rowvar: bool = True,
bias: bool = False,
ddof: Optional[int] = None,
fweights: Optional[ndarray] = None,
aweights: Optional[ndarray] = None,
*,
dtype: Optional[np.dtype[Any]] = None,
) -> ndarray:
"""
Estimate a covariance matrix, given data and weights.
Covariance indicates the level to which two variables vary together.
If we examine N-dimensional samples, :math:`X = [x_1, x_2, ... x_N]^T`,
then the covariance matrix element :math:`C_{ij}` is the covariance of
:math:`x_i` and :math:`x_j`. The element :math:`C_{ii}` is the variance
of :math:`x_i`.
Parameters
----------
m : array_like
A 1-D or 2-D array containing multiple variables and observations.
Each row of `m` represents a variable, and each column a single
observation of all those variables. Also see `rowvar` below.
y : array_like, optional
An additional set of variables and observations. `y` has the same form
as that of `m`.
rowvar : bool, optional
If `rowvar` is True (default), then each row represents a
variable, with observations in the columns. Otherwise, the relationship
is transposed: each column represents a variable, while the rows
contain observations.
bias : bool, optional
Default normalization (False) is by ``(N - 1)``, where ``N`` is the
number of observations given (unbiased estimate). If `bias` is True,
then normalization is by ``N``. These values can be overridden by using
the keyword ``ddof``.
ddof : int, optional
If not ``None`` the default value implied by `bias` is overridden.
Note that ``ddof=1`` will return the unbiased estimate, even if both
`fweights` and `aweights` are specified, and ``ddof=0`` will return
the simple average. The default value is ``None``.
fweights : array_like, int, optional
1-D array of integer frequency weights; the number of times each
observation vector should be repeated.
aweights : array_like, optional
1-D array of observation vector weights. These relative weights are
typically large for observations considered "important" and smaller for
observations considered less "important". If ``ddof=0`` the array of
weights can be used to assign probabilities to observation vectors.
dtype : data-type, optional
Data-type of the result. By default, the return data-type will have
at least `float64` precision.
Returns
-------
out : ndarray
The covariance matrix of the variables.
See Also
--------
numpy.cov
Availability
--------
Multiple GPUs, Multiple CPUs
"""
# Check inputs
if ddof is not None and type(ddof) is not int:
raise ValueError("ddof must be integer")

# Handles complex arrays too
if m.ndim > 2:
raise ValueError("m has more than 2 dimensions")

if y is not None and y.ndim > 2:
raise ValueError("y has more than 2 dimensions")

if dtype is None:
if y is None:
dtype = np.result_type(m.dtype, np.float64)
else:
dtype = np.result_type(m.dtype, y.dtype, np.float64)

X = array(m, ndmin=2, dtype=dtype)
if not rowvar and X.shape[0] != 1:
X = X.T
if X.shape[0] == 0:
return empty((0, 0))
if y is not None:
y = array(y, copy=False, ndmin=2, dtype=dtype)
if not rowvar and y.shape[0] != 1:
y = y.T
# TODO(mpapadakis): Could have saved on an intermediate copy of X in
# this case, if it was already of the right shape.
X = concatenate((X, y), axis=0)

if ddof is None:
if not bias:
ddof = 1
else:
ddof = 0

# Get the product of frequencies and weights
w: Union[ndarray, None] = None
if fweights is not None:
if fweights.ndim > 1:
raise RuntimeError("cannot handle multidimensional fweights")
if fweights.shape[0] != X.shape[1]:
raise RuntimeError("incompatible numbers of samples and fweights")
if any(fweights < 0):
raise ValueError("fweights cannot be negative")
w = fweights
if aweights is not None:
if aweights.ndim > 1:
raise RuntimeError("cannot handle multidimensional aweights")
if aweights.shape[0] != X.shape[1]:
raise RuntimeError("incompatible numbers of samples and aweights")
if any(aweights < 0):
raise ValueError("aweights cannot be negative")
if w is None:
w = aweights
else:
# Cannot be done in-place with *= when aweights.dtype != w.dtype
w = w * aweights

# TODO upon merge of average() replace this code
w_sum: Union[ndarray, int] = 0
if w is None:
avg = mean(X, axis=1)
w_sum = X.shape[1]
else:
w_sum = sum(w)
avg = sum(X * w, axis=1) / w_sum

# Determine the normalization
fact: Union[ndarray, float] = 0.0
if w is None:
fact = X.shape[1] - ddof
elif ddof == 0:
fact = w_sum
elif aweights is None:
fact = w_sum - ddof
else:
fact = w_sum - ddof * sum(w * aweights) / w_sum

fact = clip(fact, 0.0, None)

X -= avg[:, None]
if w is None:
X_T = X.T
else:
X_T = (X * w).T
c = dot(X, X_T.conj())
# Cannot be done in-place with /= when the dtypes differ
c = c / fact
return c.squeeze()


# Histograms


Expand Down
1 change: 1 addition & 0 deletions docs/cunumeric/source/api/statistics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Averages and variances
mean
nanmean
var
cov


Histograms
Expand Down
96 changes: 96 additions & 0 deletions tests/integration/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,102 @@ def test_var_xfail(dtype, ddof, axis, shape):
check_op(op_np, op_num, np_in, dtype, negative_test=True)


@pytest.mark.parametrize("dtype", dtypes)
@pytest.mark.parametrize("rowvar", [True, False])
@pytest.mark.parametrize("ddof", [None, 0, 1])
def test_cov(dtype, rowvar, ddof):
np_in = get_op_input(astype=dtype)
num_in = num.array(np_in)

np_out = np.cov(np_in, rowvar=rowvar, ddof=ddof)
num_out = num.cov(num_in, rowvar=rowvar, ddof=ddof)
if dtype == dtypes[0]:
assert allclose(np_out, num_out, atol=1e-2)
else:
assert allclose(np_out, num_out)


fweights_base = [[9, 2, 1, 2, 3], [1, 1, 3, 2, 4], None]
np_aweights_base = [
np.abs(get_op_input(astype=dtype, shape=(5,))) for dtype in dtypes
] + [[0.03, 0.04, 01.01, 0.02, 0.08], None]


@pytest.mark.parametrize("dtype", dtypes)
@pytest.mark.parametrize("bias", [True, False])
@pytest.mark.parametrize("ddof", [None, 0, 1])
@pytest.mark.parametrize("fweights", fweights_base)
@pytest.mark.parametrize("np_aweights", np_aweights_base)
def test_cov_full(dtype, bias, ddof, fweights, np_aweights):
np_in = get_op_input(astype=dtype, shape=(4, 5))
num_in = num.array(np_in)
if fweights is not None:
np_fweights = np.array(fweights)
num_fweights = num.array(fweights)
else:
np_fweights = None
num_fweights = None
if isinstance(np_aweights, np.ndarray):
num_aweights = num.array(np_aweights)
else:
num_aweights = np_aweights
# num_aweights = None
# np_aweights = None

np_out = np.cov(
np_in, bias=bias, ddof=ddof, fweights=np_fweights, aweights=np_aweights
)
num_out = num.cov(
num_in,
bias=bias,
ddof=ddof,
fweights=num_fweights,
aweights=num_aweights,
)
# if dtype == dtypes[0]:
# assert allclose(np_out, num_out, atol=1e-2)
# else:
# assert allclose(np_out, num_out)
assert allclose(np_out, num_out, atol=1e-2)


@pytest.mark.parametrize("ddof", [None, 0, 1])
@pytest.mark.parametrize("fweights", fweights_base)
@pytest.mark.parametrize("np_aweights", np_aweights_base)
def test_cov_dtype_scaling(ddof, fweights, np_aweights):
np_in = np.array(
[
[1 + 3j, 1 - 1j, 2 + 2j, 4 + 3j, -1 + 2j],
[1 + 3j, 1 - 1j, 2 + 2j, 4 + 3j, -1 + 2j],
]
)
num_in = num.array(np_in)
if fweights is not None:
np_fweights = np.array(fweights)
num_fweights = num.array(fweights)
else:
np_fweights = None
num_fweights = None
if isinstance(np_aweights, np.ndarray):
num_aweights = num.array(np_aweights)
else:
num_aweights = np_aweights

np_out = np.cov(
np_in,
ddof=ddof,
fweights=np_fweights,
aweights=np_aweights,
)
num_out = num.cov(
num_in,
ddof=ddof,
fweights=num_fweights,
aweights=num_aweights,
)
assert allclose(np_out, num_out, atol=1e-2)


if __name__ == "__main__":
import sys

Expand Down

0 comments on commit 119dd6d

Please sign in to comment.