Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(feat): [WIP] support physical quantities via Pint #1481

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ dependencies = [
"exceptiongroup; python_version<'3.11'",
"natsort",
"packaging>=20.0",
"pint",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might lean towards putting this in its own optional dependency section.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@flying-sheep thoughts?

# array-api-compat 1.5 has https://github.com/scverse/anndata/issues/1410
"array_api_compat>1.4,!=1.5",
]
Expand Down
3 changes: 2 additions & 1 deletion src/anndata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# Backport package for exception groups
import exceptiongroup # noqa: F401

from ._core.anndata import AnnData
from ._core.anndata import AnnData, units
from ._core.merge import concat
from ._core.raw import Raw
from ._io import (
Expand Down Expand Up @@ -81,4 +81,5 @@ def read(*args, **kwargs):
"ExperimentalFeatureWarning",
"experimental",
"settings",
"units",
]
3 changes: 3 additions & 0 deletions src/anndata/_core/anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from natsort import natsorted
from numpy import ma
from pandas.api.types import infer_dtype
from pint import UnitRegistry
from scipy import sparse
from scipy.sparse import issparse

Expand Down Expand Up @@ -66,6 +67,8 @@
if TYPE_CHECKING:
from os import PathLike

units = UnitRegistry()


class StorageType(Enum):
Array = np.ndarray
Expand Down
23 changes: 22 additions & 1 deletion src/anndata/_io/specs/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from scipy import sparse

import anndata as ad
from anndata import AnnData, Raw
from anndata import AnnData, Raw, units
from anndata._core import views
from anndata._core.index import _normalize_indices
from anndata._core.merge import intersect_keys
Expand Down Expand Up @@ -293,6 +293,27 @@
_writer.write_elem(g, "varm", dict(raw.varm), dataset_kwargs=dataset_kwargs)


########
# Pint #
########


@_REGISTRY.register_read(H5Group, IOSpec("pint.Quantity", "0.1.0"))
@_REGISTRY.register_read(ZarrGroup, IOSpec("pint.Quantity", "0.1.0"))
def read_quantity(elem, _reader):
v_magnitude = _reader.read_elem(elem["magnitude"])
v_units = units[_reader.read_elem(elem["units"])]
return v_magnitude * v_units

Check warning on line 306 in src/anndata/_io/specs/methods.py

View check run for this annotation

Codecov / codecov/patch

src/anndata/_io/specs/methods.py#L304-L306

Added lines #L304 - L306 were not covered by tests


@_REGISTRY.register_write(H5Group, units.Quantity, IOSpec("pint.Quantity", "0.1.0"))
@_REGISTRY.register_write(ZarrGroup, units.Quantity, IOSpec("pint.Quantity", "0.1.0"))
def write_quantity(f, k, v, _writer, dataset_kwargs=MappingProxyType({})):
g = f.require_group(k)
_writer.write_elem(g, "magnitude", v.magnitude, dataset_kwargs=dataset_kwargs)
_writer.write_elem(g, "units", str(v.units), dataset_kwargs=dataset_kwargs)

Check warning on line 314 in src/anndata/_io/specs/methods.py

View check run for this annotation

Codecov / codecov/patch

src/anndata/_io/specs/methods.py#L312-L314

Added lines #L312 - L314 were not covered by tests
Comment on lines +313 to +314
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that the units should be written into the attrs instead of being given their own array. It is just a short string, right? This would also make it easier for interoperability purposes. Arrays with this extra metadata wouldn't break existing readers.



############
# Mappings #
############
Expand Down
14 changes: 14 additions & 0 deletions src/anndata/tests/test_readwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -821,3 +821,17 @@ def test_io_dtype(tmp_path, diskfmt, dtype):
curr = read(pth)

assert curr.X.dtype == dtype


def test_readwrite_units(read, write, name, tmp_path):
X_arr = np.array(X_list)
adata = ad.AnnData(
X=X_arr,
uns={"size": 100 * ad.units["um"]},
obsm={"X_spatial": X_arr * ad.units.mm},
)
write(tmp_path / name, adata)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd follow the pattern from above, unless I'm missing a good reason not to i.e.,

read = lambda pth: getattr(ad, f"read_{diskfmt}")(pth)
write = lambda adata, pth: getattr(adata, f"write_{diskfmt}")(pth)

and the corresponding fixtures for the diskfmt

ad_read = read(tmp_path / name)

assert adata.uns["spot_size"] == ad_read.uns["spot_size"]
assert (adata.obsm["X_spatial_units"] == ad_read.obsm["X_spatial_units"]).all()
Loading