Skip to content

Commit

Permalink
Merge branch 'docs' of https://github.com/andrewgsavage/pint-pandas i…
Browse files Browse the repository at this point in the history
…nto docs
  • Loading branch information
andrewgsavage committed Sep 5, 2023
2 parents 6f94a94 + 64017fc commit 19322e5
Show file tree
Hide file tree
Showing 5 changed files with 304 additions and 62 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
matrix:
python-version: [3.9, "3.10", "3.11"]
numpy: ["numpy>=1.20.3,<2.0.0"]
pandas: ["pandas==2.0.2", ]
pandas: ["pandas==2.0.2", "pandas==2.1.0rc0" ]
pint: ["pint>=0.21.1", "pint==0.22"]

runs-on: ubuntu-latest
Expand Down
4 changes: 4 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ pint-pandas Changelog
----------------

- ReadTheDocs Documentation created.
- Support for Pandas version 2.1.0. #196
- Support for dtype-preserving `PintArray.map` for both Pandas 2.0.2 and Pandas 2.1. #196
- Support for <NA> values in columns with integer magnitudes
- Support for magnitudes of any type, such as complex128 or tuples #146
- Support for pandas 2.0, allowing `.cumsum, .cummax, .cummin` methods for `Series` and `DataFrame`. #186
- Minimum Pint version is 0.21
- Minimum Pandas vesrion is 2.0
Expand Down
126 changes: 97 additions & 29 deletions pint_pandas/pint_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
import re
import warnings
from collections import OrderedDict
from importlib.metadata import version

import numpy as np
import pandas as pd
import pint
from pandas import DataFrame, Series
from pandas import DataFrame, Series, Index
from pandas.api.extensions import (
ExtensionArray,
ExtensionDtype,
Expand All @@ -27,6 +28,11 @@
# quantify/dequantify
NO_UNIT = "No Unit"

pandas_version = version("pandas")
pandas_version_info = tuple(
int(x) if x.isdigit() else x for x in pandas_version.split(".")
)


class PintType(ExtensionDtype):
"""
Expand Down Expand Up @@ -65,7 +71,7 @@ def __new__(cls, units=None):
if not isinstance(units, _Unit):
units = cls._parse_dtype_strict(units)
# ureg.unit returns a quantity with a magnitude of 1
# eg 1 mm. Initialising a quantity and taking it's unit
# eg 1 mm. Initialising a quantity and taking its unit
# TODO: Seperate units from quantities in pint
# to simplify this bit
units = cls.ureg.Quantity(1, units).units
Expand Down Expand Up @@ -185,6 +191,12 @@ def __repr__(self):
return self.name


_NumpyEADtype = (
pd.core.dtypes.dtypes.PandasDtype
if pandas_version_info < (2, 1)
else pd.core.dtypes.dtypes.NumpyEADtype
)

dtypemap = {
int: pd.Int64Dtype(),
np.int64: pd.Int64Dtype(),
Expand All @@ -195,8 +207,8 @@ def __repr__(self):
float: pd.Float64Dtype(),
np.float64: pd.Float64Dtype(),
np.float32: pd.Float32Dtype(),
np.complex128: pd.core.dtypes.dtypes.PandasDtype("complex128"),
np.complex64: pd.core.dtypes.dtypes.PandasDtype("complex64"),
np.complex128: _NumpyEADtype("complex128"),
np.complex64: _NumpyEADtype("complex64"),
# np.float16: pd.Float16Dtype(),
}
dtypeunmap = {v: k for k, v in dtypemap.items()}
Expand Down Expand Up @@ -250,7 +262,6 @@ def __init__(self, values, dtype=None, copy=False):
copy = False
elif not isinstance(values, pd.core.arrays.numeric.NumericArray):
values = pd.array(values, copy=copy)
copy = False
if copy:
values = values.copy()
self._data = values
Expand Down Expand Up @@ -311,10 +322,14 @@ def __setitem__(self, key, value):

if isinstance(value, _Quantity):
value = value.to(self.units).magnitude
elif is_list_like(value) and len(value) > 0 and isinstance(value[0], _Quantity):
value = [item.to(self.units).magnitude for item in value]
elif is_list_like(value) and len(value) > 0:
if isinstance(value[0], _Quantity):
value = [item.to(self.units).magnitude for item in value]
if len(value) == 1:
value = value[0]

key = check_array_indexer(self, key)
# Filter out invalid values for our array type(s)
try:
self._data[key] = value
except IndexError as e:
Expand Down Expand Up @@ -458,7 +473,8 @@ def take(self, indices, allow_fill=False, fill_value=None):
Examples
--------
"""
from pandas.core.algorithms import take, is_scalar
from pandas.core.algorithms import take
from pandas.api.types import is_scalar

data = self._data
if allow_fill and fill_value is None:
Expand All @@ -470,7 +486,10 @@ def take(self, indices, allow_fill=False, fill_value=None):
# magnitude is in fact an array scalar, which will get rejected by pandas.
fill_value = fill_value[()]

result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# Turn off warning that PandasArray is deprecated for ``take``
result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill)

return PintArray(result, dtype=self.dtype)

Expand Down Expand Up @@ -512,18 +531,12 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
raise ValueError(
"Cannot infer dtype. No dtype specified and empty array"
)
if dtype is None and not isinstance(master_scalar, _Quantity):
raise ValueError("No dtype specified and not a sequence of quantities")
if dtype is None and isinstance(master_scalar, _Quantity):
if dtype is None:
if not isinstance(master_scalar, _Quantity):
raise ValueError("No dtype specified and not a sequence of quantities")
dtype = PintType(master_scalar.units)

def quantify_nan(item):
if type(item) is float:
return item * dtype.units
return item

if isinstance(master_scalar, _Quantity):
scalars = [quantify_nan(item) for item in scalars]
scalars = [
(item.to(dtype.units).magnitude if hasattr(item, "to") else item)
for item in scalars
Expand All @@ -538,10 +551,21 @@ def _from_sequence_of_strings(cls, scalars, dtype=None, copy=False):

@classmethod
def _from_factorized(cls, values, original):
from pandas.api.types import infer_dtype

if infer_dtype(values) != "object":
values = pd.array(values, copy=False)
return cls(values, dtype=original.dtype)

def _values_for_factorize(self):
return self._data._values_for_factorize()
# factorize can now handle differentiating various types of null values.
# These can only occur when the array has object dtype.
# However, for backwards compatibility we only use the null for the
# provided dtype. This may be revisited in the future, see GH#48476.
arr = self._data
if arr.dtype.kind == "O":
return np.array(arr, copy=False), self.dtype.na_value
return arr._values_for_factorize()

def value_counts(self, dropna=True):
"""
Expand All @@ -567,18 +591,19 @@ def value_counts(self, dropna=True):

# compute counts on the data with no nans
data = self._data
nafilt = np.isnan(data)
nafilt = pd.isna(data)
na_value = pd.NA # NA value for index, not data, so not quantified
data = data[~nafilt]
index = list(set(data))

data_list = data.tolist()
index = list(set(data))
array = [data_list.count(item) for item in index]

if not dropna:
index.append(np.nan)
index.append(na_value)
array.append(nafilt.sum())

return Series(array, index=index)
return Series(np.asarray(array), index=index)

def unique(self):
"""Compute the PintArray of unique values.
Expand All @@ -589,7 +614,8 @@ def unique(self):
"""
from pandas import unique

return self._from_sequence(unique(self._data), dtype=self.dtype)
data = self._data
return self._from_sequence(unique(data), dtype=self.dtype)

def __contains__(self, item) -> bool:
if not isinstance(item, _Quantity):
Expand Down Expand Up @@ -691,7 +717,7 @@ def convert_values(param):
else:
return param

if isinstance(other, (Series, DataFrame)):
if isinstance(other, (Series, DataFrame, Index)):
return NotImplemented
lvalues = self.quantity
validate_length(lvalues, other)
Expand Down Expand Up @@ -740,7 +766,9 @@ def __array__(self, dtype=None, copy=False):

def _to_array_of_quantity(self, copy=False):
qtys = [
self._Q(item, self._dtype.units) if not pd.isna(item) else item
self._Q(item, self._dtype.units)
if not pd.isna(item)
else self.dtype.na_value
for item in self._data
]
with warnings.catch_warnings(record=True):
Expand Down Expand Up @@ -798,7 +826,42 @@ def searchsorted(self, value, side="left", sorter=None):
value = [item.to(self.units).magnitude for item in value]
return arr.searchsorted(value, side=side, sorter=sorter)

def _reduce(self, name, **kwds):
def map(self, mapper, na_action=None):
"""
Map values using an input mapping or function.
Parameters
----------
mapper : function, dict, or Series
Mapping correspondence.
na_action : {None, 'ignore'}, default None
If 'ignore', propagate NA values, without passing them to the
mapping correspondence. If 'ignore' is not supported, a
``NotImplementedError`` should be raised.
Returns
-------
If mapper is a function, operate on the magnitudes of the array and
"""
if pandas_version_info < (2, 1):
ser = pd.Series(self._to_array_of_quantity())
arr = ser.map(mapper, na_action).values
else:
from pandas.core.algorithms import map_array

arr = map_array(self, mapper, na_action)

master_scalar = None
try:
master_scalar = next(i for i in arr if hasattr(i, "units"))
except StopIteration:
# JSON mapper formatting Qs as str don't create PintArrays
# ...and that's OK. Caller will get array of values
return arr
return PintArray._from_sequence(arr, PintType(master_scalar.units))

def _reduce(self, name, *, skipna: bool = True, keepdims: bool = False, **kwds):
"""
Return a scalar result of performing the reduction operation.
Expand Down Expand Up @@ -842,14 +905,20 @@ def _reduce(self, name, **kwds):

if isinstance(self._data, ExtensionArray):
try:
result = self._data._reduce(name, **kwds)
result = self._data._reduce(
name, skipna=skipna, keepdims=keepdims, **kwds
)
except NotImplementedError:
result = functions[name](self.numpy_data, **kwds)

if name in {"all", "any", "kurt", "skew"}:
return result
if name == "var":
if keepdims:
return PintArray(result, f"pint[({self.units})**2]")
return self._Q(result, self.units**2)
if keepdims:
return PintArray(result, self.dtype)
return self._Q(result, self.units)

def _accumulate(self, name: str, *, skipna: bool = True, **kwds):
Expand All @@ -866,7 +935,6 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwds):
result = self._data._accumulate(name, **kwds)
except NotImplementedError:
result = functions[name](self.numpy_data, **kwds)
print(result)

return self._from_sequence(result, self.units)

Expand Down
24 changes: 21 additions & 3 deletions pint_pandas/testsuite/test_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@

import numpy as np
import pandas as pd
import pandas._testing as tm
import pytest
import pint
from pandas.tests.extension.base.base import BaseExtensionTests
from pint.testsuite import helpers

from pint_pandas import PintArray, PintType
from pint_pandas.pint_array import pandas_version_info

ureg = PintType.ureg

Expand Down Expand Up @@ -41,7 +43,7 @@ def test_force_ndarray_like(self):
expected = pd.DataFrame(
{0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]"
)
self.assert_equal(result, expected)
tm.assert_equal(result, expected)

finally:
# restore registry
Expand All @@ -64,7 +66,7 @@ def test_offset_concat(self):
expected = pd.DataFrame(
{0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]"
)
self.assert_equal(result, expected)
tm.assert_equal(result, expected)

# issue #141
print(PintArray(q_a))
Expand All @@ -80,7 +82,7 @@ def test_assignment_add_empty(self):
result = pd.Series(data)
result[[]] += data[0]
expected = pd.Series(data)
self.assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)


class TestIssue80:
Expand Down Expand Up @@ -167,3 +169,19 @@ def test_issue_127():
a = PintType.construct_from_string("pint[dimensionless]")
b = PintType.construct_from_string("pint[]")
assert a == b


class TestIssue174(BaseExtensionTests):
def test_sum(self):
if pandas_version_info < (2, 1):
pytest.skip("Pandas reduce functions strip units prior to version 2.1.0")
a = pd.DataFrame([[0, 1, 2], [3, 4, 5]]).astype("pint[m]")
row_sum = a.sum(axis=0)
expected_1 = pd.Series([3, 5, 7], dtype="pint[m]")

tm.assert_series_equal(row_sum, expected_1)

col_sum = a.sum(axis=1)
expected_2 = pd.Series([3, 12], dtype="pint[m]")

tm.assert_series_equal(col_sum, expected_2)
Loading

0 comments on commit 19322e5

Please sign in to comment.