Skip to content

Commit

Permalink
Merge #196
Browse files Browse the repository at this point in the history
196: Pandas21 compat r=andrewgsavage a=MichaelTiemannOSC

- [ ] Closes # (insert issue number)
- [x] Executed `pre-commit run --all-files` with no errors
- [x] The change is fully covered by automated unit tests
- [ ] Documented in docs/ as appropriate
- [x] Added an entry to the CHANGES file

CI/CD doesn't quite work yet because we don't have a Pandas 2.1 rc to point to.  But comments are welcome!

Co-authored-by: Michael Tiemann <[email protected]>
  • Loading branch information
bors[bot] and MichaelTiemannOSC authored Aug 14, 2023
2 parents 31a3055 + f5947ea commit c43c18b
Show file tree
Hide file tree
Showing 5 changed files with 287 additions and 60 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
matrix:
python-version: [3.9, "3.10", "3.11"]
numpy: ["numpy>=1.20.3,<2.0.0"]
pandas: ["pandas==2.0.2", ]
pandas: ["pandas==2.0.2", "pandas==2.1.0rc0" ]
pint: ["pint>=0.21.1", "pint==0.22"]

runs-on: ubuntu-latest
Expand Down
2 changes: 2 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ pint-pandas Changelog
----------------

<<<<<<< HEAD
- Support for Pandas version 2.1.0. #196
- Support for dtype-preserving `PintArray.map` for both Pandas 2.0.2 and Pandas 2.1. #196
- Support for <NA> values in columns with integer magnitudes
- Support for magnitudes of any type, such as complex128 or tuples #146
- Support for pandas 2.0, allowing `.cumsum, .cummax, .cummin` methods for `Series` and `DataFrame`. #186
Expand Down
107 changes: 81 additions & 26 deletions pint_pandas/pint_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
import pandas as pd
import pint
from pandas import DataFrame, Series
from pandas import DataFrame, Series, Index
from pandas.api.extensions import (
ExtensionArray,
ExtensionDtype,
Expand Down Expand Up @@ -71,7 +71,7 @@ def __new__(cls, units=None):
if not isinstance(units, _Unit):
units = cls._parse_dtype_strict(units)
# ureg.unit returns a quantity with a magnitude of 1
# eg 1 mm. Initialising a quantity and taking it's unit
# eg 1 mm. Initialising a quantity and taking its unit
# TODO: Seperate units from quantities in pint
# to simplify this bit
units = cls.ureg.Quantity(1, units).units
Expand Down Expand Up @@ -262,7 +262,6 @@ def __init__(self, values, dtype=None, copy=False):
copy = False
elif not isinstance(values, pd.core.arrays.numeric.NumericArray):
values = pd.array(values, copy=copy)
copy = False
if copy:
values = values.copy()
self._data = values
Expand Down Expand Up @@ -323,10 +322,14 @@ def __setitem__(self, key, value):

if isinstance(value, _Quantity):
value = value.to(self.units).magnitude
elif is_list_like(value) and len(value) > 0 and isinstance(value[0], _Quantity):
value = [item.to(self.units).magnitude for item in value]
elif is_list_like(value) and len(value) > 0:
if isinstance(value[0], _Quantity):
value = [item.to(self.units).magnitude for item in value]
if len(value) == 1:
value = value[0]

key = check_array_indexer(self, key)
# Filter out invalid values for our array type(s)
try:
self._data[key] = value
except IndexError as e:
Expand Down Expand Up @@ -483,7 +486,10 @@ def take(self, indices, allow_fill=False, fill_value=None):
# magnitude is in fact an array scalar, which will get rejected by pandas.
fill_value = fill_value[()]

result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# Turn off warning that PandasArray is deprecated for ``take``
result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill)

return PintArray(result, dtype=self.dtype)

Expand Down Expand Up @@ -525,18 +531,12 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
raise ValueError(
"Cannot infer dtype. No dtype specified and empty array"
)
if dtype is None and not isinstance(master_scalar, _Quantity):
raise ValueError("No dtype specified and not a sequence of quantities")
if dtype is None and isinstance(master_scalar, _Quantity):
if dtype is None:
if not isinstance(master_scalar, _Quantity):
raise ValueError("No dtype specified and not a sequence of quantities")
dtype = PintType(master_scalar.units)

def quantify_nan(item):
if type(item) is float:
return item * dtype.units
return item

if isinstance(master_scalar, _Quantity):
scalars = [quantify_nan(item) for item in scalars]
scalars = [
(item.to(dtype.units).magnitude if hasattr(item, "to") else item)
for item in scalars
Expand All @@ -551,10 +551,21 @@ def _from_sequence_of_strings(cls, scalars, dtype=None, copy=False):

@classmethod
def _from_factorized(cls, values, original):
from pandas.api.types import infer_dtype

if infer_dtype(values) != "object":
values = pd.array(values, copy=False)
return cls(values, dtype=original.dtype)

def _values_for_factorize(self):
return self._data._values_for_factorize()
# factorize can now handle differentiating various types of null values.
# These can only occur when the array has object dtype.
# However, for backwards compatibility we only use the null for the
# provided dtype. This may be revisited in the future, see GH#48476.
arr = self._data
if arr.dtype.kind == "O":
return np.array(arr, copy=False), self.dtype.na_value
return arr._values_for_factorize()

def value_counts(self, dropna=True):
"""
Expand All @@ -580,18 +591,19 @@ def value_counts(self, dropna=True):

# compute counts on the data with no nans
data = self._data
nafilt = np.isnan(data)
nafilt = pd.isna(data)
na_value = pd.NA # NA value for index, not data, so not quantified
data = data[~nafilt]
index = list(set(data))

data_list = data.tolist()
index = list(set(data))
array = [data_list.count(item) for item in index]

if not dropna:
index.append(np.nan)
index.append(na_value)
array.append(nafilt.sum())

return Series(array, index=index)
return Series(np.asarray(array), index=index)

def unique(self):
"""Compute the PintArray of unique values.
Expand All @@ -602,7 +614,8 @@ def unique(self):
"""
from pandas import unique

return self._from_sequence(unique(self._data), dtype=self.dtype)
data = self._data
return self._from_sequence(unique(data), dtype=self.dtype)

def __contains__(self, item) -> bool:
if not isinstance(item, _Quantity):
Expand Down Expand Up @@ -704,7 +717,7 @@ def convert_values(param):
else:
return param

if isinstance(other, (Series, DataFrame)):
if isinstance(other, (Series, DataFrame, Index)):
return NotImplemented
lvalues = self.quantity
validate_length(lvalues, other)
Expand Down Expand Up @@ -753,7 +766,9 @@ def __array__(self, dtype=None, copy=False):

def _to_array_of_quantity(self, copy=False):
qtys = [
self._Q(item, self._dtype.units) if not pd.isna(item) else item
self._Q(item, self._dtype.units)
if not pd.isna(item)
else self.dtype.na_value
for item in self._data
]
with warnings.catch_warnings(record=True):
Expand Down Expand Up @@ -811,7 +826,42 @@ def searchsorted(self, value, side="left", sorter=None):
value = [item.to(self.units).magnitude for item in value]
return arr.searchsorted(value, side=side, sorter=sorter)

def _reduce(self, name, **kwds):
def map(self, mapper, na_action=None):
"""
Map values using an input mapping or function.
Parameters
----------
mapper : function, dict, or Series
Mapping correspondence.
na_action : {None, 'ignore'}, default None
If 'ignore', propagate NA values, without passing them to the
mapping correspondence. If 'ignore' is not supported, a
``NotImplementedError`` should be raised.
Returns
-------
If mapper is a function, operate on the magnitudes of the array and
"""
if pandas_version_info < (2, 1):
ser = pd.Series(self._to_array_of_quantity())
arr = ser.map(mapper, na_action).values
else:
from pandas.core.algorithms import map_array

arr = map_array(self, mapper, na_action)

master_scalar = None
try:
master_scalar = next(i for i in arr if hasattr(i, "units"))
except StopIteration:
# JSON mapper formatting Qs as str don't create PintArrays
# ...and that's OK. Caller will get array of values
return arr
return PintArray._from_sequence(arr, PintType(master_scalar.units))

def _reduce(self, name, *, skipna: bool = True, keepdims: bool = False, **kwds):
"""
Return a scalar result of performing the reduction operation.
Expand Down Expand Up @@ -855,14 +905,20 @@ def _reduce(self, name, **kwds):

if isinstance(self._data, ExtensionArray):
try:
result = self._data._reduce(name, **kwds)
result = self._data._reduce(
name, skipna=skipna, keepdims=keepdims, **kwds
)
except NotImplementedError:
result = functions[name](self.numpy_data, **kwds)

if name in {"all", "any", "kurt", "skew"}:
return result
if name == "var":
if keepdims:
return PintArray(result, f"pint[({self.units})**2]")
return self._Q(result, self.units**2)
if keepdims:
return PintArray(result, self.dtype)
return self._Q(result, self.units)

def _accumulate(self, name: str, *, skipna: bool = True, **kwds):
Expand All @@ -879,7 +935,6 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwds):
result = self._data._accumulate(name, **kwds)
except NotImplementedError:
result = functions[name](self.numpy_data, **kwds)
print(result)

return self._from_sequence(result, self.units)

Expand Down
24 changes: 21 additions & 3 deletions pint_pandas/testsuite/test_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@

import numpy as np
import pandas as pd
import pandas._testing as tm
import pytest
import pint
from pandas.tests.extension.base.base import BaseExtensionTests
from pint.testsuite import helpers

from pint_pandas import PintArray, PintType
from pint_pandas.pint_array import pandas_version_info

ureg = PintType.ureg

Expand Down Expand Up @@ -41,7 +43,7 @@ def test_force_ndarray_like(self):
expected = pd.DataFrame(
{0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]"
)
self.assert_equal(result, expected)
tm.assert_equal(result, expected)

finally:
# restore registry
Expand All @@ -64,7 +66,7 @@ def test_offset_concat(self):
expected = pd.DataFrame(
{0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]"
)
self.assert_equal(result, expected)
tm.assert_equal(result, expected)

# issue #141
print(PintArray(q_a))
Expand All @@ -80,7 +82,7 @@ def test_assignment_add_empty(self):
result = pd.Series(data)
result[[]] += data[0]
expected = pd.Series(data)
self.assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)


class TestIssue80:
Expand Down Expand Up @@ -167,3 +169,19 @@ def test_issue_127():
a = PintType.construct_from_string("pint[dimensionless]")
b = PintType.construct_from_string("pint[]")
assert a == b


class TestIssue174(BaseExtensionTests):
def test_sum(self):
if pandas_version_info < (2, 1):
pytest.skip("Pandas reduce functions strip units prior to version 2.1.0")
a = pd.DataFrame([[0, 1, 2], [3, 4, 5]]).astype("pint[m]")
row_sum = a.sum(axis=0)
expected_1 = pd.Series([3, 5, 7], dtype="pint[m]")

tm.assert_series_equal(row_sum, expected_1)

col_sum = a.sum(axis=1)
expected_2 = pd.Series([3, 12], dtype="pint[m]")

tm.assert_series_equal(col_sum, expected_2)
Loading

0 comments on commit c43c18b

Please sign in to comment.