Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pandas21 compat #196

Merged
merged 14 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
matrix:
python-version: [3.9, "3.10", "3.11"]
numpy: ["numpy>=1.20.3,<2.0.0"]
pandas: ["pandas==2.0.2", ]
pandas: ["pandas==2.0.2", "pandas==2.1.0rc0" ]
pint: ["pint>=0.21.1", "pint==0.22"]

runs-on: ubuntu-latest
Expand Down
2 changes: 2 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ pint-pandas Changelog
----------------

<<<<<<< HEAD
- Support for Pandas version 2.1.0. #196
- Support for dtype-preserving `PintArray.map` for both Pandas 2.0.2 and Pandas 2.1. #196
- Support for <NA> values in columns with integer magnitudes
- Support for magnitudes of any type, such as complex128 or tuples #146
- Support for pandas 2.0, allowing `.cumsum, .cummax, .cummin` methods for `Series` and `DataFrame`. #186
Expand Down
107 changes: 81 additions & 26 deletions pint_pandas/pint_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
import pandas as pd
import pint
from pandas import DataFrame, Series
from pandas import DataFrame, Series, Index
from pandas.api.extensions import (
ExtensionArray,
ExtensionDtype,
Expand Down Expand Up @@ -71,7 +71,7 @@ def __new__(cls, units=None):
if not isinstance(units, _Unit):
units = cls._parse_dtype_strict(units)
# ureg.unit returns a quantity with a magnitude of 1
# eg 1 mm. Initialising a quantity and taking it's unit
# eg 1 mm. Initialising a quantity and taking its unit
# TODO: Seperate units from quantities in pint
# to simplify this bit
units = cls.ureg.Quantity(1, units).units
Expand Down Expand Up @@ -262,7 +262,6 @@ def __init__(self, values, dtype=None, copy=False):
copy = False
elif not isinstance(values, pd.core.arrays.numeric.NumericArray):
values = pd.array(values, copy=copy)
copy = False
if copy:
values = values.copy()
self._data = values
Expand Down Expand Up @@ -323,10 +322,14 @@ def __setitem__(self, key, value):

if isinstance(value, _Quantity):
value = value.to(self.units).magnitude
elif is_list_like(value) and len(value) > 0 and isinstance(value[0], _Quantity):
value = [item.to(self.units).magnitude for item in value]
elif is_list_like(value) and len(value) > 0:
if isinstance(value[0], _Quantity):
value = [item.to(self.units).magnitude for item in value]
if len(value) == 1:
value = value[0]

key = check_array_indexer(self, key)
# Filter out invalid values for our array type(s)
try:
self._data[key] = value
except IndexError as e:
Expand Down Expand Up @@ -483,7 +486,10 @@ def take(self, indices, allow_fill=False, fill_value=None):
# magnitude is in fact an array scalar, which will get rejected by pandas.
fill_value = fill_value[()]

result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# Turn off warning that PandasArray is deprecated for ``take``
result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill)

return PintArray(result, dtype=self.dtype)

Expand Down Expand Up @@ -525,18 +531,12 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
raise ValueError(
"Cannot infer dtype. No dtype specified and empty array"
)
if dtype is None and not isinstance(master_scalar, _Quantity):
raise ValueError("No dtype specified and not a sequence of quantities")
if dtype is None and isinstance(master_scalar, _Quantity):
if dtype is None:
if not isinstance(master_scalar, _Quantity):
raise ValueError("No dtype specified and not a sequence of quantities")
dtype = PintType(master_scalar.units)

def quantify_nan(item):
andrewgsavage marked this conversation as resolved.
Show resolved Hide resolved
if type(item) is float:
return item * dtype.units
return item

if isinstance(master_scalar, _Quantity):
scalars = [quantify_nan(item) for item in scalars]
scalars = [
(item.to(dtype.units).magnitude if hasattr(item, "to") else item)
for item in scalars
Expand All @@ -551,10 +551,21 @@ def _from_sequence_of_strings(cls, scalars, dtype=None, copy=False):

@classmethod
def _from_factorized(cls, values, original):
from pandas.api.types import infer_dtype

if infer_dtype(values) != "object":
values = pd.array(values, copy=False)
return cls(values, dtype=original.dtype)

def _values_for_factorize(self):
return self._data._values_for_factorize()
# factorize can now handle differentiating various types of null values.
# These can only occur when the array has object dtype.
# However, for backwards compatibility we only use the null for the
# provided dtype. This may be revisited in the future, see GH#48476.
arr = self._data
if arr.dtype.kind == "O":
return np.array(arr, copy=False), self.dtype.na_value
return arr._values_for_factorize()

def value_counts(self, dropna=True):
"""
Expand All @@ -580,18 +591,19 @@ def value_counts(self, dropna=True):

# compute counts on the data with no nans
data = self._data
nafilt = np.isnan(data)
nafilt = pd.isna(data)
na_value = pd.NA # NA value for index, not data, so not quantified
data = data[~nafilt]
index = list(set(data))

data_list = data.tolist()
index = list(set(data))
array = [data_list.count(item) for item in index]

if not dropna:
index.append(np.nan)
index.append(na_value)
array.append(nafilt.sum())

return Series(array, index=index)
return Series(np.asarray(array), index=index)

def unique(self):
"""Compute the PintArray of unique values.
Expand All @@ -602,7 +614,8 @@ def unique(self):
"""
from pandas import unique

return self._from_sequence(unique(self._data), dtype=self.dtype)
data = self._data
return self._from_sequence(unique(data), dtype=self.dtype)

def __contains__(self, item) -> bool:
if not isinstance(item, _Quantity):
Expand Down Expand Up @@ -704,7 +717,7 @@ def convert_values(param):
else:
return param

if isinstance(other, (Series, DataFrame)):
if isinstance(other, (Series, DataFrame, Index)):
return NotImplemented
lvalues = self.quantity
validate_length(lvalues, other)
Expand Down Expand Up @@ -753,7 +766,9 @@ def __array__(self, dtype=None, copy=False):

def _to_array_of_quantity(self, copy=False):
qtys = [
self._Q(item, self._dtype.units) if not pd.isna(item) else item
self._Q(item, self._dtype.units)
if not pd.isna(item)
else self.dtype.na_value
for item in self._data
]
with warnings.catch_warnings(record=True):
Expand Down Expand Up @@ -811,7 +826,42 @@ def searchsorted(self, value, side="left", sorter=None):
value = [item.to(self.units).magnitude for item in value]
return arr.searchsorted(value, side=side, sorter=sorter)

def _reduce(self, name, **kwds):
def map(self, mapper, na_action=None):
"""
Map values using an input mapping or function.

Parameters
----------
mapper : function, dict, or Series
Mapping correspondence.
na_action : {None, 'ignore'}, default None
If 'ignore', propagate NA values, without passing them to the
mapping correspondence. If 'ignore' is not supported, a
``NotImplementedError`` should be raised.

Returns
-------
If mapper is a function, operate on the magnitudes of the array and

"""
if pandas_version_info < (2, 1):
ser = pd.Series(self._to_array_of_quantity())
arr = ser.map(mapper, na_action).values
else:
from pandas.core.algorithms import map_array

arr = map_array(self, mapper, na_action)

master_scalar = None
try:
master_scalar = next(i for i in arr if hasattr(i, "units"))
except StopIteration:
# JSON mapper formatting Qs as str don't create PintArrays
# ...and that's OK. Caller will get array of values
return arr
return PintArray._from_sequence(arr, PintType(master_scalar.units))

def _reduce(self, name, *, skipna: bool = True, keepdims: bool = False, **kwds):
"""
Return a scalar result of performing the reduction operation.

Expand Down Expand Up @@ -855,14 +905,20 @@ def _reduce(self, name, **kwds):

if isinstance(self._data, ExtensionArray):
try:
result = self._data._reduce(name, **kwds)
result = self._data._reduce(
name, skipna=skipna, keepdims=keepdims, **kwds
)
except NotImplementedError:
result = functions[name](self.numpy_data, **kwds)

if name in {"all", "any", "kurt", "skew"}:
return result
if name == "var":
if keepdims:
return PintArray(result, f"pint[({self.units})**2]")
return self._Q(result, self.units**2)
if keepdims:
return PintArray(result, self.dtype)
return self._Q(result, self.units)

def _accumulate(self, name: str, *, skipna: bool = True, **kwds):
Expand All @@ -879,7 +935,6 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwds):
result = self._data._accumulate(name, **kwds)
except NotImplementedError:
result = functions[name](self.numpy_data, **kwds)
print(result)

return self._from_sequence(result, self.units)

Expand Down
24 changes: 21 additions & 3 deletions pint_pandas/testsuite/test_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@

import numpy as np
import pandas as pd
import pandas._testing as tm
import pytest
import pint
from pandas.tests.extension.base.base import BaseExtensionTests
from pint.testsuite import helpers

from pint_pandas import PintArray, PintType
from pint_pandas.pint_array import pandas_version_info

ureg = PintType.ureg

Expand Down Expand Up @@ -41,7 +43,7 @@ def test_force_ndarray_like(self):
expected = pd.DataFrame(
{0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]"
)
self.assert_equal(result, expected)
tm.assert_equal(result, expected)
andrewgsavage marked this conversation as resolved.
Show resolved Hide resolved

finally:
# restore registry
Expand All @@ -64,7 +66,7 @@ def test_offset_concat(self):
expected = pd.DataFrame(
{0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]"
)
self.assert_equal(result, expected)
tm.assert_equal(result, expected)

# issue #141
print(PintArray(q_a))
Expand All @@ -80,7 +82,7 @@ def test_assignment_add_empty(self):
result = pd.Series(data)
result[[]] += data[0]
expected = pd.Series(data)
self.assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)


class TestIssue80:
Expand Down Expand Up @@ -167,3 +169,19 @@ def test_issue_127():
a = PintType.construct_from_string("pint[dimensionless]")
b = PintType.construct_from_string("pint[]")
assert a == b


class TestIssue174(BaseExtensionTests):
def test_sum(self):
if pandas_version_info < (2, 1):
pytest.skip("Pandas reduce functions strip units prior to version 2.1.0")
a = pd.DataFrame([[0, 1, 2], [3, 4, 5]]).astype("pint[m]")
row_sum = a.sum(axis=0)
expected_1 = pd.Series([3, 5, 7], dtype="pint[m]")

tm.assert_series_equal(row_sum, expected_1)

col_sum = a.sum(axis=1)
expected_2 = pd.Series([3, 12], dtype="pint[m]")

tm.assert_series_equal(col_sum, expected_2)
Loading
Loading