From 9c7971baa8725c23fa415a69d03f76a164da0131 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Mon, 7 Aug 2023 09:12:11 -0400 Subject: [PATCH 01/13] Update to Pandas 2.1.0.dev0+1415.g9cfedf48fa Additional code synchronizations (and the addition of a dtype-preserving map method). These changes were initially developed to support uncertainties, but the uncertainty changes have all been stripped out to simplify merging of underlying code. Once these changes are fully synced with a release version of Pandas 2.1, we can look at adding back uncertainties. These changes also tolerate complex128 as a base type for magnitudes, with one except (under discussion as https://github.com/pandas-dev/pandas/issues/54445). Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 123 ++++++++++++++++++++------- pint_pandas/testsuite/test_issues.py | 21 ++++- 2 files changed, 112 insertions(+), 32 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 2b5c40f4..b06e9071 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd import pint -from pandas import DataFrame, Series +from pandas import DataFrame, Series, Index from pandas.api.extensions import ( ExtensionArray, ExtensionDtype, @@ -27,6 +27,8 @@ # quantify/dequantify NO_UNIT = "No Unit" +# from pint.facets.plain.quantity import PlainQuantity as _Quantity +# from pint.facets.plain.unit import PlainUnit as _Unit class PintType(ExtensionDtype): """ @@ -65,7 +67,7 @@ def __new__(cls, units=None): if not isinstance(units, _Unit): units = cls._parse_dtype_strict(units) # ureg.unit returns a quantity with a magnitude of 1 - # eg 1 mm. Initialising a quantity and taking it's unit + # eg 1 mm. Initialising a quantity and taking its unit # TODO: Seperate units from quantities in pint # to simplify this bit units = cls.ureg.Quantity(1, units).units @@ -195,8 +197,8 @@ def __repr__(self): float: pd.Float64Dtype(), np.float64: pd.Float64Dtype(), np.float32: pd.Float32Dtype(), - np.complex128: pd.core.dtypes.dtypes.PandasDtype("complex128"), - np.complex64: pd.core.dtypes.dtypes.PandasDtype("complex64"), + np.complex128: pd.core.dtypes.dtypes.NumpyEADtype("complex128"), + np.complex64: pd.core.dtypes.dtypes.NumpyEADtype("complex64"), # np.float16: pd.Float16Dtype(), } dtypeunmap = {v: k for k, v in dtypemap.items()} @@ -250,7 +252,6 @@ def __init__(self, values, dtype=None, copy=False): copy = False elif not isinstance(values, pd.core.arrays.numeric.NumericArray): values = pd.array(values, copy=copy) - copy = False if copy: values = values.copy() self._data = values @@ -309,12 +310,22 @@ def __setitem__(self, key, value): # doing nothing here seems to be ok return + master_scalar = None + try: + master_scalar = next(i for i in self._data if pd.notna(i)) + except StopIteration: + pass + if isinstance(value, _Quantity): value = value.to(self.units).magnitude - elif is_list_like(value) and len(value) > 0 and isinstance(value[0], _Quantity): - value = [item.to(self.units).magnitude for item in value] + elif is_list_like(value) and len(value) > 0: + if isinstance(value[0], _Quantity): + value = [item.to(self.units).magnitude for item in value] + if len(value) == 1: + value = value[0] key = check_array_indexer(self, key) + # Filter out invalid values for our array type(s) try: self._data[key] = value except IndexError as e: @@ -458,7 +469,8 @@ def take(self, indices, allow_fill=False, fill_value=None): Examples -------- """ - from pandas.core.algorithms import take, is_scalar + from pandas.core.algorithms import take + from pandas.core.dtypes.common import is_scalar data = self._data if allow_fill and fill_value is None: @@ -470,7 +482,10 @@ def take(self, indices, allow_fill=False, fill_value=None): # magnitude is in fact an array scalar, which will get rejected by pandas. fill_value = fill_value[()] - result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # Turn off warning that PandasArray is deprecated for ``take`` + result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill) return PintArray(result, dtype=self.dtype) @@ -512,22 +527,17 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): raise ValueError( "Cannot infer dtype. No dtype specified and empty array" ) - if dtype is None and not isinstance(master_scalar, _Quantity): - raise ValueError("No dtype specified and not a sequence of quantities") - if dtype is None and isinstance(master_scalar, _Quantity): + if dtype is None: + if not isinstance(master_scalar, _Quantity): + raise ValueError("No dtype specified and not a sequence of quantities") dtype = PintType(master_scalar.units) - def quantify_nan(item): - if type(item) is float: - return item * dtype.units - return item - if isinstance(master_scalar, _Quantity): - scalars = [quantify_nan(item) for item in scalars] scalars = [ (item.to(dtype.units).magnitude if hasattr(item, "to") else item) for item in scalars ] + # When creating empty arrays, make them large enoguh to hold UFloats in case we need to do so later return cls(scalars, dtype=dtype, copy=copy) @classmethod @@ -538,10 +548,21 @@ def _from_sequence_of_strings(cls, scalars, dtype=None, copy=False): @classmethod def _from_factorized(cls, values, original): + from pandas._libs.lib import infer_dtype + + if infer_dtype(values) != "object": + values = pd.array(values, copy=False) return cls(values, dtype=original.dtype) def _values_for_factorize(self): - return self._data._values_for_factorize() + # factorize can now handle differentiating various types of null values. + # These can only occur when the array has object dtype. + # However, for backwards compatibility we only use the null for the + # provided dtype. This may be revisited in the future, see GH#48476. + arr = self._data + if arr.dtype.kind == "O": + return np.array(arr, copy=False), self.dtype.na_value.m + return arr._values_for_factorize() def value_counts(self, dropna=True): """ @@ -567,16 +588,17 @@ def value_counts(self, dropna=True): # compute counts on the data with no nans data = self._data - nafilt = np.isnan(data) + nafilt = pd.isna(data) + na_value = self.dtype.na_value.m data = data[~nafilt] + index = list(set(data)) data_list = data.tolist() - index = list(set(data)) array = [data_list.count(item) for item in index] if not dropna: - index.append(np.nan) - array.append(nafilt.sum()) + index.append(na_value) + array.append(len(nafilt)) return Series(array, index=index) @@ -589,7 +611,8 @@ def unique(self): """ from pandas import unique - return self._from_sequence(unique(self._data), dtype=self.dtype) + data = self._data + return self._from_sequence(unique(data), dtype=self.dtype) def __contains__(self, item) -> bool: if not isinstance(item, _Quantity): @@ -691,7 +714,7 @@ def convert_values(param): else: return param - if isinstance(other, (Series, DataFrame)): + if isinstance(other, (Series, DataFrame, Index)): return NotImplemented lvalues = self.quantity validate_length(lvalues, other) @@ -740,7 +763,9 @@ def __array__(self, dtype=None, copy=False): def _to_array_of_quantity(self, copy=False): qtys = [ - self._Q(item, self._dtype.units) if not pd.isna(item) else item + self._Q(item, self._dtype.units) + if item is not self.dtype.na_value.m + else self.dtype.na_value for item in self._data ] with warnings.catch_warnings(record=True): @@ -798,7 +823,42 @@ def searchsorted(self, value, side="left", sorter=None): value = [item.to(self.units).magnitude for item in value] return arr.searchsorted(value, side=side, sorter=sorter) - def _reduce(self, name, **kwds): + def map(self, mapper, na_action=None): + """ + Map values using an input mapping or function. + + Parameters + ---------- + mapper : function, dict, or Series + Mapping correspondence. + na_action : {None, 'ignore'}, default None + If 'ignore', propagate NA values, without passing them to the + mapping correspondence. If 'ignore' is not supported, a + ``NotImplementedError`` should be raised. + + Returns + ------- + If mapper is a function, operate on the magnitudes of the array and + + """ + if callable(mapper) and len(self): + from pandas._libs import lib + + # This converts PintArray into array of Quantities + values = self.astype(object, copy=False) + # Using _from_sequence allows for possibility that mapper changes units + if na_action is None: + arr = lib.map_infer(values, mapper, convert=True) + else: + arr = lib.map_infer_mask( + values, mapper, mask=pd.isna(values).view(np.uint8), convert=True + ) + # If mapper doesn't return a Quantity, this will raise a ValueError + return PintArray._from_sequence(arr) + else: + return super().map(mapper, na_action=na_action) + + def _reduce(self, name, *, skipna: bool = True, keepdims: bool = False, **kwds): """ Return a scalar result of performing the reduction operation. @@ -842,14 +902,20 @@ def _reduce(self, name, **kwds): if isinstance(self._data, ExtensionArray): try: - result = self._data._reduce(name, **kwds) + result = self._data._reduce( + name, skipna=skipna, keepdims=keepdims, **kwds + ) except NotImplementedError: result = functions[name](self.numpy_data, **kwds) if name in {"all", "any", "kurt", "skew"}: return result if name == "var": + if keepdims: + return PintArray(result, f"pint[({self.units})**2]") return self._Q(result, self.units**2) + if keepdims: + return PintArray(result, self.dtype) return self._Q(result, self.units) def _accumulate(self, name: str, *, skipna: bool = True, **kwds): @@ -866,7 +932,6 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwds): result = self._data._accumulate(name, **kwds) except NotImplementedError: result = functions[name](self.numpy_data, **kwds) - print(result) return self._from_sequence(result, self.units) diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index 316efce3..4184f909 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -3,6 +3,7 @@ import numpy as np import pandas as pd +import pandas._testing as tm import pytest import pint from pandas.tests.extension.base.base import BaseExtensionTests @@ -41,7 +42,7 @@ def test_force_ndarray_like(self): expected = pd.DataFrame( {0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]" ) - self.assert_equal(result, expected) + tm.assert_equal(result, expected) finally: # restore registry @@ -64,7 +65,7 @@ def test_offset_concat(self): expected = pd.DataFrame( {0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]" ) - self.assert_equal(result, expected) + tm.assert_equal(result, expected) # issue #141 print(PintArray(q_a)) @@ -80,7 +81,7 @@ def test_assignment_add_empty(self): result = pd.Series(data) result[[]] += data[0] expected = pd.Series(data) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) class TestIssue80: @@ -167,3 +168,17 @@ def test_issue_127(): a = PintType.construct_from_string("pint[dimensionless]") b = PintType.construct_from_string("pint[]") assert a == b + + +class TestIssue174(BaseExtensionTests): + def test_sum(self): + a = pd.DataFrame([[0, 1, 2], [3, 4, 5]]).astype("pint[m]") + row_sum = a.sum(axis=0) + expected_1 = pd.Series([3, 5, 7], dtype="pint[m]") + + tm.assert_series_equal(row_sum, expected_1) + + col_sum = a.sum(axis=1) + expected_2 = pd.Series([3, 12], dtype="pint[m]") + + tm.assert_series_equal(col_sum, expected_2) From 53b74a41145f7ef75f8bb35878020136f6305071 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Mon, 7 Aug 2023 09:17:09 -0400 Subject: [PATCH 02/13] Initial commit missing file and pre-commit changes Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 8 +- .../testsuite/test_pandas_extensiontests.py | 188 +++++++++++++----- 2 files changed, 140 insertions(+), 56 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index b06e9071..35dd567c 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -30,6 +30,7 @@ # from pint.facets.plain.quantity import PlainQuantity as _Quantity # from pint.facets.plain.unit import PlainUnit as _Unit + class PintType(ExtensionDtype): """ A Pint duck-typed class, suitable for holding a quantity (with unit specified) dtype. @@ -310,9 +311,8 @@ def __setitem__(self, key, value): # doing nothing here seems to be ok return - master_scalar = None try: - master_scalar = next(i for i in self._data if pd.notna(i)) + next(i for i in self._data if pd.notna(i)) except StopIteration: pass @@ -838,8 +838,8 @@ def map(self, mapper, na_action=None): Returns ------- - If mapper is a function, operate on the magnitudes of the array and - + If mapper is a function, operate on the magnitudes of the array and + """ if callable(mapper) and len(self): from pandas._libs import lib diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 698cbb56..87851e4e 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -11,7 +11,7 @@ from pandas.tests.extension import base from pandas.tests.extension.conftest import ( as_frame, # noqa: F401 - as_array, # noqa: F401, + as_array, # noqa: F401 as_series, # noqa: F401 fillna_method, # noqa: F401 groupby_apply_op, # noqa: F401 @@ -133,7 +133,6 @@ def data_for_grouping(numeric_dtype): a = 1.0 b = 2.0**32 + 1 c = 2.0**32 + 10 - numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) return PintArray.from_1darray_quantity( ureg.Quantity( @@ -185,7 +184,7 @@ def all_compare_operators(request): return request.param -# commented functions aren't implemented +# commented functions aren't implemented in numpy/pandas _all_numeric_reductions = [ "sum", "max", @@ -275,7 +274,7 @@ def test_groupby_apply_identity(self, data_for_grouping): index=pd.Index([1, 2, 3, 4], name="A"), name="B", ) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) @pytest.mark.xfail(run=True, reason="assert_frame_equal issue") @pytest.mark.parametrize("as_index", [True, False]) @@ -287,10 +286,10 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping): if as_index: index = pd.Index._with_infer(uniques, name="B") expected = pd.Series([3.0, 1.0, 4.0], index=index, name="A") - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) else: expected = pd.DataFrame({"B": uniques, "A": [3.0, 1.0, 4.0]}) - self.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_in_numeric_groupby(self, data_for_grouping): df = pd.DataFrame( @@ -314,74 +313,134 @@ def test_groupby_extension_no_sort(self, data_for_grouping): index = pd.Index._with_infer(index, name="B") expected = pd.Series([1.0, 3.0, 4.0], index=index, name="A") - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) -class TestInterface(base.BaseInterfaceTests): - pass +class TestMethods(base.BaseMethodsTests): + def test_apply_simple_series(self, data): + result = pd.Series(data).apply(lambda x: x * 2 + ureg.Quantity(1, x.u)) + assert isinstance(result, pd.Series) + @pytest.mark.parametrize("na_action", [None, "ignore"]) + def test_map(self, data_missing, na_action): + s = pd.Series(data_missing) + result = s.map(lambda x: x, na_action=na_action) + expected = s + tm.assert_series_equal(result, expected) -class TestMethods(base.BaseMethodsTests): @pytest.mark.skip("All values are valid as magnitudes") def test_insert_invalid(self): pass class TestArithmeticOps(base.BaseArithmeticOpsTests): - def _check_divmod_op(self, s, op, other, exc=None): - # divmod has multiple return values, so check separately - if exc is None: - result_div, result_mod = op(s, other) - if op is divmod: - expected_div, expected_mod = s // other, s % other - else: - expected_div, expected_mod = other // s, other % s - self.assert_series_equal(result_div, expected_div) - self.assert_series_equal(result_mod, expected_mod) - else: - with pytest.raises(exc): - divmod(s, other) - - def _get_exception(self, data, op_name): - if data.data.dtype == pd.core.dtypes.dtypes.PandasDtype("complex128"): - if op_name in ["__floordiv__", "__rfloordiv__", "__mod__", "__rmod__"]: - return op_name, TypeError + # With Pint 0.21, series and scalar need to have compatible units for + # the arithmetic to work + # series & scalar + + divmod_exc = None + series_scalar_exc = None + frame_scalar_exc = None + series_array_exc = None + + def _get_expected_exception( + self, op_name: str, obj, other + ) -> type[Exception] | None: if op_name in ["__pow__", "__rpow__"]: - return op_name, DimensionalityError - - return op_name, None - - @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) - def test_divmod_series_array(self, data, data_for_twos): - base.BaseArithmeticOpsTests.test_divmod_series_array(self, data, data_for_twos) + return DimensionalityError + complex128_dtype = pd.core.dtypes.dtypes.NumpyEADtype("complex128") + if ( + (isinstance(obj, pd.Series) and obj.dtype == complex128_dtype) + or ( + isinstance(obj, pd.DataFrame) + and any([dtype == complex128_dtype for dtype in obj.dtypes]) + ) + or (isinstance(other, pd.Series) and other.dtype == complex128_dtype) + or ( + isinstance(other, pd.DataFrame) + and any([dtype == complex128_dtype for dtype in other.dtypes]) + ) + ): + if op_name in ["__floordiv__", "__rfloordiv__", "__mod__", "__rmod__"]: + breakpoint() + return TypeError + return super()._get_expected_exception(op_name, obj, other) def test_arith_series_with_scalar(self, data, all_arithmetic_operators): - # With Pint 0.21, series and scalar need to have compatible units for - # the arithmetic to work # series & scalar - op_name, exc = self._get_exception(data, all_arithmetic_operators) - s = pd.Series(data) - self.check_opname(s, op_name, s.iloc[0], exc=exc) - - def test_arith_series_with_array(self, data, all_arithmetic_operators): - # ndarray & other series - op_name, exc = self._get_exception(data, all_arithmetic_operators) + op_name = all_arithmetic_operators + if data[0].dtype.kind == "c" and op_name in [ + "floor_divide", + "remainder", + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + ]: + pytest.skip( + f"{data.dtype.name} PintArray does not support {op_name} operator" + ) ser = pd.Series(data) - self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser)), exc) + self.check_opname(ser, op_name, ser.iloc[0]) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): # frame & scalar - op_name, exc = self._get_exception(data, all_arithmetic_operators) + op_name = all_arithmetic_operators + if data[0].dtype.kind == "c" and op_name in [ + "floor_divide", + "remainder", + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + ]: + pytest.skip( + f"{data.dtype.name} PintArray does not support {op_name} operator" + ) df = pd.DataFrame({"A": data}) - self.check_opname(df, op_name, data[0], exc=exc) + self.check_opname(df, op_name, data[0]) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + # ndarray & other series + op_name = all_arithmetic_operators + if data[0].dtype.kind == "c" and op_name in [ + "floor_divide", + "remainder", + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + ]: + pytest.skip( + f"{data.dtype.name} PintArray does not support {op_name} operator" + ) + ser = pd.Series(data) + self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser))) - # parameterise this to try divisor not equal to 1 + # parameterise this to try divisor not equal to 1 Mm @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) def test_divmod(self, data): + if data[0].dtype.kind == "c": + pytest.skip(f"{data.dtype.name} PintArray does not support divmod") s = pd.Series(data) self._check_divmod_op(s, divmod, 1 * ureg.Mm) self._check_divmod_op(1 * ureg.Mm, ops.rdivmod, s) + def test_divmod_series_array(self, data, data_for_twos): + if data[0].dtype.kind == "c": + pytest.skip(f"{data.dtype.name} dtype does not support divmod") + if data_for_twos[0].dtype.kind == "c": + pytest.skip(f"{data_for_twos.dtype.name} dtype does not support divmod") + + ser = pd.Series(data) + self._check_divmod_op(ser, divmod, data) + + other = data_for_twos + self._check_divmod_op(other, ops.rdivmod, ser) + + other = pd.Series(other) + self._check_divmod_op(other, ops.rdivmod, ser) + class TestComparisonOps(base.BaseComparisonOpsTests): def _compare_other(self, s, data, op_name, other): @@ -441,6 +500,10 @@ def check_reduce(self, s, op_name, skipna): expected = expected_m assert result == expected + @pytest.mark.skip("tests not written yet") + def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): + pass + @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_scaling(self, data, all_numeric_reductions, skipna): """Make sure that the reductions give the same physical result independent of the unit representation. @@ -456,7 +519,10 @@ def test_reduce_scaling(self, data, all_numeric_reductions, skipna): # min/max with empty produce numpy warnings with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) + # try: r_nm = getattr(s_nm, op_name)(skipna=skipna) + # except AttributeError: + # pytest.skip("bye!") r_mm = getattr(s_mm, op_name)(skipna=skipna) if isinstance(r_nm, ureg.Quantity): # convert both results to the same units, then take the magnitude @@ -467,6 +533,16 @@ def test_reduce_scaling(self, data, all_numeric_reductions, skipna): v_mm = r_mm assert np.isclose(v_nm, v_mm, rtol=1e-3), f"{r_nm} == {r_mm}" + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_xx(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + s = pd.Series(data) + + # min/max with empty produce numpy warnings + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + self.check_reduce(s, op_name, skipna) + class TestBooleanReduce(base.BaseBooleanReduceTests): def check_reduce(self, s, op_name, skipna): @@ -507,13 +583,21 @@ def test_unstack(self, data, index, obj): class TestSetitem(base.BaseSetitemTests): @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) def test_setitem_scalar_key_sequence_raise(self, data): + # This can be removed when https://github.com/pandas-dev/pandas/pull/54441 is accepted base.BaseSetitemTests.test_setitem_scalar_key_sequence_raise(self, data) + def test_setitem_2d_values(self, data): + # GH50085 + original = data.copy() + df = pd.DataFrame({"a": data, "b": data}) + df.loc[[0, 1], :] = df.loc[[1, 0], :].values + assert (df.loc[0, :] == original[1]).all() + assert (df.loc[1, :] == original[0]).all() + class TestAccumulate(base.BaseAccumulateTests): - @pytest.mark.parametrize("skipna", [True, False]) - def test_accumulate_series_raises(self, data, all_numeric_accumulations, skipna): - pass + def _supports_accumulation(self, ser: pd.Series, op_name: str) -> bool: + return True def check_accumulate(self, s, op_name, skipna): if op_name == "cumprod": @@ -524,4 +608,4 @@ def check_accumulate(self, s, op_name, skipna): s_unitless = pd.Series(s.values.data) expected = getattr(s_unitless, op_name)(skipna=skipna) expected = pd.Series(expected, dtype=s.dtype) - self.assert_series_equal(result, expected, check_dtype=False) + tm.assert_series_equal(result, expected, check_dtype=False) From 366f5f32dd31f3d2992a65e1243915954405b523 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Mon, 7 Aug 2023 22:05:17 -0400 Subject: [PATCH 03/13] Fixes to map and _get_expected_exception The PintArray map function should return a PintArray if the mapper returns PintQuantities, but otherwise it should just return whatever was the result of the mapper. We follow the examples from pandas in the definition and use of _get_expected_exception in the test suite. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 10 +- .../testsuite/test_pandas_extensiontests.py | 126 ++++++++++-------- 2 files changed, 78 insertions(+), 58 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 35dd567c..e0da93ea 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -853,8 +853,14 @@ def map(self, mapper, na_action=None): arr = lib.map_infer_mask( values, mapper, mask=pd.isna(values).view(np.uint8), convert=True ) - # If mapper doesn't return a Quantity, this will raise a ValueError - return PintArray._from_sequence(arr) + master_scalar = None + try: + master_scalar = next(i for i in arr if hasattr(i, "units")) + except StopIteration: + # JSON mapper formatting Qs as str don't create PintArrays + # ...and that's OK. Caller will get array of values + return arr + return PintArray._from_sequence(arr, PintType(master_scalar.units)) else: return super().map(mapper, na_action=na_action) diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 87851e4e..aefcee45 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -348,90 +348,104 @@ def _get_expected_exception( ) -> type[Exception] | None: if op_name in ["__pow__", "__rpow__"]: return DimensionalityError - complex128_dtype = pd.core.dtypes.dtypes.NumpyEADtype("complex128") - if ( - (isinstance(obj, pd.Series) and obj.dtype == complex128_dtype) - or ( - isinstance(obj, pd.DataFrame) - and any([dtype == complex128_dtype for dtype in obj.dtypes]) - ) - or (isinstance(other, pd.Series) and other.dtype == complex128_dtype) - or ( - isinstance(other, pd.DataFrame) - and any([dtype == complex128_dtype for dtype in other.dtypes]) - ) - ): - if op_name in ["__floordiv__", "__rfloordiv__", "__mod__", "__rmod__"]: - breakpoint() + if op_name in [ + "__divmod__", + "__rdivmod__", + "floor_divide", + "remainder", + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__" + ]: + exc = None + if isinstance(obj, complex): + pytest.skip(f"{type(obj).__name__} does not support {op_name}") + return TypeError + if isinstance(other, complex): + pytest.skip(f"{type(other).__name__} does not support {op_name}") return TypeError - return super()._get_expected_exception(op_name, obj, other) + if isinstance(obj, ureg.Quantity): + pytest.skip(f"{type(obj.m).__name__} Quantity does not support {op_name}") + return TypeError + if isinstance(other, ureg.Quantity): + pytest.skip(f"{type(other.m).__name__} Quantity does not support {op_name}") + return TypeError + if isinstance(obj, pd.Series): + try: + if obj.pint.m.dtype.kind == "c": + pytest.skip( + f"{obj.pint.m.dtype.name} {obj.dtype} does not support {op_name}" + ) + return TypeError + except AttributeError: + exc = super()._get_expected_exception(op_name, obj, other) + if exc: + return exc + if isinstance(other, pd.Series): + try: + if other.pint.m.dtype.kind == "c": + pytest.skip( + f"{other.pint.m.dtype.name} {other.dtype} does not support {op_name}" + ) + return TypeError + except AttributeError: + exc = super()._get_expected_exception(op_name, obj, other) + if exc: + return exc + if isinstance(obj, pd.DataFrame): + try: + df = obj.pint.dequantify() + for i, col in enumerate(df.columns): + if df.iloc[:, i].dtype.kind == "c": + pytest.skip( + f"{df.iloc[:, i].dtype.name} {df.dtypes[i]} does not support {op_name}" + ) + return TypeError + except AttributeError: + exc = super()._get_expected_exception(op_name, obj, other) + if exc: + return exc + if isinstance(other, pd.DataFrame): + try: + df = other.pint.dequantify() + for i, col in enumerate(df.columns): + if df.iloc[:, i].dtype.kind == "c": + pytest.skip( + f"{df.iloc[:, i].dtype.name} {df.dtypes[i]} does not support {op_name}" + ) + return TypeError + except AttributeError: + exc = super()._get_expected_exception(op_name, obj, other) + # Fall through... + return exc def test_arith_series_with_scalar(self, data, all_arithmetic_operators): # series & scalar op_name = all_arithmetic_operators - if data[0].dtype.kind == "c" and op_name in [ - "floor_divide", - "remainder", - "__floordiv__", - "__rfloordiv__", - "__mod__", - "__rmod__", - ]: - pytest.skip( - f"{data.dtype.name} PintArray does not support {op_name} operator" - ) ser = pd.Series(data) self.check_opname(ser, op_name, ser.iloc[0]) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): # frame & scalar op_name = all_arithmetic_operators - if data[0].dtype.kind == "c" and op_name in [ - "floor_divide", - "remainder", - "__floordiv__", - "__rfloordiv__", - "__mod__", - "__rmod__", - ]: - pytest.skip( - f"{data.dtype.name} PintArray does not support {op_name} operator" - ) df = pd.DataFrame({"A": data}) self.check_opname(df, op_name, data[0]) def test_arith_series_with_array(self, data, all_arithmetic_operators): # ndarray & other series op_name = all_arithmetic_operators - if data[0].dtype.kind == "c" and op_name in [ - "floor_divide", - "remainder", - "__floordiv__", - "__rfloordiv__", - "__mod__", - "__rmod__", - ]: - pytest.skip( - f"{data.dtype.name} PintArray does not support {op_name} operator" - ) ser = pd.Series(data) self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser))) # parameterise this to try divisor not equal to 1 Mm @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) def test_divmod(self, data): - if data[0].dtype.kind == "c": - pytest.skip(f"{data.dtype.name} PintArray does not support divmod") s = pd.Series(data) self._check_divmod_op(s, divmod, 1 * ureg.Mm) self._check_divmod_op(1 * ureg.Mm, ops.rdivmod, s) def test_divmod_series_array(self, data, data_for_twos): - if data[0].dtype.kind == "c": - pytest.skip(f"{data.dtype.name} dtype does not support divmod") - if data_for_twos[0].dtype.kind == "c": - pytest.skip(f"{data_for_twos.dtype.name} dtype does not support divmod") - ser = pd.Series(data) self._check_divmod_op(ser, divmod, data) From d883583c8f1ca54eeab124c0ae2bb8d3ef28696e Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 11 Aug 2023 23:24:30 -0400 Subject: [PATCH 04/13] Update pint_array.py Fix canonicalization of NaNs to use na_value (with units) rather than pure np.nan. While np.nan is correctly handled as a value in PintArrays, and work as expected with addition (adding a quantity to a NaN produces a NaN, which remains value), it doesn't work for multiplication (where a quantity times a NaN produces not a NaN but new quantity of NaN magnitude the units of the non-NaN value). Canonicalizing to na_value provides consistent unit handling with NaNs through all arithmetic operations. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index e0da93ea..62b70cfb 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -198,8 +198,8 @@ def __repr__(self): float: pd.Float64Dtype(), np.float64: pd.Float64Dtype(), np.float32: pd.Float32Dtype(), - np.complex128: pd.core.dtypes.dtypes.NumpyEADtype("complex128"), - np.complex64: pd.core.dtypes.dtypes.NumpyEADtype("complex64"), + np.complex128: pd.core.dtypes.dtypes.PandasDtype("complex128"), + np.complex64: pd.core.dtypes.dtypes.PandasDtype("complex64"), # np.float16: pd.Float16Dtype(), } dtypeunmap = {v: k for k, v in dtypemap.items()} @@ -561,7 +561,7 @@ def _values_for_factorize(self): # provided dtype. This may be revisited in the future, see GH#48476. arr = self._data if arr.dtype.kind == "O": - return np.array(arr, copy=False), self.dtype.na_value.m + return np.array(arr, copy=False), self.dtype.na_value return arr._values_for_factorize() def value_counts(self, dropna=True): @@ -589,7 +589,7 @@ def value_counts(self, dropna=True): # compute counts on the data with no nans data = self._data nafilt = pd.isna(data) - na_value = self.dtype.na_value.m + na_value = self.dtype.na_value data = data[~nafilt] index = list(set(data)) @@ -764,7 +764,7 @@ def __array__(self, dtype=None, copy=False): def _to_array_of_quantity(self, copy=False): qtys = [ self._Q(item, self._dtype.units) - if item is not self.dtype.na_value.m + if item is not self.dtype.na_value else self.dtype.na_value for item in self._data ] From 34554a6c49441517ff315426a39a614432b6926f Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 11 Aug 2023 23:32:21 -0400 Subject: [PATCH 05/13] Update test_pandas_extensiontests.py Make black happy. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .../testsuite/test_pandas_extensiontests.py | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index aefcee45..487eef04 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -349,14 +349,14 @@ def _get_expected_exception( if op_name in ["__pow__", "__rpow__"]: return DimensionalityError if op_name in [ - "__divmod__", - "__rdivmod__", - "floor_divide", - "remainder", - "__floordiv__", - "__rfloordiv__", - "__mod__", - "__rmod__" + "__divmod__", + "__rdivmod__", + "floor_divide", + "remainder", + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", ]: exc = None if isinstance(obj, complex): @@ -366,11 +366,15 @@ def _get_expected_exception( pytest.skip(f"{type(other).__name__} does not support {op_name}") return TypeError if isinstance(obj, ureg.Quantity): - pytest.skip(f"{type(obj.m).__name__} Quantity does not support {op_name}") - return TypeError + pytest.skip( + f"{type(obj.m).__name__} Quantity does not support {op_name}" + ) + return TypeError if isinstance(other, ureg.Quantity): - pytest.skip(f"{type(other.m).__name__} Quantity does not support {op_name}") - return TypeError + pytest.skip( + f"{type(other.m).__name__} Quantity does not support {op_name}" + ) + return TypeError if isinstance(obj, pd.Series): try: if obj.pint.m.dtype.kind == "c": From 4ff2ec4ba0e3d3326c283c218099a1d68ffd21cb Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 12 Aug 2023 22:15:14 -0400 Subject: [PATCH 06/13] Fix value_counts and other feedback Fix errors that crept into value_counts concerning na_type. And canonicalize na_value in quantities a bit more generally. As stated in a previous commit comment, if we leave naked NA/NaNs in our Quantity array, the code gracefully handles unit addition/subtraction, but unit multiplication/division loses. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 8 ++++---- pint_pandas/testsuite/test_pandas_extensiontests.py | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 36289c4b..566caad7 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -598,7 +598,7 @@ def value_counts(self, dropna=True): # compute counts on the data with no nans data = self._data nafilt = pd.isna(data) - na_value = self.dtype.na_value + na_value = pd.NA # NA value for index, not data, so not quantified data = data[~nafilt] index = list(set(data)) @@ -607,9 +607,9 @@ def value_counts(self, dropna=True): if not dropna: index.append(na_value) - array.append(len(nafilt)) + array.append(nafilt.sum()) - return Series(array, index=index) + return Series(np.asarray(array), index=index) def unique(self): """Compute the PintArray of unique values. @@ -773,7 +773,7 @@ def __array__(self, dtype=None, copy=False): def _to_array_of_quantity(self, copy=False): qtys = [ self._Q(item, self._dtype.units) - if item is not self.dtype.na_value + if not pd.isna(item) else self.dtype.na_value for item in self._data ] diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 487eef04..4a9ea683 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -316,6 +316,10 @@ def test_groupby_extension_no_sort(self, data_for_grouping): tm.assert_series_equal(result, expected) +class TestInterface(base.BaseInterfaceTests): + pass + + class TestMethods(base.BaseMethodsTests): def test_apply_simple_series(self, data): result = pd.Series(data).apply(lambda x: x * 2 + ureg.Quantity(1, x.u)) @@ -537,10 +541,7 @@ def test_reduce_scaling(self, data, all_numeric_reductions, skipna): # min/max with empty produce numpy warnings with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) - # try: r_nm = getattr(s_nm, op_name)(skipna=skipna) - # except AttributeError: - # pytest.skip("bye!") r_mm = getattr(s_mm, op_name)(skipna=skipna) if isinstance(r_nm, ureg.Quantity): # convert both results to the same units, then take the magnitude From 053f90357949cda3a2bb3ed930864bf387e55ae0 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 12 Aug 2023 22:35:12 -0400 Subject: [PATCH 07/13] Update test_pandas_extensiontests.py Simplify type annotations for _get_expected_exception to make Python 3.9 happy. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/testsuite/test_pandas_extensiontests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 4a9ea683..590b89bd 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -349,7 +349,7 @@ class TestArithmeticOps(base.BaseArithmeticOpsTests): def _get_expected_exception( self, op_name: str, obj, other - ) -> type[Exception] | None: + ): # -> type[Exception] | None, but Union types not understood by Python 3.9 if op_name in ["__pow__", "__rpow__"]: return DimensionalityError if op_name in [ From 7a97fb75e5f40863fbf4898dfcae65dbde812222 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 12 Aug 2023 22:52:54 -0400 Subject: [PATCH 08/13] Update pint_array.py Delete the last vestiges of some old code from a different PR. Also use Pandas public APIs for `infer_dtype` (used in `_from_factorized`). Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 566caad7..d071bd2a 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -320,11 +320,6 @@ def __setitem__(self, key, value): # doing nothing here seems to be ok return - try: - next(i for i in self._data if pd.notna(i)) - except StopIteration: - pass - if isinstance(value, _Quantity): value = value.to(self.units).magnitude elif is_list_like(value) and len(value) > 0: @@ -557,7 +552,7 @@ def _from_sequence_of_strings(cls, scalars, dtype=None, copy=False): @classmethod def _from_factorized(cls, values, original): - from pandas._libs.lib import infer_dtype + from pandas.api.types import infer_dtype if infer_dtype(values) != "object": values = pd.array(values, copy=False) From 630c2aec242d32ce14bd70441591858494d8bc02 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 12 Aug 2023 22:55:30 -0400 Subject: [PATCH 09/13] Update pint_array.py Remove unrelated comment about UFloats. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index d071bd2a..1f1c1dc1 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -541,7 +541,6 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): (item.to(dtype.units).magnitude if hasattr(item, "to") else item) for item in scalars ] - # When creating empty arrays, make them large enoguh to hold UFloats in case we need to do so later return cls(scalars, dtype=dtype, copy=copy) @classmethod From e1e4585a0af02d3bdbafb64614dc873857114e27 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sun, 13 Aug 2023 08:52:04 -0400 Subject: [PATCH 10/13] Testsuite now passes with both Pandas 2.0.2 and 2.1.0rc0 Restore some Pandas 2.0.x interfaces and use pandas_version_info from pint_array to condition logic. Also change some more `self.assert_series_equal` to `tm.assert_series_equal` in the restored code to accommodate Pandas 2.1 BaseExtensionTests behavior. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/testsuite/test_issues.py | 3 + .../testsuite/test_pandas_extensiontests.py | 88 ++++++++++++++----- 2 files changed, 71 insertions(+), 20 deletions(-) diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index 4184f909..95d85b2a 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -10,6 +10,7 @@ from pint.testsuite import helpers from pint_pandas import PintArray, PintType +from pint_pandas.pint_array import pandas_version_info ureg = PintType.ureg @@ -172,6 +173,8 @@ def test_issue_127(): class TestIssue174(BaseExtensionTests): def test_sum(self): + if pandas_version_info < (2, 1): + pytest.skip("Pandas reduce functions strip units prior to version 2.1.0") a = pd.DataFrame([[0, 1, 2], [3, 4, 5]]).astype("pint[m]") row_sum = a.sum(axis=0) expected_1 = pd.Series([3, 5, 7], dtype="pint[m]") diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 590b89bd..126bc32e 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -22,7 +22,7 @@ from pint.errors import DimensionalityError from pint_pandas import PintArray, PintType -from pint_pandas.pint_array import dtypemap +from pint_pandas.pint_array import dtypemap, pandas_version_info ureg = PintType.ureg @@ -328,6 +328,10 @@ def test_apply_simple_series(self, data): @pytest.mark.parametrize("na_action", [None, "ignore"]) def test_map(self, data_missing, na_action): s = pd.Series(data_missing) + if pandas_version_info < (2, 1) and na_action is not None: + pytest.skip( + "Pandas EA map function only accepts None as na_action parameter" + ) result = s.map(lambda x: x, na_action=na_action) expected = s tm.assert_series_equal(result, expected) @@ -338,10 +342,6 @@ def test_insert_invalid(self): class TestArithmeticOps(base.BaseArithmeticOpsTests): - # With Pint 0.21, series and scalar need to have compatible units for - # the arithmetic to work - # series & scalar - divmod_exc = None series_scalar_exc = None frame_scalar_exc = None @@ -428,31 +428,73 @@ def _get_expected_exception( # Fall through... return exc + # The following methods are needed to work with Pandas < 2.1 + def _check_divmod_op(self, s, op, other, exc=None): + # divmod has multiple return values, so check separately + if exc is None: + result_div, result_mod = op(s, other) + if op is divmod: + expected_div, expected_mod = s // other, s % other + else: + expected_div, expected_mod = other // s, other % s + tm.assert_series_equal(result_div, expected_div) + tm.assert_series_equal(result_mod, expected_mod) + else: + with pytest.raises(exc): + divmod(s, other) + + def _get_exception(self, data, op_name): + if data.data.dtype == pd.core.dtypes.dtypes.PandasDtype("complex128"): + if op_name in ["__floordiv__", "__rfloordiv__", "__mod__", "__rmod__"]: + return op_name, TypeError + if op_name in ["__pow__", "__rpow__"]: + return op_name, DimensionalityError + + return op_name, None + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + # With Pint 0.21, series and scalar need to have compatible units for + # the arithmetic to work # series & scalar - op_name = all_arithmetic_operators - ser = pd.Series(data) - self.check_opname(ser, op_name, ser.iloc[0]) - - def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): - # frame & scalar - op_name = all_arithmetic_operators - df = pd.DataFrame({"A": data}) - self.check_opname(df, op_name, data[0]) + if pandas_version_info < (2, 1): + op_name, exc = self._get_exception(data, all_arithmetic_operators) + s = pd.Series(data) + self.check_opname(s, op_name, s.iloc[0], exc=exc) + else: + op_name = all_arithmetic_operators + ser = pd.Series(data) + self.check_opname(ser, op_name, ser.iloc[0]) def test_arith_series_with_array(self, data, all_arithmetic_operators): # ndarray & other series - op_name = all_arithmetic_operators - ser = pd.Series(data) - self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser))) + if pandas_version_info < (2, 1): + op_name, exc = self._get_exception(data, all_arithmetic_operators) + ser = pd.Series(data) + self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser)), exc) + else: + op_name = all_arithmetic_operators + ser = pd.Series(data) + self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser))) + + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + # frame & scalar + if pandas_version_info < (2, 1): + op_name, exc = self._get_exception(data, all_arithmetic_operators) + df = pd.DataFrame({"A": data}) + self.check_opname(df, op_name, data[0], exc=exc) + else: + op_name = all_arithmetic_operators + df = pd.DataFrame({"A": data}) + self.check_opname(df, op_name, data[0]) # parameterise this to try divisor not equal to 1 Mm @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) def test_divmod(self, data): - s = pd.Series(data) - self._check_divmod_op(s, divmod, 1 * ureg.Mm) - self._check_divmod_op(1 * ureg.Mm, ops.rdivmod, s) + ser = pd.Series(data) + self._check_divmod_op(ser, divmod, 1 * ureg.Mm) + self._check_divmod_op(1 * ureg.Mm, ops.rdivmod, ser) + @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) def test_divmod_series_array(self, data, data_for_twos): ser = pd.Series(data) self._check_divmod_op(ser, divmod, data) @@ -615,6 +657,12 @@ def test_setitem_2d_values(self, data): class TestAccumulate(base.BaseAccumulateTests): + @pytest.mark.parametrize("skipna", [True, False]) + def test_accumulate_series_raises(self, data, all_numeric_accumulations, skipna): + if pandas_version_info < (2, 1): + # Should this be skip? Historic code simply used pass. + pass + def _supports_accumulation(self, ser: pd.Series, op_name: str) -> bool: return True From 9ff71791e90f8f47e471beee46a0cbe2d245d86c Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sun, 13 Aug 2023 19:07:53 -0400 Subject: [PATCH 11/13] Simplify PintArray.map; CI and CHANGES updates Simplify PintArray.map to let `pandas.core.algorithms.map_array` do the mapping and we just clean up the PintArray details if necessary. Pandas 2.1.0rc0 added to CI Updated CHANGES Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci.yml | 2 +- CHANGES | 1 + pint_pandas/pint_array.py | 33 +++++++++++---------------------- 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dcbd816f..251070a6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,7 +8,7 @@ jobs: matrix: python-version: [3.9, "3.10", "3.11"] numpy: ["numpy>=1.20.3,<2.0.0"] - pandas: ["pandas==2.0.2", ] + pandas: ["pandas==2.0.2", "pandas==2.1.0rc0" ] pint: ["pint>=0.21.1", "pint==0.22"] runs-on: ubuntu-latest diff --git a/CHANGES b/CHANGES index 962c5e10..b003ad3a 100644 --- a/CHANGES +++ b/CHANGES @@ -5,6 +5,7 @@ pint-pandas Changelog ---------------- <<<<<<< HEAD +- Support for Pandas version 2.1.0 (including dtype-preserving `PintArray.map`) #196 - Support for values in columns with integer magnitudes - Support for magnitudes of any type, such as complex128 or tuples #146 - Support for pandas 2.0, allowing `.cumsum, .cummax, .cummin` methods for `Series` and `DataFrame`. #186 diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 1f1c1dc1..96b608c2 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -844,28 +844,17 @@ def map(self, mapper, na_action=None): If mapper is a function, operate on the magnitudes of the array and """ - if callable(mapper) and len(self): - from pandas._libs import lib - - # This converts PintArray into array of Quantities - values = self.astype(object, copy=False) - # Using _from_sequence allows for possibility that mapper changes units - if na_action is None: - arr = lib.map_infer(values, mapper, convert=True) - else: - arr = lib.map_infer_mask( - values, mapper, mask=pd.isna(values).view(np.uint8), convert=True - ) - master_scalar = None - try: - master_scalar = next(i for i in arr if hasattr(i, "units")) - except StopIteration: - # JSON mapper formatting Qs as str don't create PintArrays - # ...and that's OK. Caller will get array of values - return arr - return PintArray._from_sequence(arr, PintType(master_scalar.units)) - else: - return super().map(mapper, na_action=na_action) + from pandas.core.algorithms import map_array + + arr = map_array(self, mapper, na_action) + master_scalar = None + try: + master_scalar = next(i for i in arr if hasattr(i, "units")) + except StopIteration: + # JSON mapper formatting Qs as str don't create PintArrays + # ...and that's OK. Caller will get array of values + return arr + return PintArray._from_sequence(arr, PintType(master_scalar.units)) def _reduce(self, name, *, skipna: bool = True, keepdims: bool = False, **kwds): """ From 2c6de3cf05477a5d5fb882e07939503808d2490b Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sun, 13 Aug 2023 21:17:28 -0400 Subject: [PATCH 12/13] Implement `map` also for Pandas 2.0.2 Implement `map` for PintArrays also only with Pandas 2.0.2 interfaces. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- CHANGES | 3 ++- pint_pandas/pint_array.py | 9 +++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index b003ad3a..f3c12ba4 100644 --- a/CHANGES +++ b/CHANGES @@ -5,7 +5,8 @@ pint-pandas Changelog ---------------- <<<<<<< HEAD -- Support for Pandas version 2.1.0 (including dtype-preserving `PintArray.map`) #196 +- Support for Pandas version 2.1.0. #196 +- Support for dtype-preserving `PintArray.map` for both Pandas 2.0.2 and Pandas 2.1. #196 - Support for values in columns with integer magnitudes - Support for magnitudes of any type, such as complex128 or tuples #146 - Support for pandas 2.0, allowing `.cumsum, .cummax, .cummin` methods for `Series` and `DataFrame`. #186 diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 96b608c2..7154be5a 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -844,9 +844,14 @@ def map(self, mapper, na_action=None): If mapper is a function, operate on the magnitudes of the array and """ - from pandas.core.algorithms import map_array + if pandas_version_info < (2, 1): + ser = pd.Series(self._to_array_of_quantity()) + arr = ser.map(mapper, na_action).values + else: + from pandas.core.algorithms import map_array + + arr = map_array(self, mapper, na_action) - arr = map_array(self, mapper, na_action) master_scalar = None try: master_scalar = next(i for i in arr if hasattr(i, "units")) From f5947ea78bdb9942816c15c0be9d427ab12789ff Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sun, 13 Aug 2023 21:42:12 -0400 Subject: [PATCH 13/13] Constrain test_setitem_2d_values to _base_numeric_dtypes Until Pandas resolves https://github.com/pandas-dev/pandas/issues/54445 we cannot feed complex128 types to the test_setitem_2d_values test case. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/testsuite/test_pandas_extensiontests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 126bc32e..1427baa8 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -647,6 +647,7 @@ def test_setitem_scalar_key_sequence_raise(self, data): # This can be removed when https://github.com/pandas-dev/pandas/pull/54441 is accepted base.BaseSetitemTests.test_setitem_scalar_key_sequence_raise(self, data) + @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) def test_setitem_2d_values(self, data): # GH50085 original = data.copy()