diff --git a/CHANGES b/CHANGES index e978290..0f65aad 100644 --- a/CHANGES +++ b/CHANGES @@ -5,6 +5,7 @@ pint-pandas Changelog ---------------- - Support for uncertainties as magnitudes in PintArrays. #140 +- Fix dequantify duplicate column failure #202 - Fix astype issue #196 0.5 (2023-09-07) diff --git a/docs/getting/index.rst b/docs/getting/index.rst index da8a44a..6e000fc 100644 --- a/docs/getting/index.rst +++ b/docs/getting/index.rst @@ -7,7 +7,7 @@ The getting started guide aims to get you using pint-pandas productively as quic What is Pint-pandas? -------------------- -The Pandas package provides powerful DataFrame and Series abstractions for dealing with numerical, temporal, categorical, string-based, and even user-defined data (using its ExtensionArray feature). The Pint package provides a rich and extensible vocabulary of units for constructing Quantities and an equally rich and extensible range of unit conversions to make it easy to perform unit-safe calculations using Quantities. Pint-pandas provides PintArray, aPandas ExtensionArray that efficiently implements Pandas DataFrame and Series functionality as unit-aware operations where appropriate. +The Pandas package provides powerful DataFrame and Series abstractions for dealing with numerical, temporal, categorical, string-based, and even user-defined data (using its ExtensionArray feature). The Pint package provides a rich and extensible vocabulary of units for constructing Quantities and an equally rich and extensible range of unit conversions to make it easy to perform unit-safe calculations using Quantities. Pint-pandas provides PintArray, a Pandas ExtensionArray that efficiently implements Pandas DataFrame and Series functionality as unit-aware operations where appropriate. Those who have used Pint know well that good units discipline often catches not only simple mistakes, but sometimes more fundamental errors as well. Pint-pandas can reveal similar errors when it comes to slicing and dicing Pandas data. diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index c5b927f..138bb7f 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -1,7 +1,6 @@ import copy import re import warnings -from collections import OrderedDict from importlib.metadata import version import numpy as np @@ -1082,23 +1081,35 @@ def formatter_func(dtype): df_columns = df.columns.to_frame() df_columns["units"] = [ - formatter_func(df[col].dtype) - if isinstance(df[col].dtype, PintType) + formatter_func(df.dtypes.iloc[i]) + if isinstance(df.dtypes.iloc[i], PintType) else NO_UNIT - for col in df.columns + for i, col in enumerate(df.columns) ] - data_for_df = OrderedDict() + data_for_df = [] for i, col in enumerate(df.columns): - if isinstance(df[col].dtype, PintType): - data_for_df[tuple(df_columns.iloc[i])] = df[col].values.data + if isinstance(df.dtypes.iloc[i], PintType): + data_for_df.append( + pd.Series( + data=df.iloc[:, i].values.data, + name=tuple(df_columns.iloc[i]), + index=df.index, + copy=False, + ) + ) else: - data_for_df[tuple(df_columns.iloc[i])] = df[col].values - - df_new = DataFrame(data_for_df, columns=data_for_df.keys()) + data_for_df.append( + pd.Series( + data=df.iloc[:, i].values, + name=tuple(df_columns.iloc[i]), + index=df.index, + copy=False, + ) + ) + df_new = pd.concat(data_for_df, axis=1, copy=False) df_new.columns.names = df.columns.names + ["unit"] - df_new.index = df.index return df_new diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index 0e606d2..1bd9229 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -245,3 +245,38 @@ def test_issue_194(dtype): s2 = s1.astype(dtype) tm.assert_series_equal(s0, s2) + + +class TestIssue202(BaseExtensionTests): + def test_dequantify(self): + df = pd.DataFrame() + df["test"] = pd.Series([1, 2, 3], dtype="pint[kN]") + df.insert(0, "test", df["test"], allow_duplicates=True) + + expected = pd.DataFrame.from_dict( + data={ + "index": [0, 1, 2], + "columns": [("test", "kilonewton")], + "data": [[1], [2], [3]], + "index_names": [None], + "column_names": [None, "unit"], + }, + orient="tight", + dtype="Int64", + ) + result = df.iloc[:, 1:].pint.dequantify() + tm.assert_frame_equal(expected, result) + + expected = pd.DataFrame.from_dict( + data={ + "index": [0, 1, 2], + "columns": [("test", "kilonewton"), ("test", "kilonewton")], + "data": [[1, 1], [2, 2], [3, 3]], + "index_names": [None], + "column_names": [None, "unit"], + }, + orient="tight", + dtype="Int64", + ) + result = df.pint.dequantify() + tm.assert_frame_equal(expected, result)