diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index 572d26716d373..afef4d327c703 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -1159,8 +1159,10 @@ def _reconstruct_columns_from_metadata(columns, column_indexes): elif pandas_dtype == "decimal": level = _pandas_api.pd.Index([decimal.Decimal(i) for i in level]) elif ( - level.dtype == "str" and "mixed" in pandas_dtype and numpy_dtype == "object" + level.dtype == "str" and numpy_dtype == "object" + and ("mixed" in pandas_dtype or pandas_dtype in ["unicode", "string"]) ): + # in this case don't convert to object dtype, but keep using the str dtype new_levels.append(level) continue elif level.dtype != dtype: diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 388b941114278..e213c2f982110 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -4553,7 +4553,7 @@ def test_metadata_compat_range_index_pre_0_12(): e1 = pd.DataFrame( {'a': a_values}, index=pd.RangeIndex(0, 8, step=2, name='qux'), - columns=pd.Index(['a'], dtype=object) + columns=pd.Index(['a']) ) t1 = pa.Table.from_arrays([a_arrow, rng_index_arrow], names=['a', 'qux']) @@ -4584,7 +4584,7 @@ def test_metadata_compat_range_index_pre_0_12(): e2 = pd.DataFrame( {'qux': a_values}, index=pd.RangeIndex(0, 8, step=2, name='qux'), - columns=pd.Index(['qux'], dtype=object) + columns=pd.Index(['qux']) ) t2 = pa.Table.from_arrays([a_arrow, rng_index_arrow], names=['qux', gen_name_0]) @@ -4615,7 +4615,7 @@ def test_metadata_compat_range_index_pre_0_12(): e3 = pd.DataFrame( {'a': a_values}, index=pd.RangeIndex(0, 8, step=2, name=None), - columns=pd.Index(['a'], dtype=object) + columns=pd.Index(['a']) ) t3 = pa.Table.from_arrays([a_arrow, rng_index_arrow], names=['a', gen_name_0]) @@ -4646,7 +4646,7 @@ def test_metadata_compat_range_index_pre_0_12(): e4 = pd.DataFrame( {'a': a_values}, index=[pd.RangeIndex(0, 8, step=2, name='qux'), b_values], - columns=pd.Index(['a'], dtype=object) + columns=pd.Index(['a']) ) t4 = pa.Table.from_arrays([a_arrow, rng_index_arrow, b_arrow], names=['a', 'qux', gen_name_1]) @@ -4682,7 +4682,7 @@ def test_metadata_compat_range_index_pre_0_12(): e5 = pd.DataFrame( {'a': a_values}, index=[pd.RangeIndex(0, 8, step=2, name=None), b_values], - columns=pd.Index(['a'], dtype=object) + columns=pd.Index(['a']) ) t5 = pa.Table.from_arrays([a_arrow, rng_index_arrow, b_arrow], names=['a', gen_name_0, gen_name_1])