Merge branch 'main' into bug-update-60228

pandas-dev · Nov 22, 2024 · d4ea527 · d4ea527
2 parents 8db4edc + e62fcb1
commit d4ea527
Show file tree

Hide file tree

Showing 13 changed files with 146 additions and 15 deletions.
diff --git a/.github/workflows/comment-commands.yml b/.github/workflows/comment-commands.yml
@@ -11,7 +11,7 @@ permissions:
 jobs:
   issue_assign:
     runs-on: ubuntu-22.04
-    if: (!github.event.issue.pull_request) && github.event.comment.body == 'take'
+    if: (!github.event.issue.pull_request) && trim(github.event.comment.body) == 'take'
     concurrency:
       group: ${{ github.actor }}-issue-assign
     steps:

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -109,7 +109,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.resample.Resampler.std SA01" \
         -i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
         -i "pandas.core.resample.Resampler.var SA01" \
-        -i "pandas.errors.ChainedAssignmentError SA01" \
         -i "pandas.errors.DuplicateLabelError SA01" \
         -i "pandas.errors.IntCastingNaNError SA01" \
         -i "pandas.errors.InvalidIndexError SA01" \

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -54,6 +54,7 @@ Other enhancements
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
 - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
+- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
 - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
@@ -763,7 +764,7 @@ ExtensionArray
 
 Styler
 ^^^^^^
--
+- Bug in :meth:`Styler.to_latex` where styling column headers when combined with a hidden index or hidden index-levels is fixed.
 
 Other
 ^^^^^

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4742,7 +4742,8 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
         3  4   4    7   8  0
         4  5   2    6   7  3
 
-        For columns with spaces in their name, you can use backtick quoting.
+        For columns with spaces or other disallowed characters in their name, you can
+        use backtick quoting.
 
         >>> df.eval("B * `C&C`")
         0    100

diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
@@ -487,6 +487,11 @@ class ChainedAssignmentError(Warning):
     For more information on Copy-on-Write,
     see :ref:`the user guide<copy_on_write>`.
 
+    See Also
+    --------
+    options.mode.copy_on_write : Global setting for enabling or disabling
+        Copy-on-Write behavior.
+
     Examples
     --------
     >>> pd.options.mode.copy_on_write = True

diff --git a/pandas/io/_util.py b/pandas/io/_util.py
@@ -60,9 +60,12 @@ def arrow_table_to_pandas(
     table: pyarrow.Table,
     dtype_backend: DtypeBackend | Literal["numpy"] | lib.NoDefault = lib.no_default,
     null_to_int64: bool = False,
+    to_pandas_kwargs: dict | None = None,
 ) -> pd.DataFrame:
     pa = import_optional_dependency("pyarrow")
 
+    to_pandas_kwargs = {} if to_pandas_kwargs is None else to_pandas_kwargs
+
     types_mapper: type[pd.ArrowDtype] | None | Callable
     if dtype_backend == "numpy_nullable":
         mapping = _arrow_dtype_mapping()
@@ -80,5 +83,5 @@ def arrow_table_to_pandas(
     else:
         raise NotImplementedError
 
-    df = table.to_pandas(types_mapper=types_mapper)
+    df = table.to_pandas(types_mapper=types_mapper, **to_pandas_kwargs)
     return df
diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py
@@ -868,7 +868,8 @@ def _translate_latex(self, d: dict, clines: str | None) -> None:
             or multirow sparsification (so that \multirow and \multicol work correctly).
         """
         index_levels = self.index.nlevels
-        visible_index_level_n = index_levels - sum(self.hide_index_)
+        # GH 52218
+        visible_index_level_n = max(1, index_levels - sum(self.hide_index_))
         d["head"] = [
             [
                 {**col, "cellstyle": self.ctx_columns[r, c - visible_index_level_n]}

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -242,6 +242,7 @@ def read(
         dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
         storage_options: StorageOptions | None = None,
         filesystem=None,
+        to_pandas_kwargs: dict[str, Any] | None = None,
         **kwargs,
     ) -> DataFrame:
         kwargs["use_pandas_metadata"] = True
@@ -266,7 +267,11 @@ def read(
                     "make_block is deprecated",
                     DeprecationWarning,
                 )
-                result = arrow_table_to_pandas(pa_table, dtype_backend=dtype_backend)
+                result = arrow_table_to_pandas(
+                    pa_table,
+                    dtype_backend=dtype_backend,
+                    to_pandas_kwargs=to_pandas_kwargs,
+                )
 
             if pa_table.schema.metadata:
                 if b"PANDAS_ATTRS" in pa_table.schema.metadata:
@@ -347,6 +352,7 @@ def read(
         filters=None,
         storage_options: StorageOptions | None = None,
         filesystem=None,
+        to_pandas_kwargs: dict | None = None,
         **kwargs,
     ) -> DataFrame:
         parquet_kwargs: dict[str, Any] = {}
@@ -362,6 +368,10 @@ def read(
             raise NotImplementedError(
                 "filesystem is not implemented for the fastparquet engine."
             )
+        if to_pandas_kwargs is not None:
+            raise NotImplementedError(
+                "to_pandas_kwargs is not implemented for the fastparquet engine."
+            )
         path = stringify_path(path)
         handles = None
         if is_fsspec_url(path):
@@ -452,7 +462,7 @@ def to_parquet(
         .. versionadded:: 2.1.0
 
     kwargs
-        Additional keyword arguments passed to the engine
+        Additional keyword arguments passed to the engine.
 
     Returns
     -------
@@ -491,6 +501,7 @@ def read_parquet(
     dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
     filesystem: Any = None,
     filters: list[tuple] | list[list[tuple]] | None = None,
+    to_pandas_kwargs: dict | None = None,
     **kwargs,
 ) -> DataFrame:
     """
@@ -564,6 +575,12 @@ def read_parquet(
 
         .. versionadded:: 2.1.0
 
+    to_pandas_kwargs : dict | None, default None
+        Keyword arguments to pass through to :func:`pyarrow.Table.to_pandas`
+        when ``engine="pyarrow"``.
+
+        .. versionadded:: 3.0.0
+
     **kwargs
         Any additional kwargs are passed to the engine.
 
@@ -636,5 +653,6 @@ def read_parquet(
         storage_options=storage_options,
         dtype_backend=dtype_backend,
         filesystem=filesystem,
+        to_pandas_kwargs=to_pandas_kwargs,
         **kwargs,
     )
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
@@ -1405,3 +1405,88 @@ def test_to_latex_multiindex_multirow(self):
             """
         )
         assert result == expected
+
+    def test_to_latex_multiindex_format_single_index_hidden(self):
+        # GH 52218
+        df = DataFrame(
+            {
+                "A": [1, 2],
+                "B": [4, 5],
+            }
+        )
+        result = (
+            df.style.hide(axis="index")
+            .map_index(lambda v: "textbf:--rwrap;", axis="columns")
+            .to_latex()
+        )
+        expected = _dedent(r"""
+            \begin{tabular}{rr}
+            \textbf{A} & \textbf{B} \\
+            1 & 4 \\
+            2 & 5 \\
+            \end{tabular}
+            """)
+        assert result == expected
+
+    def test_to_latex_multiindex_format_triple_index_two_hidden(self):
+        # GH 52218
+        arrays = [
+            ["A", "A", "B", "B"],
+            ["one", "two", "one", "two"],
+            ["x", "x", "y", "y"],
+        ]
+        index = pd.MultiIndex.from_arrays(
+            arrays, names=["Level 0", "Level 1", "Level 2"]
+        )
+        df = DataFrame(
+            [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
+            index=index,
+            columns=["C1", "C2", "C3"],
+        )
+        result = (
+            df.style.hide(axis="index", level=[0, 1])
+            .map_index(lambda v: "textbf:--rwrap;", axis="columns")
+            .to_latex()
+        )
+        expected = _dedent(r"""
+            \begin{tabular}{lrrr}
+             & \textbf{C1} & \textbf{C2} & \textbf{C3} \\
+            Level 2 &  &  &  \\
+            x & 0 & 0 & 0 \\
+            x & 0 & 0 & 0 \\
+            y & 0 & 0 & 0 \\
+            y & 0 & 0 & 0 \\
+            \end{tabular}
+            """)
+        assert result == expected
+
+    def test_to_latex_multiindex_format_triple_index_all_hidden(self):
+        # GH 52218
+        arrays = [
+            ["A", "A", "B", "B"],
+            ["one", "two", "one", "two"],
+            ["x", "x", "y", "y"],
+        ]
+        index = pd.MultiIndex.from_arrays(
+            arrays, names=["Level 0", "Level 1", "Level 2"]
+        )
+        df = DataFrame(
+            [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
+            index=index,
+            columns=["C1", "C2", "C3"],
+        )
+        result = (
+            df.style.hide(axis="index", level=[0, 1, 2])
+            .map_index(lambda v: "textbf:--rwrap;", axis="columns")
+            .to_latex()
+        )
+        expected = _dedent(r"""
+            \begin{tabular}{rrr}
+            \textbf{C1} & \textbf{C2} & \textbf{C3} \\
+            0 & 0 & 0 \\
+            0 & 0 & 0 \\
+            0 & 0 & 0 \\
+            0 & 0 & 0 \\
+            \end{tabular}
+            """)
+        assert result == expected
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -1172,6 +1172,20 @@ def test_non_nanosecond_timestamps(self, temp_file):
         )
         tm.assert_frame_equal(result, expected)
 
+    def test_maps_as_pydicts(self, pa):
+        pyarrow = pytest.importorskip("pyarrow", "13.0.0")
+
+        schema = pyarrow.schema(
+            [("foo", pyarrow.map_(pyarrow.string(), pyarrow.int64()))]
+        )
+        df = pd.DataFrame([{"foo": {"A": 1}}, {"foo": {"B": 2}}])
+        check_round_trip(
+            df,
+            pa,
+            write_kwargs={"schema": schema},
+            read_kwargs={"to_pandas_kwargs": {"maps_as_pydicts": "strict"}},
+        )
+
 
 class TestParquetFastParquet(Base):
     def test_basic(self, fp, df_full, request):

diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -2376,17 +2376,21 @@ def test_pivot_table_with_margins_and_numeric_columns(self):
 
         tm.assert_frame_equal(result, expected)
 
-    def test_pivot_ea_dtype_dropna(self, dropna):
+    @pytest.mark.parametrize(
+        "dtype,expected_dtype", [("Int64", "Float64"), ("int64", "float64")]
+    )
+    def test_pivot_ea_dtype_dropna(self, dropna, dtype, expected_dtype):
         # GH#47477
-        df = DataFrame({"x": "a", "y": "b", "age": Series([20, 40], dtype="Int64")})
+        # GH#47971
+        df = DataFrame({"x": "a", "y": "b", "age": Series([20, 40], dtype=dtype)})
         result = df.pivot_table(
             index="x", columns="y", values="age", aggfunc="mean", dropna=dropna
         )
         expected = DataFrame(
             [[30]],
             index=Index(["a"], name="x"),
             columns=Index(["b"], name="y"),
-            dtype="Float64",
+            dtype=expected_dtype,
         )
         tm.assert_frame_equal(result, expected)
 

diff --git a/web/pandas/_templates/layout.html b/web/pandas/_templates/layout.html
@@ -73,12 +73,12 @@
                     </a>
                 </li>
                 <li class="list-inline-item">
-                    <a href="https://x.com/pandas_dev/">
+                    <a href="https://x.com/pandas_dev">
                         <i class="fab bi bi-twitter-x"></i>
                     </a>
                 </li>
                 <li class="list-inline-item">
-                    <a href="https://github.com/pandas-dev/pandas/">
+                    <a href="https://github.com/pandas-dev/pandas">
                         <i class="fab bi bi-github"></i>
                     </a>
                 </li>

diff --git a/web/pandas/index.html b/web/pandas/index.html
@@ -83,8 +83,8 @@ <h4>Follow us</h4>
                             </a>
                         </li>
                         <li class="list-inline-item">
-                            <a href="https://twitter.com/pandas_dev/">
-                                <i class="follow-us-button fab bi bi-twitter"></i>
+                            <a href="https://x.com/pandas_dev">
+                                <i class="follow-us-button fab bi bi-twitter-x"></i>
                             </a>
                         </li>
                     </ul>