pandas-dev · mroeschke · Dec 18, 2023 · Dec 18, 2023 · Dec 18, 2023 · Dec 19, 2023
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -18,13 +18,8 @@ ci:
     # manual stage hooks
     skip: [pylint, pyright, mypy]
 repos:
--   repo: https://github.com/hauntsaninja/black-pre-commit-mirror
-    # black compiled with mypyc
-    rev: 23.11.0
-    hooks:
-      - id: black
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.6
+    rev: v0.1.8
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
@@ -35,6 +30,7 @@ repos:
         files: ^pandas
         exclude: ^pandas/tests
         args: [--select, "ANN001,ANN2", --fix-only, --exit-non-zero-on-fix]
+    -   id: ruff-format
 -   repo: https://github.com/jendrikseipp/vulture
     rev: 'v2.10'
     hooks:

diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
@@ -84,9 +84,7 @@ def time_loc_slice(self, index, index_structure):
 
 class NumericMaskedIndexing:
     monotonic_list = list(range(10**6))
-    non_monotonic_list = (
-        list(range(50)) + [54, 53, 52, 51] + list(range(55, 10**6 - 1))
-    )
+    non_monotonic_list = list(range(50)) + [54, 53, 52, 51] + list(range(55, 10**6 - 1))
 
     params = [
         ("Int64", "UInt64", "Float64"),

diff --git a/asv_bench/benchmarks/io/style.py b/asv_bench/benchmarks/io/style.py
@@ -76,7 +76,8 @@ def _style_format(self):
         # apply a formatting function
         # subset is flexible but hinders vectorised solutions
         self.st = self.df.style.format(
-            "{:,.3f}", subset=IndexSlice["row_1":f"row_{ir}", "float_1":f"float_{ic}"]
+            "{:,.3f}",
+            subset=IndexSlice["row_1" : f"row_{ir}", "float_1" : f"float_{ic}"],
         )
 
     def _style_apply_format_hide(self):

@@ -63,108 +63,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
     $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01,EX02,EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
-    MSG='Partially validate docstrings (EX03)' ;  echo $MSG
-    $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX03 --ignore_functions \
-        pandas.Series.dt.day_name \
-        pandas.Series.str.len \
-        pandas.Series.cat.set_categories \
-        pandas.Series.plot.bar \
-        pandas.Series.plot.hist \
-        pandas.Series.plot.line \
-        pandas.Series.to_sql \
-        pandas.Series.to_latex \
-        pandas.errors.CategoricalConversionWarning \
-        pandas.errors.ChainedAssignmentError \
-        pandas.errors.ClosedFileError \
-        pandas.errors.DatabaseError \
-        pandas.errors.IndexingError \
-        pandas.errors.InvalidColumnName \
-        pandas.errors.NumExprClobberingError \
-        pandas.errors.PossibleDataLossError \
-        pandas.errors.PossiblePrecisionLoss \
-        pandas.errors.SettingWithCopyError \
-        pandas.errors.SettingWithCopyWarning \
-        pandas.errors.SpecificationError \
-        pandas.errors.UndefinedVariableError \
-        pandas.errors.ValueLabelTypeMismatch \
-        pandas.Timestamp.ceil \
-        pandas.Timestamp.floor \
-        pandas.Timestamp.round \
-        pandas.read_pickle \
-        pandas.ExcelWriter \
-        pandas.read_json \
-        pandas.io.json.build_table_schema \
-        pandas.DataFrame.to_latex \
-        pandas.io.formats.style.Styler.to_latex \
-        pandas.read_parquet \
-        pandas.DataFrame.to_sql \
-        pandas.read_stata \
-        pandas.core.resample.Resampler.pipe \
-        pandas.core.resample.Resampler.fillna \
-        pandas.core.resample.Resampler.interpolate \
-        pandas.plotting.scatter_matrix \
-        pandas.pivot \
-        pandas.merge_asof \
-        pandas.wide_to_long \
-        pandas.Index.rename \
-        pandas.Index.droplevel \
-        pandas.Index.isin \
-        pandas.CategoricalIndex.set_categories \
-        pandas.MultiIndex.names \
-        pandas.MultiIndex.droplevel \
-        pandas.IndexSlice \
-        pandas.DatetimeIndex.month_name \
-        pandas.DatetimeIndex.day_name \
-        pandas.core.window.rolling.Rolling.corr \
-        pandas.Grouper \
-        pandas.core.groupby.SeriesGroupBy.apply \
-        pandas.core.groupby.DataFrameGroupBy.apply \
-        pandas.core.groupby.SeriesGroupBy.transform \
-        pandas.core.groupby.SeriesGroupBy.pipe \
-        pandas.core.groupby.DataFrameGroupBy.pipe \
-        pandas.core.groupby.DataFrameGroupBy.describe \
-        pandas.core.groupby.DataFrameGroupBy.idxmax \
-        pandas.core.groupby.DataFrameGroupBy.idxmin \
-        pandas.core.groupby.DataFrameGroupBy.value_counts \
-        pandas.core.groupby.SeriesGroupBy.describe \
-        pandas.core.groupby.DataFrameGroupBy.boxplot \
-        pandas.core.groupby.DataFrameGroupBy.hist \
-        pandas.io.formats.style.Styler.map \
-        pandas.io.formats.style.Styler.apply_index \
-        pandas.io.formats.style.Styler.map_index \
-        pandas.io.formats.style.Styler.format \
-        pandas.io.formats.style.Styler.format_index \
-        pandas.io.formats.style.Styler.relabel_index \
-        pandas.io.formats.style.Styler.hide \
-        pandas.io.formats.style.Styler.set_td_classes \
-        pandas.io.formats.style.Styler.set_tooltips \
-        pandas.io.formats.style.Styler.set_uuid \
-        pandas.io.formats.style.Styler.pipe \
-        pandas.io.formats.style.Styler.highlight_between \
-        pandas.io.formats.style.Styler.highlight_quantile \
-        pandas.io.formats.style.Styler.background_gradient \
-        pandas.io.formats.style.Styler.text_gradient \
-        pandas.DataFrame.values \
-        pandas.DataFrame.loc \
-        pandas.DataFrame.iloc \
-        pandas.DataFrame.groupby \
-        pandas.DataFrame.describe \
-        pandas.DataFrame.skew \
-        pandas.DataFrame.var \
-        pandas.DataFrame.idxmax \
-        pandas.DataFrame.idxmin \
-        pandas.DataFrame.last \
-        pandas.DataFrame.pivot \
-        pandas.DataFrame.sort_values \
-        pandas.DataFrame.tz_convert \
-        pandas.DataFrame.tz_localize \
-        pandas.DataFrame.plot.bar \
-        pandas.DataFrame.plot.hexbin \
-        pandas.DataFrame.plot.hist \
-        pandas.DataFrame.plot.line \
-        pandas.DataFrame.hist \
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
 fi
 
 ### DOCUMENTATION NOTEBOOKS ###

diff --git a/doc/make.py b/doc/make.py
@@ -113,7 +113,7 @@ def _run_os(*args) -> None:
 
         Examples
         --------
-        >>> DocBuilder()._run_os('python', '--version')
+        >>> DocBuilder()._run_os("python", "--version")
         """
         subprocess.check_call(args, stdout=sys.stdout, stderr=sys.stderr)
 
@@ -129,7 +129,7 @@ def _sphinx_build(self, kind: str):
 
         Examples
         --------
-        >>> DocBuilder(num_jobs=4)._sphinx_build('html')
+        >>> DocBuilder(num_jobs=4)._sphinx_build("html")
         """
         if kind not in ("html", "latex", "linkcheck"):
             raise ValueError(f"kind must be html, latex or linkcheck, not {kind}")

@@ -38,7 +38,7 @@ Pre-commit
 ----------
 
 Additionally, :ref:`Continuous Integration <contributing.ci>` will run code formatting checks
-like ``black``, ``ruff``,
+like ``ruff``,
 ``isort``, and ``clang-format`` and more using `pre-commit hooks <https://pre-commit.com/>`_.
 Any warnings from these checks will cause the :ref:`Continuous Integration <contributing.ci>` to fail; therefore,
 it is helpful to run the check yourself before submitting code. This

diff --git a/environment.yml b/environment.yml
@@ -75,7 +75,6 @@ dependencies:
   - cxx-compiler
 
   # code checks
-  - flake8=6.1.0  # run in subprocess over docstring examples
   - mypy=1.8.0  # pre-commit uses locally installed mypy
   - tokenize-rt  # scripts/check_for_inconsistent_pandas_namespace.py
   - pre-commit>=3.6.0

diff --git a/pandas/_config/config.py b/pandas/_config/config.py
@@ -464,7 +464,7 @@ class option_context(ContextDecorator):
     Examples
     --------
     >>> from pandas import option_context
-    >>> with option_context('display.max_rows', 10, 'display.max_columns', 5):
+    >>> with option_context("display.max_rows", 10, "display.max_columns", 5):
     ...     pass
     """
 

@@ -179,7 +179,8 @@ def indices_fast(
     sorted_labels: list[npt.NDArray[np.int64]],
 ) -> dict[Hashable, npt.NDArray[np.intp]]: ...
 def generate_slices(
-    labels: np.ndarray, ngroups: int  # const intp_t[:]
+    labels: np.ndarray,
+    ngroups: int,  # const intp_t[:]
 ) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...
 def count_level_2d(
     mask: np.ndarray,  # ndarray[uint8_t, ndim=2, cast=True],
@@ -209,5 +210,6 @@ def get_reverse_indexer(
 def is_bool_list(obj: list) -> bool: ...
 def dtypes_all_equal(types: list[DtypeObj]) -> bool: ...
 def is_range_indexer(
-    left: np.ndarray, n: int  # np.ndarray[np.int64, ndim=1]
+    left: np.ndarray,
+    n: int,  # np.ndarray[np.int64, ndim=1]
 ) -> bool: ...
diff --git a/pandas/_testing/_hypothesis.py b/pandas/_testing/_hypothesis.py
@@ -54,12 +54,8 @@
     DATETIME_NO_TZ = st.datetimes()
 
 DATETIME_JAN_1_1900_OPTIONAL_TZ = st.datetimes(
-    min_value=pd.Timestamp(
-        1900, 1, 1
-    ).to_pydatetime(),  # pyright: ignore[reportGeneralTypeIssues]
-    max_value=pd.Timestamp(
-        1900, 1, 1
-    ).to_pydatetime(),  # pyright: ignore[reportGeneralTypeIssues]
+    min_value=pd.Timestamp(1900, 1, 1).to_pydatetime(),  # pyright: ignore[reportGeneralTypeIssues]
+    max_value=pd.Timestamp(1900, 1, 1).to_pydatetime(),  # pyright: ignore[reportGeneralTypeIssues]
     timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()),
 )
 

diff --git a/pandas/_testing/_warnings.py b/pandas/_testing/_warnings.py
@@ -75,10 +75,8 @@ class for all warnings. To raise multiple types of exceptions,
     >>> import warnings
     >>> with assert_produces_warning():
     ...     warnings.warn(UserWarning())
-    ...
     >>> with assert_produces_warning(False):
     ...     warnings.warn(RuntimeWarning())
-    ...
     Traceback (most recent call last):
         ...
     AssertionError: Caused unexpected warning(s): ['RuntimeWarning'].

diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py
@@ -1129,8 +1129,8 @@ def assert_frame_equal(
     but with columns of differing dtypes.
 
     >>> from pandas.testing import assert_frame_equal
-    >>> df1 = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
-    >>> df2 = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]})
+    >>> df1 = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
+    >>> df2 = pd.DataFrame({"a": [1, 2], "b": [3.0, 4.0]})
 
     df1 equals itself.
 

diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py
@@ -70,9 +70,8 @@ def set_timezone(tz: str) -> Generator[None, None, None]:
     >>> tzlocal().tzname(datetime(2021, 1, 1))  # doctest: +SKIP
     'IST'
 
-    >>> with set_timezone('US/Eastern'):
+    >>> with set_timezone("US/Eastern"):
     ...     tzlocal().tzname(datetime(2021, 1, 1))
-    ...
     'EST'
     """
     import time

diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py
@@ -265,7 +265,7 @@ def __init__(self, pandas_object):  # noqa: E999
     For consistency with pandas methods, you should raise an ``AttributeError``
     if the data passed to your accessor has an incorrect dtype.
 
-    >>> pd.Series(['a', 'b']).dt
+    >>> pd.Series(["a", "b"]).dt
     Traceback (most recent call last):
     ...
     AttributeError: Can only use .dt accessor with datetimelike values
@@ -274,8 +274,6 @@ def __init__(self, pandas_object):  # noqa: E999
     --------
     In your library code::
 
-        import pandas as pd
-
         @pd.api.extensions.register_dataframe_accessor("geo")
         class GeoAccessor:
             def __init__(self, pandas_obj):

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -387,15 +387,21 @@ def unique(values):
 
     >>> pd.unique(
     ...     pd.Series(
-    ...         pd.Categorical(list("baabc"), categories=list("abc"), ordered=True)
+    ...         pd.Categorical(
+    ...             list("baabc"),
+    ...             categories=list("abc"),
+    ...             ordered=True,
+    ...         )
     ...     )
     ... )
     ['b', 'a', 'c']
     Categories (3, object): ['a' < 'b' < 'c']
 
     An array of tuples
 
-    >>> pd.unique(pd.Series([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]).values)
+    >>> pd.unique(
+    ...     pd.Series([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]).values
+    ... )
     array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object)
     """
     return unique_with_mask(values)
@@ -1209,8 +1215,12 @@ def take(
     >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True)
     array([10., 10., nan])
 
-    >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True,
-    ...      fill_value=-10)
+    >>> pd.api.extensions.take(
+    ...     np.array([10, 20, 30]),
+    ...     [0, 0, -1],
+    ...     allow_fill=True,
+    ...     fill_value=-10,
+    ... )
     array([ 10,  10, -10])
     """
     if not isinstance(arr, (np.ndarray, ABCExtensionArray, ABCIndex, ABCSeries)):

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -1010,7 +1010,8 @@ def wrapper(*args, **kwargs):
             # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str |
             # list[Callable[..., Any] | str]]"; expected "Hashable"
             nb_looper = generate_apply_looper(
-                self.func, **engine_kwargs  # type: ignore[arg-type]
+                self.func,  # type: ignore[arg-type]
+                **engine_kwargs,
             )
             result = nb_looper(self.values, self.axis)
             # If we made the result 2-D, squeeze it back to 1-D
@@ -1797,14 +1798,18 @@ def normalize_keyword_aggregation(
 
 
 def _make_unique_kwarg_list(
-    seq: Sequence[tuple[Any, Any]]
+    seq: Sequence[tuple[Any, Any]],
 ) -> Sequence[tuple[Any, Any]]:
     """
     Uniquify aggfunc name of the pairs in the order list
 
     Examples:
     --------
-    >>> kwarg_list = [('a', '<lambda>'), ('a', '<lambda>'), ('b', '<lambda>')]
+    >>> kwarg_list = [
+    ...     ("a", "<lambda>"),
+    ...     ("a", "<lambda>"),
+    ...     ("b", "<lambda>"),
+    ... ]
     >>> _make_unique_kwarg_list(kwarg_list)
     [('a', '<lambda>_0'), ('a', '<lambda>_1'), ('b', '<lambda>')]
     """
@@ -1835,8 +1840,12 @@ def relabel_result(
     --------
     >>> from pandas.core.apply import relabel_result
     >>> result = pd.DataFrame(
-    ...     {"A": [np.nan, 2, np.nan], "C": [6, np.nan, np.nan], "B": [np.nan, 4, 2.5]},
-    ...     index=["max", "mean", "min"]
+    ...     {
+    ...         "A": [np.nan, 2, np.nan],
+    ...         "C": [6, np.nan, np.nan],
+    ...         "B": [np.nan, 4, 2.5],
+    ...     },
+    ...     index=["max", "mean", "min"],
     ... )
     >>> funcs = {"A": ["max"], "C": ["max"], "B": ["mean", "min"]}
     >>> columns = ("foo", "aab", "bar", "dat")
@@ -1975,7 +1984,7 @@ def maybe_mangle_lambdas(agg_spec: Any) -> Any:
 
     Examples
     --------
-    >>> maybe_mangle_lambdas('sum')
+    >>> maybe_mangle_lambdas("sum")
     'sum'
     >>> maybe_mangle_lambdas([lambda: 1, lambda: 2])  # doctest: +SKIP
     [<function __main__.<lambda_0>,
@@ -2020,7 +2029,7 @@ def validate_func_kwargs(
 
     Examples
     --------
-    >>> validate_func_kwargs({'one': 'min', 'two': 'max'})
+    >>> validate_func_kwargs({"one": "min", "two": "max"})
     (['one', 'two'], ['min', 'max'])
     """
     tuple_given_message = "func is expected but received {} in **kwargs."