Merge branch 'main' into ref-dtype-explicit

jbrockmendel · Sep 20, 2024 · e64b79b · e64b79b
2 parents 71c147c + 2419343
commit e64b79b
Show file tree

Hide file tree

Showing 50 changed files with 592 additions and 308 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -92,7 +92,13 @@ jobs:
           no_output_timeout: 30m # Sometimes the tests won't generate any output, make sure the job doesn't get killed by that
           command: |
             pip3 install cibuildwheel==2.20.0
-            cibuildwheel --output-dir wheelhouse
+            if [[ $CIBW_BUILD == cp313t* ]]; then
+              # TODO: temporarily run 3.13 free threaded builds without build isolation
+              # since we need pre-release cython
+              CIBW_BUILD_FRONTEND="pip; args: --no-build-isolation" cibuildwheel --output-dir wheelhouse
+            else
+              cibuildwheel --output-dir wheelhouse
+            fi
 
           environment:
             CIBW_BUILD: << parameters.cibw-build >>
@@ -141,6 +147,10 @@ workflows:
               cibw-build: ["cp310-manylinux_aarch64",
                            "cp311-manylinux_aarch64",
                            "cp312-manylinux_aarch64",
+                           "cp313-manylinux_aarch64",
+                           "cp313t-manylinux_aarch64",
                            "cp310-musllinux_aarch64",
                            "cp311-musllinux_aarch64",
-                           "cp312-musllinux_aarch64",]
+                           "cp312-musllinux_aarch64",
+                           "cp313-musllinux_aarch64",
+                           "cp313t-musllinux_aarch64"]
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
@@ -102,9 +102,7 @@ jobs:
         python: [["cp310", "3.10"], ["cp311", "3.11"], ["cp312", "3.12"], ["cp313", "3.13"], ["cp313t", "3.13"]]
         include:
         # TODO: Remove this plus installing build deps in cibw_before_build.sh
-        # and test deps in cibw_before_test.sh after pandas can be built with a released NumPy/Cython
-        - python: ["cp313", "3.13"]
-          cibw_build_frontend: 'pip; args: --no-build-isolation'
+        # after pandas can be built with a released NumPy/Cython
         - python: ["cp313t", "3.13"]
           cibw_build_frontend: 'pip; args: --no-build-isolation'
         # Build Pyodide wheels and upload them to Anaconda.org
@@ -187,11 +185,9 @@ jobs:
       - name: Test Windows Wheels
         if: ${{ matrix.buildplat[1] == 'win_amd64' }}
         shell: pwsh
-        # TODO: Remove NumPy nightly install when there's a 3.13 wheel on PyPI
         run: |
           $TST_CMD = @"
           python -m pip install hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0;
-          ${{ matrix.python[1] == '3.13' && 'python -m pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy;' }}
           python -m pip install `$(Get-Item pandas\wheelhouse\*.whl);
           python -c `'import pandas as pd; pd.test(extra_args=[`\"--no-strict-data-files`\", `\"-m not clipboard and not single_cpu and not slow and not network and not db`\"])`';
           "@

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -65,4 +65,3 @@ graft pandas/_libs/include
 
 # Include cibw script in sdist since it's needed for building wheels
 include scripts/cibw_before_build.sh
-include scripts/cibw_before_test.sh
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -70,10 +70,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         --format=actions \
         -i ES01 `# For now it is ok if docstrings are missing the extended summary` \
         -i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
-        -i "pandas.NA SA01" \
         -i "pandas.Period.freq GL08" \
         -i "pandas.Period.ordinal GL08" \
-        -i "pandas.PeriodDtype.freq SA01" \
         -i "pandas.RangeIndex.from_range PR01,SA01" \
         -i "pandas.RangeIndex.step SA01" \
         -i "pandas.Series.cat.add_categories PR01,PR02" \
@@ -103,12 +101,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.sparse.from_coo PR07,SA01" \
         -i "pandas.Series.sparse.npoints SA01" \
         -i "pandas.Series.sparse.sp_values SA01" \
-        -i "pandas.Timedelta.components SA01" \
         -i "pandas.Timedelta.max PR02" \
         -i "pandas.Timedelta.min PR02" \
         -i "pandas.Timedelta.resolution PR02" \
         -i "pandas.Timedelta.to_timedelta64 SA01" \
-        -i "pandas.Timedelta.total_seconds SA01" \
         -i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
         -i "pandas.Timestamp.max PR02" \
         -i "pandas.Timestamp.min PR02" \
@@ -117,14 +113,12 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.tzinfo GL08" \
         -i "pandas.Timestamp.year GL08" \
         -i "pandas.api.types.is_dict_like PR07,SA01" \
-        -i "pandas.api.types.is_extension_array_dtype SA01" \
         -i "pandas.api.types.is_file_like PR07,SA01" \
         -i "pandas.api.types.is_float PR01,SA01" \
         -i "pandas.api.types.is_float_dtype SA01" \
         -i "pandas.api.types.is_hashable PR01,RT03,SA01" \
         -i "pandas.api.types.is_int64_dtype SA01" \
         -i "pandas.api.types.is_integer PR01,SA01" \
-        -i "pandas.api.types.is_integer_dtype SA01" \
         -i "pandas.api.types.is_interval_dtype SA01" \
         -i "pandas.api.types.is_iterator PR07,SA01" \
         -i "pandas.api.types.is_list_like SA01" \
@@ -136,7 +130,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
         -i "pandas.arrays.BooleanArray SA01" \
         -i "pandas.arrays.DatetimeArray SA01" \
-        -i "pandas.arrays.FloatingArray SA01" \
         -i "pandas.arrays.IntegerArray SA01" \
         -i "pandas.arrays.IntervalArray.left SA01" \
         -i "pandas.arrays.IntervalArray.length SA01" \
@@ -153,14 +146,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.groupby.DataFrameGroupBy.groups SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.hist RT03" \
         -i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
-        -i "pandas.core.groupby.DataFrameGroupBy.max SA01" \
-        -i "pandas.core.groupby.DataFrameGroupBy.min SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
         -i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.ohlc SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
         -i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
-        -i "pandas.core.groupby.DataFrameGroupBy.sum SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.agg RT03" \
         -i "pandas.core.groupby.SeriesGroupBy.aggregate RT03" \
@@ -169,13 +159,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01" \
-        -i "pandas.core.groupby.SeriesGroupBy.max SA01" \
-        -i "pandas.core.groupby.SeriesGroupBy.min SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
         -i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
         -i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
-        -i "pandas.core.groupby.SeriesGroupBy.sum SA01" \
         -i "pandas.core.resample.Resampler.__iter__ RT03,SA01" \
         -i "pandas.core.resample.Resampler.ffill RT03" \
         -i "pandas.core.resample.Resampler.get_group RT03,SA01" \
@@ -204,7 +191,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.errors.IntCastingNaNError SA01" \
         -i "pandas.errors.InvalidIndexError SA01" \
         -i "pandas.errors.InvalidVersion SA01" \
-        -i "pandas.errors.MergeError SA01" \
         -i "pandas.errors.NullFrequencyError SA01" \
         -i "pandas.errors.NumExprClobberingError SA01" \
         -i "pandas.errors.NumbaUtilError SA01" \
@@ -397,7 +383,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.tseries.offsets.Week.n GL08" \
         -i "pandas.tseries.offsets.Week.normalize GL08" \
         -i "pandas.tseries.offsets.Week.weekday GL08" \
-        -i "pandas.tseries.offsets.WeekOfMonth SA01" \
         -i "pandas.tseries.offsets.WeekOfMonth.is_on_offset GL08" \
         -i "pandas.tseries.offsets.WeekOfMonth.n GL08" \
         -i "pandas.tseries.offsets.WeekOfMonth.normalize GL08" \

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -254,7 +254,9 @@
         "json_url": "https://pandas.pydata.org/versions.json",
         "version_match": switcher_version,
     },
-    "show_version_warning_banner": True,
+    # This shows a warning for patch releases since the
+    # patch version doesn't compare as equal (e.g. 2.2.1 != 2.2.0 but it should be)
+    "show_version_warning_banner": False,
     "icon_links": [
         {
             "name": "Mastodon",

diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst
@@ -32,6 +32,7 @@ Version 2.2
 .. toctree::
    :maxdepth: 2
 
+   v2.2.3
    v2.2.2
    v2.2.1
    v2.2.0

diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst
@@ -56,4 +56,4 @@ Other
 Contributors
 ~~~~~~~~~~~~
 
-.. contributors:: v2.2.1..v2.2.2|HEAD
+.. contributors:: v2.2.1..v2.2.2
diff --git a/doc/source/whatsnew/v2.2.3.rst b/doc/source/whatsnew/v2.2.3.rst
@@ -0,0 +1,45 @@
+.. _whatsnew_223:
+
+What's new in 2.2.3 (September 20, 2024)
+----------------------------------------
+
+These are the changes in pandas 2.2.3. See :ref:`release` for a full changelog
+including other versions of pandas.
+
+{{ header }}
+
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_220.py13_compat:
+
+Pandas 2.2.3 is now compatible with Python 3.13
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Pandas 2.2.3 is the first version of pandas that is generally compatible with the upcoming
+Python 3.13, and both wheels for free-threaded and normal Python 3.13 will be uploaded for
+this release.
+
+As usual please report any bugs discovered to our `issue tracker <https://github.com/pandas-dev/pandas/issues/new/choose>`_
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_223.bug_fixes:
+
+Bug fixes
+~~~~~~~~~
+- Bug in :func:`eval` on :class:`complex` including division ``/`` discards imaginary part. (:issue:`21374`)
+- Minor fixes for numpy 2.1 compatibility. (:issue:`59444`)
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_223.other:
+
+Other
+~~~~~
+- Missing licenses for 3rd party dependencies were added back into the wheels. (:issue:`58632`)
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_223.contributors:
+
+Contributors
+~~~~~~~~~~~~
+
+.. contributors:: v2.2.2..v2.2.3|HEAD
diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
@@ -102,6 +102,7 @@ Conversion
 
 Strings
 ^^^^^^^
+- Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` incorrectly returning integer results in case of ``method="average"`` and raising an error if it would truncate results (:issue:`59768`)
 - Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
 - Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` and :class:`StringDtype` with ``storage="pyarrow"`` giving incorrect results (:issue:`59710`)
 - Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -627,6 +627,7 @@ I/O
 - Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
+- Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
 - Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
 - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
@@ -692,7 +693,6 @@ Other
 ^^^^^
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
 - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)
-- Bug in :func:`eval` on :class:`complex` including division ``/`` discards imaginary part. (:issue:`21374`)
 - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
 - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
 - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -600,6 +600,8 @@ def array_equivalent_object(ndarray left, ndarray right) -> bool:
                     if not array_equivalent(x, y):
                         return False
 
+            elif PyArray_Check(x) or PyArray_Check(y):
+                return False
             elif (x is C_NA) ^ (y is C_NA):
                 return False
             elif not (

diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -347,6 +347,14 @@ class NAType(C_NAType):
     The NA singleton is a missing value indicator defined by pandas. It is
     used in certain new extension dtypes (currently the "string" dtype).
 
+    See Also
+    --------
+    numpy.nan : Floating point representation of Not a Number (NaN) for numerical data.
+    isna : Detect missing values for an array-like object.
+    notna : Detect non-missing values for an array-like object.
+    DataFrame.fillna : Fill missing values in a DataFrame.
+    Series.fillna : Fill missing values in a Series.
+
     Examples
     --------
     >>> pd.NA

diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
@@ -493,6 +493,14 @@ class NaTType(_NaT):
         """
         Total seconds in the duration.
 
+        This method calculates the total duration in seconds by combining
+        the days, seconds, and microseconds of the `Timedelta` object.
+
+        See Also
+        --------
+        to_timedelta : Convert argument to timedelta.
+        Timedelta : Represents a duration, the difference between two dates or times.
+
         Examples
         --------
         >>> td = pd.Timedelta('1min')

diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd
@@ -89,7 +89,7 @@ cdef int string_to_dts(
     int* out_local,
     int* out_tzoffset,
     bint want_exc,
-    format: str | None = *,
+    str format = *,
     bint exact = *
 ) except? -1
 

diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx
@@ -331,7 +331,7 @@ cdef int string_to_dts(
     int* out_local,
     int* out_tzoffset,
     bint want_exc,
-    format: str | None=None,
+    str format=None,
     bint exact=True,
 ) except? -1:
     cdef:

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -3582,6 +3582,11 @@ cdef class WeekOfMonth(WeekOfMonthMixin):
     """
     Describes monthly dates like "the Tuesday of the 2nd week of each month".
 
+    This offset allows for generating or adjusting dates by specifying
+    a particular week and weekday within a month. The week is zero-indexed,
+    where 0 corresponds to the first week of the month, and weekday follows
+    a Monday=0 convention.
+
     Attributes
     ----------
     n : int, default 1
@@ -3602,6 +3607,12 @@ cdef class WeekOfMonth(WeekOfMonthMixin):
         - 5 is Saturday
         - 6 is Sunday.
 
+    See Also
+    --------
+    offsets.Week : Describes weekly frequency adjustments.
+    offsets.MonthEnd : Describes month-end frequency adjustments.
+    date_range : Generates a range of dates based on a specific frequency.
+
     Examples
     --------
     >>> ts = pd.Timestamp(2022, 1, 1)

diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1189,6 +1189,14 @@ cdef class _Timedelta(timedelta):
         """
         Total seconds in the duration.
 
+        This method calculates the total duration in seconds by combining
+        the days, seconds, and microseconds of the `Timedelta` object.
+
+        See Also
+        --------
+        to_timedelta : Convert argument to timedelta.
+        Timedelta : Represents a duration, the difference between two dates or times.
+
         Examples
         --------
         >>> td = pd.Timedelta('1min')
@@ -1493,6 +1501,17 @@ cdef class _Timedelta(timedelta):
         """
         Return a components namedtuple-like.
 
+        Each component represents a different time unit, allowing you to access the
+        breakdown of the total duration in terms of days, hours, minutes, seconds,
+        milliseconds, microseconds, and nanoseconds.
+
+        See Also
+        --------
+        Timedelta.total_seconds : Returns the total duration of the Timedelta in
+            seconds.
+        to_timedelta : Convert argument to Timedelta.
+        Timedelta : Represents a duration, the difference between two dates or times.
+
         Examples
         --------
         >>> td = pd.Timedelta('2 day 4 min 3 us 42 ns')

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -1338,7 +1338,13 @@ def string_storage(request):
         pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
         pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
         ("python", np.nan),
-    ]
+    ],
+    ids=[
+        "string=string[python]",
+        "string=string[pyarrow]",
+        "string=str[pyarrow]",
+        "string=str[python]",
+    ],
 )
 def string_dtype_arguments(request):
     """
@@ -1369,6 +1375,7 @@ def dtype_backend(request):
 
 # Alias so we can test with cartesian product of string_storage
 string_storage2 = string_storage
+string_dtype_arguments2 = string_dtype_arguments
 
 
 @pytest.fixture(params=tm.BYTES_DTYPES)