Skip to content

Commit

Permalink
Assorted backports for 2.2.x (pandas-dev#59785)
Browse files Browse the repository at this point in the history
* Backport PR pandas-dev#59065: ENH: Fix Python 3.13 test failures & enable CI

* Remove deprecated plot_date calls (pandas-dev#58484)

* Remove deprecated plot_date calls

These were deprecated in Matplotlib 3.9.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
(cherry picked from commit c9bc480)

* Pick out fastparquet xfails for green CI

* pin pytz to fix test_arrays.py

* more workflow tweaks for pytz and Python 3.13

* fix typing and tune tests for copy on write

* remove WASM stuff

* more arm skips

* go for green

---------

Co-authored-by: Lysandros Nikolaou <[email protected]>
Co-authored-by: Elliott Sales de Andrade <[email protected]>
  • Loading branch information
3 people authored Sep 18, 2024
1 parent 2127b42 commit f7b6378
Show file tree
Hide file tree
Showing 28 changed files with 83 additions and 69 deletions.
8 changes: 4 additions & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ jobs:
steps:
- checkout
- run: .circleci/setup_env.sh
- run: >
PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH
LD_PRELOAD=$HOME/miniconda3/envs/pandas-dev/lib/libgomp.so.1:$LD_PRELOAD
- run: |
sudo apt-get update && sudo apt-get install -y libegl1 libopengl0
PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH \
LD_PRELOAD=$HOME/miniconda3/envs/pandas-dev/lib/libgomp.so.1:$LD_PRELOAD \
ci/run_tests.sh
linux-musl:
docker:
Expand All @@ -35,7 +35,7 @@ jobs:
/opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
. ~/virtualenvs/pandas-dev/bin/activate
python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil "pytz<2024.2" pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
python -m pip list --no-cache-dir
- run: |
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ jobs:
. ~/virtualenvs/pandas-dev/bin/activate
python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1
python -m pip install numpy --config-settings=setup-args="-Dallow-noblas=true"
python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil "pytz<2024.2" pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
python -m pip list --no-cache-dir
export PANDAS_CI=1
Expand Down Expand Up @@ -295,7 +295,7 @@ jobs:
/opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
. ~/virtualenvs/pandas-dev/bin/activate
python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil "pytz<2024.2" pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
python -m pip list --no-cache-dir
Expand Down Expand Up @@ -329,7 +329,7 @@ jobs:
# To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs
# to the corresponding posix/windows-macos/sdist etc. workflows.
# Feel free to modify this comment as necessary.
if: false # Uncomment this to freeze the workflow, comment it to unfreeze
# if: false # Uncomment this to freeze the workflow, comment it to unfreeze
defaults:
run:
shell: bash -eou pipefail {0}
Expand Down Expand Up @@ -361,15 +361,15 @@ jobs:
- name: Set up Python Dev Version
uses: actions/setup-python@v5
with:
python-version: '3.12-dev'
python-version: '3.13-dev'

- name: Build Environment
run: |
python --version
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
python -m pip install versioneer[toml]
python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov
python -m pip install python-dateutil "pytz<2024.2" tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov
python -m pip install -ve . --no-build-isolation --no-index --no-deps --config-settings=setup-args="--werror"
python -m pip list
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,13 @@ jobs:
shell: pwsh
run: |
$TST_CMD = @"
python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0;
python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytz<2024.2;
python -m pip install `$(Get-Item pandas\wheelhouse\*.whl);
python -c `'import pandas as pd; pd.test(extra_args=[`\"--no-strict-data-files`\", `\"-m not clipboard and not single_cpu and not slow and not network and not db`\"])`';
"@
# add rc to the end of the image name if the Python version is unreleased
docker pull python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }}
docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }} powershell -Command $TST_CMD
docker pull python:${{ matrix.python[1] == '3.13' && '3.13-rc' || format('{0}-windowsservercore', matrix.python[1]) }}
docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] == '3.13' && '3.13-rc' || format('{0}-windowsservercore', matrix.python[1]) }} powershell -Command $TST_CMD
- uses: actions/upload-artifact@v4
with:
Expand Down
3 changes: 2 additions & 1 deletion ci/deps/actions-310.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ dependencies:
# required dependencies
- python-dateutil
- numpy
- pytz
# pytz 2024.2 timezones cause wrong results
- pytz<2024.2

# optional dependencies
- beautifulsoup4>=4.11.2
Expand Down
3 changes: 2 additions & 1 deletion ci/deps/actions-311-downstream_compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ dependencies:
# required dependencies
- python-dateutil
- numpy
- pytz
# pytz 2024.2 timezones cause wrong results
- pytz<2024.2

# optional dependencies
- beautifulsoup4>=4.11.2
Expand Down
3 changes: 2 additions & 1 deletion ci/deps/actions-311-numpydev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ dependencies:

# pandas dependencies
- python-dateutil
- pytz
# pytz 2024.2 timezones cause wrong results
- pytz<2024.2
- pip

- pip:
Expand Down
3 changes: 2 additions & 1 deletion ci/deps/actions-311-pyarrownightly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ dependencies:
# required dependencies
- python-dateutil
- numpy<2
- pytz
# pytz 2024.2 timezones cause wrong results
- pytz<2024.2
- pip

- pip:
Expand Down
3 changes: 2 additions & 1 deletion ci/deps/actions-311.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ dependencies:
# required dependencies
- python-dateutil
- numpy
- pytz
# pytz 2024.2 timezones cause wrong results
- pytz<2024.2

# optional dependencies
- beautifulsoup4>=4.11.2
Expand Down
3 changes: 2 additions & 1 deletion ci/deps/actions-312.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ dependencies:
# required dependencies
- python-dateutil
- numpy
- pytz
# pytz 2024.2 timezones cause wrong results
- pytz<2024.2

# optional dependencies
- beautifulsoup4>=4.11.2
Expand Down
3 changes: 2 additions & 1 deletion ci/deps/actions-39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ dependencies:
# required dependencies
- python-dateutil
- numpy
- pytz
# pytz 2024.2 timezones cause wrong results
- pytz<2024.2

# optional dependencies
- beautifulsoup4>=4.11.2
Expand Down
1 change: 1 addition & 0 deletions ci/deps/actions-pypy-39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies:
# required
- numpy
- python-dateutil
# pytz 2024.2 timezones cause wrong results
- pytz
- pip:
- tzdata>=2022.7
3 changes: 2 additions & 1 deletion ci/deps/circle-310-arm64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ dependencies:
# required dependencies
- python-dateutil
- numpy
- pytz
# pytz 2024.2 timezones cause wrong results
- pytz < 2024.2

# optional dependencies
- beautifulsoup4>=4.11.2
Expand Down
12 changes: 6 additions & 6 deletions pandas/_libs/src/vendored/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -410,8 +410,8 @@ static void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
npyarr->type_num = PyArray_DESCR(obj)->type_num;

if (GET_TC(tc)->transpose) {
npyarr->dim = PyArray_DIM(obj, npyarr->ndim);
npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim);
npyarr->dim = PyArray_DIM(obj, (int)npyarr->ndim);
npyarr->stride = PyArray_STRIDE(obj, (int)npyarr->ndim);
npyarr->stridedim = npyarr->ndim;
npyarr->index[npyarr->ndim] = 0;
npyarr->inc = -1;
Expand Down Expand Up @@ -452,8 +452,8 @@ static void NpyArrPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
return;
}
const PyArrayObject *arrayobj = (const PyArrayObject *)npyarr->array;
npyarr->dim = PyArray_DIM(arrayobj, npyarr->stridedim);
npyarr->stride = PyArray_STRIDE(arrayobj, npyarr->stridedim);
npyarr->dim = PyArray_DIM(arrayobj, (int)npyarr->stridedim);
npyarr->stride = PyArray_STRIDE(arrayobj, (int)npyarr->stridedim);
npyarr->dataptr += npyarr->stride;

NpyArr_freeItemValue(obj, tc);
Expand Down Expand Up @@ -524,8 +524,8 @@ static int NpyArr_iterNext(JSOBJ _obj, JSONTypeContext *tc) {
}
const PyArrayObject *arrayobj = (const PyArrayObject *)npyarr->array;

npyarr->dim = PyArray_DIM(arrayobj, npyarr->stridedim);
npyarr->stride = PyArray_STRIDE(arrayobj, npyarr->stridedim);
npyarr->dim = PyArray_DIM(arrayobj, (int)npyarr->stridedim);
npyarr->stride = PyArray_STRIDE(arrayobj, (int)npyarr->stridedim);
npyarr->index[npyarr->stridedim] = 0;

((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr;
Expand Down
7 changes: 6 additions & 1 deletion pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4960,7 +4960,12 @@ cpdef to_offset(freq, bint is_period=False):
if result is None:
raise ValueError(INVALID_FREQ_ERR_MSG.format(freq))

if is_period and not hasattr(result, "_period_dtype_code"):
try:
has_period_dtype_code = hasattr(result, "_period_dtype_code")
except ValueError:
has_period_dtype_code = False

if is_period and not has_period_dtype_code:
if isinstance(freq, str):
raise ValueError(f"{result.name} is not supported as period frequency")
else:
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pandas.util._exceptions import find_stack_level

if TYPE_CHECKING:
import google.auth
from google.auth.credentials import Credentials

from pandas import DataFrame

Expand All @@ -37,7 +37,7 @@ def read_gbq(
dialect: str | None = None,
location: str | None = None,
configuration: dict[str, Any] | None = None,
credentials: google.auth.credentials.Credentials | None = None,
credentials: Credentials | None = None,
use_bqstorage_api: bool | None = None,
max_results: int | None = None,
progress_bar_type: str | None = None,
Expand Down Expand Up @@ -230,7 +230,7 @@ def to_gbq(
table_schema: list[dict[str, str]] | None = None,
location: str | None = None,
progress_bar: bool = True,
credentials: google.auth.credentials.Credentials | None = None,
credentials: Credentials | None = None,
) -> None:
warnings.warn(
"to_gbq is deprecated and will be removed in a future version. "
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2816,7 +2816,9 @@ def test_rolling_wrong_param_min_period():
test_df = DataFrame([name_l, val_l]).T
test_df.columns = ["name", "val"]

result_error_msg = r"__init__\(\) got an unexpected keyword argument 'min_period'"
result_error_msg = (
r"^[a-zA-Z._]*\(\) got an unexpected keyword argument 'min_period'"
)
with pytest.raises(TypeError, match=result_error_msg):
test_df.groupby("name")["val"].rolling(window=2, min_period=1).sum()

Expand Down
1 change: 0 additions & 1 deletion pandas/tests/indexes/interval/test_interval_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,6 @@ def test_construction_overflow(self):
expected = (50 + np.iinfo(np.int64).max) / 2
assert result == expected

@pytest.mark.xfail(not IS64, reason="GH 23440")
@pytest.mark.parametrize(
"left, right, expected",
[
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/indexes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,7 @@ def test_sort_values_invalid_na_position(index_with_missing, na_position):
index_with_missing.sort_values(na_position=na_position)


@pytest.mark.fails_arm_wheels
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
@pytest.mark.parametrize("na_position", ["first", "last"])
def test_sort_values_with_missing(index_with_missing, na_position, request):
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/indexing/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pytest

from pandas._libs import index as libindex
from pandas.compat import IS64

import pandas as pd
from pandas import (
Expand Down Expand Up @@ -210,7 +209,6 @@ def test_mi_intervalindex_slicing_with_scalar(self):
expected = Series([1, 6, 2, 8, 7], index=expected_index, name="value")
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(not IS64, reason="GH 23440")
@pytest.mark.parametrize(
"base",
[101, 1010],
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/indexing/interval/test_interval_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas.compat import IS64

from pandas import (
Index,
Interval,
Expand Down Expand Up @@ -211,7 +209,6 @@ def test_loc_getitem_missing_key_error_message(
obj.loc[[4, 5, 6]]


@pytest.mark.xfail(not IS64, reason="GH 23440")
@pytest.mark.parametrize(
"intervals",
[
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/parser/test_dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def custom_dialect():
"escapechar": "~",
"delimiter": ":",
"skipinitialspace": False,
"quotechar": "~",
"quotechar": "`",
"quoting": 3,
}
return dialect_name, dialect_kwargs
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,10 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
df.to_csv(path, compression=compression_, encoding=encoding)

# reading should fail (otherwise we wouldn't need the warning)
msg = r"UTF-\d+ stream does not start with BOM"
msg = (
r"UTF-\d+ stream does not start with BOM|"
r"'utf-\d+' codec can't decode byte"
)
with pytest.raises(UnicodeError, match=msg):
pd.read_csv(path, compression=compression_, encoding=encoding)

Expand Down
15 changes: 5 additions & 10 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
pa_version_under11p0,
pa_version_under13p0,
pa_version_under15p0,
pa_version_under17p0,
)

import pandas as pd
Expand Down Expand Up @@ -449,12 +448,8 @@ def test_read_filters(self, engine, tmp_path):
repeat=1,
)

def test_write_index(self, engine, using_copy_on_write, request):
def test_write_index(self, engine):
check_names = engine != "fastparquet"
if using_copy_on_write and engine == "fastparquet":
request.applymarker(
pytest.mark.xfail(reason="fastparquet write into index")
)

df = pd.DataFrame({"A": [1, 2, 3]})
check_round_trip(df, engine)
Expand Down Expand Up @@ -1064,9 +1059,6 @@ def test_read_dtype_backend_pyarrow_config_index(self, pa):
expected=expected,
)

@pytest.mark.xfail(
pa_version_under17p0, reason="pa.pandas_compat passes 'datetime64' to .astype"
)
def test_columns_dtypes_not_invalid(self, pa):
df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))})

Expand Down Expand Up @@ -1314,7 +1306,10 @@ def test_empty_dataframe(self, fp):
expected = df.copy()
check_round_trip(df, fp, expected=expected)

@pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index")
@pytest.mark.xfail(
_HAVE_FASTPARQUET and Version(fastparquet.__version__) > Version("2022.12"),
reason="fastparquet bug, see https://github.com/dask/fastparquet/issues/929",
)
def test_timezone_aware_index(self, fp, timezone_aware_date_list):
idx = 5 * [timezone_aware_date_list]

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/xml/test_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -1044,7 +1044,7 @@ def test_utf16_encoding(xml_baby_names, parser):
UnicodeError,
match=(
"UTF-16 stream does not start with BOM|"
"'utf-16-le' codec can't decode byte"
"'utf-16(-le)?' codec can't decode byte"
),
):
read_xml(xml_baby_names, encoding="UTF-16", parser=parser)
Expand Down
Loading

0 comments on commit f7b6378

Please sign in to comment.