From b0dc6546a0b720f86ddbaa87e4d055557160af54 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 25 Oct 2024 06:59:37 -0700 Subject: [PATCH] [SPARK-50106][PYTHON] Upgrade `protobuf` Python package to 5.28.3 ### What changes were proposed in this pull request? This PR aims to upgrade `protobuf` Python package from `4.25.1` to `5.28.3` for Apache Spark 4.0.0 on February 2025. ### Why are the changes needed? To bring the latest bug fixes and support `Python 3.13` which requires `grpcio*>=1.66.2` (`grpcio-status>=1.66.2` requires `protobuf>=5.26.1`). - https://github.com/protocolbuffers/protobuf/releases/tag/v28.3 (2024-10-22) - https://pypi.org/project/protobuf/5.28.3/ (2024-10-22) ### Does this PR introduce _any_ user-facing change? This is a dependency change with no behavior change. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48644 from dongjoon-hyun/SPARK-50106. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .github/workflows/build_and_test.yml | 4 ++-- .github/workflows/maven_test.yml | 2 +- dev/create-release/spark-rm/Dockerfile | 2 +- dev/infra/Dockerfile | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index c43c7df22039b..553a961109ab0 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -276,7 +276,7 @@ jobs: - name: Install Python packages (Python 3.11) if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect') run: | - python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==4.25.1' + python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==5.28.3' python3.11 -m pip list # Run the tests. - name: Run tests @@ -614,7 +614,7 @@ jobs: python-version: '3.11' - name: Install dependencies for Python CodeGen check run: | - python3.11 -m pip install 'black==23.9.1' 'protobuf==4.25.1' 'mypy==1.8.0' 'mypy-protobuf==3.3.0' + python3.11 -m pip install 'black==23.9.1' 'protobuf==5.28.3' 'mypy==1.8.0' 'mypy-protobuf==3.3.0' python3.11 -m pip list - name: Python CodeGen check run: ./dev/connect-check-protos.py diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml index dd089d665d6e3..22153fe2f980c 100644 --- a/.github/workflows/maven_test.yml +++ b/.github/workflows/maven_test.yml @@ -178,7 +178,7 @@ jobs: - name: Install Python packages (Python 3.11) if: (contains(matrix.modules, 'sql#core')) || contains(matrix.modules, 'connect') run: | - python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==4.25.1' + python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==5.28.3' python3.11 -m pip list # Run the tests. - name: Run tests diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index 80a66ea71778b..f70a1dec6e468 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -102,7 +102,7 @@ RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.2' scipy coverage matp ARG BASIC_PIP_PKGS="numpy pyarrow>=15.0.0 six==1.16.0 pandas==2.2.2 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 twine==3.4.1" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.62.0 grpcio-status==1.62.0 protobuf==4.25.1 googleapis-common-protos==1.65.0" +ARG CONNECT_PIP_PKGS="grpcio==1.62.0 grpcio-status==1.62.0 protobuf==5.28.3 googleapis-common-protos==1.65.0" # Install Python 3.10 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index 83f6b222b54a2..70efeecfac581 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -96,7 +96,7 @@ RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.3' scipy coverage matp ARG BASIC_PIP_PKGS="numpy pyarrow>=15.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.62.0 grpcio-status==1.62.0 protobuf==4.25.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.62.0 grpcio-status==1.62.0 protobuf==5.28.3 googleapis-common-protos==1.65.0 graphviz==0.20.3" # Install Python 3.10 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10