From 28f9013d0c590babf9ffc95449c9232ccac44b4f Mon Sep 17 00:00:00 2001 From: "Reza (Shahin) Khanipour" Date: Wed, 11 Sep 2024 14:25:04 +0200 Subject: [PATCH] fix(spark): lock package version, to ensure they match plugins. --- poetry.lock | 25 ++++++++++++------------- pyproject.toml | 5 ++++- src/sparkle/application/spark.py | 1 + 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/poetry.lock b/poetry.lock index 3d6d036..84467d6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -510,13 +510,13 @@ wcwidth = "*" [[package]] name = "py4j" -version = "0.10.9.7" +version = "0.10.9.5" description = "Enables Python programs to dynamically access arbitrary Java objects" optional = false python-versions = "*" files = [ - {file = "py4j-0.10.9.7-py2.py3-none-any.whl", hash = "sha256:85defdfd2b2376eb3abf5ca6474b51ab7e0de341c75a02f46dc9b5976f5a5c1b"}, - {file = "py4j-0.10.9.7.tar.gz", hash = "sha256:0b6e5315bb3ada5cf62ac651d107bb2ebc02def3dee9d9548e3baac644ea8dbb"}, + {file = "py4j-0.10.9.5-py2.py3-none-any.whl", hash = "sha256:52d171a6a2b031d8a5d1de6efe451cf4f5baff1a2819aabc3741c8406539ba04"}, + {file = "py4j-0.10.9.5.tar.gz", hash = "sha256:276a4a3c5a2154df1860ef3303a927460e02e97b047dc0a47c1c3fb8cce34db6"}, ] [[package]] @@ -555,23 +555,22 @@ files = [ [[package]] name = "pyspark" -version = "3.5.2" +version = "3.3.2" description = "Apache Spark Python API" optional = false -python-versions = ">=3.8" +python-versions = ">=3.7" files = [ - {file = "pyspark-3.5.2.tar.gz", hash = "sha256:bbb36eba09fa24e86e0923d7e7a986041b90c714e11c6aa976f9791fe9edde5e"}, + {file = "pyspark-3.3.2.tar.gz", hash = "sha256:0dfd5db4300c1f6cc9c16d8dbdfb82d881b4b172984da71344ede1a9d4893da8"}, ] [package.dependencies] -py4j = "0.10.9.7" +py4j = "0.10.9.5" [package.extras] -connect = ["googleapis-common-protos (>=1.56.4)", "grpcio (>=1.56.0)", "grpcio-status (>=1.56.0)", "numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] -ml = ["numpy (>=1.15,<2)"] -mllib = ["numpy (>=1.15,<2)"] -pandas-on-spark = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] -sql = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] +ml = ["numpy (>=1.15)"] +mllib = ["numpy (>=1.15)"] +pandas-on-spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] +sql = ["pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] [[package]] name = "pytest" @@ -829,4 +828,4 @@ test = ["pytest"] [metadata] lock-version = "2.0" python-versions = ">=3.10.14" -content-hash = "1c3d77deda9c1912f8b6518ade11e7a1e5ba436e0561ff5e8579c5a7bb25aeec" +content-hash = "0f190c56d5a7e5cd472b25fd8626ddb2c28e70256d653e0a3a8e4e2fc77f3f5c" diff --git a/pyproject.toml b/pyproject.toml index a46cc79..977f613 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,10 @@ packages = [ [tool.poetry.dependencies] python = ">=3.10.14" -pyspark = ">=3.3.2" +# FIXME we need to lock this for now, given the plugins we install are +# tightly coupled with the versions. Unless we require users to add +# these plugins on runtime, we can't change this. +pyspark = "3.3.2" [tool.poetry.group.dev.dependencies] commitizen = "^3.29.0" diff --git a/src/sparkle/application/spark.py b/src/sparkle/application/spark.py index 4456111..e7cf273 100644 --- a/src/sparkle/application/spark.py +++ b/src/sparkle/application/spark.py @@ -12,6 +12,7 @@ _SPARK_EXTENSIONS = [ "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions", ] + _SPARK_PACKAGES = [ "org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.3.1", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.0",