[SPARK-44964][ML][CONNECT][TESTS] Clean up pyspark.ml.connect.functio…

…ns doctest ### What changes were proposed in this pull request? This PR proposes to clean up `pyspark.ml.connect.functions` doctest. All of the tests under that are being skipped. ### Why are the changes needed? To remove unused test codes. ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? Manually ran the tests via: ```python ./python/run-tests --python-executables=python3 --modules=pyspark-ml-connect ``` ### Was this patch authored or co-authored using generative AI tooling? No Closes #42679 from HyukjinKwon/SPARK-44964. Authored-by: Hyukjin Kwon <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
apache · Aug 27, 2023 · 23ce9c4 · 23ce9c4
1 parent 04339e3
commit 23ce9c4
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 45 deletions.
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
@@ -886,8 +886,6 @@ def __hash__(self):
         "python/pyspark/ml/connect",
     ],
     python_test_goals=[
-        # ml doctests
-        "pyspark.ml.connect.functions",
         # ml unittests
         "pyspark.ml.tests.connect.test_connect_function",
         "pyspark.ml.tests.connect.test_parity_torch_distributor",

diff --git a/python/pyspark/ml/connect/__init__.py b/python/pyspark/ml/connect/__init__.py
@@ -16,6 +16,9 @@
 #
 
 """Spark Connect Python Client - ML module"""
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
 
 from pyspark.ml.connect.base import (
     Estimator,

diff --git a/python/pyspark/ml/connect/functions.py b/python/pyspark/ml/connect/functions.py
@@ -14,12 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.sql.connect.utils import check_dependencies
-
-check_dependencies(__name__)
-
 from pyspark.ml import functions as PyMLFunctions
-
 from pyspark.sql.connect.column import Column
 from pyspark.sql.connect.functions import _invoke_function, _to_col, lit
 
@@ -36,41 +31,3 @@ def array_to_vector(col: Column) -> Column:
 
 
 array_to_vector.__doc__ = PyMLFunctions.array_to_vector.__doc__
-
-
-def _test() -> None:
-    import sys
-    import doctest
-    from pyspark.sql import SparkSession as PySparkSession
-    import pyspark.ml.connect.functions
-
-    globs = pyspark.ml.connect.functions.__dict__.copy()
-
-    # TODO: split vector_to_array doctest since it includes .mllib vectors
-    del pyspark.ml.connect.functions.vector_to_array.__doc__
-
-    # TODO: spark.createDataFrame should support UDT
-    del pyspark.ml.connect.functions.array_to_vector.__doc__
-
-    globs["spark"] = (
-        PySparkSession.builder.appName("ml.connect.functions tests")
-        .remote("local[4]")
-        .getOrCreate()
-    )
-
-    (failure_count, test_count) = doctest.testmod(
-        pyspark.ml.connect.functions,
-        globs=globs,
-        optionflags=doctest.ELLIPSIS
-        | doctest.NORMALIZE_WHITESPACE
-        | doctest.IGNORE_EXCEPTION_DETAIL,
-    )
-
-    globs["spark"].stop()
-
-    if failure_count:
-        sys.exit(-1)
-
-
-if __name__ == "__main__":
-    _test()