diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 203e53474f74a..9357b4842bbdb 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -1388,10 +1388,12 @@ def when(self, condition: "Column", value: Any) -> "Column": Examples -------- + Example 1: Using :func:`when` with conditions and values to create a new Column + >>> from pyspark.sql import functions as sf - >>> df = spark.createDataFrame( - ... [(2, "Alice"), (5, "Bob")], ["age", "name"]) - >>> df.select(df.name, sf.when(df.age > 4, 1).when(df.age < 3, -1).otherwise(0)).show() + >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], ["age", "name"]) + >>> result = df.select(df.name, sf.when(df.age > 4, 1).when(df.age < 3, -1).otherwise(0)) + >>> result.show() +-----+------------------------------------------------------------+ | name|CASE WHEN (age > 4) THEN 1 WHEN (age < 3) THEN -1 ELSE 0 END| +-----+------------------------------------------------------------+ @@ -1399,6 +1401,38 @@ def when(self, condition: "Column", value: Any) -> "Column": | Bob| 1| +-----+------------------------------------------------------------+ + Example 2: Chaining multiple :func:`when` conditions + + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([(1, "Alice"), (4, "Bob"), (6, "Charlie")], ["age", "name"]) + >>> result = df.select( + ... df.name, + ... sf.when(df.age < 3, "Young").when(df.age < 5, "Middle-aged").otherwise("Old") + ... ) + >>> result.show() + +-------+---------------------------------------------------------------------------+ + | name|CASE WHEN (age < 3) THEN Young WHEN (age < 5) THEN Middle-aged ELSE Old END| + +-------+---------------------------------------------------------------------------+ + | Alice| Young| + | Bob| Middle-aged| + |Charlie| Old| + +-------+---------------------------------------------------------------------------+ + + Example 3: Using literal values as conditions + + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], ["age", "name"]) + >>> result = df.select( + ... df.name, sf.when(sf.lit(True), 1).otherwise( + ... sf.raise_error("unreachable")).alias("when")) + >>> result.show() + +-----+----+ + | name|when| + +-----+----+ + |Alice| 1| + | Bob| 1| + +-----+----+ + See Also -------- pyspark.sql.functions.when