Skip to content

Commit

Permalink
[SPARK-50044][PYTHON] Refine the docstring of multiple math functions
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Refine the docstring of multiple math functions

### Why are the changes needed?
1, make them copy-pasteable;
2, show the projection: input -> output

### Does this PR introduce _any_ user-facing change?
doc changes

### How was this patch tested?
updated doctests

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #48567 from zhengruifeng/doc_refine_ln.

Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
  • Loading branch information
zhengruifeng authored and MaxGekk committed Oct 20, 2024
1 parent 4508911 commit 32cc2dd
Showing 1 changed file with 94 additions and 37 deletions.
131 changes: 94 additions & 37 deletions python/pyspark/sql/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -7157,27 +7157,46 @@ def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = Non

Examples
--------
Example 1: Specify both base number and the input value

>>> from pyspark.sql import functions as sf
>>> df = spark.sql("SELECT * FROM VALUES (1), (2), (4) AS t(value)")
>>> df.select(sf.log(2.0, df.value).alias('log2_value')).show()
+----------+
|log2_value|
+----------+
| 0.0|
| 1.0|
| 2.0|
+----------+
>>> df.select("*", sf.log(2.0, df.value)).show()
+-----+---------------+
|value|LOG(2.0, value)|
+-----+---------------+
| 1| 0.0|
| 2| 1.0|
| 4| 2.0|
+-----+---------------+

And Natural logarithm
Example 2: Return NULL for invalid input values

>>> df.select(sf.log(df.value).alias('ln_value')).show()
+------------------+
| ln_value|
+------------------+
| 0.0|
|0.6931471805599453|
|1.3862943611198906|
+------------------+
>>> from pyspark.sql import functions as sf
>>> df = spark.sql("SELECT * FROM VALUES (1), (2), (0), (-1), (NULL) AS t(value)")
>>> df.select("*", sf.log(3.0, df.value)).show()
+-----+------------------+
|value| LOG(3.0, value)|
+-----+------------------+
| 1| 0.0|
| 2|0.6309297535714...|
| 0| NULL|
| -1| NULL|
| NULL| NULL|
+-----+------------------+

Example 3: Specify only the input value (Natural logarithm)

>>> from pyspark.sql import functions as sf
>>> df = spark.sql("SELECT * FROM VALUES (1), (2), (4) AS t(value)")
>>> df.select("*", sf.log(df.value)).show()
+-----+------------------+
|value| ln(value)|
+-----+------------------+
| 1| 0.0|
| 2|0.6931471805599...|
| 4|1.3862943611198...|
+-----+------------------+
"""
from pyspark.sql.classic.column import _to_java_column

Expand Down Expand Up @@ -7205,13 +7224,22 @@ def ln(col: "ColumnOrName") -> Column:

Examples
--------
>>> df = spark.createDataFrame([(4,)], ['a'])
>>> df.select(ln('a')).show()
+------------------+
| ln(a)|
+------------------+
|1.3862943611198906|
+------------------+
>>> from pyspark.sql import functions as sf
>>> spark.range(10).select("*", sf.ln('id')).show()
+---+------------------+
| id| ln(id)|
+---+------------------+
| 0| NULL|
| 1| 0.0|
| 2|0.6931471805599...|
| 3|1.0986122886681...|
| 4|1.3862943611198...|
| 5|1.6094379124341...|
| 6| 1.791759469228...|
| 7|1.9459101490553...|
| 8|2.0794415416798...|
| 9|2.1972245773362...|
+---+------------------+
"""
return _invoke_function_over_columns("ln", col)

Expand All @@ -7237,13 +7265,22 @@ def log2(col: "ColumnOrName") -> Column:

Examples
--------
>>> df = spark.createDataFrame([(4,)], ['a'])
>>> df.select(log2('a').alias('log2')).show()
+----+
|log2|
+----+
| 2.0|
+----+
>>> from pyspark.sql import functions as sf
>>> spark.range(10).select("*", sf.log2('id')).show()
+---+------------------+
| id| LOG2(id)|
+---+------------------+
| 0| NULL|
| 1| 0.0|
| 2| 1.0|
| 3| 1.584962500721...|
| 4| 2.0|
| 5| 2.321928094887...|
| 6| 2.584962500721...|
| 7| 2.807354922057...|
| 8| 3.0|
| 9|3.1699250014423...|
+---+------------------+
"""
return _invoke_function_over_columns("log2", col)

Expand Down Expand Up @@ -7274,9 +7311,16 @@ def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column:

Examples
--------
>>> df = spark.createDataFrame([("010101",)], ['n'])
>>> df.select(conv(df.n, 2, 16).alias('hex')).collect()
[Row(hex='15')]
>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([("010101",), ( "101",), ("001",)], ['n'])
>>> df.select("*", sf.conv(df.n, 2, 16)).show()
+------+--------------+
| n|conv(n, 2, 16)|
+------+--------------+
|010101| 15|
| 101| 5|
| 001| 1|
+------+--------------+
"""
from pyspark.sql.classic.column import _to_java_column

Expand Down Expand Up @@ -7307,9 +7351,22 @@ def factorial(col: "ColumnOrName") -> Column:

Examples
--------
>>> df = spark.createDataFrame([(5,)], ['n'])
>>> df.select(factorial(df.n).alias('f')).collect()
[Row(f=120)]
>>> from pyspark.sql import functions as sf
>>> spark.range(10).select("*", sf.factorial('id')).show()
+---+-------------+
| id|factorial(id)|
+---+-------------+
| 0| 1|
| 1| 1|
| 2| 2|
| 3| 6|
| 4| 24|
| 5| 120|
| 6| 720|
| 7| 5040|
| 8| 40320|
| 9| 362880|
+---+-------------+
"""
return _invoke_function_over_columns("factorial", col)

Expand Down

0 comments on commit 32cc2dd

Please sign in to comment.