diff --git a/.github/workflows/update_build_status.yml b/.github/workflows/update_build_status.yml
index d0a50b2b4aa74..542fa567dea69 100644
--- a/.github/workflows/update_build_status.yml
+++ b/.github/workflows/update_build_status.yml
@@ -72,7 +72,7 @@ jobs:
                       } catch (error) {
                         console.error(error)
                         // Run not found. This can happen when the PR author removes GitHub Actions runs or
-                        // disalbes GitHub Actions.
+                        // disables GitHub Actions.
                         continue
                       }
 
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 9c825a99be180..e320981783ecc 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2922,7 +2922,7 @@ setClassUnion("characterOrstructTypeOrColumn", c("character", "structType", "Col
 #' @details
 #' \code{from_json}: Parses a column containing a JSON string into a Column of \code{structType}
 #' with the specified \code{schema} or array of \code{structType} if \code{as.json.array} is set
-#' to \code{TRUE}. If the string is unparseable, the Column will contain the value NA.
+#' to \code{TRUE}. If the string is unparsable, the Column will contain the value NA.
 #'
 #' @rdname column_collection_functions
 #' @param as.json.array indicating if input string is JSON array of objects or a single object.
@@ -3004,7 +3004,7 @@ setMethod("schema_of_json", signature(x = "characterOrColumn"),
 #' @details
 #' \code{from_csv}: Parses a column containing a CSV string into a Column of \code{structType}
 #' with the specified \code{schema}.
-#' If the string is unparseable, the Column will contain the value NA.
+#' If the string is unparsable, the Column will contain the value NA.
 #'
 #' @rdname column_collection_functions
 #' @aliases from_csv from_csv,Column,characterOrstructTypeOrColumn-method
diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R
index 61e174de9ac56..4ccec991bb07b 100644
--- a/R/pkg/R/serialize.R
+++ b/R/pkg/R/serialize.R
@@ -60,7 +60,7 @@ writeObject <- function(con, object, writeType = TRUE) {
   if (type %in% c("integer", "character", "logical", "double", "numeric")) {
     if (is.na(object[[1]])) {
       # Uses the first element for now to keep the behavior same as R before
-      # 4.2.0. This is wrong because we should differenciate c(NA) from a
+      # 4.2.0. This is wrong because we should differentiate c(NA) from a
       # single NA as the former means array(null) and the latter means null
       # in Spark SQL. However, it requires non-trivial comparison to distinguish
       # both in R. We should ideally fix this.
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
index 02a38eac5b409..6e9bd548f5327 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
@@ -251,17 +251,17 @@ AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
             // Higher shuffleMergeId seen for the shuffle ID meaning new stage attempt is being
             // run for the shuffle ID. Close and clean up old shuffleMergeId files,
             // happens in the indeterminate stage retries
-            AppAttemptShuffleMergeId currrentAppAttemptShuffleMergeId =
+            AppAttemptShuffleMergeId currentAppAttemptShuffleMergeId =
                 new AppAttemptShuffleMergeId(appShuffleInfo.appId, appShuffleInfo.attemptId,
                     shuffleId, latestShuffleMergeId);
             logger.info("{}: creating a new shuffle merge metadata since received " +
               "shuffleMergeId {} is higher than latest shuffleMergeId {}",
               MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$,
-                currrentAppAttemptShuffleMergeId),
+                currentAppAttemptShuffleMergeId),
               MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, shuffleMergeId),
               MDC.of(LogKeys.LATEST_SHUFFLE_MERGE_ID$.MODULE$, latestShuffleMergeId));
             submitCleanupTask(() ->
-                closeAndDeleteOutdatedPartitions(currrentAppAttemptShuffleMergeId,
+                closeAndDeleteOutdatedPartitions(currentAppAttemptShuffleMergeId,
                     mergePartitionsInfo.shuffleMergePartitions));
             return new AppShuffleMergePartitionsInfo(shuffleMergeId, false);
           } else {
diff --git a/connector/connect/docs/client-connection-string.md b/connector/connect/docs/client-connection-string.md
index 37b2956a5c44a..df371c5beaaac 100644
--- a/connector/connect/docs/client-connection-string.md
+++ b/connector/connect/docs/client-connection-string.md
@@ -2,7 +2,7 @@
 
 From the client perspective, Spark Connect mostly behaves as any other GRPC
 client and can be configured as such. However, to make it easy to use from
-different programming languages and to have a homogenous connection surface
+different programming languages and to have a homogeneous connection surface
 this document proposes what the user surface is for connecting to a
 Spark Connect endpoint.
 
@@ -136,7 +136,7 @@ server_url = "sc://myhost.com:443/;use_ssl=true;token=ABCDEFG"
 
 As mentioned above, Spark Connect uses a regular GRPC client and the server path
 cannot be configured to remain compatible with the GRPC standard and HTTP. For
-example the following examles are invalid.
+example the following examples are invalid.
 
 ```python
 server_url = "sc://myhost.com:443/mypathprefix/;token=AAAAAAA"
diff --git a/docs/_plugins/include_example.rb b/docs/_plugins/include_example.rb
index 7d0e78738095e..6fd14ce31a68c 100644
--- a/docs/_plugins/include_example.rb
+++ b/docs/_plugins/include_example.rb
@@ -114,8 +114,8 @@ def select_lines(code)
         range = Range.new(start + 1, endline - 1)
         trimmed = trim_codeblock(lines[range])
         # Filter out possible example tags of overlapped labels.
-        taggs_filtered = trimmed.select { |l| !l.include? '$example ' }
-        result += taggs_filtered.join
+        tags_filtered = trimmed.select { |l| !l.include? '$example ' }
+        result += tags_filtered.join
         result += "\n"
       end
       result
diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
index 88bad6c5d1b9f..958e442545dcd 100644
--- a/docs/core-migration-guide.md
+++ b/docs/core-migration-guide.md
@@ -62,7 +62,7 @@ license: |
 
 ## Upgrading from Core 3.3 to 3.4
 
-- Since Spark 3.4, Spark driver will own `PersistentVolumnClaim`s and try to reuse if they are not assigned to live executors. To restore the behavior before Spark 3.4, you can set `spark.kubernetes.driver.ownPersistentVolumeClaim` to `false` and `spark.kubernetes.driver.reusePersistentVolumeClaim` to `false`.
+- Since Spark 3.4, Spark driver will own `PersistentVolumeClaim`s and try to reuse if they are not assigned to live executors. To restore the behavior before Spark 3.4, you can set `spark.kubernetes.driver.ownPersistentVolumeClaim` to `false` and `spark.kubernetes.driver.reusePersistentVolumeClaim` to `false`.
 
 - Since Spark 3.4, Spark driver will track shuffle data when dynamic allocation is enabled without shuffle service. To restore the behavior before Spark 3.4, you can set `spark.dynamicAllocation.shuffleTracking.enabled` to `false`.
 
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index aefa979946a6c..b6f847ff533f5 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -673,7 +673,7 @@ To use a custom metrics.properties for the application master and executors, upd
   <td>false</td>
   <td>
     Set to true for applications that have higher security requirements and prefer that their
-    secret is not saved in the db. The shuffle data of such applications wll not be recovered after
+    secret is not saved in the db. The shuffle data of such applications will not be recovered after
     the External Shuffle Service restarts.
   </td>
   <td>3.5.0</td>
diff --git a/docs/security.md b/docs/security.md
index c7d3fd5f8c36f..81173d5f01ce7 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -72,7 +72,7 @@ secrets to be secure.
   <td>false</td>
   <td>
     Set to true for applications that have higher security requirements and prefer that their
-    secret is not saved in the db. The shuffle data of such applications wll not be recovered after
+    secret is not saved in the db. The shuffle data of such applications will not be recovered after
     the External Shuffle Service restarts.
   </td>
   <td>3.5.0</td>
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index d828436e77340..4f8e0dc1a3917 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -372,7 +372,7 @@ SPARK_MASTER_OPTS supports the following system properties:
   <td>
     The pattern for app ID generation based on Java `String.format` method.
     The default value is `app-%s-%04d` which represents the existing app id string, e.g.,
-    `app-20231031224509-0008`. Plesae be careful to generate unique IDs.
+    `app-20231031224509-0008`. Please be careful to generate unique IDs.
   </td>
   <td>4.0.0</td>
 </tr>
diff --git a/docs/sql-ref-syntax-ddl-declare-variable.md b/docs/sql-ref-syntax-ddl-declare-variable.md
index ba9857bf1917a..41ecba1364361 100644
--- a/docs/sql-ref-syntax-ddl-declare-variable.md
+++ b/docs/sql-ref-syntax-ddl-declare-variable.md
@@ -83,7 +83,7 @@ DECLARE OR REPLACE five = 55;
 -- Explicitly declare the default value of a variable using the keyword `DEFAULT`
 DECLARE VARIABLE size DEFAULT 6;
 
--- STRING variable initialialized to `NULL`
+-- STRING variable initialized to `NULL`
 DECLARE some_var STRING;
 ```
 
diff --git a/python/docs/source/reference/pyspark.ss/index.rst b/python/docs/source/reference/pyspark.ss/index.rst
index 2cb0b1216eff9..440228134fac9 100644
--- a/python/docs/source/reference/pyspark.ss/index.rst
+++ b/python/docs/source/reference/pyspark.ss/index.rst
@@ -20,7 +20,7 @@
 Structured Streaming
 ====================
 
-This page gives an overview of all public Structed Streaming API.
+This page gives an overview of all public Structured Streaming API.
 
 .. toctree::
     :maxdepth: 2
diff --git a/python/pyspark/ml/connect/io_utils.py b/python/pyspark/ml/connect/io_utils.py
index c401e3e76676a..8d93426915d42 100644
--- a/python/pyspark/ml/connect/io_utils.py
+++ b/python/pyspark/ml/connect/io_utils.py
@@ -74,7 +74,7 @@ class ParamsReadWrite(Params):
 
     def _get_extra_metadata(self) -> Any:
         """
-        Returns exta metadata of the instance
+        Returns extra metadata of the instance
         """
         return None
 
diff --git a/python/pyspark/ml/connect/tuning.py b/python/pyspark/ml/connect/tuning.py
index cdb606048a59a..190fc683acf7d 100644
--- a/python/pyspark/ml/connect/tuning.py
+++ b/python/pyspark/ml/connect/tuning.py
@@ -170,7 +170,7 @@ def _parallelFitTasks(
 
     if active_session is None:
         raise RuntimeError(
-            "An active SparkSession is required for running cross valiator fit tasks."
+            "An active SparkSession is required for running cross validator fit tasks."
         )
 
     def get_single_task(index: int, param_map: Any) -> Callable[[], Tuple[int, float]]:
diff --git a/python/pyspark/ml/deepspeed/deepspeed_distributor.py b/python/pyspark/ml/deepspeed/deepspeed_distributor.py
index 4ac5ff2fb4207..3fd1d3bb32463 100644
--- a/python/pyspark/ml/deepspeed/deepspeed_distributor.py
+++ b/python/pyspark/ml/deepspeed/deepspeed_distributor.py
@@ -49,7 +49,7 @@ def __init__(
         Parameters
         ----------
         numGpus: int
-            The number of GPUs to use per node (analagous to num_gpus in deepspeed command).
+            The number of GPUs to use per node (analogous to num_gpus in deepspeed command).
         nnodes: int
             The number of nodes that should be used for the run.
         localMode: bool
diff --git a/python/pyspark/ml/dl_util.py b/python/pyspark/ml/dl_util.py
index 8ead529d7b729..3b87049ef2777 100644
--- a/python/pyspark/ml/dl_util.py
+++ b/python/pyspark/ml/dl_util.py
@@ -27,7 +27,7 @@ class FunctionPickler:
     This class provides a way to pickle a function and its arguments.
     It also provides a way to create a script that can run a
     function with arguments if they have them pickled to a file.
-    It also provides a way of extracting the conents of a pickle file.
+    It also provides a way of extracting the contents of a pickle file.
     """
 
     @staticmethod
diff --git a/python/pyspark/ml/tests/connect/test_connect_function.py b/python/pyspark/ml/tests/connect/test_connect_function.py
index 393d38fdc426a..7d3a115ab0619 100644
--- a/python/pyspark/ml/tests/connect/test_connect_function.py
+++ b/python/pyspark/ml/tests/connect/test_connect_function.py
@@ -43,7 +43,7 @@ def setUpClass(cls):
         # Disable the shared namespace so pyspark.sql.functions, etc point the regular
         # PySpark libraries.
         os.environ["PYSPARK_NO_NAMESPACE_SHARE"] = "1"
-        cls.connect = cls.spark  # Switch Spark Connect session and regular PySpark sesion.
+        cls.connect = cls.spark  # Switch Spark Connect session and regular PySpark session.
         cls.spark = PySparkSession._instantiatedSession
         assert cls.spark is not None
 
diff --git a/python/pyspark/ml/tests/test_dl_util.py b/python/pyspark/ml/tests/test_dl_util.py
index e5e2c6bc191d8..c130cf1ff6b9d 100644
--- a/python/pyspark/ml/tests/test_dl_util.py
+++ b/python/pyspark/ml/tests/test_dl_util.py
@@ -137,7 +137,7 @@ def _are_two_files_identical(self, fpath1: str, fpath2: str) -> bool:
                 "",
             ),
             (
-                "Check if it creates the correct file with only suffix + boddy",
+                "Check if it creates the correct file with only suffix + body",
                 "",
                 "print('goodbye')",
             ),
diff --git a/python/pyspark/ml/tests/test_functions.py b/python/pyspark/ml/tests/test_functions.py
index e67e46ded67bd..7719b2b27e0ab 100644
--- a/python/pyspark/ml/tests/test_functions.py
+++ b/python/pyspark/ml/tests/test_functions.py
@@ -265,14 +265,14 @@ def predict(a, b, c):
         with self.assertRaisesRegex(Exception, "Model expected 3 inputs, but received 4 columns"):
             preds = self.df.withColumn("preds", sum_cols(*columns)).toPandas()
 
-        # muliple scalar columns with one tensor_input_shape => single numpy array
+        # multiple scalar columns with one tensor_input_shape => single numpy array
         sum_cols = predict_batch_udf(
             array_sum_fn, return_type=DoubleType(), batch_size=5, input_tensor_shapes=[[4]]
         )
         preds = self.df.withColumn("preds", sum_cols(struct(*columns))).toPandas()
         self.assertTrue(np.array_equal(np.sum(self.data, axis=1), preds["preds"].to_numpy()))
 
-        # muliple scalar columns with wrong tensor_input_shape => ERROR
+        # multiple scalar columns with wrong tensor_input_shape => ERROR
         sum_cols = predict_batch_udf(
             array_sum_fn, return_type=DoubleType(), batch_size=5, input_tensor_shapes=[[3]]
         )
diff --git a/python/pyspark/ml/tests/test_param.py b/python/pyspark/ml/tests/test_param.py
index 8df50a5963e6b..0aa9827124954 100644
--- a/python/pyspark/ml/tests/test_param.py
+++ b/python/pyspark/ml/tests/test_param.py
@@ -368,12 +368,12 @@ def test_default_params_transferred(self):
         self.assertFalse(binarizer.isSet(binarizer.outputCol))
         self.assertEqual(result[0][0], 1.0)
 
-    def test_lr_evaluate_invaild_type(self):
+    def test_lr_evaluate_invalid_type(self):
         lr = LinearRegressionModel()
         invalid_type = ""
         self.assertRaises(TypeError, lr.evaluate, invalid_type)
 
-    def test_glr_evaluate_invaild_type(self):
+    def test_glr_evaluate_invalid_type(self):
         glr = GeneralizedLinearRegressionModel()
         invalid_type = ""
         self.assertRaises(TypeError, glr.evaluate, invalid_type)
diff --git a/python/pyspark/ml/torch/distributor.py b/python/pyspark/ml/torch/distributor.py
index 62a71c5a96af4..ef86f38b716b7 100644
--- a/python/pyspark/ml/torch/distributor.py
+++ b/python/pyspark/ml/torch/distributor.py
@@ -232,10 +232,10 @@ def _get_num_tasks(self) -> int:
 
     def _validate_input_params(self) -> None:
         if self.num_processes <= 0:
-            raise ValueError("num_proccesses has to be a positive integer")
+            raise ValueError("num_processes has to be a positive integer")
 
     def _check_encryption(self) -> None:
-        """Checks to see if the user requires encrpytion of data.
+        """Checks to see if the user requires encryption of data.
         If required, throw an exception since we don't support that.
 
         Raises
diff --git a/python/pyspark/pandas/accessors.py b/python/pyspark/pandas/accessors.py
index 4c36f7976af83..77757e4b60873 100644
--- a/python/pyspark/pandas/accessors.py
+++ b/python/pyspark/pandas/accessors.py
@@ -936,7 +936,7 @@ def _transform_batch(
 
         def pandas_concat(*series: pd.Series) -> pd.DataFrame:
             # The input can only be a DataFrame for struct from Spark 3.0.
-            # This works around makeing the input as a frame. See SPARK-27240
+            # This works around making the input as a frame. See SPARK-27240
             pdf = pd.concat(series, axis=1)
             pdf.columns = columns
             return pdf
diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
index bc54d8b9b17cb..01e23214d662d 100644
--- a/python/pyspark/pandas/base.py
+++ b/python/pyspark/pandas/base.py
@@ -1123,7 +1123,7 @@ def shift(
         Shift Series/Index by desired number of periods.
 
         .. note:: the current implementation of shift uses Spark's Window without
-            specifying partition specification. This leads to moveing all data into
+            specifying partition specification. This leads to moving all data into
             a single partition in a single machine and could cause serious
             performance degradation. Avoid this method with very large datasets.
 
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 49aa49f65e35b..f315d59a4fe94 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -7686,7 +7686,7 @@ def _sort(
         if na_position not in ("first", "last"):
             raise ValueError("invalid na_position: '{}'".format(na_position))
 
-        # Mapper: Get a spark colum
+        # Mapper: Get a spark column
         # n function for (ascending, na_position) combination
         mapper = {
             (True, "first"): PySparkColumn.asc_nulls_first,
@@ -9808,7 +9808,7 @@ def describe(self, percentiles: Optional[List[float]] = None) -> "DataFrame":
 
         if is_all_string_type:
             # Handling string type columns
-            # We will retrive the `count`, `unique`, `top` and `freq`.
+            # We will retrieve the `count`, `unique`, `top` and `freq`.
             internal = self._internal.resolved_copy
             exprs_string = [
                 internal.spark_column_for(psser._column_label) for psser in psser_string
diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
index b387ca1d4e508..34f11768bcbc0 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -205,7 +205,7 @@ def _parameters_to_print(self, parameters: Mapping[str, Any]) -> Mapping[str, An
                     try:
                         params[name] = getattr(self, "_" + name)
                     except AttributeError:
-                        pass  # Simpy ignore
+                        pass  # Simply ignore
         return params
 
     def print(self, indent: int = 0) -> str:
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 085a1a629634a..0ea0eef50c0f3 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -2549,7 +2549,7 @@ def join(
         pyspark.errors.exceptions.captured.AnalysisException: Column name#0 are ambiguous...
 
         A better approach is to assign aliases to the dataframes, and then reference
-        the ouptut columns from the join operation using these aliases:
+        the output columns from the join operation using these aliases:
 
         >>> df.alias("a").join(
         ...     df.alias("b"), sf.col("a.name") == sf.col("b.name"), "outer"
@@ -3907,7 +3907,7 @@ def groupingSets(
         groupingSets : sequence of sequence of columns or str
             Individual set of columns to group on.
         cols : :class:`Column` or str
-            Addional grouping columns specified by users.
+            Additional grouping columns specified by users.
             Those columns are shown as the output columns after aggregation.
 
         Returns
diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py
index 391bc3db7a86f..4b4c164055eaf 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -15832,7 +15832,7 @@ def split_part(src: "ColumnOrName", delimiter: "ColumnOrName", partNum: "ColumnO
     Parameters
     ----------
     src : :class:`~pyspark.sql.Column` or column name
-        A column of string to be splited.
+        A column of string to be split.
     delimiter : :class:`~pyspark.sql.Column` or column name
         A column of string, the delimiter used for split.
     partNum : :class:`~pyspark.sql.Column` or column name
@@ -19618,7 +19618,7 @@ def from_json(
     """
     Parses a column containing a JSON string into a :class:`MapType` with :class:`StringType`
     as keys type, :class:`StructType` or :class:`ArrayType` with
-    the specified schema. Returns `null`, in the case of an unparseable string.
+    the specified schema. Returns `null`, in the case of an unparsable string.
 
     .. versionadded:: 2.1.0
 
@@ -20230,7 +20230,7 @@ def from_xml(
 ) -> Column:
     """
     Parses a column containing a XML string to a row with
-    the specified schema. Returns `null`, in the case of an unparseable string.
+    the specified schema. Returns `null`, in the case of an unparsable string.
 
     .. versionadded:: 4.0.0
 
@@ -22624,7 +22624,7 @@ def transform_keys(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -
     Returns
     -------
     :class:`~pyspark.sql.Column`
-        a new map of enties where new keys were calculated by applying given function to
+        a new map of entries where new keys were calculated by applying given function to
         each key value argument.
 
     Examples
@@ -22664,7 +22664,7 @@ def transform_values(col: "ColumnOrName", f: Callable[[Column, Column], Column])
     Returns
     -------
     :class:`~pyspark.sql.Column`
-        a new map of enties where new values were calculated by applying given function to
+        a new map of entries where new values were calculated by applying given function to
         each key value argument.
 
     Examples
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 4744bdf861d37..2113f0707f910 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -215,7 +215,7 @@ def options(self, **options: "OptionalPrimitiveType") -> "DataFrameReader":
         Parameters
         ----------
         **options : dict
-            The dictionary of string keys and prmitive-type values.
+            The dictionary of string keys and primitive-type values.
 
         Examples
         --------
diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py b/python/pyspark/sql/tests/connect/test_connect_function.py
index e29873173cc3a..b7a02efcd5e2b 100644
--- a/python/pyspark/sql/tests/connect/test_connect_function.py
+++ b/python/pyspark/sql/tests/connect/test_connect_function.py
@@ -54,7 +54,7 @@ def setUpClass(cls):
         # Disable the shared namespace so pyspark.sql.functions, etc point the regular
         # PySpark libraries.
         os.environ["PYSPARK_NO_NAMESPACE_SHARE"] = "1"
-        cls.connect = cls.spark  # Switch Spark Connect session and regular PySpark sesion.
+        cls.connect = cls.spark  # Switch Spark Connect session and regular PySpark session.
         cls.spark = PySparkSession._instantiatedSession
         assert cls.spark is not None
 
diff --git a/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py b/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py
index de8f30baebca5..9db66aa252ee6 100644
--- a/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py
+++ b/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py
@@ -146,7 +146,7 @@ def func(df: DataFrame, batch_id: int):
     def my_test_function_2():
         return 2
 
-    def test_streaming_foreach_batch_fuction_calling(self):
+    def test_streaming_foreach_batch_function_calling(self):
         def my_test_function_3():
             return 3
 
diff --git a/python/pyspark/sql/tests/test_udtf.py b/python/pyspark/sql/tests/test_udtf.py
index 206cfd7dc4885..8447edfbbb15d 100644
--- a/python/pyspark/sql/tests/test_udtf.py
+++ b/python/pyspark/sql/tests/test_udtf.py
@@ -1345,7 +1345,7 @@ def eval(self, a, b):
                 assertSchemaEqual(df.schema, expected_schema)
                 assertDataFrameEqual(df, expected_results)
 
-    def test_udtf_with_analyze_arbitary_number_arguments(self):
+    def test_udtf_with_analyze_arbitrary_number_arguments(self):
         class TestUDTF:
             @staticmethod
             def analyze(*args: AnalyzeArgument) -> AnalyzeResult:
diff --git a/python/pyspark/sql/udtf.py b/python/pyspark/sql/udtf.py
index 5ce3e2dfd2a9e..cf4f976fd93b3 100644
--- a/python/pyspark/sql/udtf.py
+++ b/python/pyspark/sql/udtf.py
@@ -148,7 +148,7 @@ class AnalyzeResult:
         The schema that the Python UDTF will return.
     withSinglePartition: bool
         If true, the UDTF is specifying for Catalyst to repartition all rows of the input TABLE
-        argument to one collection for consumption by exactly one instance of the correpsonding
+        argument to one collection for consumption by exactly one instance of the corresponding
         UDTF class.
     partitionBy: sequence of :class:`PartitioningColumn`
         If non-empty, this is a sequence of expressions that the UDTF is specifying for Catalyst to
diff --git a/python/pyspark/streaming/tests/test_dstream.py b/python/pyspark/streaming/tests/test_dstream.py
index 046247763c0b3..4c9633db311a6 100644
--- a/python/pyspark/streaming/tests/test_dstream.py
+++ b/python/pyspark/streaming/tests/test_dstream.py
@@ -403,7 +403,7 @@ def failed_func(rdd1, rdd2):
 
         self.fail("a failed func should throw an error")
 
-    def test_failed_func_with_reseting_failure(self):
+    def test_failed_func_with_resetting_failure(self):
         input = [self.sc.parallelize([d], 1) for d in range(4)]
         input_stream = self.ssc.queueStream(input)
 
diff --git a/python/pyspark/worker_util.py b/python/pyspark/worker_util.py
index 81c05ce94eb65..5c758d3f83fe6 100644
--- a/python/pyspark/worker_util.py
+++ b/python/pyspark/worker_util.py
@@ -107,8 +107,8 @@ def setup_memory_limits(memory_limit_mb: int) -> None:
 
         except (resource.error, OSError, ValueError) as e:
             # not all systems support resource limits, so warn instead of failing
-            curent = currentframe()
-            lineno = getframeinfo(curent).lineno + 1 if curent is not None else 0
+            current = currentframe()
+            lineno = getframeinfo(current).lineno + 1 if current is not None else 0
             if "__file__" in globals():
                 print(
                     warnings.formatwarning(
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
index 051093fcad277..44b634af95ca9 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
@@ -291,7 +291,7 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
           assert(finished == false)
         } else {
           // If it wasn't sent, time deadline must have been reached before stream became available,
-          // or it was intterupted. Will exit in the next loop iterattion.
+          // or it was interrupted. Will exit in the next loop iterattion.
           assert(deadlineLimitReached || interrupted)
         }
       } else if (streamFinished) {