diff --git a/CHANGELOG.md b/CHANGELOG.md index 2273a22cb..71897e8ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## 0.10.13dev +* [Feature] Disable full stack trace when using spark connect ([#1011](https://github.com/ploomber/jupysql/issues/1011)) (by [@b1ackout](https://github.com/b1ackout)) + ## 0.10.12 (2024-07-12) * [Feature] Remove sqlalchemy upper bound ([#1020](https://github.com/ploomber/jupysql/pull/1020)) diff --git a/src/sql/run/sparkdataframe.py b/src/sql/run/sparkdataframe.py index 81644b1e2..995193776 100644 --- a/src/sql/run/sparkdataframe.py +++ b/src/sql/run/sparkdataframe.py @@ -9,9 +9,9 @@ def handle_spark_dataframe(dataframe, should_cache=False): - """Execute a ResultSet sqlaproxy using pysark module.""" + """Execute a ResultSet sqlaproxy using pyspark module.""" if not DataFrame and not CDataFrame: - raise exceptions.MissingPackageError("pysark not installed") + raise exceptions.MissingPackageError("pyspark not installed") return SparkResultProxy(dataframe, dataframe.columns, should_cache) diff --git a/src/sql/util.py b/src/sql/util.py index 74af08c32..4eadcf1f8 100644 --- a/src/sql/util.py +++ b/src/sql/util.py @@ -7,6 +7,12 @@ from sqlglot.errors import ParseError from sqlalchemy.exc import SQLAlchemyError from ploomber_core.dependencies import requires + +try: + from pyspark.sql.utils import AnalysisException +except ModuleNotFoundError: + AnalysisException = None + import ast from os.path import isfile import re @@ -556,11 +562,14 @@ def is_non_sqlalchemy_error(error): "pyodbc.ProgrammingError", # Clickhouse errors "DB::Exception:", - # Pyspark - "UNRESOLVED_ROUTINE", - "PARSE_SYNTAX_ERROR", ] - return any(msg in str(error) for msg in specific_db_errors) + is_pyspark_analysis_exception = ( + isinstance(error, AnalysisException) if AnalysisException else False + ) + return ( + any(msg in str(error) for msg in specific_db_errors) + or is_pyspark_analysis_exception + ) def if_substring_exists(string, substrings):