From f51773fa5d20b88a33df790ce8687b88b58554aa Mon Sep 17 00:00:00 2001 From: Bing Li <63471091+sfc-gh-bli@users.noreply.github.com> Date: Tue, 1 Oct 2024 17:51:36 -0700 Subject: [PATCH] SNOW-1693064 Compile with Scala 2.13 (#167) * change doc style * fix all scala 2.13 changes * support scala 2.13 * add test * update test * fix test * fix udf test * fix test --- .github/workflows/precommit-multiple-sdk.yml | 24 + .scalafmt.conf | 1 + build.sbt | 3 +- .../com/snowflake/snowpark/AsyncJob.scala | 260 +- .../scala/com/snowflake/snowpark/Column.scala | 1021 +-- .../snowpark/CopyableDataFrame.scala | 532 +- .../com/snowflake/snowpark/DataFrame.scala | 4286 ++++++----- .../snowpark/DataFrameNaFunctions.scala | 130 +- .../snowflake/snowpark/DataFrameReader.scala | 788 +- .../snowpark/DataFrameStatFunctions.scala | 431 +- .../snowflake/snowpark/DataFrameWriter.scala | 715 +- .../snowflake/snowpark/FileOperation.scala | 237 +- .../com/snowflake/snowpark/GroupingSets.scala | 41 +- .../com/snowflake/snowpark/MergeBuilder.scala | 266 +- .../com/snowflake/snowpark/MergeClause.scala | 258 +- .../snowpark/RelationalGroupedDataFrame.scala | 358 +- .../scala/com/snowflake/snowpark/Row.scala | 402 +- .../snowpark/SProcRegistration.scala | 1289 ++-- .../com/snowflake/snowpark/SaveMode.scala | 50 +- .../com/snowflake/snowpark/Session.scala | 1655 ++-- .../snowpark/SnowparkClientException.scala | 9 +- .../snowflake/snowpark/StoredProcedure.scala | 31 +- .../snowflake/snowpark/TableFunction.scala | 85 +- .../snowflake/snowpark/UDFRegistration.scala | 1552 ++-- .../snowflake/snowpark/UDTFRegistration.scala | 388 +- .../com/snowflake/snowpark/Updatable.scala | 595 +- .../snowpark/UserDefinedFunction.scala | 34 +- .../scala/com/snowflake/snowpark/Window.scala | 58 +- .../com/snowflake/snowpark/WindowSpec.scala | 35 +- .../com/snowflake/snowpark/functions.scala | 6645 +++++++++-------- .../snowpark/internal/ClosureCleaner.scala | 67 +- .../snowpark/internal/ErrorMessage.scala | 19 +- .../snowpark/internal/FatJarBuilder.scala | 91 +- .../snowpark/internal/Implicits.scala | 7 +- .../snowpark/internal/JavaCodeCompiler.scala | 60 +- .../snowpark/internal/JavaUtils.scala | 18 +- .../snowpark/internal/ScalaFunctions.scala | 405 +- .../snowpark/internal/SchemaUtils.scala | 5 +- .../snowpark/internal/ServerConnection.scala | 7 +- .../internal/UDXRegistrationHandler.scala | 37 +- .../snowflake/snowpark/internal/Utils.scala | 18 +- .../internal/analyzer/Expression.scala | 4 +- .../internal/analyzer/SnowflakePlan.scala | 19 +- .../internal/analyzer/SnowflakePlanNode.scala | 21 +- .../internal/analyzer/SqlGenerator.scala | 2 +- .../snowpark/internal/analyzer/package.scala | 25 +- .../snowflake/snowpark/tableFunctions.scala | 375 +- .../snowflake/snowpark/types/ArrayType.scala | 7 +- .../snowflake/snowpark/types/BinaryType.scala | 7 +- .../snowpark/types/BooleanType.scala | 7 +- .../snowflake/snowpark/types/DataType.scala | 21 +- .../snowflake/snowpark/types/DateType.scala | 7 +- .../snowflake/snowpark/types/Geography.scala | 96 +- .../snowpark/types/GeographyType.scala | 7 +- .../snowflake/snowpark/types/Geometry.scala | 72 +- .../snowpark/types/GeometryType.scala | 7 +- .../snowflake/snowpark/types/MapType.scala | 7 +- .../snowpark/types/NumericType.scala | 77 +- .../snowflake/snowpark/types/StringType.scala | 7 +- .../snowflake/snowpark/types/StructType.scala | 263 +- .../snowflake/snowpark/types/TimeType.scala | 9 +- .../snowpark/types/TimestampType.scala | 7 +- .../snowflake/snowpark/types/Variant.scala | 378 +- .../snowpark/types/VariantType.scala | 7 +- .../snowflake/snowpark/types/package.scala | 7 +- .../com/snowflake/snowpark/udtf/UDTFs.scala | 595 +- .../code_verification/ClassUtils.scala | 7 +- .../code_verification/JavaScalaAPISuite.scala | 4 +- .../code_verification/PomSuite.scala | 4 +- .../snowpark/ErrorMessageSuite.scala | 4 +- .../snowpark/ExpressionAndPlanNodeSuite.scala | 14 +- .../snowpark/FatJarBuilderSuite.scala | 5 +- .../com/snowflake/snowpark/JavaAPISuite.scala | 5 +- .../snowpark/JavaCodeCompilerSuite.scala | 4 +- .../com/snowflake/snowpark/LoggingSuite.scala | 4 +- .../snowpark/NewColumnReferenceSuite.scala | 2 +- .../com/snowflake/snowpark/SNTestBase.scala | 5 +- .../SnowparkSFConnectionHandlerSuite.scala | 4 +- .../com/snowflake/snowpark/TestUtils.scala | 3 + .../DataFrameAggregateSuite.scala | 5 +- .../snowpark_test/IndependentClassSuite.scala | 4 +- .../snowpark_test/JavaUtilsSuite.scala | 5 +- .../snowpark_test/ScalaVariantSuite.scala | 4 +- .../snowpark_test/WindowSpecSuite.scala | 28 +- 84 files changed, 13152 insertions(+), 11906 deletions(-) create mode 100644 .github/workflows/precommit-multiple-sdk.yml diff --git a/.github/workflows/precommit-multiple-sdk.yml b/.github/workflows/precommit-multiple-sdk.yml new file mode 100644 index 00000000..46e48538 --- /dev/null +++ b/.github/workflows/precommit-multiple-sdk.yml @@ -0,0 +1,24 @@ +name: precommit test - Compile with Multiple SDK +on: + push: + branches: [ main ] + pull_request: + branches: '**' + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup JDK + uses: actions/setup-java@v3 + with: + distribution: temurin + java-version: 8 + - name: Decrypt profile.properties + run: .github/scripts/decrypt_profile.sh + env: + PROFILE_PASSWORD: ${{ secrets.PROFILE_PASSWORD }} + - name: Run test + run: sbt clean +compile \ No newline at end of file diff --git a/.scalafmt.conf b/.scalafmt.conf index e89b4fa8..f1fb8694 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -7,6 +7,7 @@ align = none align.openParenDefnSite = false align.openParenCallSite = false align.tokens = [] +docstrings.style = Asterisk optIn = { configStyleArguments = false } diff --git a/build.sbt b/build.sbt index 5054ceab..dae45688 100644 --- a/build.sbt +++ b/build.sbt @@ -25,6 +25,7 @@ lazy val root = (project in file(".")) name := snowparkName, version := "1.15.0-SNAPSHOT", scalaVersion := sys.props.getOrElse("SCALA_VERSION", default = "2.12.18"), + crossScalaVersions := Seq("2.12.18", "2.13.15"), organization := "com.snowflake", javaOptions ++= Seq("-source", "1.8", "-target", "1.8"), licenses := Seq("The Apache Software License, Version 2.0" -> @@ -58,7 +59,7 @@ lazy val root = (project in file(".")) // "junit" % "juint" % "4.13.1" % Test, "com.github.sbt" % "junit-interface" % "0.13.3" % Test, "org.mockito" % "mockito-core" % "2.23.0" % Test, - "org.scalatest" %% "scalatest" % "3.0.5" % Test, + "org.scalatest" %% "scalatest" % "3.2.19" % Test, ), scalafmtOnCompile := true, javafmtOnCompile := true, diff --git a/src/main/scala/com/snowflake/snowpark/AsyncJob.scala b/src/main/scala/com/snowflake/snowpark/AsyncJob.scala index de2ddd96..788d1254 100644 --- a/src/main/scala/com/snowflake/snowpark/AsyncJob.scala +++ b/src/main/scala/com/snowflake/snowpark/AsyncJob.scala @@ -4,138 +4,146 @@ import com.snowflake.snowpark.internal.{CloseableIterator, ErrorMessage} import com.snowflake.snowpark.internal.analyzer.SnowflakePlan import scala.reflect.runtime.universe.{TypeTag, typeOf} -/** Provides a way to track an asynchronous query in Snowflake. - * - * You can use this object to check the status of an asynchronous query and retrieve the results. - * - * To check the status of an asynchronous query that you submitted earlier, call - * [[Session.createAsyncJob]], and pass in the query ID. This returns an `AsyncJob` object that you - * can use to check the status of the query and retrieve the query results. - * - * Example 1: Create an AsyncJob by specifying a valid ``, check whether the query is - * running or not, and get the result rows. - * {{{ - * val asyncJob = session.createAsyncJob() - * println(s"Is query \${asyncJob.getQueryId()} running? \${asyncJob.isRunning()}") - * val rows = asyncJob.getRows() - * }}} - * - * Example 2: Create an AsyncJob by specifying a valid `` and cancel the query if it is - * still running. - * {{{ - * session.createAsyncJob().cancel() - * }}} - * - * @since 0.11.0 - */ +/** + * Provides a way to track an asynchronous query in Snowflake. + * + * You can use this object to check the status of an asynchronous query and retrieve the results. + * + * To check the status of an asynchronous query that you submitted earlier, call + * [[Session.createAsyncJob]], and pass in the query ID. This returns an `AsyncJob` object that you + * can use to check the status of the query and retrieve the query results. + * + * Example 1: Create an AsyncJob by specifying a valid ``, check whether the query is + * running or not, and get the result rows. + * {{{ + * val asyncJob = session.createAsyncJob() + * println(s"Is query \${asyncJob.getQueryId()} running? \${asyncJob.isRunning()}") + * val rows = asyncJob.getRows() + * }}} + * + * Example 2: Create an AsyncJob by specifying a valid `` and cancel the query if it is + * still running. + * {{{ + * session.createAsyncJob().cancel() + * }}} + * + * @since 0.11.0 + */ class AsyncJob private[snowpark] (queryID: String, session: Session, plan: Option[SnowflakePlan]) { - /** Get the query ID for the underlying query. - * - * @since 0.11.0 - * @return - * a query ID - */ + /** + * Get the query ID for the underlying query. + * + * @since 0.11.0 + * @return + * a query ID + */ def getQueryId(): String = queryID - /** Returns an iterator of [[Row]] objects that you can use to retrieve the results for the - * underlying query. - * - * Unlike the [[getRows]] method, this method does not load all data into memory at once. - * - * @since 0.11.0 - * @param maxWaitTimeInSeconds - * The maximum number of seconds to wait for the query to complete before attempting to - * retrieve the results. The default value is the value of the - * `snowpark_request_timeout_in_seconds` configuration property. - * @return - * An Iterator of [[Row]] objects - */ + /** + * Returns an iterator of [[Row]] objects that you can use to retrieve the results for the + * underlying query. + * + * Unlike the [[getRows]] method, this method does not load all data into memory at once. + * + * @since 0.11.0 + * @param maxWaitTimeInSeconds + * The maximum number of seconds to wait for the query to complete before attempting to retrieve + * the results. The default value is the value of the `snowpark_request_timeout_in_seconds` + * configuration property. + * @return + * An Iterator of [[Row]] objects + */ def getIterator(maxWaitTimeInSeconds: Int = session.requestTimeoutInSeconds): Iterator[Row] = session.conn.getAsyncResult(queryID, maxWaitTimeInSeconds, plan)._1 - /** Returns an Array of [[Row]] objects that represent the results of the underlying query. - * - * @since 0.11.0 - * @param maxWaitTimeInSeconds - * The maximum number of seconds to wait for the query to complete before attempting to - * retrieve the results. The default value is the value of the - * `snowpark_request_timeout_in_seconds` configuration property. - * @return - * An Array of [[Row]] objects - */ + /** + * Returns an Array of [[Row]] objects that represent the results of the underlying query. + * + * @since 0.11.0 + * @param maxWaitTimeInSeconds + * The maximum number of seconds to wait for the query to complete before attempting to retrieve + * the results. The default value is the value of the `snowpark_request_timeout_in_seconds` + * configuration property. + * @return + * An Array of [[Row]] objects + */ def getRows(maxWaitTimeInSeconds: Int = session.requestTimeoutInSeconds): Array[Row] = getIterator(maxWaitTimeInSeconds).toArray - /** Returns true if the underlying query completed. - * - * Completion may be due to query success, cancellation or failure, in all of these cases, this - * method will return true. - * - * @since 0.11.0 - * @return - * true if this query completed. - */ + /** + * Returns true if the underlying query completed. + * + * Completion may be due to query success, cancellation or failure, in all of these cases, this + * method will return true. + * + * @since 0.11.0 + * @return + * true if this query completed. + */ def isDone(): Boolean = session.conn.isDone(queryID) - /** Cancel the underlying query if it is running. - * - * @since 0.11.0 - */ + /** + * Cancel the underlying query if it is running. + * + * @since 0.11.0 + */ def cancel(): Unit = session.conn.runQuery(s"SELECT SYSTEM$$CANCEL_QUERY('$queryID')") } -/** Provides a way to track an asynchronously executed action in a DataFrame. - * - * To get the result of the action (e.g. the number of results from a `count()` action or an Array - * of [[Row]] objects from the `collect()` action), call the [[getResult]] method. - * - * To perform an action on a DataFrame asynchronously, call an action method on the - * [[DataFrameAsyncActor]] object returned by [[DataFrame.async]]. For example: - * {{{ - * val asyncJob1 = df.async.collect() - * val asyncJob2 = df.async.toLocalIterator() - * val asyncJob3 = df.async.count() - * }}} - * Each of these methods returns a TypedAsyncJob object that you can use to get the results of the - * action. - * - * @since 0.11.0 - */ +/** + * Provides a way to track an asynchronously executed action in a DataFrame. + * + * To get the result of the action (e.g. the number of results from a `count()` action or an Array + * of [[Row]] objects from the `collect()` action), call the [[getResult]] method. + * + * To perform an action on a DataFrame asynchronously, call an action method on the + * [[DataFrameAsyncActor]] object returned by [[DataFrame.async]]. For example: + * {{{ + * val asyncJob1 = df.async.collect() + * val asyncJob2 = df.async.toLocalIterator() + * val asyncJob3 = df.async.count() + * }}} + * Each of these methods returns a TypedAsyncJob object that you can use to get the results of the + * action. + * + * @since 0.11.0 + */ class TypedAsyncJob[T: TypeTag] private[snowpark] ( queryID: String, session: Session, plan: Option[SnowflakePlan]) extends AsyncJob(queryID, session, plan) { - /** Returns the result for the specific DataFrame action. - * - * Example 1: Create a TypedAsyncJob by asynchronously executing a DataFrame action `collect()`, - * check whether the job is running or not, and get the action result with [[getResult]]. NOTE: - * The returned type for [[getResult]] in this example is `Array[Row]`. - * {{{ - * val df = session.table("t1") - * val asyncJob = df.async.collect() - * println(s"Is query \${asyncJob.getQueryId()} running? \${asyncJob.isRunning()}") - * val rowResult = asyncJob.getResult() - * }}} - * - * Example 2: Create a TypedAsyncJob by asynchronously executing a DataFrame action count() and - * get the action result with [[getResult]]. NOTE: The returned type for [[getResult]] in this - * example is `Long`. - * {{{ - * val asyncJob = df.async.count() - * val longResult = asyncJob.getResult() - * }}} - * - * @since 0.11.0 - * @param maxWaitTimeInSeconds - * The maximum number of seconds to wait for the query to complete before attempting to - * retrieve the results. The default value is the value of the - * `snowpark_request_timeout_in_seconds` configuration property. - * @return - * The result for the specific action - */ + /** + * Returns the result for the specific DataFrame action. + * + * Example 1: Create a TypedAsyncJob by asynchronously executing a DataFrame action `collect()`, + * check whether the job is running or not, and get the action result with [[getResult]]. NOTE: + * The returned type for [[getResult]] in this example is `Array[Row]`. + * {{{ + * val df = session.table("t1") + * val asyncJob = df.async.collect() + * println(s"Is query \${asyncJob.getQueryId()} running? \${asyncJob.isRunning()}") + * val rowResult = asyncJob.getResult() + * }}} + * + * Example 2: Create a TypedAsyncJob by asynchronously executing a DataFrame action count() and + * get the action result with [[getResult]]. NOTE: The returned type for [[getResult]] in this + * example is `Long`. + * {{{ + * val asyncJob = df.async.count() + * val longResult = asyncJob.getResult() + * }}} + * + * @since 0.11.0 + * @param maxWaitTimeInSeconds + * The maximum number of seconds to wait for the query to complete before attempting to retrieve + * the results. The default value is the value of the `snowpark_request_timeout_in_seconds` + * configuration property. + * @return + * The result for the specific action + */ def getResult(maxWaitTimeInSeconds: Int = session.requestTimeoutInSeconds): T = { val tpe = typeOf[T] tpe match { @@ -181,10 +189,11 @@ class TypedAsyncJob[T: TypeTag] private[snowpark] ( } } -/** Provides a way to track an asynchronously executed action in a MergeBuilder. - * - * @since 1.3.0 - */ +/** + * Provides a way to track an asynchronously executed action in a MergeBuilder. + * + * @since 1.3.0 + */ class MergeTypedAsyncJob private[snowpark] ( queryID: String, session: Session, @@ -192,16 +201,17 @@ class MergeTypedAsyncJob private[snowpark] ( mergeBuilder: MergeBuilder) extends TypedAsyncJob[MergeResult](queryID, session, plan) { - /** Returns the MergeResult for the MergeBuilder's action - * - * @since 1.3.0 - * @param maxWaitTimeInSeconds - * The maximum number of seconds to wait for the query to complete before attempting to - * retrieve the results. The default value is the value of the - * `snowpark_request_timeout_in_seconds` configuration property. - * @return - * The [[MergeResult]] - */ + /** + * Returns the MergeResult for the MergeBuilder's action + * + * @since 1.3.0 + * @param maxWaitTimeInSeconds + * The maximum number of seconds to wait for the query to complete before attempting to retrieve + * the results. The default value is the value of the `snowpark_request_timeout_in_seconds` + * configuration property. + * @return + * The [[MergeResult]] + */ override def getResult(maxWaitTimeInSeconds: Int = session.requestTimeoutInSeconds): MergeResult = MergeBuilder.getMergeResult(getRows(maxWaitTimeInSeconds), mergeBuilder) } diff --git a/src/main/scala/com/snowflake/snowpark/Column.scala b/src/main/scala/com/snowflake/snowpark/Column.scala index c102fd62..d02460c6 100644 --- a/src/main/scala/com/snowflake/snowpark/Column.scala +++ b/src/main/scala/com/snowflake/snowpark/Column.scala @@ -6,39 +6,39 @@ import com.snowflake.snowpark.types.DataType import com.snowflake.snowpark.functions.lit // scalastyle:off -/** Represents a column or an expression in a DataFrame. - * - * To create a Column object to refer to a column in a DataFrame, you can: - * - * - Use the [[com.snowflake.snowpark.functions.col(colName* functions.col]] function. - * - Use the [[com.snowflake.snowpark.DataFrame.col DataFrame.col]] method. - * - Use the shorthand for the - * [[com.snowflake.snowpark.DataFrame.apply(colName* DataFrame.apply]] method - * (``("")`). - * - * For example: - * - * {{{ - * import com.snowflake.snowpark.functions.col - * df.select(col("name")) - * df.select(df.col("name")) - * dfLeft.select(dfRight, dfLeft("name") === dfRight("name")) - * }}} - * - * This class also defines utility functions for constructing expressions with Columns. - * - * The following examples demonstrate how to use Column objects in expressions: - * {{{ - * df - * .filter(col("id") === 20) - * .filter((col("a") + col("b")) < 10) - * .select((col("b") * 10) as "c") - * }}} - * - * @groupname utl Utility Functions - * @groupname op Expression Operation Functions - * @since 0.1.0 - */ +/** + * Represents a column or an expression in a DataFrame. + * + * To create a Column object to refer to a column in a DataFrame, you can: + * + * - Use the [[com.snowflake.snowpark.functions.col(colName* functions.col]] function. + * - Use the [[com.snowflake.snowpark.DataFrame.col DataFrame.col]] method. + * - Use the shorthand for the [[com.snowflake.snowpark.DataFrame.apply(colName* DataFrame.apply]] + * method (``("")`). + * + * For example: + * + * {{{ + * import com.snowflake.snowpark.functions.col + * df.select(col("name")) + * df.select(df.col("name")) + * dfLeft.select(dfRight, dfLeft("name") === dfRight("name")) + * }}} + * + * This class also defines utility functions for constructing expressions with Columns. + * + * The following examples demonstrate how to use Column objects in expressions: + * {{{ + * df + * .filter(col("id") === 20) + * .filter((col("a") + col("b")) < 10) + * .select((col("b") * 10) as "c") + * }}} + * + * @groupname utl Utility Functions + * @groupname op Expression Operation Functions + * @since 0.1.0 + */ // scalastyle:on case class Column private[snowpark] (private[snowpark] val expr: Expression) extends Logging { private[snowpark] def named: NamedExpression = expr match { @@ -47,21 +47,21 @@ case class Column private[snowpark] (private[snowpark] val expr: Expression) ext case _ => UnresolvedAlias(expr) } - /** Returns a conditional expression that you can pass to the filter or where method to perform - * the equivalent of a WHERE ... IN query with a specified list of values. - * - * The expression evaluates to true if the value in the column is one of the values in a - * specified sequence. - * - * For example, the following code returns a DataFrame that contains the rows where the column - * "a" contains the value 1, 2, or 3. This is equivalent to SELECT * FROM table WHERE a IN (1, 2, - * 3). - * {{{ - * df.filter(df("a").in(Seq(1, 2, 3))) - * }}} - * @group op - * @since 0.10.0 - */ + /** + * Returns a conditional expression that you can pass to the filter or where method to perform the + * equivalent of a WHERE ... IN query with a specified list of values. + * + * The expression evaluates to true if the value in the column is one of the values in a specified + * sequence. + * + * For example, the following code returns a DataFrame that contains the rows where the column "a" + * contains the value 1, 2, or 3. This is equivalent to SELECT * FROM table WHERE a IN (1, 2, 3). + * {{{ + * df.filter(df("a").in(Seq(1, 2, 3))) + * }}} + * @group op + * @since 0.10.0 + */ def in(values: Seq[Any]): Column = { val columnCount = expr match { case me: MultipleExpression => me.expressions.size @@ -99,161 +99,172 @@ case class Column private[snowpark] (private[snowpark] val expr: Expression) ext withExpr(InExpression(expr, valueExpressions)) } - /** Returns a conditional expression that you can pass to the filter or where method to perform a - * WHERE ... IN query with a specified subquery. - * - * The expression evaluates to true if the value in the column is one of the values in the column - * of the same name in a specified DataFrame. - * - * For example, the following code returns a DataFrame that contains the rows where the column - * "a" of `df2` contains one of the values from column "a" in `df1`. This is equivalent to SELECT - * * FROM table2 WHERE a IN (SELECT a FROM table1). - * {{{ - * val df1 = session.table(table1) - * val df2 = session.table(table2) - * df2.filter(col("a").in(df1)) - * }}} - * - * @group op - * @since 0.10.0 - */ + /** + * Returns a conditional expression that you can pass to the filter or where method to perform a + * WHERE ... IN query with a specified subquery. + * + * The expression evaluates to true if the value in the column is one of the values in the column + * of the same name in a specified DataFrame. + * + * For example, the following code returns a DataFrame that contains the rows where the column "a" + * of `df2` contains one of the values from column "a" in `df1`. This is equivalent to SELECT * + * FROM table2 WHERE a IN (SELECT a FROM table1). + * {{{ + * val df1 = session.table(table1) + * val df2 = session.table(table2) + * df2.filter(col("a").in(df1)) + * }}} + * + * @group op + * @since 0.10.0 + */ def in(df: DataFrame): Column = in(Seq(df)) // scalastyle:off - /** Returns the specified element (field) in a column that contains - * [[https://docs.snowflake.com/en/user-guide/semistructured-concepts.html semi-structured data]]. - * - * The method applies case-sensitive matching to the names of the specified elements. - * - * This is equivalent to using - * [[https://docs.snowflake.com/en/user-guide/querying-semistructured.html#bracket-notation bracket notation in SQL]] - * (`column['element']`). - * - * - If the column is an OBJECT value, this function extracts the VARIANT value of the element - * with the specified name from the OBJECT value. - * - * - If the element is not found, the method returns NULL. - * - * - You must not specify an empty string for the element name. - * - * - If the column is a VARIANT value, this function first checks if the VARIANT value contains - * an OBJECT value. - * - * - If the VARIANT value does not contain an OBJECT value, the method returns NULL. - * - * - Otherwise, the method works as described above. - * - * For example: - * {{{ - * import com.snowflake.snowpark.functions.col - * df.select(col("src")("salesperson")("emails")(0)) - * }}} - * - * @param field - * field name of the subfield to be extracted. You cannot specify a path. - * @group op - * @since 0.2.0 - */ + /** + * Returns the specified element (field) in a column that contains + * [[https://docs.snowflake.com/en/user-guide/semistructured-concepts.html semi-structured data]]. + * + * The method applies case-sensitive matching to the names of the specified elements. + * + * This is equivalent to using + * [[https://docs.snowflake.com/en/user-guide/querying-semistructured.html#bracket-notation bracket notation in SQL]] + * (`column['element']`). + * + * - If the column is an OBJECT value, this function extracts the VARIANT value of the element + * with the specified name from the OBJECT value. + * + * - If the element is not found, the method returns NULL. + * + * - You must not specify an empty string for the element name. + * + * - If the column is a VARIANT value, this function first checks if the VARIANT value contains + * an OBJECT value. + * + * - If the VARIANT value does not contain an OBJECT value, the method returns NULL. + * + * - Otherwise, the method works as described above. + * + * For example: + * {{{ + * import com.snowflake.snowpark.functions.col + * df.select(col("src")("salesperson")("emails")(0)) + * }}} + * + * @param field + * field name of the subfield to be extracted. You cannot specify a path. + * @group op + * @since 0.2.0 + */ // scalastyle:on def apply(field: String): Column = withExpr(SubfieldString(expr, field)) // scalastyle:off - /** Returns the element (field) at the specified index in a column that contains - * [[https://docs.snowflake.com/en/user-guide/semistructured-concepts.html semi-structured data]]. - * - * The method applies case-sensitive matching to the names of the specified elements. - * - * This is equivalent to using - * [[https://docs.snowflake.com/en/user-guide/querying-semistructured.html#bracket-notation bracket notation in SQL]] - * (`column[index]`). - * - * - If the column is an ARRAY value, this function extracts the VARIANT value of the array - * element at the specified index. - * - * - If the index points outside of the array boundaries or if an element does not exist at the - * specified index (e.g. if the array is sparsely populated), the method returns NULL. - * - * - If the column is a VARIANT value, this function first checks if the VARIANT value contains - * an ARRAY value. - * - * - If the VARIANT value does not contain an ARRAY value, the method returns NULL. - * - * - Otherwise, the method works as described above. - * - * For example: - * {{{ - * import com.snowflake.snowpark.functions.col - * df.select(col("src")(1)(0)("name")(0)) - * }}} - * - * @param idx - * index of the subfield to be extracted - * @group op - * @since 0.2.0 - */ + /** + * Returns the element (field) at the specified index in a column that contains + * [[https://docs.snowflake.com/en/user-guide/semistructured-concepts.html semi-structured data]]. + * + * The method applies case-sensitive matching to the names of the specified elements. + * + * This is equivalent to using + * [[https://docs.snowflake.com/en/user-guide/querying-semistructured.html#bracket-notation bracket notation in SQL]] + * (`column[index]`). + * + * - If the column is an ARRAY value, this function extracts the VARIANT value of the array + * element at the specified index. + * + * - If the index points outside of the array boundaries or if an element does not exist at the + * specified index (e.g. if the array is sparsely populated), the method returns NULL. + * + * - If the column is a VARIANT value, this function first checks if the VARIANT value contains + * an ARRAY value. + * + * - If the VARIANT value does not contain an ARRAY value, the method returns NULL. + * + * - Otherwise, the method works as described above. + * + * For example: + * {{{ + * import com.snowflake.snowpark.functions.col + * df.select(col("src")(1)(0)("name")(0)) + * }}} + * + * @param idx + * index of the subfield to be extracted + * @group op + * @since 0.2.0 + */ // scalastyle:on def apply(idx: Int): Column = withExpr(SubfieldInt(expr, idx)) - /** Returns the column name (if the column has a name). - * @group utl - * @since 0.2.0 - */ + /** + * Returns the column name (if the column has a name). + * @group utl + * @since 0.2.0 + */ def getName: Option[String] = expr match { case namedExpr: NamedExpression => Option(namedExpr.name) case _ => None } - /** Returns a string representation of the expression corresponding to this Column instance. - * @since 0.1.0 - * @group utl - */ + /** + * Returns a string representation of the expression corresponding to this Column instance. + * @since 0.1.0 + * @group utl + */ override def toString: String = s"Column[${expr.toString()}]" - /** Returns a new renamed Column. Alias for [[name]]. - * @group op - * @since 0.1.0 - */ + /** + * Returns a new renamed Column. Alias for [[name]]. + * @group op + * @since 0.1.0 + */ def as(alias: String): Column = name(alias) - /** Returns a new renamed Column. Alias for [[name]]. - * @group op - * @since 0.1.0 - */ + /** + * Returns a new renamed Column. Alias for [[name]]. + * @group op + * @since 0.1.0 + */ def alias(alias: String): Column = name(alias) // used by join when column name conflict private[snowpark] def internalAlias(alias: String): Column = withExpr(Alias(expr, quoteName(alias), isInternal = true)) - /** Returns a new renamed Column. - * @group op - * @since 0.1.0 - */ + /** + * Returns a new renamed Column. + * @group op + * @since 0.1.0 + */ def name(alias: String): Column = withExpr(Alias(expr, quoteName(alias))) - /** Unary minus. - * - * @group op - * @since 0.1.0 - */ + /** + * Unary minus. + * + * @group op + * @since 0.1.0 + */ def unary_- : Column = withExpr(UnaryMinus(expr)) - /** Unary not. - * @group op - * @since 0.1.0 - */ + /** + * Unary not. + * @group op + * @since 0.1.0 + */ def unary_! : Column = withExpr(Not(expr)) - /** Equal to. Alias for [[equal_to]]. Use this instead of `==` to perform an equality check in an - * expression. For example: - * {{{ - * lhs.filter(col("a") === 10).join(rhs, rhs("id") === lhs("id")) - * }}} - * - * @group op - * @since 0.1.0 - */ + /** + * Equal to. Alias for [[equal_to]]. Use this instead of `==` to perform an equality check in an + * expression. For example: + * {{{ + * lhs.filter(col("a") === 10).join(rhs, rhs("id") === lhs("id")) + * }}} + * + * @group op + * @since 0.1.0 + */ def ===(other: Any): Column = withExpr { val right = toExpr(other) if (this.expr == right) { @@ -264,78 +275,90 @@ case class Column private[snowpark] (private[snowpark] val expr: Expression) ext EqualTo(expr, right) } - /** Equal to. Same as `===`. - * @group op - * @since 0.1.0 - */ + /** + * Equal to. Same as `===`. + * @group op + * @since 0.1.0 + */ def equal_to(other: Column): Column = this === other - /** Not equal to. Alias for [[not_equal]]. - * - * @group op - * @since 0.1.0 - */ + /** + * Not equal to. Alias for [[not_equal]]. + * + * @group op + * @since 0.1.0 + */ def =!=(other: Any): Column = withExpr(NotEqualTo(expr, toExpr(other))) - /** Not equal to. - * @group op - * @since 0.1.0 - */ + /** + * Not equal to. + * @group op + * @since 0.1.0 + */ def not_equal(other: Column): Column = this =!= other - /** Greater than. Alias for [[gt]]. - * @group op - * @since 0.1.0 - */ + /** + * Greater than. Alias for [[gt]]. + * @group op + * @since 0.1.0 + */ def >(other: Any): Column = withExpr(GreaterThan(expr, toExpr(other))) - /** Greater than. - * @group op - * @since 0.1.0 - */ + /** + * Greater than. + * @group op + * @since 0.1.0 + */ def gt(other: Column): Column = this > other - /** Less than. Alias for [[lt]]. - * @group op - * @since 0.1.0 - */ + /** + * Less than. Alias for [[lt]]. + * @group op + * @since 0.1.0 + */ def <(other: Any): Column = withExpr(LessThan(expr, toExpr(other))) - /** Less than. - * @group op - * @since 0.1.0 - */ + /** + * Less than. + * @group op + * @since 0.1.0 + */ def lt(other: Column): Column = this < other - /** Less than or equal to. Alias for [[leq]]. - * @group op - * @since 0.1.0 - */ + /** + * Less than or equal to. Alias for [[leq]]. + * @group op + * @since 0.1.0 + */ def <=(other: Any): Column = withExpr(LessThanOrEqual(expr, toExpr(other))) - /** Less than or equal to. - * @group op - * @since 0.1.0 - */ + /** + * Less than or equal to. + * @group op + * @since 0.1.0 + */ def leq(other: Column): Column = this <= other - /** Greater than or equal to. Alias for [[geq]]. - * @group op - * @since 0.1.0 - */ + /** + * Greater than or equal to. Alias for [[geq]]. + * @group op + * @since 0.1.0 + */ def >=(other: Any): Column = withExpr(GreaterThanOrEqual(expr, toExpr(other))) - /** Greater than or equal to. - * @group op - * @since 0.1.0 - */ + /** + * Greater than or equal to. + * @group op + * @since 0.1.0 + */ def geq(other: Column): Column = this >= other - /** Equal to. You can use this for comparisons against a null value. Alias for [[equal_null]]. - * - * @group op - * @since 0.1.0 - */ + /** + * Equal to. You can use this for comparisons against a null value. Alias for [[equal_null]]. + * + * @group op + * @since 0.1.0 + */ def <=>(other: Any): Column = withExpr { val right = toExpr(other) if (this.expr == right) { @@ -346,313 +369,350 @@ case class Column private[snowpark] (private[snowpark] val expr: Expression) ext EqualNullSafe(expr, right) } - /** Equal to. You can use this for comparisons against a null value. - * @group op - * @since 0.1.0 - */ + /** + * Equal to. You can use this for comparisons against a null value. + * @group op + * @since 0.1.0 + */ def equal_null(other: Column): Column = this <=> other - /** Is NaN. - * @group op - * @since 0.1.0 - */ + /** + * Is NaN. + * @group op + * @since 0.1.0 + */ def equal_nan: Column = withExpr(IsNaN(expr)) - /** Is null. - * @group op - * @since 0.1.0 - */ + /** + * Is null. + * @group op + * @since 0.1.0 + */ def is_null: Column = withExpr(IsNull(expr)) - /** Wrapper for is_null function. - * - * @group op - * @since 1.10.0 - */ + /** + * Wrapper for is_null function. + * + * @group op + * @since 1.10.0 + */ def isNull: Column = is_null - /** Is not null. - * @group op - * @since 0.1.0 - */ + /** + * Is not null. + * @group op + * @since 0.1.0 + */ def is_not_null: Column = withExpr(IsNotNull(expr)) - /** Or. Alias for [[or]]. - * @group op - * @since 0.1.0 - */ + /** + * Or. Alias for [[or]]. + * @group op + * @since 0.1.0 + */ def ||(other: Any): Column = withExpr(Or(expr, toExpr(other))) - /** Or. - * @group op - * @since 0.1.0 - */ + /** + * Or. + * @group op + * @since 0.1.0 + */ def or(other: Column): Column = this || other - /** And. Alias for [[and]]. - * @group op - * @since 0.1.0 - */ + /** + * And. Alias for [[and]]. + * @group op + * @since 0.1.0 + */ def &&(other: Any): Column = withExpr(And(expr, toExpr(other))) - /** And. - * @group op - * @since 0.1.0 - */ + /** + * And. + * @group op + * @since 0.1.0 + */ def and(other: Column): Column = this && other - /** Between lower bound and upper bound. - * @group op - * @since 0.1.0 - */ + /** + * Between lower bound and upper bound. + * @group op + * @since 0.1.0 + */ def between(lowerBound: Column, upperBound: Column): Column = { (this >= lowerBound) && (this <= upperBound) } - /** Plus. Alias for [[plus]]. - * @group op - * @since 0.1.0 - */ + /** + * Plus. Alias for [[plus]]. + * @group op + * @since 0.1.0 + */ def +(other: Any): Column = withExpr(Add(expr, toExpr(other))) - /** Plus. - * @group op - * @since 0.1.0 - */ + /** + * Plus. + * @group op + * @since 0.1.0 + */ def plus(other: Column): Column = this + other - /** Minus. Alias for [[minus]]. - * @group op - * @since 0.1.0 - */ + /** + * Minus. Alias for [[minus]]. + * @group op + * @since 0.1.0 + */ def -(other: Any): Column = withExpr(Subtract(expr, toExpr(other))) - /** Minus. - * @group op - * @since 0.1.0 - */ + /** + * Minus. + * @group op + * @since 0.1.0 + */ def minus(other: Column): Column = this - other - /** Multiply. Alias for [[multiply]]. - * @group op - * @since 0.1.0 - */ + /** + * Multiply. Alias for [[multiply]]. + * @group op + * @since 0.1.0 + */ def *(other: Any): Column = withExpr(Multiply(expr, toExpr(other))) - /** Multiply. - * @group op - * @since 0.1.0 - */ + /** + * Multiply. + * @group op + * @since 0.1.0 + */ def multiply(other: Column): Column = this * other - /** Divide. Alias for [[divide]]. - * @group op - * @since 0.1.0 - */ + /** + * Divide. Alias for [[divide]]. + * @group op + * @since 0.1.0 + */ def /(other: Any): Column = withExpr(Divide(expr, toExpr(other))) - /** Divide. - * @group op - * @since 0.1.0 - */ + /** + * Divide. + * @group op + * @since 0.1.0 + */ def divide(other: Column): Column = this / other - /** Remainder. Alias for [[mod]]. - * @group op - * @since 0.1.0 - */ + /** + * Remainder. Alias for [[mod]]. + * @group op + * @since 0.1.0 + */ def %(other: Any): Column = withExpr(Remainder(expr, toExpr(other))) - /** Remainder. - * @group op - * @since 0.1.0 - */ + /** + * Remainder. + * @group op + * @since 0.1.0 + */ def mod(other: Column): Column = this % other - /** Casts the values in the Column to the specified data type. - * @group op - * @since 0.1.0 - */ + /** + * Casts the values in the Column to the specified data type. + * @group op + * @since 0.1.0 + */ def cast(to: DataType): Column = withExpr(Cast(expr, to)) - /** Returns a Column expression with values sorted in descending order. - * @group op - * @since 0.1.0 - */ + /** + * Returns a Column expression with values sorted in descending order. + * @group op + * @since 0.1.0 + */ def desc: Column = withExpr(SortOrder(expr, Descending)) - /** Returns a Column expression with values sorted in descending order (null values sorted before - * non-null values). - * - * @group op - * @since 0.1.0 - */ + /** + * Returns a Column expression with values sorted in descending order (null values sorted before + * non-null values). + * + * @group op + * @since 0.1.0 + */ def desc_nulls_first: Column = withExpr(SortOrder(expr, Descending, NullsFirst, Set.empty)) - /** Returns a Column expression with values sorted in descending order (null values sorted after - * non-null values). - * - * @group op - * @since 0.1.0 - */ + /** + * Returns a Column expression with values sorted in descending order (null values sorted after + * non-null values). + * + * @group op + * @since 0.1.0 + */ def desc_nulls_last: Column = withExpr(SortOrder(expr, Descending, NullsLast, Set.empty)) - /** Returns a Column expression with values sorted in ascending order. - * - * @group op - * @since 0.1.0 - */ + /** + * Returns a Column expression with values sorted in ascending order. + * + * @group op + * @since 0.1.0 + */ def asc: Column = withExpr(SortOrder(expr, Ascending)) - /** Returns a Column expression with values sorted in ascending order (null values sorted before - * non-null values). - * - * @group op - * @since 0.1.0 - */ + /** + * Returns a Column expression with values sorted in ascending order (null values sorted before + * non-null values). + * + * @group op + * @since 0.1.0 + */ def asc_nulls_first: Column = withExpr(SortOrder(expr, Ascending, NullsFirst, Set.empty)) - /** Returns a Column expression with values sorted in ascending order (null values sorted after - * non-null values). - * - * @group op - * @since 0.1.0 - */ + /** + * Returns a Column expression with values sorted in ascending order (null values sorted after + * non-null values). + * + * @group op + * @since 0.1.0 + */ def asc_nulls_last: Column = withExpr(SortOrder(expr, Ascending, NullsLast, Set.empty)) - /** Bitwise or. - * - * @group op - * @since 0.1.0 - */ + /** + * Bitwise or. + * + * @group op + * @since 0.1.0 + */ def bitor(other: Column): Column = withExpr(BitwiseOr(expr, toExpr(other))) - /** Bitwise and. - * - * @group op - * @since 0.1.0 - */ + /** + * Bitwise and. + * + * @group op + * @since 0.1.0 + */ def bitand(other: Column): Column = withExpr(BitwiseAnd(expr, toExpr(other))) - /** Bitwise xor. - * - * @group op - * @since 0.1.0 - */ + /** + * Bitwise xor. + * + * @group op + * @since 0.1.0 + */ def bitxor(other: Column): Column = withExpr(BitwiseXor(expr, toExpr(other))) - /** Returns a windows frame, based on the specified [[WindowSpec]]. - * - * @group op - * @since 0.1.0 - */ + /** + * Returns a windows frame, based on the specified [[WindowSpec]]. + * + * @group op + * @since 0.1.0 + */ def over(window: WindowSpec): Column = window.withAggregate(expr) - /** Returns a windows frame, based on an empty [[WindowSpec]] expression. - * - * @group op - * @since 0.1.0 - */ + /** + * Returns a windows frame, based on an empty [[WindowSpec]] expression. + * + * @group op + * @since 0.1.0 + */ def over(): Column = over(Window.spec) - /** Allows case-sensitive matching of strings based on comparison with a pattern. - * - * For details, see the Snowflake documentation on - * [[https://docs.snowflake.com/en/sql-reference/functions/like.html#usage-notes LIKE]]. - * - * @group op - * @since 0.1.0 - */ + /** + * Allows case-sensitive matching of strings based on comparison with a pattern. + * + * For details, see the Snowflake documentation on + * [[https://docs.snowflake.com/en/sql-reference/functions/like.html#usage-notes LIKE]]. + * + * @group op + * @since 0.1.0 + */ def like(pattern: Column): Column = withExpr(Like(this.expr, pattern.expr)) // scalastyle:off - /** Returns true if this [[Column]] matches the specified regular expression. - * - * For details, see the Snowflake documentation on - * [[https://docs.snowflake.com/en/sql-reference/functions-regexp.html#label-regexp-general-usage-notes regular expressions]]. - * - * @group op - * @since 0.1.0 - */ + /** + * Returns true if this [[Column]] matches the specified regular expression. + * + * For details, see the Snowflake documentation on + * [[https://docs.snowflake.com/en/sql-reference/functions-regexp.html#label-regexp-general-usage-notes regular expressions]]. + * + * @group op + * @since 0.1.0 + */ // scalastyle:on def regexp(pattern: Column): Column = withExpr(RegExp(this.expr, pattern.expr)) - /** Returns a Column expression that adds a WITHIN GROUP clause to sort the rows by the specified - * columns. - * - * This method is supported on Column expressions returned by some of the aggregate functions, - * including [[functions.array_agg]], LISTAGG(), PERCENTILE_CONT(), and PERCENTILE_DISC(). - * - * For example: - * {{{ - * import com.snowflake.snowpark.functions._ - * import session.implicits._ - * // Create a DataFrame from a sequence. - * val df = Seq((3, "v1"), (1, "v3"), (2, "v2")).toDF("a", "b") - * // Create a DataFrame containing the values in "a" sorted by "b". - * val dfArrayAgg = df.select(array_agg(col("a")).withinGroup(col("b"))) - * // Create a DataFrame containing the values in "a" grouped by "b" - * // and sorted by "a" in descending order. - * var dfArrayAggWindow = df.select( - * array_agg(col("a")) - * .withinGroup(col("a").desc) - * .over(Window.partitionBy(col("b"))) - * ) - * }}} - * - * For details, see the Snowflake documentation for the aggregate function that you are using - * (e.g. [[https://docs.snowflake.com/en/sql-reference/functions/array_agg.html ARRAY_AGG]]). - * - * @group op - * @since 0.6.0 - */ + /** + * Returns a Column expression that adds a WITHIN GROUP clause to sort the rows by the specified + * columns. + * + * This method is supported on Column expressions returned by some of the aggregate functions, + * including [[functions.array_agg]], LISTAGG(), PERCENTILE_CONT(), and PERCENTILE_DISC(). + * + * For example: + * {{{ + * import com.snowflake.snowpark.functions._ + * import session.implicits._ + * // Create a DataFrame from a sequence. + * val df = Seq((3, "v1"), (1, "v3"), (2, "v2")).toDF("a", "b") + * // Create a DataFrame containing the values in "a" sorted by "b". + * val dfArrayAgg = df.select(array_agg(col("a")).withinGroup(col("b"))) + * // Create a DataFrame containing the values in "a" grouped by "b" + * // and sorted by "a" in descending order. + * var dfArrayAggWindow = df.select( + * array_agg(col("a")) + * .withinGroup(col("a").desc) + * .over(Window.partitionBy(col("b"))) + * ) + * }}} + * + * For details, see the Snowflake documentation for the aggregate function that you are using + * (e.g. [[https://docs.snowflake.com/en/sql-reference/functions/array_agg.html ARRAY_AGG]]). + * + * @group op + * @since 0.6.0 + */ def withinGroup(first: Column, remaining: Column*): Column = withinGroup(first +: remaining) - /** Returns a Column expression that adds a WITHIN GROUP clause to sort the rows by the specified - * sequence of columns. - * - * This method is supported on Column expressions returned by some of the aggregate functions, - * including [[functions.array_agg]], LISTAGG(), PERCENTILE_CONT(), and PERCENTILE_DISC(). - * - * For example: - * {{{ - * import com.snowflake.snowpark.functions._ - * import session.implicits._ - * // Create a DataFrame from a sequence. - * val df = Seq((3, "v1"), (1, "v3"), (2, "v2")).toDF("a", "b") - * // Create a DataFrame containing the values in "a" sorted by "b". - * df.select(array_agg(col("a")).withinGroup(Seq(col("b")))) - * // Create a DataFrame containing the values in "a" grouped by "b" - * // and sorted by "a" in descending order. - * df.select( - * array_agg(Seq(col("a"))) - * .withinGroup(col("a").desc) - * .over(Window.partitionBy(col("b"))) - * ) - * }}} - * - * For details, see the Snowflake documentation for the aggregate function that you are using - * (e.g. [[https://docs.snowflake.com/en/sql-reference/functions/array_agg.html ARRAY_AGG]]). - * - * @group op - * @since 0.6.0 - */ + /** + * Returns a Column expression that adds a WITHIN GROUP clause to sort the rows by the specified + * sequence of columns. + * + * This method is supported on Column expressions returned by some of the aggregate functions, + * including [[functions.array_agg]], LISTAGG(), PERCENTILE_CONT(), and PERCENTILE_DISC(). + * + * For example: + * {{{ + * import com.snowflake.snowpark.functions._ + * import session.implicits._ + * // Create a DataFrame from a sequence. + * val df = Seq((3, "v1"), (1, "v3"), (2, "v2")).toDF("a", "b") + * // Create a DataFrame containing the values in "a" sorted by "b". + * df.select(array_agg(col("a")).withinGroup(Seq(col("b")))) + * // Create a DataFrame containing the values in "a" grouped by "b" + * // and sorted by "a" in descending order. + * df.select( + * array_agg(Seq(col("a"))) + * .withinGroup(col("a").desc) + * .over(Window.partitionBy(col("b"))) + * ) + * }}} + * + * For details, see the Snowflake documentation for the aggregate function that you are using + * (e.g. [[https://docs.snowflake.com/en/sql-reference/functions/array_agg.html ARRAY_AGG]]). + * + * @group op + * @since 0.6.0 + */ def withinGroup(cols: Seq[Column]): Column = withExpr(WithinGroup(this.expr, cols.map { _.expr })) // scalastyle:off - /** Returns a copy of the original [[Column]] with the specified `collationSpec` property, rather - * than the original collation specification property. - * - * For details, see the Snowflake documentation on - * [[https://docs.snowflake.com/en/sql-reference/collation.html#label-collation-specification collation specifications]]. - * - * @group op - * @since 0.1.0 - */ + /** + * Returns a copy of the original [[Column]] with the specified `collationSpec` property, rather + * than the original collation specification property. + * + * For details, see the Snowflake documentation on + * [[https://docs.snowflake.com/en/sql-reference/collation.html#label-collation-specification collation specifications]]. + * + * @group op + * @since 0.1.0 + */ // scalastyle:on def collate(collateSpec: String): Column = withExpr(Collate(this.expr, collateSpec)) @@ -675,47 +735,50 @@ private[snowpark] object Column { def expr(e: String): Column = new Column(UnresolvedAttribute(e)) } -/** Represents a [[https://docs.snowflake.com/en/sql-reference/functions/case.html CASE]] - * expression. - * - * To construct this object for a CASE expression, call the - * [[com.snowflake.snowpark.functions.when functions.when]]. specifying a condition and the - * corresponding result for that condition. Then, call the [[when]] and [[otherwise]] methods to - * specify additional conditions and results. - * - * For example: - * {{{ - * import com.snowflake.snowpark.functions._ - * df.select( - * when(col("col").is_null, lit(1)) - * .when(col("col") === 1, lit(2)) - * .otherwise(lit(3)) - * ) - * }}} - * - * @since 0.2.0 - */ +/** + * Represents a [[https://docs.snowflake.com/en/sql-reference/functions/case.html CASE]] expression. + * + * To construct this object for a CASE expression, call the + * [[com.snowflake.snowpark.functions.when functions.when]]. specifying a condition and the + * corresponding result for that condition. Then, call the [[when]] and [[otherwise]] methods to + * specify additional conditions and results. + * + * For example: + * {{{ + * import com.snowflake.snowpark.functions._ + * df.select( + * when(col("col").is_null, lit(1)) + * .when(col("col") === 1, lit(2)) + * .otherwise(lit(3)) + * ) + * }}} + * + * @since 0.2.0 + */ class CaseExpr private[snowpark] (branches: Seq[(Expression, Expression)]) extends Column(CaseWhen(branches)) { - /** Appends one more WHEN condition to the CASE expression. - * - * @since 0.2.0 - */ + /** + * Appends one more WHEN condition to the CASE expression. + * + * @since 0.2.0 + */ def when(condition: Column, value: Column): CaseExpr = new CaseExpr(branches :+ ((condition.expr, value.expr))) - /** Sets the default result for this CASE expression. - * - * @since 0.2.0 - */ + /** + * Sets the default result for this CASE expression. + * + * @since 0.2.0 + */ def otherwise(value: Column): Column = withExpr { CaseWhen(branches, Option(value.expr)) } - /** Sets the default result for this CASE expression. Alias for [[otherwise]]. - * - * @since 0.2.0 - */ + /** + * Sets the default result for this CASE expression. Alias for [[otherwise]]. + * + * @since 0.2.0 + */ def `else`(value: Column): Column = otherwise(value) } diff --git a/src/main/scala/com/snowflake/snowpark/CopyableDataFrame.scala b/src/main/scala/com/snowflake/snowpark/CopyableDataFrame.scala index e8ebd7f3..f5688482 100644 --- a/src/main/scala/com/snowflake/snowpark/CopyableDataFrame.scala +++ b/src/main/scala/com/snowflake/snowpark/CopyableDataFrame.scala @@ -3,17 +3,18 @@ package com.snowflake.snowpark import com.snowflake.snowpark.internal._ import com.snowflake.snowpark.internal.analyzer._ -/** DataFrame for loading data from files in a stage to a table. Objects of this type are returned - * by the [[DataFrameReader]] methods that load data from files (e.g. [[DataFrameReader.csv csv]]). - * - * To save the data from the staged files to a table, call the `copyInto()` methods. This method - * uses the COPY INTO `` command to copy the data to a specified table. - * - * @groupname actions Actions - * @groupname basic Basic DataFrame Functions - * - * @since 0.9.0 - */ +/** + * DataFrame for loading data from files in a stage to a table. Objects of this type are returned by + * the [[DataFrameReader]] methods that load data from files (e.g. [[DataFrameReader.csv csv]]). + * + * To save the data from the staged files to a table, call the `copyInto()` methods. This method + * uses the COPY INTO `` command to copy the data to a specified table. + * + * @groupname actions Actions + * @groupname basic Basic DataFrame Functions + * + * @since 0.9.0 + */ class CopyableDataFrame private[snowpark] ( override private[snowpark] val session: Session, override private[snowpark] val plan: SnowflakePlan, @@ -21,124 +22,127 @@ class CopyableDataFrame private[snowpark] ( private val stagedFileReader: StagedFileReader) extends DataFrame(session, plan, methodChain) { - /** Executes a `COPY INTO ` command to load data from files in a stage into a - * specified table. - * - * copyInto is an action method (like the [[collect]] method), so calling the method executes the - * SQL statement to copy the data. - * - * For example, the following code loads data from the path specified by `myFileStage` to the - * table `T`: - * {{{ - * val df = session.read.schema(userSchema).csv(myFileStage) - * df.copyInto("T") - * }}} - * - * @group actions - * @param tableName - * Name of the table where the data should be saved. - * @since 0.9.0 - */ + /** + * Executes a `COPY INTO ` command to load data from files in a stage into a specified + * table. + * + * copyInto is an action method (like the [[collect]] method), so calling the method executes the + * SQL statement to copy the data. + * + * For example, the following code loads data from the path specified by `myFileStage` to the + * table `T`: + * {{{ + * val df = session.read.schema(userSchema).csv(myFileStage) + * df.copyInto("T") + * }}} + * + * @group actions + * @param tableName + * Name of the table where the data should be saved. + * @since 0.9.0 + */ def copyInto(tableName: String): Unit = action("copyInto") { getCopyDataFrame(tableName, Seq.empty, Seq.empty, Map.empty).collect() } // scalastyle:off line.size.limit - /** Executes a `COPY INTO ` command with the specified transformations to load data - * from files in a stage into a specified table. - * - * copyInto is an action method (like the [[collect]] method), so calling the method executes the - * SQL statement to copy the data. - * - * When copying the data into the table, you can apply transformations to the data from the files - * to: - * - Rename the columns - * - Change the order of the columns - * - Omit or insert columns - * - Cast the value in a column to a specific type - * - * You can use the same techniques described in - * [[https://docs.snowflake.com/en/user-guide/data-load-transform.html Transforming Data During Load]] - * expressed as a {@code Seq} of [[Column]] expressions that correspond to the - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters SELECT statement parameters]] - * in the `COPY INTO ` command. - * - * For example, the following code loads data from the path specified by `myFileStage` to the - * table `T`. The example transforms the data from the file by inserting the value of the first - * column into the first column of table `T` and inserting the length of that value into the - * second column of table `T`. - * {{{ - * import com.snowflake.snowpark.functions._ - * val df = session.read.schema(userSchema).csv(myFileStage) - * val transformations = Seq(col("\$1"), length(col("\$1"))) - * df.copyInto("T", transformations) - * }}} - * - * @group actions - * @param tableName - * Name of the table where the data should be saved. - * @param transformations - * Seq of [[Column]] expressions that specify the transformations to apply (similar to - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters transformation parameters]]). - * @since 0.9.0 - */ + /** + * Executes a `COPY INTO ` command with the specified transformations to load data + * from files in a stage into a specified table. + * + * copyInto is an action method (like the [[collect]] method), so calling the method executes the + * SQL statement to copy the data. + * + * When copying the data into the table, you can apply transformations to the data from the files + * to: + * - Rename the columns + * - Change the order of the columns + * - Omit or insert columns + * - Cast the value in a column to a specific type + * + * You can use the same techniques described in + * [[https://docs.snowflake.com/en/user-guide/data-load-transform.html Transforming Data During Load]] + * expressed as a {@code Seq} of [[Column]] expressions that correspond to the + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters SELECT statement parameters]] + * in the `COPY INTO ` command. + * + * For example, the following code loads data from the path specified by `myFileStage` to the + * table `T`. The example transforms the data from the file by inserting the value of the first + * column into the first column of table `T` and inserting the length of that value into the + * second column of table `T`. + * {{{ + * import com.snowflake.snowpark.functions._ + * val df = session.read.schema(userSchema).csv(myFileStage) + * val transformations = Seq(col("\$1"), length(col("\$1"))) + * df.copyInto("T", transformations) + * }}} + * + * @group actions + * @param tableName + * Name of the table where the data should be saved. + * @param transformations + * Seq of [[Column]] expressions that specify the transformations to apply (similar to + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters transformation parameters]]). + * @since 0.9.0 + */ // scalastyle:on line.size.limit def copyInto(tableName: String, transformations: Seq[Column]): Unit = action("copyInto") { getCopyDataFrame(tableName, Seq.empty, transformations, Map.empty).collect() } // scalastyle:off line.size.limit - /** Executes a `COPY INTO ` command with the specified transformations and options to - * load data from files in a stage into a specified table. - * - * copyInto is an action method (like the [[collect]] method), so calling the method executes the - * SQL statement to copy the data. - * - * In addition, you can specify - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#format-type-options-formattypeoptions format type options]] - * or - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#label-copy-into-table-copyoptions copy options]] - * that determine how the copy operation should be performed. - * - * When copying the data into the table, you can apply transformations to the data from the files - * to: - * - Rename the columns - * - Change the order of the columns - * - Omit or insert columns - * - Cast the value in a column to a specific type - * - * You can use the same techniques described in - * [[https://docs.snowflake.com/en/user-guide/data-load-transform.html Transforming Data During Load]] - * expressed as a {@code Seq} of [[Column]] expressions that correspond to the - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters SELECT statement parameters]] - * in the `COPY INTO ` command. - * - * For example, the following code loads data from the path specified by `myFileStage` to the - * table `T`. The example transforms the data from the file by inserting the value of the first - * column into the first column of table `T` and inserting the length of that value into the - * second column of table `T`. The example also uses a {@code Map} to set the {@code FORCE} and - * {@code skip_header} options for the copy operation. - * {{{ - * import com.snowflake.snowpark.functions._ - * val df = session.read.schema(userSchema).option("skip_header", 1).csv(myFileStage) - * val transformations = Seq(col("\$1"), length(col("\$1"))) - * val extraOptions = Map("FORCE" -> "true", "skip_header" -> 2) - * df.copyInto("T", transformations, extraOptions) - * }}} - * - * @group actions - * @param tableName - * Name of the table where the data should be saved. - * @param transformations - * Seq of [[Column]] expressions that specify the transformations to apply (similar to - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters transformation parameters]]). - * @param options - * Map of the names of options (e.g. {@code compression} , {@code skip_header} , etc.) and - * their corresponding values.NOTE: By default, the {@code CopyableDataFrame} object uses the - * options set in the [[DataFrameReader]] used to create that object. You can use this - * {@code options} parameter to override the default options or set additional options. - * @since 0.9.0 - */ + /** + * Executes a `COPY INTO ` command with the specified transformations and options to + * load data from files in a stage into a specified table. + * + * copyInto is an action method (like the [[collect]] method), so calling the method executes the + * SQL statement to copy the data. + * + * In addition, you can specify + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#format-type-options-formattypeoptions format type options]] + * or + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#label-copy-into-table-copyoptions copy options]] + * that determine how the copy operation should be performed. + * + * When copying the data into the table, you can apply transformations to the data from the files + * to: + * - Rename the columns + * - Change the order of the columns + * - Omit or insert columns + * - Cast the value in a column to a specific type + * + * You can use the same techniques described in + * [[https://docs.snowflake.com/en/user-guide/data-load-transform.html Transforming Data During Load]] + * expressed as a {@code Seq} of [[Column]] expressions that correspond to the + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters SELECT statement parameters]] + * in the `COPY INTO ` command. + * + * For example, the following code loads data from the path specified by `myFileStage` to the + * table `T`. The example transforms the data from the file by inserting the value of the first + * column into the first column of table `T` and inserting the length of that value into the + * second column of table `T`. The example also uses a {@code Map} to set the {@code FORCE} and + * {@code skip_header} options for the copy operation. + * {{{ + * import com.snowflake.snowpark.functions._ + * val df = session.read.schema(userSchema).option("skip_header", 1).csv(myFileStage) + * val transformations = Seq(col("\$1"), length(col("\$1"))) + * val extraOptions = Map("FORCE" -> "true", "skip_header" -> 2) + * df.copyInto("T", transformations, extraOptions) + * }}} + * + * @group actions + * @param tableName + * Name of the table where the data should be saved. + * @param transformations + * Seq of [[Column]] expressions that specify the transformations to apply (similar to + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters transformation parameters]]). + * @param options + * Map of the names of options (e.g. {@code compression} , {@code skip_header} , etc.) and their + * corresponding values.NOTE: By default, the {@code CopyableDataFrame} object uses the options + * set in the [[DataFrameReader]] used to create that object. You can use this {@code options} + * parameter to override the default options or set additional options. + * @since 0.9.0 + */ // scalastyle:on line.size.limit def copyInto(tableName: String, transformations: Seq[Column], options: Map[String, Any]): Unit = action("copyInto") { @@ -146,64 +150,65 @@ class CopyableDataFrame private[snowpark] ( } // scalastyle:off line.size.limit - /** Executes a `COPY INTO ` command with the specified transformations and options to - * load data from files in a stage into a specified table. - * - * copyInto is an action method (like the [[collect]] method), so calling the method executes the - * SQL statement to copy the data. - * - * In addition, you can specify - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#format-type-options-formattypeoptions format type options]] - * or - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#label-copy-into-table-copyoptions copy options]] - * that determine how the copy operation should be performed. - * - * When copying the data into the table, you can apply transformations to the data from the files - * to: - * - Rename the columns - * - Change the order of the columns - * - Omit or insert columns - * - Cast the value in a column to a specific type - * - * You can use the same techniques described in - * [[https://docs.snowflake.com/en/user-guide/data-load-transform.html Transforming Data During Load]] - * expressed as a {@code Seq} of [[Column]] expressions that correspond to the - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters SELECT statement parameters]] - * in the `COPY INTO ` command. - * - * You can specify a subset of the table columns to copy into. The number of provided column - * names must match the number of transformations. - * - * For example, suppose the target table `T` has 3 columns: "ID", "A" and "A_LEN". "ID" is an - * `AUTOINCREMENT` column, which should be exceluded from this copy into action. The following - * code loads data from the path specified by `myFileStage` to the table `T`. The example - * transforms the data from the file by inserting the value of the first column into the column - * `A` and inserting the length of that value into the column `A_LEN`. The example also uses a - * {@code Map} to set the {@code FORCE} and {@code skip_header} options for the copy operation. - * {{{ - * import com.snowflake.snowpark.functions._ - * val df = session.read.schema(userSchema).option("skip_header", 1).csv(myFileStage) - * val transformations = Seq(col("\$1"), length(col("\$1"))) - * val targetColumnNames = Seq("A", "A_LEN") - * val extraOptions = Map("FORCE" -> "true", "skip_header" -> 2) - * df.copyInto("T", targetColumnNames, transformations, extraOptions) - * }}} - * - * @group actions - * @param tableName - * Name of the table where the data should be saved. - * @param targetColumnNames - * Name of the columns in the table where the data should be saved. - * @param transformations - * Seq of [[Column]] expressions that specify the transformations to apply (similar to - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters transformation parameters]]). - * @param options - * Map of the names of options (e.g. {@code compression} , {@code skip_header} , etc.) and - * their corresponding values.NOTE: By default, the {@code CopyableDataFrame} object uses the - * options set in the [[DataFrameReader]] used to create that object. You can use this - * {@code options} parameter to override the default options or set additional options. - * @since 0.11.0 - */ + /** + * Executes a `COPY INTO ` command with the specified transformations and options to + * load data from files in a stage into a specified table. + * + * copyInto is an action method (like the [[collect]] method), so calling the method executes the + * SQL statement to copy the data. + * + * In addition, you can specify + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#format-type-options-formattypeoptions format type options]] + * or + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#label-copy-into-table-copyoptions copy options]] + * that determine how the copy operation should be performed. + * + * When copying the data into the table, you can apply transformations to the data from the files + * to: + * - Rename the columns + * - Change the order of the columns + * - Omit or insert columns + * - Cast the value in a column to a specific type + * + * You can use the same techniques described in + * [[https://docs.snowflake.com/en/user-guide/data-load-transform.html Transforming Data During Load]] + * expressed as a {@code Seq} of [[Column]] expressions that correspond to the + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters SELECT statement parameters]] + * in the `COPY INTO ` command. + * + * You can specify a subset of the table columns to copy into. The number of provided column names + * must match the number of transformations. + * + * For example, suppose the target table `T` has 3 columns: "ID", "A" and "A_LEN". "ID" is an + * `AUTOINCREMENT` column, which should be exceluded from this copy into action. The following + * code loads data from the path specified by `myFileStage` to the table `T`. The example + * transforms the data from the file by inserting the value of the first column into the column + * `A` and inserting the length of that value into the column `A_LEN`. The example also uses a + * {@code Map} to set the {@code FORCE} and {@code skip_header} options for the copy operation. + * {{{ + * import com.snowflake.snowpark.functions._ + * val df = session.read.schema(userSchema).option("skip_header", 1).csv(myFileStage) + * val transformations = Seq(col("\$1"), length(col("\$1"))) + * val targetColumnNames = Seq("A", "A_LEN") + * val extraOptions = Map("FORCE" -> "true", "skip_header" -> 2) + * df.copyInto("T", targetColumnNames, transformations, extraOptions) + * }}} + * + * @group actions + * @param tableName + * Name of the table where the data should be saved. + * @param targetColumnNames + * Name of the columns in the table where the data should be saved. + * @param transformations + * Seq of [[Column]] expressions that specify the transformations to apply (similar to + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters transformation parameters]]). + * @param options + * Map of the names of options (e.g. {@code compression} , {@code skip_header} , etc.) and their + * corresponding values.NOTE: By default, the {@code CopyableDataFrame} object uses the options + * set in the [[DataFrameReader]] used to create that object. You can use this {@code options} + * parameter to override the default options or set additional options. + * @since 0.11.0 + */ // scalastyle:on line.size.limit def copyInto( tableName: String, @@ -237,33 +242,35 @@ class CopyableDataFrame private[snowpark] ( new StagedFileReader(stagedFileReader))) } - /** Returns a clone of this CopyableDataFrame. - * - * @return - * A [[CopyableDataFrame]] - * @since 0.10.0 - * @group basic - */ + /** + * Returns a clone of this CopyableDataFrame. + * + * @return + * A [[CopyableDataFrame]] + * @since 0.10.0 + * @group basic + */ override def clone: CopyableDataFrame = action("clone") { new CopyableDataFrame(session, plan, Seq(), stagedFileReader) } - /** Returns a [[CopyableDataFrameAsyncActor]] object that can be used to execute CopyableDataFrame - * actions asynchronously. - * - * Example: - * {{{ - * val asyncJob = session.read.schema(userSchema).csv(testFileOnStage).async.collect() - * // At this point, the thread is not blocked. You can perform additional work before - * // calling asyncJob.getResult() to retrieve the results of the action. - * // NOTE: getResult() is a blocking call. - * asyncJob.getResult() - * }}} - * - * @since 0.11.0 - * @return - * A [[CopyableDataFrameAsyncActor]] object - */ + /** + * Returns a [[CopyableDataFrameAsyncActor]] object that can be used to execute CopyableDataFrame + * actions asynchronously. + * + * Example: + * {{{ + * val asyncJob = session.read.schema(userSchema).csv(testFileOnStage).async.collect() + * // At this point, the thread is not blocked. You can perform additional work before + * // calling asyncJob.getResult() to retrieve the results of the action. + * // NOTE: getResult() is a blocking call. + * asyncJob.getResult() + * }}} + * + * @since 0.11.0 + * @return + * A [[CopyableDataFrameAsyncActor]] object + */ override def async: CopyableDataFrameAsyncActor = new CopyableDataFrameAsyncActor(this) @inline override protected def action[T](funcName: String)(func: => T): T = { @@ -271,40 +278,43 @@ class CopyableDataFrame private[snowpark] ( } } -/** Provides APIs to execute CopyableDataFrame actions asynchronously. - * - * @since 0.11.0 - */ +/** + * Provides APIs to execute CopyableDataFrame actions asynchronously. + * + * @since 0.11.0 + */ class CopyableDataFrameAsyncActor private[snowpark] (cdf: CopyableDataFrame) extends DataFrameAsyncActor(cdf) { - /** Executes `CopyableDataFrame.copyInto` asynchronously. - * - * @param tableName - * Name of the table where the data should be saved. - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `CopyableDataFrame.copyInto` asynchronously. + * + * @param tableName + * Name of the table where the data should be saved. + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def copyInto(tableName: String): TypedAsyncJob[Unit] = action("copyInto") { val df = cdf.getCopyDataFrame(tableName) cdf.session.conn.executeAsync[Unit](df.snowflakePlan) } // scalastyle:off line.size.limit - /** Executes `CopyableDataFrame.copyInto` asynchronously. - * - * @param tableName - * Name of the table where the data should be saved. - * @param transformations - * Seq of [[Column]] expressions that specify the transformations to apply (similar to - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters transformation parameters]]). - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `CopyableDataFrame.copyInto` asynchronously. + * + * @param tableName + * Name of the table where the data should be saved. + * @param transformations + * Seq of [[Column]] expressions that specify the transformations to apply (similar to + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters transformation parameters]]). + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ // scalastyle:on line.size.limit def copyInto(tableName: String, transformations: Seq[Column]): TypedAsyncJob[Unit] = action("copyInto") { @@ -313,23 +323,24 @@ class CopyableDataFrameAsyncActor private[snowpark] (cdf: CopyableDataFrame) } // scalastyle:off line.size.limit - /** Executes `CopyableDataFrame.copyInto` asynchronously. - * - * @param tableName - * Name of the table where the data should be saved. - * @param transformations - * Seq of [[Column]] expressions that specify the transformations to apply (similar to - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters transformation parameters]]). - * @param options - * Map of the names of options (e.g. {@code compression} , {@code skip_header} , etc.) and - * their corresponding values.NOTE: By default, the {@code CopyableDataFrame} object uses the - * options set in the [[DataFrameReader]] used to create that object. You can use this - * {@code options} parameter to override the default options or set additional options. - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `CopyableDataFrame.copyInto` asynchronously. + * + * @param tableName + * Name of the table where the data should be saved. + * @param transformations + * Seq of [[Column]] expressions that specify the transformations to apply (similar to + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters transformation parameters]]). + * @param options + * Map of the names of options (e.g. {@code compression} , {@code skip_header} , etc.) and their + * corresponding values.NOTE: By default, the {@code CopyableDataFrame} object uses the options + * set in the [[DataFrameReader]] used to create that object. You can use this {@code options} + * parameter to override the default options or set additional options. + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ // scalastyle:on line.size.limit def copyInto( tableName: String, @@ -340,25 +351,26 @@ class CopyableDataFrameAsyncActor private[snowpark] (cdf: CopyableDataFrame) } // scalastyle:off line.size.limit - /** Executes `CopyableDataFrame.copyInto` asynchronously. - * - * @param tableName - * Name of the table where the data should be saved. - * @param targetColumnNames - * Name of the columns in the table where the data should be saved. - * @param transformations - * Seq of [[Column]] expressions that specify the transformations to apply (similar to - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters transformation parameters]]). - * @param options - * Map of the names of options (e.g. {@code compression} , {@code skip_header} , etc.) and - * their corresponding values.NOTE: By default, the {@code CopyableDataFrame} object uses the - * options set in the [[DataFrameReader]] used to create that object. You can use this - * {@code options} parameter to override the default options or set additional options. - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `CopyableDataFrame.copyInto` asynchronously. + * + * @param tableName + * Name of the table where the data should be saved. + * @param targetColumnNames + * Name of the columns in the table where the data should be saved. + * @param transformations + * Seq of [[Column]] expressions that specify the transformations to apply (similar to + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#transformation-parameters transformation parameters]]). + * @param options + * Map of the names of options (e.g. {@code compression} , {@code skip_header} , etc.) and their + * corresponding values.NOTE: By default, the {@code CopyableDataFrame} object uses the options + * set in the [[DataFrameReader]] used to create that object. You can use this {@code options} + * parameter to override the default options or set additional options. + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ // scalastyle:on line.size.limit def copyInto( tableName: String, diff --git a/src/main/scala/com/snowflake/snowpark/DataFrame.scala b/src/main/scala/com/snowflake/snowpark/DataFrame.scala index ce68a426..a7beffb9 100644 --- a/src/main/scala/com/snowflake/snowpark/DataFrame.scala +++ b/src/main/scala/com/snowflake/snowpark/DataFrame.scala @@ -2,7 +2,6 @@ package com.snowflake.snowpark import scala.reflect.ClassTag import scala.util.{DynamicVariable, Random} -import com.snowflake.snowpark.internal.analyzer.{TableFunction => TF} import com.snowflake.snowpark.internal.{ErrorMessage, Logging, OpenTelemetry, Utils} import com.snowflake.snowpark.internal.analyzer._ import com.snowflake.snowpark.types._ @@ -50,152 +49,153 @@ private[snowpark] object DataFrame extends Logging { } } -/** Represents a lazily-evaluated relational dataset that contains a collection of [[Row]] objects - * with columns defined by a schema (column name and type). - * - * A DataFrame is considered lazy because it encapsulates the computation or query required to - * produce a relational dataset. The computation is not performed until you call a method that - * performs an action (e.g. [[collect]]). - * - * '''Creating a DataFrame''' - * - * You can create a DataFrame in a number of different ways, as shown in the examples below. - * - * Example 1: Creating a DataFrame by reading a table. - * {{{ - * val dfPrices = session.table("itemsdb.publicschema.prices") - * }}} - * - * Example 2: Creating a DataFrame by reading files from a stage. - * {{{ - * val dfCatalog = session.read.csv("@stage/some_dir") - * }}} - * - * Example 3: Creating a DataFrame by specifying a sequence or a range. - * {{{ - * val df = session.createDataFrame(Seq((1, "one"), (2, "two"))) - * }}} - * {{{ - * val df = session.range(1, 10, 2) - * }}} - * - * Example 4: Create a new DataFrame by applying transformations to other existing DataFrames. - * {{{ - * val dfMergedData = dfCatalog.join(dfPrices, dfCatalog("itemId") === dfPrices("ID")) - * }}} - * - * '''Performing operations on a DataFrame''' - * - * Broadly, the operations on DataFrame can be divided into two types: - * - * - '''Transformations''' produce a new DataFrame from one or more existing DataFrames. Note - * that tranformations are lazy and don't cause the DataFrame to be evaluated. If the API does - * not provide a method to express the SQL that you want to use, you can use - * [[functions.sqlExpr]] as a workaround. - * - * - '''Actions''' cause the DataFrame to be evaluated. When you call a method that performs an - * action, Snowpark sends the SQL query for the DataFrame to the server for evaluation. - * - * '''Transforming a DataFrame''' - * - * The following examples demonstrate how you can transform a DataFrame. - * - * Example 5. Using the [[select(first:com\.snowflake\.snowpark\.Column* select]] method to select - * the columns that should be in the DataFrame (similar to adding a `SELECT` clause). - * - * {{{ - * // Return a new DataFrame containing the ID and amount columns of the prices table. This is - * // equivalent to: - * // SELECT ID, AMOUNT FROM PRICES; - * val dfPriceIdsAndAmounts = dfPrices.select(col("ID"), col("amount")) - * }}} - * - * Example 6. Using the [[Column.as]] method to rename a column in a DataFrame (similar to using - * `SELECT col AS alias`). - * - * {{{ - * // Return a new DataFrame containing the ID column of the prices table as a column named - * // itemId. This is equivalent to: - * // SELECT ID AS itemId FROM PRICES; - * val dfPriceItemIds = dfPrices.select(col("ID").as("itemId")) - * }}} - * - * Example 7. Using the [[filter]] method to filter data (similar to adding a `WHERE` clause). - * - * {{{ - * // Return a new DataFrame containing the row from the prices table with the ID 1. This is - * // equivalent to: - * // SELECT * FROM PRICES WHERE ID = 1; - * val dfPrice1 = dfPrices.filter((col("ID") === 1)) - * }}} - * - * Example 8. Using the [[sort(first* sort]] method to specify the sort order of the data (similar - * to adding an `ORDER BY` clause). - * - * {{{ - * // Return a new DataFrame for the prices table with the rows sorted by ID. This is equivalent - * // to: - * // SELECT * FROM PRICES ORDER BY ID; - * val dfSortedPrices = dfPrices.sort(col("ID")) - * }}} - * - * Example 9. Using the [[groupBy(first:com\.snowflake\.snowpark\.Column* groupBy]] method to - * return a [[RelationalGroupedDataFrame]] that you can use to group and aggregate results (similar - * to adding a `GROUP BY` clause). - * - * [[RelationalGroupedDataFrame]] provides methods for aggregating results, including: - * - * - [[RelationalGroupedDataFrame.avg(cols* avg]] (equivalent to AVG(column)) - * - [[RelationalGroupedDataFrame.count count]] (equivalent to COUNT()) - * - [[RelationalGroupedDataFrame.max(cols* max]] (equivalent to MAX(column)) - * - [[RelationalGroupedDataFrame.median(cols* median]] (equivalent to MEDIAN(column)) - * - [[RelationalGroupedDataFrame.min(cols* min]] (equivalent to MIN(column)) - * - [[RelationalGroupedDataFrame.sum(cols* sum]] (equivalent to SUM(column)) - * - * {{{ - * // Return a new DataFrame for the prices table that computes the sum of the prices by - * // category. This is equivalent to: - * // SELECT CATEGORY, SUM(AMOUNT) FROM PRICES GROUP BY CATEGORY; - * val dfTotalPricePerCategory = dfPrices.groupBy(col("category")).sum(col("amount")) - * }}} - * - * Example 10. Using a [[Window]] to build a [[WindowSpec]] object that you can use for - * [[https://docs.snowflake.com/en/user-guide/functions-window-using.html windowing functions]] - * (similar to using ' OVER ... PARTITION BY ... ORDER BY'). - * - * {{{ - * // Define a window that partitions prices by category and sorts the prices by date within the - * // partition. - * val window = Window.partitionBy(col("category")).orderBy(col("price_date")) - * // Calculate the running sum of prices over this window. This is equivalent to: - * // SELECT CATEGORY, PRICE_DATE, SUM(AMOUNT) OVER - * // (PARTITION BY CATEGORY ORDER BY PRICE_DATE) - * // FROM PRICES ORDER BY PRICE_DATE; - * val dfCumulativePrices = dfPrices.select( - * col("category"), col("price_date"), - * sum(col("amount")).over(window)).sort(col("price_date")) - * }}} - * - * '''Performing an action on a DataFrame''' - * - * The following examples demonstrate how you can perform an action on a DataFrame. - * - * Example 11: Performing a query and returning an array of Rows. - * {{{ - * val results = dfPrices.collect() - * }}} - * - * Example 12: Performing a query and print the results. - * {{{ - * dfPrices.show() - * }}} - * - * @groupname basic Basic DataFrame Functions - * @groupname actions Actions - * @groupname transform Transformations - * - * @since 0.1.0 - */ +/** + * Represents a lazily-evaluated relational dataset that contains a collection of [[Row]] objects + * with columns defined by a schema (column name and type). + * + * A DataFrame is considered lazy because it encapsulates the computation or query required to + * produce a relational dataset. The computation is not performed until you call a method that + * performs an action (e.g. [[collect]]). + * + * '''Creating a DataFrame''' + * + * You can create a DataFrame in a number of different ways, as shown in the examples below. + * + * Example 1: Creating a DataFrame by reading a table. + * {{{ + * val dfPrices = session.table("itemsdb.publicschema.prices") + * }}} + * + * Example 2: Creating a DataFrame by reading files from a stage. + * {{{ + * val dfCatalog = session.read.csv("@stage/some_dir") + * }}} + * + * Example 3: Creating a DataFrame by specifying a sequence or a range. + * {{{ + * val df = session.createDataFrame(Seq((1, "one"), (2, "two"))) + * }}} + * {{{ + * val df = session.range(1, 10, 2) + * }}} + * + * Example 4: Create a new DataFrame by applying transformations to other existing DataFrames. + * {{{ + * val dfMergedData = dfCatalog.join(dfPrices, dfCatalog("itemId") === dfPrices("ID")) + * }}} + * + * '''Performing operations on a DataFrame''' + * + * Broadly, the operations on DataFrame can be divided into two types: + * + * - '''Transformations''' produce a new DataFrame from one or more existing DataFrames. Note that + * tranformations are lazy and don't cause the DataFrame to be evaluated. If the API does not + * provide a method to express the SQL that you want to use, you can use [[functions.sqlExpr]] + * as a workaround. + * + * - '''Actions''' cause the DataFrame to be evaluated. When you call a method that performs an + * action, Snowpark sends the SQL query for the DataFrame to the server for evaluation. + * + * '''Transforming a DataFrame''' + * + * The following examples demonstrate how you can transform a DataFrame. + * + * Example 5. Using the [[select(first:com\.snowflake\.snowpark\.Column* select]] method to select + * the columns that should be in the DataFrame (similar to adding a `SELECT` clause). + * + * {{{ + * // Return a new DataFrame containing the ID and amount columns of the prices table. This is + * // equivalent to: + * // SELECT ID, AMOUNT FROM PRICES; + * val dfPriceIdsAndAmounts = dfPrices.select(col("ID"), col("amount")) + * }}} + * + * Example 6. Using the [[Column.as]] method to rename a column in a DataFrame (similar to using + * `SELECT col AS alias`). + * + * {{{ + * // Return a new DataFrame containing the ID column of the prices table as a column named + * // itemId. This is equivalent to: + * // SELECT ID AS itemId FROM PRICES; + * val dfPriceItemIds = dfPrices.select(col("ID").as("itemId")) + * }}} + * + * Example 7. Using the [[filter]] method to filter data (similar to adding a `WHERE` clause). + * + * {{{ + * // Return a new DataFrame containing the row from the prices table with the ID 1. This is + * // equivalent to: + * // SELECT * FROM PRICES WHERE ID = 1; + * val dfPrice1 = dfPrices.filter((col("ID") === 1)) + * }}} + * + * Example 8. Using the [[sort(first* sort]] method to specify the sort order of the data (similar + * to adding an `ORDER BY` clause). + * + * {{{ + * // Return a new DataFrame for the prices table with the rows sorted by ID. This is equivalent + * // to: + * // SELECT * FROM PRICES ORDER BY ID; + * val dfSortedPrices = dfPrices.sort(col("ID")) + * }}} + * + * Example 9. Using the [[groupBy(first:com\.snowflake\.snowpark\.Column* groupBy]] method to return + * a [[RelationalGroupedDataFrame]] that you can use to group and aggregate results (similar to + * adding a `GROUP BY` clause). + * + * [[RelationalGroupedDataFrame]] provides methods for aggregating results, including: + * + * - [[RelationalGroupedDataFrame.avg(cols* avg]] (equivalent to AVG(column)) + * - [[RelationalGroupedDataFrame.count count]] (equivalent to COUNT()) + * - [[RelationalGroupedDataFrame.max(cols* max]] (equivalent to MAX(column)) + * - [[RelationalGroupedDataFrame.median(cols* median]] (equivalent to MEDIAN(column)) + * - [[RelationalGroupedDataFrame.min(cols* min]] (equivalent to MIN(column)) + * - [[RelationalGroupedDataFrame.sum(cols* sum]] (equivalent to SUM(column)) + * + * {{{ + * // Return a new DataFrame for the prices table that computes the sum of the prices by + * // category. This is equivalent to: + * // SELECT CATEGORY, SUM(AMOUNT) FROM PRICES GROUP BY CATEGORY; + * val dfTotalPricePerCategory = dfPrices.groupBy(col("category")).sum(col("amount")) + * }}} + * + * Example 10. Using a [[Window]] to build a [[WindowSpec]] object that you can use for + * [[https://docs.snowflake.com/en/user-guide/functions-window-using.html windowing functions]] + * (similar to using ' OVER ... PARTITION BY ... ORDER BY'). + * + * {{{ + * // Define a window that partitions prices by category and sorts the prices by date within the + * // partition. + * val window = Window.partitionBy(col("category")).orderBy(col("price_date")) + * // Calculate the running sum of prices over this window. This is equivalent to: + * // SELECT CATEGORY, PRICE_DATE, SUM(AMOUNT) OVER + * // (PARTITION BY CATEGORY ORDER BY PRICE_DATE) + * // FROM PRICES ORDER BY PRICE_DATE; + * val dfCumulativePrices = dfPrices.select( + * col("category"), col("price_date"), + * sum(col("amount")).over(window)).sort(col("price_date")) + * }}} + * + * '''Performing an action on a DataFrame''' + * + * The following examples demonstrate how you can perform an action on a DataFrame. + * + * Example 11: Performing a query and returning an array of Rows. + * {{{ + * val results = dfPrices.collect() + * }}} + * + * Example 12: Performing a query and print the results. + * {{{ + * dfPrices.show() + * }}} + * + * @groupname basic Basic DataFrame Functions + * @groupname actions Actions + * @groupname transform Transformations + * + * @since 0.1.0 + */ class DataFrame private[snowpark] ( private[snowpark] val session: Session, private[snowpark] val plan: LogicalPlan, @@ -204,27 +204,29 @@ class DataFrame private[snowpark] ( lazy private[snowpark] val snowflakePlan: SnowflakePlan = session.analyzer.resolve(plan) - /** Returns a clone of this DataFrame. - * - * @group basic - * @since 0.4.0 - * @return - * A [[DataFrame]] - */ + /** + * Returns a clone of this DataFrame. + * + * @group basic + * @since 0.4.0 + * @return + * A [[DataFrame]] + */ override def clone: DataFrame = transformation("clone") { DataFrame(session, snowflakePlan.clone) } // the column name of schema may be renamed to its original name. // to access the real column name, use `output` instead. - /** Returns the definition of the columns in this DataFrame (the "relational schema" for the - * DataFrame). - * - * @group basic - * @since 0.1.0 - * @return - * [[com.snowflake.snowpark.types.StructType]] - */ + /** + * Returns the definition of the columns in this DataFrame (the "relational schema" for the + * DataFrame). + * + * @group basic + * @since 0.1.0 + * @return + * [[com.snowflake.snowpark.types.StructType]] + */ lazy val schema: StructType = { val attrs: Seq[Attribute] = if (session.conn.hideInternalAlias) { Utils.getDisplayColumnNames(snowflakePlan.attributes, plan.internalRenamedColumns) @@ -234,16 +236,17 @@ class DataFrame private[snowpark] ( StructType.fromAttributes(attrs) } - /** Caches the content of this DataFrame to create a new cached DataFrame. - * - * All subsequent operations on the returned cached DataFrame are performed on the cached data - * and have no effect on the original DataFrame. - * - * @since 0.4.0 - * @group actions - * @return - * A [[HasCachedResult]] - */ + /** + * Caches the content of this DataFrame to create a new cached DataFrame. + * + * All subsequent operations on the returned cached DataFrame are performed on the cached data and + * have no effect on the original DataFrame. + * + * @since 0.4.0 + * @group actions + * @return + * A [[HasCachedResult]] + */ def cacheResult(): HasCachedResult = action("cacheResult") { val tempTableName = randomNameForTempObject(TempObjectType.Table) val createTempTable = @@ -254,15 +257,16 @@ class DataFrame private[snowpark] ( new HasCachedResult(session, newPlan, Seq()) } - /** Prints the list of queries that will be executed to evaluate this DataFrame. Prints the query - * execution plan if only one SELECT/DML/DDL statement will be executed. - * - * For more information about the query execution plan, see the - * [[https://docs.snowflake.com/en/sql-reference/sql/explain.html EXPLAIN]] command. - * - * @since 0.1.0 - * @group basic - */ + /** + * Prints the list of queries that will be executed to evaluate this DataFrame. Prints the query + * execution plan if only one SELECT/DML/DDL statement will be executed. + * + * For more information about the query execution plan, see the + * [[https://docs.snowflake.com/en/sql-reference/sql/explain.html EXPLAIN]] command. + * + * @since 0.1.0 + * @group basic + */ def explain(): Unit = { // scalastyle:off println println(explainString) @@ -294,93 +298,95 @@ class DataFrame private[snowpark] ( msg + "\n--------------------------------------------" } - /** Creates a new DataFrame containing the columns with the specified names. - * - * You can use this method to assign column names when constructing a DataFrame. For example: - * - * For example: - * - * {{{ - * var df = session.createDataFrame(Seq((1, "a")).toDF(Seq("a", "b")) - * }}} - * - * This returns a DataFrame containing the following: - * - * {{{ - * ------------- - * |"A" |"B" | - * ------------- - * |1 |2 | - * |3 |4 | - * ------------- - * }}} - * - * if you imported [[Session.implicits .implicits._]], you can use the following - * syntax to create the DataFrame from a `Seq` and call `toDF` to assign column names to the - * returned DataFrame: - * - * {{{ - * import mysession.implicits_ - * var df = Seq((1, 2), (3, 4)).toDF(Seq("a", "b")) - * }}} - * - * The number of column names that you pass in must match the number of columns in the current - * DataFrame. - * - * @group basic - * @since 0.1.0 - * @param first - * The name of the first column. - * @param remaining - * A list of the rest of the column names. - * @return - * A [[DataFrame]] - */ + /** + * Creates a new DataFrame containing the columns with the specified names. + * + * You can use this method to assign column names when constructing a DataFrame. For example: + * + * For example: + * + * {{{ + * var df = session.createDataFrame(Seq((1, "a")).toDF(Seq("a", "b")) + * }}} + * + * This returns a DataFrame containing the following: + * + * {{{ + * ------------- + * |"A" |"B" | + * ------------- + * |1 |2 | + * |3 |4 | + * ------------- + * }}} + * + * if you imported [[Session.implicits .implicits._]], you can use the following + * syntax to create the DataFrame from a `Seq` and call `toDF` to assign column names to the + * returned DataFrame: + * + * {{{ + * import mysession.implicits_ + * var df = Seq((1, 2), (3, 4)).toDF(Seq("a", "b")) + * }}} + * + * The number of column names that you pass in must match the number of columns in the current + * DataFrame. + * + * @group basic + * @since 0.1.0 + * @param first + * The name of the first column. + * @param remaining + * A list of the rest of the column names. + * @return + * A [[DataFrame]] + */ def toDF(first: String, remaining: String*): DataFrame = transformation("toDF") { toDF(first +: remaining) } - /** Creates a new DataFrame containing the data in the current DataFrame but in columns with the - * specified names. - * - * You can use this method to assign column names when constructing a DataFrame. For example: - * - * For example: - * - * {{{ - * var df = session.createDataFrame(Seq((1, 2), (3, 4))).toDF(Seq("a", "b")) - * }}} - * - * This returns a DataFrame containing the following: - * - * {{{ - * ------------- - * |"A" |"B" | - * ------------- - * |1 |2 | - * |3 |4 | - * ------------- - * }}} - * - * If you imported [[Session.implicits .implicits._]], you can use the following - * syntax to create the DataFrame from a `Seq` and call `toDF` to assign column names to the - * returned DataFrame: - * - * {{{ - * import mysession.implicits_ - * var df = Seq((1, 2), (3, 4)).toDF(Seq("a", "b")) - * }}} - * - * The number of column names that you pass in must match the number of columns in the current - * DataFrame. - * - * @group basic - * @since 0.2.0 - * @param colNames - * A list of column names. - * @return - * A [[DataFrame]] - */ + /** + * Creates a new DataFrame containing the data in the current DataFrame but in columns with the + * specified names. + * + * You can use this method to assign column names when constructing a DataFrame. For example: + * + * For example: + * + * {{{ + * var df = session.createDataFrame(Seq((1, 2), (3, 4))).toDF(Seq("a", "b")) + * }}} + * + * This returns a DataFrame containing the following: + * + * {{{ + * ------------- + * |"A" |"B" | + * ------------- + * |1 |2 | + * |3 |4 | + * ------------- + * }}} + * + * If you imported [[Session.implicits .implicits._]], you can use the following + * syntax to create the DataFrame from a `Seq` and call `toDF` to assign column names to the + * returned DataFrame: + * + * {{{ + * import mysession.implicits_ + * var df = Seq((1, 2), (3, 4)).toDF(Seq("a", "b")) + * }}} + * + * The number of column names that you pass in must match the number of columns in the current + * DataFrame. + * + * @group basic + * @since 0.2.0 + * @param colNames + * A list of column names. + * @return + * A [[DataFrame]] + */ def toDF(colNames: Seq[String]): DataFrame = transformation("toDF") { require( output.length == colNames.length, @@ -402,86 +408,89 @@ class DataFrame private[snowpark] ( } } - /** Creates a new DataFrame containing the data in the current DataFrame but in columns with the - * specified names. - * - * You can use this method to assign column names when constructing a DataFrame. For example: - * - * For example: - * - * {{{ - * val df = session.createDataFrame(Seq((1, "a"))).toDF(Array("a", "b")) - * }}} - * - * This returns a DataFrame containing the following: - * - * {{{ - * ------------- - * |"A" |"B" | - * ------------- - * |1 |2 | - * |3 |4 | - * ------------- - * }}} - * - * If you imported [[Session.implicits .implicits._]], you can use the following - * syntax to create the DataFrame from a `Seq` and call `toDF` to assign column names to the - * returned DataFrame: - * - * {{{ - * import mysession.implicits_ - * var df = Seq((1, 2), (3, 4)).toDF(Array("a", "b")) - * }}} - * - * The number of column names that you pass in must match the number of columns in the current - * DataFrame. - * - * @group basic - * @since 0.7.0 - * @param colNames - * An array of column names. - * @return - * A [[DataFrame]] - */ + /** + * Creates a new DataFrame containing the data in the current DataFrame but in columns with the + * specified names. + * + * You can use this method to assign column names when constructing a DataFrame. For example: + * + * For example: + * + * {{{ + * val df = session.createDataFrame(Seq((1, "a"))).toDF(Array("a", "b")) + * }}} + * + * This returns a DataFrame containing the following: + * + * {{{ + * ------------- + * |"A" |"B" | + * ------------- + * |1 |2 | + * |3 |4 | + * ------------- + * }}} + * + * If you imported [[Session.implicits .implicits._]], you can use the following + * syntax to create the DataFrame from a `Seq` and call `toDF` to assign column names to the + * returned DataFrame: + * + * {{{ + * import mysession.implicits_ + * var df = Seq((1, 2), (3, 4)).toDF(Array("a", "b")) + * }}} + * + * The number of column names that you pass in must match the number of columns in the current + * DataFrame. + * + * @group basic + * @since 0.7.0 + * @param colNames + * An array of column names. + * @return + * A [[DataFrame]] + */ def toDF(colNames: Array[String]): DataFrame = transformation("toDF") { toDF(colNames.toSeq) } - /** Sorts a DataFrame by the specified expressions (similar to ORDER BY in SQL). - * - * For example: - * - * {{{ - * val dfSorted = df.sort($"colA", $"colB".asc) - * }}} - * - * @group transform - * @since 0.1.0 - * @param first - * The first Column expression for sorting the DataFrame. - * @param remaining - * Additional Column expressions for sorting the DataFrame. - * @return - * A [[DataFrame]] - */ + /** + * Sorts a DataFrame by the specified expressions (similar to ORDER BY in SQL). + * + * For example: + * + * {{{ + * val dfSorted = df.sort($"colA", $"colB".asc) + * }}} + * + * @group transform + * @since 0.1.0 + * @param first + * The first Column expression for sorting the DataFrame. + * @param remaining + * Additional Column expressions for sorting the DataFrame. + * @return + * A [[DataFrame]] + */ def sort(first: Column, remaining: Column*): DataFrame = transformation("sort") { sort(first +: remaining) } - /** Sorts a DataFrame by the specified expressions (similar to ORDER BY in SQL). - * - * For example: - * {{{ - * val dfSorted = df.sort(Seq($"colA", $"colB".desc)) - * }}} - * - * @group transform - * @since 0.2.0 - * @param sortExprs - * A list of Column expressions for sorting the DataFrame. - * @return - * A [[DataFrame]] - */ + /** + * Sorts a DataFrame by the specified expressions (similar to ORDER BY in SQL). + * + * For example: + * {{{ + * val dfSorted = df.sort(Seq($"colA", $"colB".desc)) + * }}} + * + * @group transform + * @since 0.2.0 + * @param sortExprs + * A list of Column expressions for sorting the DataFrame. + * @return + * A [[DataFrame]] + */ def sort(sortExprs: Seq[Column]): DataFrame = transformation("sort") { if (sortExprs.nonEmpty) { withPlan( @@ -498,110 +507,116 @@ class DataFrame private[snowpark] ( } } - /** Sorts a DataFrame by the specified expressions (similar to ORDER BY in SQL). - * - * For example: - * - * {{{ - * val dfSorted = df.sort(Array(col("col1").asc, col("col2").desc, col("col3"))) - * }}} - * - * @group transform - * @since 0.7.0 - * @param sortExprs - * An array of Column expressions for sorting the DataFrame. - * @return - * A [[DataFrame]] - */ + /** + * Sorts a DataFrame by the specified expressions (similar to ORDER BY in SQL). + * + * For example: + * + * {{{ + * val dfSorted = df.sort(Array(col("col1").asc, col("col2").desc, col("col3"))) + * }}} + * + * @group transform + * @since 0.7.0 + * @param sortExprs + * An array of Column expressions for sorting the DataFrame. + * @return + * A [[DataFrame]] + */ def sort(sortExprs: Array[Column]): DataFrame = sort(sortExprs.toSeq) - /** Returns a reference to a column in the DataFrame. This method is identical to - * [[col DataFrame.col]]. - * - * @group transform - * @since 0.1.0 - * @param colName - * The name of the column. - * @return - * A [[Column]] - */ + /** + * Returns a reference to a column in the DataFrame. This method is identical to + * [[col DataFrame.col]]. + * + * @group transform + * @since 0.1.0 + * @param colName + * The name of the column. + * @return + * A [[Column]] + */ def apply(colName: String): Column = col(colName) - /** Returns a reference to a column in the DataFrame. - * - * @group transform - * @since 0.1.0 - * @param colName - * The name of the column. - * @return - * A [[Column]] - */ + /** + * Returns a reference to a column in the DataFrame. + * + * @group transform + * @since 0.1.0 + * @param colName + * The name of the column. + * @return + * A [[Column]] + */ def col(colName: String): Column = colName match { case "*" => Column(Star(snowflakePlan.output)) case _ => Column(resolve(colName)) } - /** Returns the current DataFrame aliased as the input alias name. - * - * For example: - * - * {{{ - * val df2 = df.alias("A") - * df2.select(df2.col("A.num")) - * }}} - * - * @group basic - * @since 1.10.0 - * @param alias - * The alias name of the dataframe - * @return - * a [[DataFrame]] - */ + /** + * Returns the current DataFrame aliased as the input alias name. + * + * For example: + * + * {{{ + * val df2 = df.alias("A") + * df2.select(df2.col("A.num")) + * }}} + * + * @group basic + * @since 1.10.0 + * @param alias + * The alias name of the dataframe + * @return + * a [[DataFrame]] + */ def alias(alias: String): DataFrame = transformation("alias") { withPlan(DataframeAlias(alias, plan, output)) } - /** Returns a new DataFrame with the specified Column expressions as output (similar to SELECT in - * SQL). Only the Columns specified as arguments will be present in the resulting DataFrame. - * - * You can use any Column expression. - * - * For example: - * - * {{{ - * val dfSelected = df.select($"col1", substring($"col2", 0, 10), df("col3") + df("col4")) - * }}} - * - * @group transform - * @since 0.1.0 - * @param first - * The expression for the first column to return. - * @param remaining - * A list of expressions for the additional columns to return. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame with the specified Column expressions as output (similar to SELECT in + * SQL). Only the Columns specified as arguments will be present in the resulting DataFrame. + * + * You can use any Column expression. + * + * For example: + * + * {{{ + * val dfSelected = df.select($"col1", substring($"col2", 0, 10), df("col3") + df("col4")) + * }}} + * + * @group transform + * @since 0.1.0 + * @param first + * The expression for the first column to return. + * @param remaining + * A list of expressions for the additional columns to return. + * @return + * A [[DataFrame]] + */ def select(first: Column, remaining: Column*): DataFrame = transformation("select") { select(first +: remaining) } - /** Returns a new DataFrame with the specified Column expressions as output (similar to SELECT in - * SQL). Only the Columns specified as arguments will be present in the resulting DataFrame. - * - * You can use any Column expression. - * - * For example: - * {{{ - * val dfSelected = df.select(Seq($"col1", substring($"col2", 0, 10), df("col3") + df("col4"))) - * }}} - * - * @group transform - * @since 0.2.0 - * @param columns - * A list of expressions for the columns to return. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame with the specified Column expressions as output (similar to SELECT in + * SQL). Only the Columns specified as arguments will be present in the resulting DataFrame. + * + * You can use any Column expression. + * + * For example: + * {{{ + * val dfSelected = df.select(Seq($"col1", substring($"col2", 0, 10), df("col3") + df("col4"))) + * }}} + * + * @group transform + * @since 0.2.0 + * @param columns + * A list of expressions for the columns to return. + * @return + * A [[DataFrame]] + */ def select[T: ClassTag](columns: Seq[Column]): DataFrame = transformation("select") { require( columns.nonEmpty, @@ -690,182 +705,190 @@ class DataFrame private[snowpark] ( } } - /** Returns a new DataFrame with the specified Column expressions as output (similar to SELECT in - * SQL). Only the Columns specified as arguments will be present in the resulting DataFrame. - * - * You can use any Column expression. - * - * For example: - * - * {{{ - * val dfSelected = - * df.select(Array(df.col("col1"), lit("abc"), df.col("col1") + df.col("col2"))) - * }}} - * - * @group transform - * @since 0.7.0 - * @param columns - * An array of expressions for the columns to return. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame with the specified Column expressions as output (similar to SELECT in + * SQL). Only the Columns specified as arguments will be present in the resulting DataFrame. + * + * You can use any Column expression. + * + * For example: + * + * {{{ + * val dfSelected = + * df.select(Array(df.col("col1"), lit("abc"), df.col("col1") + df.col("col2"))) + * }}} + * + * @group transform + * @since 0.7.0 + * @param columns + * An array of expressions for the columns to return. + * @return + * A [[DataFrame]] + */ def select(columns: Array[Column]): DataFrame = transformation("select") { select(columns.toSeq) } - /** Returns a new DataFrame with a subset of named columns (similar to SELECT in SQL). - * - * For example: - * - * {{{ - * val dfSelected = df.select("col1", "col2", "col3") - * }}} - * - * @group transform - * @since 0.1.0 - * @param first - * The name of the first column to return. - * @param remaining - * A list of the names of the additional columns to return. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame with a subset of named columns (similar to SELECT in SQL). + * + * For example: + * + * {{{ + * val dfSelected = df.select("col1", "col2", "col3") + * }}} + * + * @group transform + * @since 0.1.0 + * @param first + * The name of the first column to return. + * @param remaining + * A list of the names of the additional columns to return. + * @return + * A [[DataFrame]] + */ def select(first: String, remaining: String*): DataFrame = transformation("select") { select(first +: remaining) } - /** Returns a new DataFrame with a subset of named columns (similar to SELECT in SQL). - * - * For example: - * {{{ - * val dfSelected = df.select(Seq("col1", "col2", "col3")) - * }}} - * - * @group transform - * @since 0.2.0 - * @param columns - * A list of the names of columns to return. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame with a subset of named columns (similar to SELECT in SQL). + * + * For example: + * {{{ + * val dfSelected = df.select(Seq("col1", "col2", "col3")) + * }}} + * + * @group transform + * @since 0.2.0 + * @param columns + * A list of the names of columns to return. + * @return + * A [[DataFrame]] + */ def select(columns: Seq[String]): DataFrame = transformation("select") { select(columns.map(Column(_))) } - /** Returns a new DataFrame with a subset of named columns (similar to SELECT in SQL). - * - * For example: - * - * {{{ - * val dfSelected = df.select(Array("col1", "col2")) - * }}} - * - * @group transform - * @since 0.7.0 - * @param columns - * An array of the names of columns to return. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame with a subset of named columns (similar to SELECT in SQL). + * + * For example: + * + * {{{ + * val dfSelected = df.select(Array("col1", "col2")) + * }}} + * + * @group transform + * @since 0.7.0 + * @param columns + * An array of the names of columns to return. + * @return + * A [[DataFrame]] + */ def select(columns: Array[String]): DataFrame = transformation("select") { select(columns.toSeq) } - /** Returns a new DataFrame that excludes the columns with the specified names from the output. - * - * This is functionally equivalent to calling [[select(first:String* select]] and passing in all - * columns except the ones to exclude. - * - * Throws [[SnowparkClientException]] if the resulting DataFrame contains no output columns. - * @group transform - * @since 0.1.0 - * @param first - * The name of the first column to exclude. - * @param remaining - * A list of the names of additional columns to exclude. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame that excludes the columns with the specified names from the output. + * + * This is functionally equivalent to calling [[select(first:String* select]] and passing in all + * columns except the ones to exclude. + * + * Throws [[SnowparkClientException]] if the resulting DataFrame contains no output columns. + * @group transform + * @since 0.1.0 + * @param first + * The name of the first column to exclude. + * @param remaining + * A list of the names of additional columns to exclude. + * @return + * A [[DataFrame]] + */ def drop(first: String, remaining: String*): DataFrame = transformation("drop") { drop(first +: remaining) } - /** Returns a new DataFrame that excludes the columns with the specified names from the output. - * - * This is functionally equivalent to calling [[select(columns:Seq* select]] and passing in all - * columns except the ones to exclude. - * - * Throws [[SnowparkClientException]] if the resulting DataFrame contains no output columns. - * - * @group transform - * @since 0.2.0 - * @param colNames - * A list of the names of columns to exclude. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame that excludes the columns with the specified names from the output. + * + * This is functionally equivalent to calling [[select(columns:Seq* select]] and passing in all + * columns except the ones to exclude. + * + * Throws [[SnowparkClientException]] if the resulting DataFrame contains no output columns. + * + * @group transform + * @since 0.2.0 + * @param colNames + * A list of the names of columns to exclude. + * @return + * A [[DataFrame]] + */ def drop(colNames: Seq[String]): DataFrame = transformation("drop") { val dropColumns: Seq[Column] = colNames.map(name => functions.col(name)) drop(dropColumns) } - /** Returns a new DataFrame that excludes the columns with the specified names from the output. - * - * This is functionally equivalent to calling [[select(columns:Array[String* select]] and passing - * in all columns except the ones to exclude. - * - * Throws [[SnowparkClientException]] if the resulting DataFrame contains no output columns. - * - * @group transform - * @since 0.7.0 - * @param colNames - * An array of the names of columns to exclude. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame that excludes the columns with the specified names from the output. + * + * This is functionally equivalent to calling [[select(columns:Array[String* select]] and passing + * in all columns except the ones to exclude. + * + * Throws [[SnowparkClientException]] if the resulting DataFrame contains no output columns. + * + * @group transform + * @since 0.7.0 + * @param colNames + * An array of the names of columns to exclude. + * @return + * A [[DataFrame]] + */ def drop(colNames: Array[String]): DataFrame = transformation("drop") { drop(colNames.toSeq) } - /** Returns a new DataFrame that excludes the columns specified by the expressions from the - * output. - * - * This is functionally equivalent to calling [[select(first:String* select]] and passing in all - * columns except the ones to exclude. - * - * This method throws a [[SnowparkClientException]] if: - * - A specified column does not have a name, or - * - The resulting DataFrame has no output columns. - * - * @group transform - * @since 0.1.0 - * @param first - * The expression for the first column to exclude. - * @param remaining - * A list of expressions for additional columns to exclude. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame that excludes the columns specified by the expressions from the output. + * + * This is functionally equivalent to calling [[select(first:String* select]] and passing in all + * columns except the ones to exclude. + * + * This method throws a [[SnowparkClientException]] if: + * - A specified column does not have a name, or + * - The resulting DataFrame has no output columns. + * + * @group transform + * @since 0.1.0 + * @param first + * The expression for the first column to exclude. + * @param remaining + * A list of expressions for additional columns to exclude. + * @return + * A [[DataFrame]] + */ def drop(first: Column, remaining: Column*): DataFrame = transformation("drop") { drop(first +: remaining) } - /** Returns a new DataFrame that excludes the specified column expressions from the output. - * - * This is functionally equivalent to calling [[select(columns:Seq* select]] and passing in all - * columns except the ones to exclude. - * - * This method throws a [[SnowparkClientException]] if: - * - A specified column does not have a name, or - * - The resulting DataFrame has no output columns. - * - * @group transform - * @since 0.2.0 - * @param cols - * A list of the names of the columns to exclude. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame that excludes the specified column expressions from the output. + * + * This is functionally equivalent to calling [[select(columns:Seq* select]] and passing in all + * columns except the ones to exclude. + * + * This method throws a [[SnowparkClientException]] if: + * - A specified column does not have a name, or + * - The resulting DataFrame has no output columns. + * + * @group transform + * @since 0.2.0 + * @param cols + * A list of the names of the columns to exclude. + * @return + * A [[DataFrame]] + */ def drop[T: ClassTag](cols: Seq[Column]): DataFrame = transformation("drop") { val dropColumns: Seq[NamedExpression] = cols.map { case Column(expr: NamedExpression) => expr @@ -876,420 +899,457 @@ class DataFrame private[snowpark] ( renameBackIfDeduped(resultDF) } - /** Returns a new DataFrame that excludes the specified column expressions from the output. - * - * This is functionally equivalent to calling [[select(columns:Array[String* select]] and passing - * in all columns except the ones to exclude. - * - * This method throws a [[SnowparkClientException]] if: - * - A specified column does not have a name, or - * - The resulting DataFrame has no output columns. - * - * @group transform - * @since 0.7.0 - * @param cols - * An array of the names of the columns to exclude. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame that excludes the specified column expressions from the output. + * + * This is functionally equivalent to calling [[select(columns:Array[String* select]] and passing + * in all columns except the ones to exclude. + * + * This method throws a [[SnowparkClientException]] if: + * - A specified column does not have a name, or + * - The resulting DataFrame has no output columns. + * + * @group transform + * @since 0.7.0 + * @param cols + * An array of the names of the columns to exclude. + * @return + * A [[DataFrame]] + */ def drop(cols: Array[Column]): DataFrame = transformation("drop") { drop(cols.toSeq) } - /** Filters rows based on the specified conditional expression (similar to WHERE in SQL). - * - * For example: - * - * {{{ - * val dfFiltered = df.filter($"colA" > 1 && $"colB" < 100) - * }}} - * - * @group transform - * @since 0.1.0 - * @param condition - * Filter condition defined as an expression on columns. - * @return - * A filtered [[DataFrame]] - */ + /** + * Filters rows based on the specified conditional expression (similar to WHERE in SQL). + * + * For example: + * + * {{{ + * val dfFiltered = df.filter($"colA" > 1 && $"colB" < 100) + * }}} + * + * @group transform + * @since 0.1.0 + * @param condition + * Filter condition defined as an expression on columns. + * @return + * A filtered [[DataFrame]] + */ def filter(condition: Column): DataFrame = transformation("filter") { withPlan(Filter(condition.expr, plan)) } - /** Filters rows based on the specified conditional expression (similar to WHERE in SQL). This is - * equivalent to calling [[filter]]. - * - * For example: - * - * {{{ - * // The following two result in the same SQL query: - * pricesDF.filter($"price" > 100) - * pricesDF.where($"price" > 100) - * }}} - * - * @group transform - * @since 0.1.0 - * @param condition - * Filter condition defined as an expression on columns. - * @return - * A filtered [[DataFrame]] - */ + /** + * Filters rows based on the specified conditional expression (similar to WHERE in SQL). This is + * equivalent to calling [[filter]]. + * + * For example: + * + * {{{ + * // The following two result in the same SQL query: + * pricesDF.filter($"price" > 100) + * pricesDF.where($"price" > 100) + * }}} + * + * @group transform + * @since 0.1.0 + * @param condition + * Filter condition defined as an expression on columns. + * @return + * A filtered [[DataFrame]] + */ def where(condition: Column): DataFrame = transformation("where") { filter(condition) } - /** Aggregate the data in the DataFrame. Use this method if you don't need to group the data - * (`groupBy`). - * - * For the input, pass in a Map that specifies the column names and aggregation functions. For - * each pair in the Map: - * - Set the key to the name of the column to aggregate. - * - Set the value to the name of the aggregation function to use on that column. - * - * The following example calculates the maximum value of the `num_sales` column and the average - * value of the `price` column: - * {{{ - * val dfAgg = df.agg("num_sales" -> "max", "price" -> "mean") - * }}} - * - * This is equivalent to calling `agg` after calling `groupBy` without a column name: - * {{{ - * val dfAgg = df.groupBy().agg(df("num_sales") -> "max", df("price") -> "mean") - * }}} - * - * @group transform - * @since 0.1.0 - * @param expr - * A map of column names and aggregate functions. - * @return - * A [[DataFrame]] - */ + /** + * Aggregate the data in the DataFrame. Use this method if you don't need to group the data + * (`groupBy`). + * + * For the input, pass in a Map that specifies the column names and aggregation functions. For + * each pair in the Map: + * - Set the key to the name of the column to aggregate. + * - Set the value to the name of the aggregation function to use on that column. + * + * The following example calculates the maximum value of the `num_sales` column and the average + * value of the `price` column: + * {{{ + * val dfAgg = df.agg("num_sales" -> "max", "price" -> "mean") + * }}} + * + * This is equivalent to calling `agg` after calling `groupBy` without a column name: + * {{{ + * val dfAgg = df.groupBy().agg(df("num_sales") -> "max", df("price") -> "mean") + * }}} + * + * @group transform + * @since 0.1.0 + * @param expr + * A map of column names and aggregate functions. + * @return + * A [[DataFrame]] + */ def agg(expr: (String, String), exprs: (String, String)*): DataFrame = transformation("agg") { agg(expr +: exprs) } - /** Aggregate the data in the DataFrame. Use this method if you don't need to group the data - * (`groupBy`). - * - * For the input, pass in a Map that specifies the column names and aggregation functions. For - * each pair in the Map: - * - Set the key to the name of the column to aggregate. - * - Set the value to the name of the aggregation function to use on that column. - * - * The following example calculates the maximum value of the `num_sales` column and the average - * value of the `price` column: - * {{{ - * val dfAgg = df.agg(Seq("num_sales" -> "max", "price" -> "mean")) - * }}} - * - * This is equivalent to calling `agg` after calling `groupBy` without a column name: - * {{{ - * val dfAgg = df.groupBy().agg(Seq(df("num_sales") -> "max", df("price") -> "mean")) - * }}} - * - * @group transform - * @since 0.2.0 - * @param exprs - * A map of column names and aggregate functions. - * @return - * A [[DataFrame]] - */ + /** + * Aggregate the data in the DataFrame. Use this method if you don't need to group the data + * (`groupBy`). + * + * For the input, pass in a Map that specifies the column names and aggregation functions. For + * each pair in the Map: + * - Set the key to the name of the column to aggregate. + * - Set the value to the name of the aggregation function to use on that column. + * + * The following example calculates the maximum value of the `num_sales` column and the average + * value of the `price` column: + * {{{ + * val dfAgg = df.agg(Seq("num_sales" -> "max", "price" -> "mean")) + * }}} + * + * This is equivalent to calling `agg` after calling `groupBy` without a column name: + * {{{ + * val dfAgg = df.groupBy().agg(Seq(df("num_sales") -> "max", df("price") -> "mean")) + * }}} + * + * @group transform + * @since 0.2.0 + * @param exprs + * A map of column names and aggregate functions. + * @return + * A [[DataFrame]] + */ def agg(exprs: Seq[(String, String)]): DataFrame = transformation("agg") { - groupBy().agg(exprs.map({ case (c, a) => (col(c), a) })) - } - - /** Aggregate the data in the DataFrame. Use this method if you don't need to group the data - * (`groupBy`). - * - * For the input value, pass in expressions that apply aggregation functions to columns - * (functions that are defined in the [[functions]] object). - * - * The following example calculates the maximum value of the `num_sales` column and the mean - * value of the `price` column: - * - * For example: - * - * {{{ - * import com.snowflake.snowpark.functions._ - * - * val dfAgg = df.agg(max($"num_sales"), mean($"price")) - * }}} - * - * @group transform - * @since 0.1.0 - * @param expr - * A list of expressions on columns. - * @return - * A [[DataFrame]] - */ + groupBy().agg(exprs.map({ case (c, a) => (col(c), a) }).toSeq) + } + + /** + * Aggregate the data in the DataFrame. Use this method if you don't need to group the data + * (`groupBy`). + * + * For the input value, pass in expressions that apply aggregation functions to columns (functions + * that are defined in the [[functions]] object). + * + * The following example calculates the maximum value of the `num_sales` column and the mean value + * of the `price` column: + * + * For example: + * + * {{{ + * import com.snowflake.snowpark.functions._ + * + * val dfAgg = df.agg(max($"num_sales"), mean($"price")) + * }}} + * + * @group transform + * @since 0.1.0 + * @param expr + * A list of expressions on columns. + * @return + * A [[DataFrame]] + */ def agg(expr: Column, exprs: Column*): DataFrame = transformation("agg") { agg(expr +: exprs) } - /** Aggregate the data in the DataFrame. Use this method if you don't need to group the data - * (`groupBy`). - * - * For the input value, pass in expressions that apply aggregation functions to columns - * (functions that are defined in the [[functions]] object). - * - * The following example calculates the maximum value of the `num_sales` column and the mean - * value of the `price` column: - * {{{ - * import com.snowflake.snowpark.functions._ - * - * val dfAgg = df.agg(Seq(max($"num_sales"), mean($"price"))) - * }}} - * - * @group transform - * @since 0.2.0 - * @param exprs - * A list of expressions on columns. - * @return - * A [[DataFrame]] - */ + /** + * Aggregate the data in the DataFrame. Use this method if you don't need to group the data + * (`groupBy`). + * + * For the input value, pass in expressions that apply aggregation functions to columns (functions + * that are defined in the [[functions]] object). + * + * The following example calculates the maximum value of the `num_sales` column and the mean value + * of the `price` column: + * {{{ + * import com.snowflake.snowpark.functions._ + * + * val dfAgg = df.agg(Seq(max($"num_sales"), mean($"price"))) + * }}} + * + * @group transform + * @since 0.2.0 + * @param exprs + * A list of expressions on columns. + * @return + * A [[DataFrame]] + */ def agg[T: ClassTag](exprs: Seq[Column]): DataFrame = transformation("agg") { - groupBy().agg(exprs) - } - - /** Aggregate the data in the DataFrame. Use this method if you don't need to group the data - * (`groupBy`). - * - * For the input value, pass in expressions that apply aggregation functions to columns - * (functions that are defined in the [[functions]] object). - * - * The following example calculates the maximum value of the `num_sales` column and the mean - * value of the `price` column: - * - * For example: - * - * {{{ - * import com.snowflake.snowpark.functions._ - * - * val dfAgg = df.agg(Array(max($"num_sales"), mean($"price"))) - * }}} - * - * @group transform - * @since 0.7.0 - * @param exprs - * An array of expressions on columns. - * @return - * A [[DataFrame]] - */ + groupBy().agg(exprs.toSeq) + } + + /** + * Aggregate the data in the DataFrame. Use this method if you don't need to group the data + * (`groupBy`). + * + * For the input value, pass in expressions that apply aggregation functions to columns (functions + * that are defined in the [[functions]] object). + * + * The following example calculates the maximum value of the `num_sales` column and the mean value + * of the `price` column: + * + * For example: + * + * {{{ + * import com.snowflake.snowpark.functions._ + * + * val dfAgg = df.agg(Array(max($"num_sales"), mean($"price"))) + * }}} + * + * @group transform + * @since 0.7.0 + * @param exprs + * An array of expressions on columns. + * @return + * A [[DataFrame]] + */ def agg(exprs: Array[Column]): DataFrame = transformation("agg") { agg(exprs.toSeq) } - /** Performs an SQL - * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY ROLLUP]] - * on the DataFrame. - * - * @group transform - * @since 0.1.0 - * @param first - * The expression for the first column. - * @param remaining - * A list of expressions for additional columns. - * @return - * A [[RelationalGroupedDataFrame]] - */ + // scalastyle:off line.size.limit + /** + * Performs an SQL + * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY ROLLUP]] + * on the DataFrame. + * + * @group transform + * @since 0.1.0 + * @param first + * The expression for the first column. + * @param remaining + * A list of expressions for additional columns. + * @return + * A [[RelationalGroupedDataFrame]] + */ + // scalastyle:on line.size.limit def rollup(first: Column, remaining: Column*): RelationalGroupedDataFrame = rollup(first +: remaining) - /** Performs an SQL - * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY ROLLUP]] - * on the DataFrame. - * - * @group transform - * @since 0.2.0 - * @param cols - * A list of expressions on columns. - * @return - * A [[RelationalGroupedDataFrame]] - */ + // scalastyle:off line.size.limit + /** + * Performs an SQL + * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY ROLLUP]] + * on the DataFrame. + * + * @group transform + * @since 0.2.0 + * @param cols + * A list of expressions on columns. + * @return + * A [[RelationalGroupedDataFrame]] + */ + // scalastyle:on line.size.limit def rollup[T: ClassTag](cols: Seq[Column]): RelationalGroupedDataFrame = - RelationalGroupedDataFrame(this, cols.map(_.expr), RelationalGroupedDataFrame.RollupType) - - /** Performs an SQL - * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY ROLLUP]] - * on the DataFrame. - * - * @group transform - * @since 0.7.0 - * @param cols - * An array of expressions on columns. - * @return - * A [[RelationalGroupedDataFrame]] - */ + RelationalGroupedDataFrame(this, cols.map(_.expr).toSeq, RelationalGroupedDataFrame.RollupType) + + // scalastyle:off line.size.limit + /** + * Performs an SQL + * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY ROLLUP]] + * on the DataFrame. + * + * @group transform + * @since 0.7.0 + * @param cols + * An array of expressions on columns. + * @return + * A [[RelationalGroupedDataFrame]] + */ + // scalastyle:on line.size.limit def rollup(cols: Array[Column]): RelationalGroupedDataFrame = rollup(cols.toSeq) - /** Performs an SQL - * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY ROLLUP]] - * on the DataFrame. - * - * @group transform - * @since 0.1.0 - * @param first - * The name of the first column. - * @param remaining - * A list of the names of additional columns. - * @return - * A [[RelationalGroupedDataFrame]] - */ + // scalastyle:off line.size.limit + /** + * Performs an SQL + * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY ROLLUP]] + * on the DataFrame. + * + * @group transform + * @since 0.1.0 + * @param first + * The name of the first column. + * @param remaining + * A list of the names of additional columns. + * @return + * A [[RelationalGroupedDataFrame]] + */ + // scalastyle:on line.size.limit def rollup(first: String, remaining: String*): RelationalGroupedDataFrame = rollup(first +: remaining) - /** Performs an SQL - * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY ROLLUP]] - * on the DataFrame. - * - * @group transform - * @since 0.2.0 - * @param cols - * A list of column names. - * @return - * A [[RelationalGroupedDataFrame]] - */ + // scalastyle:off line.size.limit + /** + * Performs an SQL + * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY ROLLUP]] + * on the DataFrame. + * + * @group transform + * @since 0.2.0 + * @param cols + * A list of column names. + * @return + * A [[RelationalGroupedDataFrame]] + */ + // scalastyle:on line.size.limit def rollup(cols: Seq[String]): RelationalGroupedDataFrame = - RelationalGroupedDataFrame(this, cols.map(resolve), RelationalGroupedDataFrame.RollupType) - - /** Performs an SQL - * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY ROLLUP]] - * on the DataFrame. - * - * @group transform - * @since 0.7.0 - * @param cols - * An array of column names. - * @return - * A [[RelationalGroupedDataFrame]] - */ + RelationalGroupedDataFrame(this, cols.map(resolve).toSeq, RelationalGroupedDataFrame.RollupType) + + // scalastyle:off line.size.limit + /** + * Performs an SQL + * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY ROLLUP]] + * on the DataFrame. + * + * @group transform + * @since 0.7.0 + * @param cols + * An array of column names. + * @return + * A [[RelationalGroupedDataFrame]] + */ + // scalastyle:on line.size.limit def rollup(cols: Array[String]): RelationalGroupedDataFrame = rollup(cols.toSeq) - /** Groups rows by the columns specified by expressions (similar to GROUP BY in SQL). - * - * This method returns a [[RelationalGroupedDataFrame]] that you can use to perform aggregations - * on each group of data. - * - * @group transform - * @since 0.1.0 - * @param first - * The expression for the first column to group by. - * @param remaining - * A list of expressions for additional columns to group by. - * @return - * A [[RelationalGroupedDataFrame]] - */ + /** + * Groups rows by the columns specified by expressions (similar to GROUP BY in SQL). + * + * This method returns a [[RelationalGroupedDataFrame]] that you can use to perform aggregations + * on each group of data. + * + * @group transform + * @since 0.1.0 + * @param first + * The expression for the first column to group by. + * @param remaining + * A list of expressions for additional columns to group by. + * @return + * A [[RelationalGroupedDataFrame]] + */ def groupBy(first: Column, remaining: Column*): RelationalGroupedDataFrame = groupBy(first +: remaining) - /** Returns a [[RelationalGroupedDataFrame]] that you can use to perform aggregations on the - * underlying DataFrame. - * - * @group transform - * @since 0.1.0 - * @return - * A [[RelationalGroupedDataFrame]] - */ + /** + * Returns a [[RelationalGroupedDataFrame]] that you can use to perform aggregations on the + * underlying DataFrame. + * + * @group transform + * @since 0.1.0 + * @return + * A [[RelationalGroupedDataFrame]] + */ def groupBy(): RelationalGroupedDataFrame = groupBy(Seq.empty[Column]) - /** Groups rows by the columns specified by expressions (similar to GROUP BY in SQL). - * - * This method returns a [[RelationalGroupedDataFrame]] that you can use to perform aggregations - * on each group of data. - * - * @group transform - * @since 0.2.0 - * @param cols - * A list of expressions on columns. - * @return - * A [[RelationalGroupedDataFrame]] - */ + /** + * Groups rows by the columns specified by expressions (similar to GROUP BY in SQL). + * + * This method returns a [[RelationalGroupedDataFrame]] that you can use to perform aggregations + * on each group of data. + * + * @group transform + * @since 0.2.0 + * @param cols + * A list of expressions on columns. + * @return + * A [[RelationalGroupedDataFrame]] + */ def groupBy[T: ClassTag](cols: Seq[Column]): RelationalGroupedDataFrame = - RelationalGroupedDataFrame(this, cols.map(_.expr), RelationalGroupedDataFrame.GroupByType) - - /** Groups rows by the columns specified by expressions (similar to GROUP BY in SQL). - * - * This method returns a [[RelationalGroupedDataFrame]] that you can use to perform aggregations - * on each group of data. - * - * @group transform - * @since 0.7.0 - * @param cols - * An array of expressions on columns. - * @return - * A [[RelationalGroupedDataFrame]] - */ + RelationalGroupedDataFrame(this, cols.map(_.expr).toSeq, RelationalGroupedDataFrame.GroupByType) + + /** + * Groups rows by the columns specified by expressions (similar to GROUP BY in SQL). + * + * This method returns a [[RelationalGroupedDataFrame]] that you can use to perform aggregations + * on each group of data. + * + * @group transform + * @since 0.7.0 + * @param cols + * An array of expressions on columns. + * @return + * A [[RelationalGroupedDataFrame]] + */ def groupBy(cols: Array[Column]): RelationalGroupedDataFrame = groupBy(cols.toSeq) - /** Groups rows by the columns specified by name (similar to GROUP BY in SQL). - * - * This method returns a [[RelationalGroupedDataFrame]] that you can use to perform aggregations - * on each group of data. - * - * @group transform - * @since 0.1.0 - * @param first - * The name of the first column to group by. - * @param remaining - * A list of the names of additional columns to group by. - * @return - * A [[RelationalGroupedDataFrame]] - */ + /** + * Groups rows by the columns specified by name (similar to GROUP BY in SQL). + * + * This method returns a [[RelationalGroupedDataFrame]] that you can use to perform aggregations + * on each group of data. + * + * @group transform + * @since 0.1.0 + * @param first + * The name of the first column to group by. + * @param remaining + * A list of the names of additional columns to group by. + * @return + * A [[RelationalGroupedDataFrame]] + */ def groupBy(first: String, remaining: String*): RelationalGroupedDataFrame = groupBy(first +: remaining) - /** Groups rows by the columns specified by name (similar to GROUP BY in SQL). - * - * This method returns a [[RelationalGroupedDataFrame]] that you can use to perform aggregations - * on each group of data. - * - * @group transform - * @since 0.2.0 - * @param cols - * A list of the names of columns to group by. - * @return - * A [[RelationalGroupedDataFrame]] - */ + /** + * Groups rows by the columns specified by name (similar to GROUP BY in SQL). + * + * This method returns a [[RelationalGroupedDataFrame]] that you can use to perform aggregations + * on each group of data. + * + * @group transform + * @since 0.2.0 + * @param cols + * A list of the names of columns to group by. + * @return + * A [[RelationalGroupedDataFrame]] + */ def groupBy(cols: Seq[String]): RelationalGroupedDataFrame = - RelationalGroupedDataFrame(this, cols.map(resolve), RelationalGroupedDataFrame.GroupByType) - - /** Groups rows by the columns specified by name (similar to GROUP BY in SQL). - * - * This method returns a [[RelationalGroupedDataFrame]] that you can use to perform aggregations - * on each group of data. - * - * @group transform - * @since 0.7.0 - * @param cols - * An array of the names of columns to group by. - * @return - * A [[RelationalGroupedDataFrame]] - */ + RelationalGroupedDataFrame( + this, + cols.map(resolve).toSeq, + RelationalGroupedDataFrame.GroupByType) + + /** + * Groups rows by the columns specified by name (similar to GROUP BY in SQL). + * + * This method returns a [[RelationalGroupedDataFrame]] that you can use to perform aggregations + * on each group of data. + * + * @group transform + * @since 0.7.0 + * @param cols + * An array of the names of columns to group by. + * @return + * A [[RelationalGroupedDataFrame]] + */ def groupBy(cols: Array[String]): RelationalGroupedDataFrame = groupBy(cols.toSeq) // scalastyle:off line.size.limit - /** Performs an SQL - * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY GROUPING SETS]] - * on the DataFrame. - * - * GROUP BY GROUPING SETS is an extension of the GROUP BY clause that allows computing multiple - * GROUP BY clauses in a single statement. The group set is a set of dimension columns. - * - * GROUP BY GROUPING SETS is equivalent to the UNION of two or more GROUP BY operations in the - * same result set: - * - * `df.groupByGroupingSets(GroupingSets(Set(col("a"))))` is equivalent to `df.groupBy("a")` - * - * and - * - * `df.groupByGroupingSets(GroupingSets(Set(col("a")), Set(col("b"))))` is equivalent to - * `df.groupBy("a")` union `df.groupBy("b")` - * - * @param first - * A [[GroupingSets]] object. - * @param remaining - * A list of additional [[GroupingSets]] objects. - * @since 0.4.0 - */ + /** + * Performs an SQL + * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY GROUPING SETS]] + * on the DataFrame. + * + * GROUP BY GROUPING SETS is an extension of the GROUP BY clause that allows computing multiple + * GROUP BY clauses in a single statement. The group set is a set of dimension columns. + * + * GROUP BY GROUPING SETS is equivalent to the UNION of two or more GROUP BY operations in the + * same result set: + * + * `df.groupByGroupingSets(GroupingSets(Set(col("a"))))` is equivalent to `df.groupBy("a")` + * + * and + * + * `df.groupByGroupingSets(GroupingSets(Set(col("a")), Set(col("b"))))` is equivalent to + * `df.groupBy("a")` union `df.groupBy("b")` + * + * @param first + * A [[GroupingSets]] object. + * @param remaining + * A list of additional [[GroupingSets]] objects. + * @since 0.4.0 + */ // scalastyle:on line.size.limit def groupByGroupingSets( first: GroupingSets, @@ -1297,132 +1357,140 @@ class DataFrame private[snowpark] ( groupByGroupingSets(first +: remaining) // scalastyle:off line.size.limit - /** Performs an SQL - * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY GROUPING SETS]] - * on the DataFrame. - * - * GROUP BY GROUPING SETS is an extension of the GROUP BY clause that allows computing multiple - * group-by clauses in a single statement. The group set is a set of dimension columns. - * - * GROUP BY GROUPING SETS is equivalent to the UNION of two or more GROUP BY operations in the - * same result set: - * - * `df.groupByGroupingSets(GroupingSets(Set(col("a"))))` is equivalent to `df.groupBy("a")` - * - * and - * - * `df.groupByGroupingSets(GroupingSets(Set(col("a")), Set(col("b"))))` is equivalent to - * `df.groupBy("a")` union `df.groupBy("b")` - * - * @param groupingSets - * A list of [[GroupingSets]] objects. - * @since 0.4.0 - */ + /** + * Performs an SQL + * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY GROUPING SETS]] + * on the DataFrame. + * + * GROUP BY GROUPING SETS is an extension of the GROUP BY clause that allows computing multiple + * group-by clauses in a single statement. The group set is a set of dimension columns. + * + * GROUP BY GROUPING SETS is equivalent to the UNION of two or more GROUP BY operations in the + * same result set: + * + * `df.groupByGroupingSets(GroupingSets(Set(col("a"))))` is equivalent to `df.groupBy("a")` + * + * and + * + * `df.groupByGroupingSets(GroupingSets(Set(col("a")), Set(col("b"))))` is equivalent to + * `df.groupBy("a")` union `df.groupBy("b")` + * + * @param groupingSets + * A list of [[GroupingSets]] objects. + * @since 0.4.0 + */ // scalastyle:on line.size.limit def groupByGroupingSets(groupingSets: Seq[GroupingSets]): RelationalGroupedDataFrame = RelationalGroupedDataFrame( this, - groupingSets.map(_.toExpression), + groupingSets.map(_.toExpression).toSeq, RelationalGroupedDataFrame.GroupByGroupingSetsType) - /** Performs an SQL - * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY CUBE]] - * on the DataFrame. - * - * @group transform - * @since 0.1.0 - * @param first - * The expression for the first column to use. - * @param remaining - * A list of expressions for additional columns to use. - * @return - * A [[RelationalGroupedDataFrame]] - */ + /** + * Performs an SQL + * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY CUBE]] + * on the DataFrame. + * + * @group transform + * @since 0.1.0 + * @param first + * The expression for the first column to use. + * @param remaining + * A list of expressions for additional columns to use. + * @return + * A [[RelationalGroupedDataFrame]] + */ def cube(first: Column, remaining: Column*): RelationalGroupedDataFrame = cube(first +: remaining) - /** Performs an SQL - * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY CUBE]] - * on the DataFrame. - * - * @group transform - * @since 0.2.0 - * @param cols - * A list of expressions for columns to use. - * @return - * A [[RelationalGroupedDataFrame]] - */ + /** + * Performs an SQL + * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY CUBE]] + * on the DataFrame. + * + * @group transform + * @since 0.2.0 + * @param cols + * A list of expressions for columns to use. + * @return + * A [[RelationalGroupedDataFrame]] + */ def cube[T: ClassTag](cols: Seq[Column]): RelationalGroupedDataFrame = - RelationalGroupedDataFrame(this, cols.map(_.expr), RelationalGroupedDataFrame.CubeType) - - /** Performs an SQL - * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY CUBE]] - * on the DataFrame. - * - * @group transform - * @since 0.9.0 - * @param cols - * A list of expressions for columns to use. - * @return - * A [[RelationalGroupedDataFrame]] - */ + RelationalGroupedDataFrame(this, cols.map(_.expr).toSeq, RelationalGroupedDataFrame.CubeType) + + /** + * Performs an SQL + * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY CUBE]] + * on the DataFrame. + * + * @group transform + * @since 0.9.0 + * @param cols + * A list of expressions for columns to use. + * @return + * A [[RelationalGroupedDataFrame]] + */ def cube(cols: Array[Column]): RelationalGroupedDataFrame = cube(cols.toSeq) - /** Performs an SQL - * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY CUBE]] - * on the DataFrame. - * - * @group transform - * @since 0.1.0 - * @param first - * The name of the first column to use. - * @param remaining - * A list of the names of additional columns to use. - * @return - * A [[RelationalGroupedDataFrame]] - */ + /** + * Performs an SQL + * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY CUBE]] + * on the DataFrame. + * + * @group transform + * @since 0.1.0 + * @param first + * The name of the first column to use. + * @param remaining + * A list of the names of additional columns to use. + * @return + * A [[RelationalGroupedDataFrame]] + */ def cube(first: String, remaining: String*): RelationalGroupedDataFrame = cube(first +: remaining) - /** Performs an SQL - * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY CUBE]] - * - * @group transform - * @since 0.2.0 - * @param cols - * A list of the names of columns to use. - * @return - * A [[RelationalGroupedDataFrame]] - */ + /** + * Performs an SQL + * [[https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html GROUP BY CUBE]] + * + * @group transform + * @since 0.2.0 + * @param cols + * A list of the names of columns to use. + * @return + * A [[RelationalGroupedDataFrame]] + */ def cube(cols: Seq[String]): RelationalGroupedDataFrame = - RelationalGroupedDataFrame(this, cols.map(resolve), RelationalGroupedDataFrame.CubeType) - - /** Returns a new DataFrame that contains only the rows with distinct values from the current - * DataFrame. - * - * This is equivalent to performing a SELECT DISTINCT in SQL. - * - * @group transform - * @since 0.1.0 - * @return - * A [[DataFrame]] - */ + RelationalGroupedDataFrame(this, cols.map(resolve).toSeq, RelationalGroupedDataFrame.CubeType) + + /** + * Returns a new DataFrame that contains only the rows with distinct values from the current + * DataFrame. + * + * This is equivalent to performing a SELECT DISTINCT in SQL. + * + * @group transform + * @since 0.1.0 + * @return + * A [[DataFrame]] + */ def distinct(): DataFrame = transformation("distinct") { groupBy(output.map(att => quoteName(att.name)).map(this.col)).agg(Map.empty[Column, String]) } - /** Creates a new DataFrame by removing duplicated rows on given subset of columns. If no subset - * of columns specified, this function is same as [[distinct()]] function. The result is - * non-deterministic when removing duplicated rows from the subset of columns but not all - * columns. For example: Supposes we have a DataFrame `df`, which contains three rows (a, b, c): - * (1, 1, 1), (1, 1, 2), (1, 2, 3) The result of df.dropDuplicates("a", "b") can be either (1, 1, - * 1), (1, 2, 3) or (1, 1, 2), (1, 2, 3) - * - * @group transform - * @since 0.10.0 - * @return - * A [[DataFrame]] - */ + /** + * Creates a new DataFrame by removing duplicated rows on given subset of columns. If no subset of + * columns specified, this function is same as [[distinct()]] function. The result is + * non-deterministic when removing duplicated rows from the subset of columns but not all columns. + * For example: Supposes we have a DataFrame `df`, which contains three rows (a, b, c): (1, 1, 1), + * (1, 1, 2), (1, 2, 3) The result of df.dropDuplicates("a", "b") can be either (1, 1, 1), (1, 2, + * 3) or (1, 1, 2), (1, 2, 3) + * + * @group transform + * @since 0.10.0 + * @return + * A [[DataFrame]] + */ def dropDuplicates(colNames: String*): DataFrame = transformation("dropDuplicates") { if (colNames.isEmpty) { this.distinct() @@ -1440,160 +1508,167 @@ class DataFrame private[snowpark] ( } } - /** Rotates this DataFrame by turning the unique values from one column in the input expression - * into multiple columns and aggregating results where required on any remaining column values. - * - * Only one aggregate is supported with pivot. - * - * For example: - * {{{ - * val dfPivoted = df.pivot("col_1", Seq(1,2,3)).agg(sum(col("col_2"))) - * }}} - * - * @group transform - * @since 0.1.0 - * @param pivotColumn - * The name of the column to use. - * @param values - * A list of values in the column. - * @return - * A [[RelationalGroupedDataFrame]] - */ + /** + * Rotates this DataFrame by turning the unique values from one column in the input expression + * into multiple columns and aggregating results where required on any remaining column values. + * + * Only one aggregate is supported with pivot. + * + * For example: + * {{{ + * val dfPivoted = df.pivot("col_1", Seq(1,2,3)).agg(sum(col("col_2"))) + * }}} + * + * @group transform + * @since 0.1.0 + * @param pivotColumn + * The name of the column to use. + * @param values + * A list of values in the column. + * @return + * A [[RelationalGroupedDataFrame]] + */ def pivot(pivotColumn: String, values: Seq[Any]): RelationalGroupedDataFrame = pivot(Column(pivotColumn), values) - /** Rotates this DataFrame by turning the unique values from one column in the input expression - * into multiple columns and aggregating results where required on any remaining column values. - * - * Only one aggregate is supported with pivot. - * - * For example: - * {{{ - * val dfPivoted = df.pivot(col("col_1"), Seq(1,2,3)).agg(sum(col("col_2"))) - * }}} - * - * @group transform - * @since 0.1.0 - * @param pivotColumn - * Expression for the column that you want to use. - * @param values - * A list of values in the column. - * @return - * A [[RelationalGroupedDataFrame]] - */ + /** + * Rotates this DataFrame by turning the unique values from one column in the input expression + * into multiple columns and aggregating results where required on any remaining column values. + * + * Only one aggregate is supported with pivot. + * + * For example: + * {{{ + * val dfPivoted = df.pivot(col("col_1"), Seq(1,2,3)).agg(sum(col("col_2"))) + * }}} + * + * @group transform + * @since 0.1.0 + * @param pivotColumn + * Expression for the column that you want to use. + * @param values + * A list of values in the column. + * @return + * A [[RelationalGroupedDataFrame]] + */ def pivot(pivotColumn: Column, values: Seq[Any]): RelationalGroupedDataFrame = { val valueExprs = values.map { case c: Column => c.expr case v => Literal(v) - } + }.toSeq RelationalGroupedDataFrame( this, - Seq.empty, + Seq.empty.toSeq, RelationalGroupedDataFrame.PivotType(pivotColumn.expr, valueExprs)) } - /** Returns a new DataFrame that contains at most ''n'' rows from the current DataFrame (similar - * to LIMIT in SQL). - * - * Note that this is a transformation method and not an action method. - * - * @group transform - * @since 0.1.0 - * @param n - * Number of rows to return. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame that contains at most ''n'' rows from the current DataFrame (similar to + * LIMIT in SQL). + * + * Note that this is a transformation method and not an action method. + * + * @group transform + * @since 0.1.0 + * @param n + * Number of rows to return. + * @return + * A [[DataFrame]] + */ def limit(n: Int): DataFrame = transformation("limit") { withPlan(Limit(Literal(n), plan)) } - /** Returns a new DataFrame that contains all the rows in the current DataFrame and another - * DataFrame (`other`), excluding any duplicate rows. Both input DataFrames must contain the same - * number of columns. - * - * For example: - * - * {{{ - * val df1and2 = df1.union(df2) - * }}} - * - * @group transform - * @since 0.1.0 - * @param other - * The other [[DataFrame]] that contains the rows to include. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame that contains all the rows in the current DataFrame and another + * DataFrame (`other`), excluding any duplicate rows. Both input DataFrames must contain the same + * number of columns. + * + * For example: + * + * {{{ + * val df1and2 = df1.union(df2) + * }}} + * + * @group transform + * @since 0.1.0 + * @param other + * The other [[DataFrame]] that contains the rows to include. + * @return + * A [[DataFrame]] + */ def union(other: DataFrame): DataFrame = transformation("union") { withPlan(Union(plan, other.plan)) } - /** Returns a new DataFrame that contains all the rows in the current DataFrame and another - * DataFrame (`other`), including any duplicate rows. Both input DataFrames must contain the same - * number of columns. - * - * For example: - * - * {{{ - * val df1and2 = df1.unionAll(df2) - * }}} - * - * @group transform - * @since 0.1.0 - * @param other - * The other [[DataFrame]] that contains the rows to include. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame that contains all the rows in the current DataFrame and another + * DataFrame (`other`), including any duplicate rows. Both input DataFrames must contain the same + * number of columns. + * + * For example: + * + * {{{ + * val df1and2 = df1.unionAll(df2) + * }}} + * + * @group transform + * @since 0.1.0 + * @param other + * The other [[DataFrame]] that contains the rows to include. + * @return + * A [[DataFrame]] + */ def unionAll(other: DataFrame): DataFrame = transformation("unionAll") { withPlan(UnionAll(plan, other.plan)) } - /** Returns a new DataFrame that contains all the rows in the current DataFrame and another - * DataFrame (`other`), excluding any duplicate rows. - * - * This method matches the columns in the two DataFrames by their names, not by their positions. - * The columns in the other DataFrame are rearranged to match the order of columns in the current - * DataFrame. - * - * For example: - * - * {{{ - * val df1and2 = df1.unionByName(df2) - * }}} - * - * @group transform - * @since 0.1.0 - * @param other - * The other [[DataFrame]] that contains the rows to include. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame that contains all the rows in the current DataFrame and another + * DataFrame (`other`), excluding any duplicate rows. + * + * This method matches the columns in the two DataFrames by their names, not by their positions. + * The columns in the other DataFrame are rearranged to match the order of columns in the current + * DataFrame. + * + * For example: + * + * {{{ + * val df1and2 = df1.unionByName(df2) + * }}} + * + * @group transform + * @since 0.1.0 + * @param other + * The other [[DataFrame]] that contains the rows to include. + * @return + * A [[DataFrame]] + */ def unionByName(other: DataFrame): DataFrame = transformation("unionByName") { internalUnionByName(other, isAll = false) } - /** Returns a new DataFrame that contains all the rows in the current DataFrame and another - * DataFrame (`other`), including any duplicate rows. - * - * This method matches the columns in the two DataFrames by their names, not by their positions. - * The columns in the other DataFrame are rearranged to match the order of columns in the current - * DataFrame. - * - * For example: - * - * {{{ - * val df1and2 = df1.unionAllByName(df2) - * }}} - * - * @group transform - * @since 0.9.0 - * @param other - * The other [[DataFrame]] that contains the rows to include. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame that contains all the rows in the current DataFrame and another + * DataFrame (`other`), including any duplicate rows. + * + * This method matches the columns in the two DataFrames by their names, not by their positions. + * The columns in the other DataFrame are rearranged to match the order of columns in the current + * DataFrame. + * + * For example: + * + * {{{ + * val df1and2 = df1.unionAllByName(df2) + * }}} + * + * @group transform + * @since 0.9.0 + * @param other + * The other [[DataFrame]] that contains the rows to include. + * @return + * A [[DataFrame]] + */ def unionAllByName(other: DataFrame): DataFrame = transformation("unionAllByName") { internalUnionByName(other, isAll = true) } @@ -1632,148 +1707,154 @@ class DataFrame private[snowpark] ( } } - /** Returns a new DataFrame that contains the intersection of rows from the current DataFrame and - * another DataFrame (`other`). Duplicate rows are eliminated. - * - * For example: - * - * {{{ - * val dfIntersectionOf1and2 = df1.intersect(df2) - * }}} - * - * @group transform - * @since 0.1.0 - * @param other - * The other [[DataFrame]] that contains the rows to use for the intersection. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame that contains the intersection of rows from the current DataFrame and + * another DataFrame (`other`). Duplicate rows are eliminated. + * + * For example: + * + * {{{ + * val dfIntersectionOf1and2 = df1.intersect(df2) + * }}} + * + * @group transform + * @since 0.1.0 + * @param other + * The other [[DataFrame]] that contains the rows to use for the intersection. + * @return + * A [[DataFrame]] + */ def intersect(other: DataFrame): DataFrame = transformation("intersect") { withPlan(Intersect(plan, other.plan)) } - /** Returns a new DataFrame that contains all the rows from the current DataFrame except for the - * rows that also appear in another DataFrame (`other`). Duplicate rows are eliminated. - * - * For example: - * - * {{{ - * val df1except2 = df1.except(df2) - * }}} - * - * @group transform - * @since 0.1.0 - * @param other - * The [[DataFrame]] that contains the rows to exclude. - * @return - * A [[DataFrame]] - */ + /** + * Returns a new DataFrame that contains all the rows from the current DataFrame except for the + * rows that also appear in another DataFrame (`other`). Duplicate rows are eliminated. + * + * For example: + * + * {{{ + * val df1except2 = df1.except(df2) + * }}} + * + * @group transform + * @since 0.1.0 + * @param other + * The [[DataFrame]] that contains the rows to exclude. + * @return + * A [[DataFrame]] + */ def except(other: DataFrame): DataFrame = transformation("except") { withPlan(Except(plan, other.plan)) } - /** Performs a default inner join of the current DataFrame and another DataFrame (`right`). - * - * Because this method does not specify a join condition, the returned DataFrame is a cartesian - * product of the two DataFrames. - * - * If the current and `right` DataFrames have columns with the same name, and you need to refer - * to one of these columns in the returned DataFrame, use the [[apply]] or [[col]] function on - * the current or `right` DataFrame to disambiguate references to these columns. - * - * For example: - * - * {{{ - * val result = left.join(right) - * val project = result.select(left("common_col") + right("common_col")) - * }}} - * - * @group transform - * @since 0.1.0 - * @param right - * The other [[DataFrame]] to join. - * @return - * A [[DataFrame]] - */ + /** + * Performs a default inner join of the current DataFrame and another DataFrame (`right`). + * + * Because this method does not specify a join condition, the returned DataFrame is a cartesian + * product of the two DataFrames. + * + * If the current and `right` DataFrames have columns with the same name, and you need to refer to + * one of these columns in the returned DataFrame, use the [[apply]] or [[col]] function on the + * current or `right` DataFrame to disambiguate references to these columns. + * + * For example: + * + * {{{ + * val result = left.join(right) + * val project = result.select(left("common_col") + right("common_col")) + * }}} + * + * @group transform + * @since 0.1.0 + * @param right + * The other [[DataFrame]] to join. + * @return + * A [[DataFrame]] + */ def join(right: DataFrame): DataFrame = transformation("join") { join(right, Seq.empty) } - /** Performs a default inner join of the current DataFrame and another DataFrame (`right`) on a - * column (`usingColumn`). - * - * The method assumes that the `usingColumn` column has the same meaning in the left and right - * DataFrames. - * - * For example: - * - * {{{ - * val result = left.join(right, "a") - * }}} - * - * @group transform - * @since 0.1.0 - * @param right - * The other [[DataFrame]] to join. - * @param usingColumn - * The name of the column to use for the join. - * @return - * A [[DataFrame]] - */ + /** + * Performs a default inner join of the current DataFrame and another DataFrame (`right`) on a + * column (`usingColumn`). + * + * The method assumes that the `usingColumn` column has the same meaning in the left and right + * DataFrames. + * + * For example: + * + * {{{ + * val result = left.join(right, "a") + * }}} + * + * @group transform + * @since 0.1.0 + * @param right + * The other [[DataFrame]] to join. + * @param usingColumn + * The name of the column to use for the join. + * @return + * A [[DataFrame]] + */ def join(right: DataFrame, usingColumn: String): DataFrame = transformation("join") { join(right, Seq(usingColumn)) } - /** Performs a default inner join of the current DataFrame and another DataFrame (`right`) on a - * list of columns (`usingColumns`). - * - * The method assumes that the columns in `usingColumns` have the same meaning in the left and - * right DataFrames. - * - * For example: - * - * {{{ - * val dfJoinOnColA = df.join(df2, Seq("a")) - * val dfJoinOnColAAndColB = df.join(df2, Seq("a", "b")) - * }}} - * - * @group transform - * @since 0.1.0 - * @param right - * The other [[DataFrame]] to join. - * @param usingColumns - * A list of the names of the columns to use for the join. - * @return - * A [[DataFrame]] - */ + /** + * Performs a default inner join of the current DataFrame and another DataFrame (`right`) on a + * list of columns (`usingColumns`). + * + * The method assumes that the columns in `usingColumns` have the same meaning in the left and + * right DataFrames. + * + * For example: + * + * {{{ + * val dfJoinOnColA = df.join(df2, Seq("a")) + * val dfJoinOnColAAndColB = df.join(df2, Seq("a", "b")) + * }}} + * + * @group transform + * @since 0.1.0 + * @param right + * The other [[DataFrame]] to join. + * @param usingColumns + * A list of the names of the columns to use for the join. + * @return + * A [[DataFrame]] + */ def join(right: DataFrame, usingColumns: Seq[String]): DataFrame = transformation("join") { join(right, usingColumns, "inner") } - /** Performs a join of the specified type (`joinType`) with the current DataFrame and another - * DataFrame (`right`) on a list of columns (`usingColumns`). - * - * The method assumes that the columns in `usingColumns` have the same meaning in the left and - * right DataFrames. - * - * For example: - * - * {{{ - * val dfLeftJoin = df1.join(df2, Seq("a"), "left") - * val dfOuterJoin = df1.join(df2, Seq("a", "b"), "outer") - * }}} - * - * @group transform - * @since 0.1.0 - * @param right - * The other [[DataFrame]] to join. - * @param usingColumns - * A list of the names of the columns to use for the join. - * @param joinType - * The type of join (e.g. {@code "right"} , {@code "outer"} , etc.). - * @return - * A [[DataFrame]] - */ + /** + * Performs a join of the specified type (`joinType`) with the current DataFrame and another + * DataFrame (`right`) on a list of columns (`usingColumns`). + * + * The method assumes that the columns in `usingColumns` have the same meaning in the left and + * right DataFrames. + * + * For example: + * + * {{{ + * val dfLeftJoin = df1.join(df2, Seq("a"), "left") + * val dfOuterJoin = df1.join(df2, Seq("a", "b"), "outer") + * }}} + * + * @group transform + * @since 0.1.0 + * @param right + * The other [[DataFrame]] to join. + * @param usingColumns + * A list of the names of the columns to use for the join. + * @param joinType + * The type of join (e.g. {@code "right"} , {@code "outer"} , etc.). + * @return + * A [[DataFrame]] + */ def join(right: DataFrame, usingColumns: Seq[String], joinType: String): DataFrame = transformation("join") { val jType = JoinType(joinType) @@ -1786,98 +1867,100 @@ class DataFrame private[snowpark] ( } else { val (lhs, rhs) = disambiguate(this, right, jType, usingColumns) withPlan { - Join(lhs.plan, rhs.plan, UsingJoin(jType, usingColumns), None) + Join(lhs.plan, rhs.plan, UsingJoin(jType, usingColumns.toSeq), None) } } } // scalastyle:off line.size.limit - /** Performs a default inner join of the current DataFrame and another DataFrame (`right`) using - * the join condition specified in an expression (`joinExpr`). - * - * To disambiguate columns with the same name in the left DataFrame and right DataFrame, use the - * [[apply]] or [[col]] method of each DataFrame (`df("col")` or `df.col("col")`). You can use - * this approach to disambiguate columns in the `joinExprs` parameter and to refer to columns in - * the returned DataFrame. - * - * For example: - * - * {{{ - * val dfJoin = df1.join(df2, df1("a") === df2("b")) - * val dfJoin2 = df1.join(df2, df1("a") === df2("b") && df1("c" === df2("d")) - * val dfJoin3 = df1.join(df2, df1("a") === df2("a") && df1("b" === df2("b")) - * // If both df1 and df2 contain column 'c' - * val project = dfJoin3.select(df1("c") + df2("c")) - * }}} - * - * If you need to join a DataFrame with itself, keep in mind that there is no way to distinguish - * between columns on the left and right sides in a join expression. For example: - * {{{ - * val dfJoined = df.join(df, df("a") === df("b")) // Column references are ambiguous - * }}} - * As a workaround, you can either construct the left and right DataFrames separately, or you can - * call a - * [[join(right:com\.snowflake\.snowpark\.DataFrame,usingColumns:Seq[String]):com\.snowflake\.snowpark\.DataFrame* join]] - * method that allows you to pass in 'usingColumns' parameter. - * - * @group transform - * @since 0.1.0 - * @param right - * The other [[DataFrame]] to join. - * @param joinExprs - * Expression that specifies the join condition. - * @return - * A [[DataFrame]] - */ + /** + * Performs a default inner join of the current DataFrame and another DataFrame (`right`) using + * the join condition specified in an expression (`joinExpr`). + * + * To disambiguate columns with the same name in the left DataFrame and right DataFrame, use the + * [[apply]] or [[col]] method of each DataFrame (`df("col")` or `df.col("col")`). You can use + * this approach to disambiguate columns in the `joinExprs` parameter and to refer to columns in + * the returned DataFrame. + * + * For example: + * + * {{{ + * val dfJoin = df1.join(df2, df1("a") === df2("b")) + * val dfJoin2 = df1.join(df2, df1("a") === df2("b") && df1("c" === df2("d")) + * val dfJoin3 = df1.join(df2, df1("a") === df2("a") && df1("b" === df2("b")) + * // If both df1 and df2 contain column 'c' + * val project = dfJoin3.select(df1("c") + df2("c")) + * }}} + * + * If you need to join a DataFrame with itself, keep in mind that there is no way to distinguish + * between columns on the left and right sides in a join expression. For example: + * {{{ + * val dfJoined = df.join(df, df("a") === df("b")) // Column references are ambiguous + * }}} + * As a workaround, you can either construct the left and right DataFrames separately, or you can + * call a + * [[join(right:com\.snowflake\.snowpark\.DataFrame,usingColumns:Seq[String]):com\.snowflake\.snowpark\.DataFrame* join]] + * method that allows you to pass in 'usingColumns' parameter. + * + * @group transform + * @since 0.1.0 + * @param right + * The other [[DataFrame]] to join. + * @param joinExprs + * Expression that specifies the join condition. + * @return + * A [[DataFrame]] + */ // scalastyle:on line.size.limit def join(right: DataFrame, joinExprs: Column): DataFrame = transformation("join") { join(right, joinExprs, "inner") } // scalastyle:off line.size.limit - /** Performs a join of the specified type (`joinType`) with the current DataFrame and another - * DataFrame (`right`) using the join condition specified in an expression (`joinExpr`). - * - * To disambiguate columns with the same name in the left DataFrame and right DataFrame, use the - * [[apply]] or [[col]] method of each DataFrame (`df("col")` or `df.col("col")`). You can use - * this approach to disambiguate columns in the `joinExprs` parameter and to refer to columns in - * the returned DataFrame. - * - * For example: - * - * {{{ - * val dfJoin = df1.join(df2, df1("a") === df2("b"), "left") - * val dfJoin2 = df1.join(df2, df1("a") === df2("b") && df1("c" === df2("d"), "outer") - * val dfJoin3 = df1.join(df2, df1("a") === df2("a") && df1("b" === df2("b"), "outer") - * // If both df1 and df2 contain column 'c' - * val project = dfJoin3.select(df1("c") + df2("c")) - * }}} - * - * If you need to join a DataFrame with itself, keep in mind that there is no way to distinguish - * between columns on the left and right sides in a join expression. For example: - * {{{ - * val dfJoined = df.join(df, df("a") === df("b"), joinType) // Column references are ambiguous - * }}} - * To do a self-join, you can you either clone([[clone]]) the DataFrame as follows, - * {{{ - * val clonedDf = df.clone - * val dfJoined = df.join(clonedDf, df("a") === clonedDf("b"), joinType) - * }}} - * or you can call a - * [[join(right:com\.snowflake\.snowpark\.DataFrame,usingColumns:Seq[String],joinType:String):com\.snowflake\.snowpark\.DataFrame* join]] - * method that allows you to pass in 'usingColumns' parameter. - * - * @group transform - * @since 0.1.0 - * @param right - * The other [[DataFrame]] to join. - * @param joinExprs - * Expression that specifies the join condition. - * @param joinType - * The type of join (e.g. {@code "right"} , {@code "outer"} , etc.). - * @return - * A [[DataFrame]] - */ + /** + * Performs a join of the specified type (`joinType`) with the current DataFrame and another + * DataFrame (`right`) using the join condition specified in an expression (`joinExpr`). + * + * To disambiguate columns with the same name in the left DataFrame and right DataFrame, use the + * [[apply]] or [[col]] method of each DataFrame (`df("col")` or `df.col("col")`). You can use + * this approach to disambiguate columns in the `joinExprs` parameter and to refer to columns in + * the returned DataFrame. + * + * For example: + * + * {{{ + * val dfJoin = df1.join(df2, df1("a") === df2("b"), "left") + * val dfJoin2 = df1.join(df2, df1("a") === df2("b") && df1("c" === df2("d"), "outer") + * val dfJoin3 = df1.join(df2, df1("a") === df2("a") && df1("b" === df2("b"), "outer") + * // If both df1 and df2 contain column 'c' + * val project = dfJoin3.select(df1("c") + df2("c")) + * }}} + * + * If you need to join a DataFrame with itself, keep in mind that there is no way to distinguish + * between columns on the left and right sides in a join expression. For example: + * {{{ + * val dfJoined = df.join(df, df("a") === df("b"), joinType) // Column references are ambiguous + * }}} + * To do a self-join, you can you either clone([[clone]]) the DataFrame as follows, + * {{{ + * val clonedDf = df.clone + * val dfJoined = df.join(clonedDf, df("a") === clonedDf("b"), joinType) + * }}} + * or you can call a + * [[join(right:com\.snowflake\.snowpark\.DataFrame,usingColumns:Seq[String],joinType:String):com\.snowflake\.snowpark\.DataFrame* join]] + * method that allows you to pass in 'usingColumns' parameter. + * + * @group transform + * @since 0.1.0 + * @param right + * The other [[DataFrame]] to join. + * @param joinExprs + * Expression that specifies the join condition. + * @param joinType + * The type of join (e.g. {@code "right"} , {@code "outer"} , etc.). + * @return + * A [[DataFrame]] + */ // scalastyle:on line.size.limit def join(right: DataFrame, joinExprs: Column, joinType: String): DataFrame = transformation("join") { @@ -1887,180 +1970,189 @@ class DataFrame private[snowpark] ( join(right, JoinType(joinType), Some(joinExprs)) } - /** Joins the current DataFrame with the output of the specified table function `func`. - * - * To pass arguments to the table function, use the `firstArg` and `remaining` arguments of this - * method. In the table function arguments, you can include references to columns in this - * DataFrame. - * - * For example: - * {{{ - * // The following example uses the split_to_table function to split - * // column 'a' in this DataFrame on the character ','. - * // Each row in the current DataFrame will produce N rows in the resulting DataFrame, - * // where N is the number of tokens in the column 'a'. - * - * import com.snowflake.snowpark.functions._ - * import com.snowflake.snowpark.tableFunctions._ - * - * df.join(split_to_table, df("a"), lit(",")) - * }}} - * - * @group transform - * @since 0.4.0 - * @param func - * [[TableFunction]] object, which can be one of the values in the [[tableFunctions]] object or - * an object that you create from the [[TableFunction]] class. - * @param firstArg - * The first argument to pass to the specified table function. - * @param remaining - * A list of any additional arguments for the specified table function. - */ + /** + * Joins the current DataFrame with the output of the specified table function `func`. + * + * To pass arguments to the table function, use the `firstArg` and `remaining` arguments of this + * method. In the table function arguments, you can include references to columns in this + * DataFrame. + * + * For example: + * {{{ + * // The following example uses the split_to_table function to split + * // column 'a' in this DataFrame on the character ','. + * // Each row in the current DataFrame will produce N rows in the resulting DataFrame, + * // where N is the number of tokens in the column 'a'. + * + * import com.snowflake.snowpark.functions._ + * import com.snowflake.snowpark.tableFunctions._ + * + * df.join(split_to_table, df("a"), lit(",")) + * }}} + * + * @group transform + * @since 0.4.0 + * @param func + * [[TableFunction]] object, which can be one of the values in the [[tableFunctions]] object or + * an object that you create from the [[TableFunction]] class. + * @param firstArg + * The first argument to pass to the specified table function. + * @param remaining + * A list of any additional arguments for the specified table function. + */ def join(func: TableFunction, firstArg: Column, remaining: Column*): DataFrame = transformation("join") { join(func, firstArg +: remaining) } - /** Joins the current DataFrame with the output of the specified table function `func`. - * - * To pass arguments to the table function, use the `args` argument of this method. In the table - * function arguments, you can include references to columns in this DataFrame. - * - * For example: - * {{{ - * // The following example uses the split_to_table function to split - * // column 'a' in this DataFrame on the character ','. - * // Each row in this DataFrame will produce N rows in the resulting DataFrame, - * // where N is the number of tokens in the column 'a'. - * import com.snowflake.snowpark.functions._ - * import com.snowflake.snowpark.tableFunctions._ - * - * df.join(split_to_table, Seq(df("a"), lit(","))) - * }}} - * - * @group transform - * @since 0.4.0 - * @param func - * [[TableFunction]] object, which can be one of the values in the [[tableFunctions]] object or - * an object that you create from the [[TableFunction]] class. - * @param args - * A list of arguments to pass to the specified table function. - */ + /** + * Joins the current DataFrame with the output of the specified table function `func`. + * + * To pass arguments to the table function, use the `args` argument of this method. In the table + * function arguments, you can include references to columns in this DataFrame. + * + * For example: + * {{{ + * // The following example uses the split_to_table function to split + * // column 'a' in this DataFrame on the character ','. + * // Each row in this DataFrame will produce N rows in the resulting DataFrame, + * // where N is the number of tokens in the column 'a'. + * import com.snowflake.snowpark.functions._ + * import com.snowflake.snowpark.tableFunctions._ + * + * df.join(split_to_table, Seq(df("a"), lit(","))) + * }}} + * + * @group transform + * @since 0.4.0 + * @param func + * [[TableFunction]] object, which can be one of the values in the [[tableFunctions]] object or + * an object that you create from the [[TableFunction]] class. + * @param args + * A list of arguments to pass to the specified table function. + */ def join(func: TableFunction, args: Seq[Column]): DataFrame = transformation("join") { - joinTableFunction(func.call(args: _*), None) - } - - /** Joins the current DataFrame with the output of the specified user-defined table function - * (UDTF) `func`. - * - * To pass arguments to the table function, use the `args` argument of this method. In the table - * function arguments, you can include references to columns in this DataFrame. - * - * To specify a PARTITION BY or ORDER BY clause, use the `partitionBy` and `orderBy` arguments. - * - * For example: - * {{{ - * // The following example passes the values in the column `col1` to the - * // user-defined tabular function (UDTF) `udtf`, partitioning the - * // data by `col2` and sorting the data by `col1`. The example returns - * // a new DataFrame that joins the contents of the current DataFrame with - * // the output of the UDTF. - * df.join(TableFunction("udtf"), Seq(df("col1")), Seq(df("col2")), Seq(df("col1"))) - * }}} - * - * @group transform - * @since 1.7.0 - * @param func - * [[TableFunction]] object that represents a user-defined table function (UDTF). - * @param args - * A list of arguments to pass to the specified table function. - * @param partitionBy - * A list of columns partitioned by. - * @param orderBy - * A list of columns ordered by. - */ + joinTableFunction(func.call(args.toSeq: _*), None) + } + + /** + * Joins the current DataFrame with the output of the specified user-defined table function (UDTF) + * `func`. + * + * To pass arguments to the table function, use the `args` argument of this method. In the table + * function arguments, you can include references to columns in this DataFrame. + * + * To specify a PARTITION BY or ORDER BY clause, use the `partitionBy` and `orderBy` arguments. + * + * For example: + * {{{ + * // The following example passes the values in the column `col1` to the + * // user-defined tabular function (UDTF) `udtf`, partitioning the + * // data by `col2` and sorting the data by `col1`. The example returns + * // a new DataFrame that joins the contents of the current DataFrame with + * // the output of the UDTF. + * df.join(TableFunction("udtf"), Seq(df("col1")), Seq(df("col2")), Seq(df("col1"))) + * }}} + * + * @group transform + * @since 1.7.0 + * @param func + * [[TableFunction]] object that represents a user-defined table function (UDTF). + * @param args + * A list of arguments to pass to the specified table function. + * @param partitionBy + * A list of columns partitioned by. + * @param orderBy + * A list of columns ordered by. + */ def join( func: TableFunction, args: Seq[Column], partitionBy: Seq[Column], orderBy: Seq[Column]): DataFrame = transformation("join") { joinTableFunction( - func.call(args: _*), - Some(Window.partitionBy(partitionBy: _*).orderBy(orderBy: _*).getWindowSpecDefinition)) - } - - /** Joins the current DataFrame with the output of the specified table function `func` that takes - * named parameters (e.g. `flatten`). - * - * To pass arguments to the table function, use the `args` argument of this method. Pass in a - * `Map` of parameter names and values. In these values, you can include references to columns in - * this DataFrame. - * - * For example: - * {{{ - * // The following example uses the flatten function to explode compound values from - * // column 'a' in this DataFrame into multiple columns. - * - * import com.snowflake.snowpark.functions._ - * import com.snowflake.snowpark.tableFunctions._ - * - * df.join( - * tableFunction("flatten"), - * Map("input" -> parse_json(df("a"))) - * ) - * }}} - * - * @group transform - * @since 0.4.0 - * @param func - * [[TableFunction]] object, which can be one of the values in the [[tableFunctions]] object or - * an object that you create from the [[TableFunction]] class. - * @param args - * Map of arguments to pass to the specified table function. Some functions, like `flatten`, - * have named parameters. Use this map to specify the parameter names and their corresponding - * values. - */ + func.call(args.toSeq: _*), + Some( + Window + .partitionBy(partitionBy.toSeq: _*) + .orderBy(orderBy.toSeq: _*) + .getWindowSpecDefinition)) + } + + /** + * Joins the current DataFrame with the output of the specified table function `func` that takes + * named parameters (e.g. `flatten`). + * + * To pass arguments to the table function, use the `args` argument of this method. Pass in a + * `Map` of parameter names and values. In these values, you can include references to columns in + * this DataFrame. + * + * For example: + * {{{ + * // The following example uses the flatten function to explode compound values from + * // column 'a' in this DataFrame into multiple columns. + * + * import com.snowflake.snowpark.functions._ + * import com.snowflake.snowpark.tableFunctions._ + * + * df.join( + * tableFunction("flatten"), + * Map("input" -> parse_json(df("a"))) + * ) + * }}} + * + * @group transform + * @since 0.4.0 + * @param func + * [[TableFunction]] object, which can be one of the values in the [[tableFunctions]] object or + * an object that you create from the [[TableFunction]] class. + * @param args + * Map of arguments to pass to the specified table function. Some functions, like `flatten`, + * have named parameters. Use this map to specify the parameter names and their corresponding + * values. + */ def join(func: TableFunction, args: Map[String, Column]): DataFrame = transformation("join") { joinTableFunction(func.call(args), None) } - /** Joins the current DataFrame with the output of the specified user-defined table function - * (UDTF) `func`. - * - * To pass arguments to the table function, use the `args` argument of this method. Pass in a - * `Map` of parameter names and values. In these values, you can include references to columns in - * this DataFrame. - * - * To specify a PARTITION BY or ORDER BY clause, use the `partitionBy` and `orderBy` arguments. - * - * For example: - * {{{ - * // The following example passes the values in the column `col1` to the - * // user-defined tabular function (UDTF) `udtf`, partitioning the - * // data by `col2` and sorting the data by `col1`. The example returns - * // a new DataFrame that joins the contents of the current DataFrame with - * // the output of the UDTF. - * df.join( - * tableFunction("udtf"), - * Map("arg1" -> df("col1"), - * Seq(df("col2")), Seq(df("col1"))) - * ) - * }}} - * - * @group transform - * @since 1.7.0 - * @param func - * [[TableFunction]] object that represents a user-defined table function (UDTF). - * @param args - * Map of arguments to pass to the specified table function. Some functions, like `flatten`, - * have named parameters. Use this map to specify the parameter names and their corresponding - * values. - * @param partitionBy - * A list of columns partitioned by. - * @param orderBy - * A list of columns ordered by. - */ + /** + * Joins the current DataFrame with the output of the specified user-defined table function (UDTF) + * `func`. + * + * To pass arguments to the table function, use the `args` argument of this method. Pass in a + * `Map` of parameter names and values. In these values, you can include references to columns in + * this DataFrame. + * + * To specify a PARTITION BY or ORDER BY clause, use the `partitionBy` and `orderBy` arguments. + * + * For example: + * {{{ + * // The following example passes the values in the column `col1` to the + * // user-defined tabular function (UDTF) `udtf`, partitioning the + * // data by `col2` and sorting the data by `col1`. The example returns + * // a new DataFrame that joins the contents of the current DataFrame with + * // the output of the UDTF. + * df.join( + * tableFunction("udtf"), + * Map("arg1" -> df("col1"), + * Seq(df("col2")), Seq(df("col1"))) + * ) + * }}} + * + * @group transform + * @since 1.7.0 + * @param func + * [[TableFunction]] object that represents a user-defined table function (UDTF). + * @param args + * Map of arguments to pass to the specified table function. Some functions, like `flatten`, + * have named parameters. Use this map to specify the parameter names and their corresponding + * values. + * @param partitionBy + * A list of columns partitioned by. + * @param orderBy + * A list of columns ordered by. + */ def join( func: TableFunction, args: Map[String, Column], @@ -2068,59 +2160,69 @@ class DataFrame private[snowpark] ( orderBy: Seq[Column]): DataFrame = transformation("join") { joinTableFunction( func.call(args), - Some(Window.partitionBy(partitionBy: _*).orderBy(orderBy: _*).getWindowSpecDefinition)) - } - - /** Joins the current DataFrame with the output of the specified table function `func`. - * - * For example: - * {{{ - * // The following example uses the flatten function to explode compound values from - * // column 'a' in this DataFrame into multiple columns. - * - * import com.snowflake.snowpark.functions._ - * import com.snowflake.snowpark.tableFunctions._ - * - * df.join( - * tableFunctions.flatten(parse_json(df("a"))) - * ) - * }}} - * - * @group transform - * @since 1.10.0 - * @param func - * [[TableFunction]] object, which can be one of the values in the [[tableFunctions]] object or - * an object that you create from the [[TableFunction.apply()]]. - */ + Some( + Window + .partitionBy(partitionBy.toSeq: _*) + .orderBy(orderBy.toSeq: _*) + .getWindowSpecDefinition)) + } + + /** + * Joins the current DataFrame with the output of the specified table function `func`. + * + * For example: + * {{{ + * // The following example uses the flatten function to explode compound values from + * // column 'a' in this DataFrame into multiple columns. + * + * import com.snowflake.snowpark.functions._ + * import com.snowflake.snowpark.tableFunctions._ + * + * df.join( + * tableFunctions.flatten(parse_json(df("a"))) + * ) + * }}} + * + * @group transform + * @since 1.10.0 + * @param func + * [[TableFunctionEx]] object, which can be one of the values in the [[tableFunctions]] object + * or an object that you create from the [[TableFunctionEx.apply()]]. + */ def join(func: Column): DataFrame = transformation("join") { joinTableFunction(getTableFunctionExpression(func), None) } - /** Joins the current DataFrame with the output of the specified user-defined table function - * (UDTF) `func`. - * - * To specify a PARTITION BY or ORDER BY clause, use the `partitionBy` and `orderBy` arguments. - * - * For example: - * {{{ - * val tf = session.udtf.registerTemporary(TableFunc1) - * df.join(tf(Map("arg1" -> df("col1")),Seq(df("col2")), Seq(df("col1")))) - * }}} - * - * @group transform - * @since 1.10.0 - * @param func - * [[TableFunction]] object that represents a user-defined table function. - * @param partitionBy - * A list of columns partitioned by. - * @param orderBy - * A list of columns ordered by. - */ + /** + * Joins the current DataFrame with the output of the specified user-defined table function (UDTF) + * `func`. + * + * To specify a PARTITION BY or ORDER BY clause, use the `partitionBy` and `orderBy` arguments. + * + * For example: + * {{{ + * val tf = session.udtf.registerTemporary(TableFunc1) + * df.join(tf(Map("arg1" -> df("col1")),Seq(df("col2")), Seq(df("col1")))) + * }}} + * + * @group transform + * @since 1.10.0 + * @param func + * [[TableFunction]] object that represents a user-defined table function. + * @param partitionBy + * A list of columns partitioned by. + * @param orderBy + * A list of columns ordered by. + */ def join(func: Column, partitionBy: Seq[Column], orderBy: Seq[Column]): DataFrame = transformation("join") { joinTableFunction( getTableFunctionExpression(func), - Some(Window.partitionBy(partitionBy: _*).orderBy(orderBy: _*).getWindowSpecDefinition)) + Some( + Window + .partitionBy(partitionBy.toSeq: _*) + .orderBy(orderBy.toSeq: _*) + .getWindowSpecDefinition)) } private def joinTableFunction( @@ -2128,7 +2230,7 @@ class DataFrame private[snowpark] ( partitionByOrderBy: Option[WindowSpecDefinition]): DataFrame = { func match { // explode is a client side function - case TF(funcName, args) if funcName.toLowerCase().trim.equals("explode") => + case TableFunctionEx(funcName, args) if funcName.toLowerCase().trim.equals("explode") => // explode has only one argument joinWithExplode(args.head, partitionByOrderBy) case _ => @@ -2172,27 +2274,28 @@ class DataFrame private[snowpark] ( } } - /** Performs a cross join, which returns the cartesian product of the current DataFrame and - * another DataFrame (`right`). - * - * If the current and `right` DataFrames have columns with the same name, and you need to refer - * to one of these columns in the returned DataFrame, use the [[apply]] or [[col]] function on - * the current or `right` DataFrame to disambiguate references to these columns. - * - * For example: - * - * {{{ - * val dfCrossJoin = left.crossJoin(right) - * val project = dfCrossJoin.select(left("common_col") + right("common_col")) - * }}} - * - * @group transform - * @since 0.1.0 - * @param right - * The other [[DataFrame]] to join. - * @return - * A [[DataFrame]] - */ + /** + * Performs a cross join, which returns the cartesian product of the current DataFrame and another + * DataFrame (`right`). + * + * If the current and `right` DataFrames have columns with the same name, and you need to refer to + * one of these columns in the returned DataFrame, use the [[apply]] or [[col]] function on the + * current or `right` DataFrame to disambiguate references to these columns. + * + * For example: + * + * {{{ + * val dfCrossJoin = left.crossJoin(right) + * val project = dfCrossJoin.select(left("common_col") + right("common_col")) + * }}} + * + * @group transform + * @since 0.1.0 + * @param right + * The other [[DataFrame]] to join. + * @return + * A [[DataFrame]] + */ def crossJoin(right: DataFrame): DataFrame = transformation("crossJoin") { join(right, JoinType("cross"), None) } @@ -2205,101 +2308,105 @@ class DataFrame private[snowpark] ( } - /** Performs a natural join (a default inner join) of the current DataFrame and another DataFrame - * (`right`). - * - * For example: - * {{{ - * val dfNaturalJoin = df.naturalJoin(df2) - * }}} - * - * Note that this is equivalent to: - * {{{ - * val dfNaturalJoin = df.naturalJoin(df2, "inner") - * }}} - * - * @group transform - * @since 0.1.0 - * @param right - * The other [[DataFrame]] to join. - * @return - * A [[DataFrame]] - */ + /** + * Performs a natural join (a default inner join) of the current DataFrame and another DataFrame + * (`right`). + * + * For example: + * {{{ + * val dfNaturalJoin = df.naturalJoin(df2) + * }}} + * + * Note that this is equivalent to: + * {{{ + * val dfNaturalJoin = df.naturalJoin(df2, "inner") + * }}} + * + * @group transform + * @since 0.1.0 + * @param right + * The other [[DataFrame]] to join. + * @return + * A [[DataFrame]] + */ def naturalJoin(right: DataFrame): DataFrame = transformation("naturalJoin") { naturalJoin(right, "inner") } - /** Performs a natural join of the specified type (`joinType`) with the current DataFrame and - * another DataFrame (`right`). - * - * For example: - * - * {{{ - * val dfNaturalJoin = df.naturalJoin(df2, "left") - * }}} - * - * @group transform - * @since 0.1.0 - * @param right - * The other [[DataFrame]] to join. - * @param joinType - * The type of join (e.g. {@code "right"} , {@code "outer"} , etc.). - * @return - * A [[DataFrame]] - */ + /** + * Performs a natural join of the specified type (`joinType`) with the current DataFrame and + * another DataFrame (`right`). + * + * For example: + * + * {{{ + * val dfNaturalJoin = df.naturalJoin(df2, "left") + * }}} + * + * @group transform + * @since 0.1.0 + * @param right + * The other [[DataFrame]] to join. + * @param joinType + * The type of join (e.g. {@code "right"} , {@code "outer"} , etc.). + * @return + * A [[DataFrame]] + */ def naturalJoin(right: DataFrame, joinType: String): DataFrame = transformation("naturalJoin") { withPlan { Join(this.plan, right.plan, NaturalJoin(JoinType(joinType)), None) } } - /** Returns a DataFrame with an additional column with the specified name (`colName`). The column - * is computed by using the specified expression (`col`). - * - * If a column with the same name already exists in the DataFrame, that column is replaced by the - * new column. - * - * This example adds a new column named `mean_price` that contains the mean of the existing - * `price` column in the DataFrame. - * - * {{{ - * val dfWithMeanPriceCol = df.withColumn("mean_price", mean($"price")) - * }}} - * @group transform - * @since 0.1.0 - * @param colName - * The name of the column to add or replace. - * @param col - * The [[Column]] to add or replace. - * @return - * A [[DataFrame]] - */ + /** + * Returns a DataFrame with an additional column with the specified name (`colName`). The column + * is computed by using the specified expression (`col`). + * + * If a column with the same name already exists in the DataFrame, that column is replaced by the + * new column. + * + * This example adds a new column named `mean_price` that contains the mean of the existing + * `price` column in the DataFrame. + * + * {{{ + * val dfWithMeanPriceCol = df.withColumn("mean_price", mean($"price")) + * }}} + * @group transform + * @since 0.1.0 + * @param colName + * The name of the column to add or replace. + * @param col + * The [[Column]] to add or replace. + * @return + * A [[DataFrame]] + */ def withColumn(colName: String, col: Column): DataFrame = transformation("withColumn") { withColumns(Seq(colName), Seq(col)) } - /** Returns a DataFrame with additional columns with the specified names (`colNames`). The columns - * are computed by using the specified expressions (`cols`). - * - * If columns with the same names already exist in the DataFrame, those columns are replaced by - * the new columns. - * - * This example adds new columns named `mean_price` and `avg_price` that contain the mean and - * average of the existing `price` column. - * - * {{{ - * val dfWithAddedColumns = df.withColumn( - * Seq("mean_price", "avg_price"), Seq(mean($"price"), avg($"price") ) - * }}} - * @group transform - * @since 0.1.0 - * @param colNames - * A list of the names of the columns to add or replace. - * @param values - * A list of the [[Column]] objects to add or replace. - * @return - * A [[DataFrame]] - */ + /** + * Returns a DataFrame with additional columns with the specified names (`colNames`). The columns + * are computed by using the specified expressions (`cols`). + * + * If columns with the same names already exist in the DataFrame, those columns are replaced by + * the new columns. + * + * This example adds new columns named `mean_price` and `avg_price` that contain the mean and + * average of the existing `price` column. + * + * {{{ + * val dfWithAddedColumns = df.withColumn( + * Seq("mean_price", "avg_price"), Seq(mean($"price"), avg($"price") ) + * }}} + * @group transform + * @since 0.1.0 + * @param colNames + * A list of the names of the columns to add or replace. + * @param values + * A list of the [[Column]] objects to add or replace. + * @return + * A [[DataFrame]] + */ def withColumns(colNames: Seq[String], values: Seq[Column]): DataFrame = transformation("withColumns") { if (colNames.size != values.size) { @@ -2315,23 +2422,24 @@ class DataFrame private[snowpark] ( withPlan(WithColumns(newCols, plan)) } - /** Returns a DataFrame with the specified column `col` renamed as `newName`. - * - * This example renames the column `A` as `NEW_A` in the DataFrame. - * - * {{{ - * val df = session.sql("select 1 as A, 2 as B") - * val dfRenamed = df.rename("NEW_A", col("A")) - * }}} - * @group transform - * @since 0.9.0 - * @param newName - * The new name for the column - * @param col - * The [[Column]] to be renamed - * @return - * A [[DataFrame]] - */ + /** + * Returns a DataFrame with the specified column `col` renamed as `newName`. + * + * This example renames the column `A` as `NEW_A` in the DataFrame. + * + * {{{ + * val df = session.sql("select 1 as A, 2 as B") + * val dfRenamed = df.rename("NEW_A", col("A")) + * }}} + * @group transform + * @since 0.9.0 + * @param newName + * The new name for the column + * @param col + * The [[Column]] to be renamed + * @return + * A [[DataFrame]] + */ def rename(newName: String, col: Column): DataFrame = transformation("rename") { // Normalize the new column name val newQuotedName = quoteName(newName) @@ -2361,113 +2469,121 @@ class DataFrame private[snowpark] ( select(newColumns) } - /** Executes the query representing this DataFrame and returns the result as an Array of [[Row]] - * objects. - * - * @group actions - * @since 0.1.0 - * @return - * An Array of [[Row]] - */ + /** + * Executes the query representing this DataFrame and returns the result as an Array of [[Row]] + * objects. + * + * @group actions + * @since 0.1.0 + * @return + * An Array of [[Row]] + */ def collect(): Array[Row] = action("collect") { session.conn.telemetry.reportActionCollect() session.conn.execute(snowflakePlan) } - /** Executes the query representing this DataFrame and returns an iterator of [[Row]] objects that - * you can use to retrieve the results. - * - * Unlike the [[collect]] method, this method does not load all data into memory at once. - * - * @group actions - * @since 0.5.0 - * @return - * An Iterator of [[Row]] - */ + /** + * Executes the query representing this DataFrame and returns an iterator of [[Row]] objects that + * you can use to retrieve the results. + * + * Unlike the [[collect]] method, this method does not load all data into memory at once. + * + * @group actions + * @since 0.5.0 + * @return + * An Iterator of [[Row]] + */ def toLocalIterator: Iterator[Row] = action("toLocalIterator") { session.conn.telemetry.reportActionToLocalIterator() session.conn.getRowIterator(snowflakePlan) } - /** Executes the query representing this DataFrame and returns the number of rows in the result - * (similar to the COUNT function in SQL). - * - * @group actions - * @since 0.1.0 - * @return - * The number of rows. - */ + /** + * Executes the query representing this DataFrame and returns the number of rows in the result + * (similar to the COUNT function in SQL). + * + * @group actions + * @since 0.1.0 + * @return + * The number of rows. + */ def count(): Long = action("count") { session.conn.telemetry.reportActionCount() agg(("*", "count")).collect().head.getLong(0) } - /** Returns a [[DataFrameWriter]] object that you can use to write the data in the DataFrame to - * any supported destination. The Default [[SaveMode]] for the returned [[DataFrameWriter]] is - * [[SaveMode.Append Append]]. - * - * Example: - * {{{ - * df.write.saveAsTable("table1") - * }}} - * - * @group basic - * @since 0.1.0 - * @return - * A [[DataFrameWriter]] - */ + /** + * Returns a [[DataFrameWriter]] object that you can use to write the data in the DataFrame to any + * supported destination. The Default [[SaveMode]] for the returned [[DataFrameWriter]] is + * [[SaveMode.Append Append]]. + * + * Example: + * {{{ + * df.write.saveAsTable("table1") + * }}} + * + * @group basic + * @since 0.1.0 + * @return + * A [[DataFrameWriter]] + */ def write: DataFrameWriter = new DataFrameWriter(this) - /** Returns a [[DataFrameAsyncActor]] object that can be used to execute DataFrame actions - * asynchronously. - * - * Example: - * {{{ - * val asyncJob = df.async.collect() - * // At this point, the thread is not blocked. You can perform additional work before - * // calling asyncJob.getResult() to retrieve the results of the action. - * // NOTE: getResult() is a blocking call. - * val rows = asyncJob.getResult() - * }}} - * - * @since 0.11.0 - * @group basic - * @return - * A [[DataFrameAsyncActor]] object - */ + /** + * Returns a [[DataFrameAsyncActor]] object that can be used to execute DataFrame actions + * asynchronously. + * + * Example: + * {{{ + * val asyncJob = df.async.collect() + * // At this point, the thread is not blocked. You can perform additional work before + * // calling asyncJob.getResult() to retrieve the results of the action. + * // NOTE: getResult() is a blocking call. + * val rows = asyncJob.getResult() + * }}} + * + * @since 0.11.0 + * @group basic + * @return + * A [[DataFrameAsyncActor]] object + */ def async: DataFrameAsyncActor = new DataFrameAsyncActor(this) - /** Evaluates this DataFrame and prints out the first ten rows. - * - * @group actions - * @since 0.1.0 - */ + /** + * Evaluates this DataFrame and prints out the first ten rows. + * + * @group actions + * @since 0.1.0 + */ def show(): Unit = action("show") { show(10) } - /** Evaluates this DataFrame and prints out the first `''n''` rows. - * - * @group actions - * @since 0.1.0 - * @param n - * The number of rows to print out. - */ + /** + * Evaluates this DataFrame and prints out the first `''n''` rows. + * + * @group actions + * @since 0.1.0 + * @param n + * The number of rows to print out. + */ def show(n: Int): Unit = action("show") { show(n, 50) } - /** Evaluates this DataFrame and prints out the first `''n''` rows with the specified maximum - * number of characters per column. - * - * @group actions - * @since 0.5.0 - * @param n - * The number of rows to print out. - * @param maxWidth - * The maximum number of characters to print out for each column. If the number of characters - * exceeds the maximum, the method prints out an ellipsis (...) at the end of the column. - */ + /** + * Evaluates this DataFrame and prints out the first `''n''` rows with the specified maximum + * number of characters per column. + * + * @group actions + * @since 0.5.0 + * @param n + * The number of rows to print out. + * @param maxWidth + * The maximum number of characters to print out for each column. If the number of characters + * exceeds the maximum, the method prints out an ellipsis (...) at the end of the column. + */ def show(n: Int, maxWidth: Int): Unit = action("show") { session.conn.telemetry.reportActionShow() // scalastyle:off println @@ -2516,7 +2632,7 @@ class DataFrame private[snowpark] ( } } lines.append(value.substring(startIndex)) - lines + lines.toSeq } def convertValueToString(value: Any): String = @@ -2597,125 +2713,131 @@ class DataFrame private[snowpark] ( line + rowToString(header) + line + body.map(rowToString).mkString + line } - /** Creates a view that captures the computation expressed by this DataFrame. - * - * For `viewName`, you can include the database and schema name (i.e. specify a fully-qualified - * name). If no database name or schema name are specified, the view will be created in the - * current database or schema. - * - * `viewName` must be a valid - * [[https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html Snowflake identifier]]. - * - * @since 0.1.0 - * @group actions - * @param viewName - * The name of the view to create or replace. - */ + /** + * Creates a view that captures the computation expressed by this DataFrame. + * + * For `viewName`, you can include the database and schema name (i.e. specify a fully-qualified + * name). If no database name or schema name are specified, the view will be created in the + * current database or schema. + * + * `viewName` must be a valid + * [[https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html Snowflake identifier]]. + * + * @since 0.1.0 + * @group actions + * @param viewName + * The name of the view to create or replace. + */ def createOrReplaceView(viewName: String): Unit = action("createOrReplaceView") { doCreateOrReplaceView(viewName, PersistedView) } - /** Creates a view that captures the computation expressed by this DataFrame. - * - * In `multipartIdentifer`, you can include the database and schema name to specify a - * fully-qualified name. If no database name or schema name are specified, the view will be - * created in the current database or schema. - * - * The view name must be a valid - * [[https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html Snowflake identifier]]. - * - * @since 0.5.0 - * @group actions - * @param multipartIdentifier - * A sequence of strings that specifies the database name, schema name, and view name. - */ + /** + * Creates a view that captures the computation expressed by this DataFrame. + * + * In `multipartIdentifer`, you can include the database and schema name to specify a + * fully-qualified name. If no database name or schema name are specified, the view will be + * created in the current database or schema. + * + * The view name must be a valid + * [[https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html Snowflake identifier]]. + * + * @since 0.5.0 + * @group actions + * @param multipartIdentifier + * A sequence of strings that specifies the database name, schema name, and view name. + */ def createOrReplaceView(multipartIdentifier: Seq[String]): Unit = action("createOrReplaceView") { createOrReplaceView(multipartIdentifier.mkString(".")) } - /** Creates a view that captures the computation expressed by this DataFrame. - * - * In `multipartIdentifer`, you can include the database and schema name to specify a - * fully-qualified name. If no database name or schema name are specified, the view will be - * created in the current database or schema. - * - * The view name must be a valid - * [[https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html Snowflake identifier]]. - * - * @since 0.5.0 - * @group actions - * @param multipartIdentifier - * A list of strings that specifies the database name, schema name, and view name. - */ + /** + * Creates a view that captures the computation expressed by this DataFrame. + * + * In `multipartIdentifer`, you can include the database and schema name to specify a + * fully-qualified name. If no database name or schema name are specified, the view will be + * created in the current database or schema. + * + * The view name must be a valid + * [[https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html Snowflake identifier]]. + * + * @since 0.5.0 + * @group actions + * @param multipartIdentifier + * A list of strings that specifies the database name, schema name, and view name. + */ def createOrReplaceView(multipartIdentifier: java.util.List[String]): Unit = action("createOrReplaceView") { - createOrReplaceView(multipartIdentifier.asScala) + createOrReplaceView(multipartIdentifier.asScala.toSeq) } - /** Creates a temporary view that returns the same results as this DataFrame. - * - * You can use the view in subsequent SQL queries and statements during the current session. The - * temporary view is only available in the session in which it is created. - * - * For `viewName`, you can include the database and schema name (i.e. specify a fully-qualified - * name). If no database name or schema name are specified, the view will be created in the - * current database or schema. - * - * `viewName` must be a valid - * [[https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html Snowflake identifier]]. - * - * @since 0.4.0 - * @group actions - * @param viewName - * The name of the view to create or replace. - */ + /** + * Creates a temporary view that returns the same results as this DataFrame. + * + * You can use the view in subsequent SQL queries and statements during the current session. The + * temporary view is only available in the session in which it is created. + * + * For `viewName`, you can include the database and schema name (i.e. specify a fully-qualified + * name). If no database name or schema name are specified, the view will be created in the + * current database or schema. + * + * `viewName` must be a valid + * [[https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html Snowflake identifier]]. + * + * @since 0.4.0 + * @group actions + * @param viewName + * The name of the view to create or replace. + */ def createOrReplaceTempView(viewName: String): Unit = action("createOrReplaceTempView") { doCreateOrReplaceView(viewName, LocalTempView) } - /** Creates a temporary view that returns the same results as this DataFrame. - * - * You can use the view in subsequent SQL queries and statements during the current session. The - * temporary view is only available in the session in which it is created. - * - * In `multipartIdentifer`, you can include the database and schema name to specify a - * fully-qualified name. If no database name or schema name are specified, the view will be - * created in the current database or schema. - * - * The view name must be a valid - * [[https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html Snowflake identifier]]. - * - * @since 0.5.0 - * @group actions - * @param multipartIdentifier - * A sequence of strings that specify the database name, schema name, and view name. - */ + /** + * Creates a temporary view that returns the same results as this DataFrame. + * + * You can use the view in subsequent SQL queries and statements during the current session. The + * temporary view is only available in the session in which it is created. + * + * In `multipartIdentifer`, you can include the database and schema name to specify a + * fully-qualified name. If no database name or schema name are specified, the view will be + * created in the current database or schema. + * + * The view name must be a valid + * [[https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html Snowflake identifier]]. + * + * @since 0.5.0 + * @group actions + * @param multipartIdentifier + * A sequence of strings that specify the database name, schema name, and view name. + */ def createOrReplaceTempView(multipartIdentifier: Seq[String]): Unit = action("createOrReplaceTempView") { createOrReplaceTempView(multipartIdentifier.mkString(".")) } - /** Creates a temporary view that returns the same results as this DataFrame. - * - * You can use the view in subsequent SQL queries and statements during the current session. The - * temporary view is only available in the session in which it is created. - * - * In `multipartIdentifer`, you can include the database and schema name to specify a - * fully-qualified name. If no database name or schema name are specified, the view will be - * created in the current database or schema. - * - * The view name must be a valid - * [[https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html Snowflake identifier]]. - * - * @since 0.5.0 - * @group actions - * @param multipartIdentifier - * A list of strings that specify the database name, schema name, and view name. - */ + /** + * Creates a temporary view that returns the same results as this DataFrame. + * + * You can use the view in subsequent SQL queries and statements during the current session. The + * temporary view is only available in the session in which it is created. + * + * In `multipartIdentifer`, you can include the database and schema name to specify a + * fully-qualified name. If no database name or schema name are specified, the view will be + * created in the current database or schema. + * + * The view name must be a valid + * [[https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html Snowflake identifier]]. + * + * @since 0.5.0 + * @group actions + * @param multipartIdentifier + * A list of strings that specify the database name, schema name, and view name. + */ def createOrReplaceTempView(multipartIdentifier: java.util.List[String]): Unit = action("createOrReplaceTempView") { - createOrReplaceTempView(multipartIdentifier.asScala) + createOrReplaceTempView(multipartIdentifier.asScala.toSeq) } private def doCreateOrReplaceView(viewName: String, viewType: ViewType): Unit = { @@ -2724,28 +2846,30 @@ class DataFrame private[snowpark] ( session.conn.execute(session.analyzer.resolve(CreateViewCommand(viewName, plan, viewType))) } - /** Executes the query representing this DataFrame and returns the first row of results. - * - * @group actions - * @since 0.2.0 - * @return - * The first [[Row]], if the row exists. Otherwise, returns `None`. - */ + /** + * Executes the query representing this DataFrame and returns the first row of results. + * + * @group actions + * @since 0.2.0 + * @return + * The first [[Row]], if the row exists. Otherwise, returns `None`. + */ def first(): Option[Row] = action("first") { first(1).headOption } - /** Executes the query representing this DataFrame and returns the first {@code n} rows of the - * results. - * - * @group actions - * @since 0.2.0 - * @param n - * The number of rows to return. - * @return - * An Array of the first {@code n} [[Row]] objects. If {@code n} is negative or larger than the - * number of rows in the results, returns all rows in the results. - */ + /** + * Executes the query representing this DataFrame and returns the first {@code n} rows of the + * results. + * + * @group actions + * @since 0.2.0 + * @param n + * The number of rows to return. + * @return + * An Array of the first {@code n} [[Row]] objects. If {@code n} is negative or larger than the + * number of rows in the results, returns all rows in the results. + */ def first(n: Int): Array[Row] = action("first") { session.conn.telemetry.reportActionFirst() if (n < 0) { @@ -2755,81 +2879,84 @@ class DataFrame private[snowpark] ( } } - /** Returns a [[DataFrameNaFunctions]] object that provides functions for handling missing values - * in the DataFrame. - * - * @group basic - * @since 0.2.0 - */ + /** + * Returns a [[DataFrameNaFunctions]] object that provides functions for handling missing values + * in the DataFrame. + * + * @group basic + * @since 0.2.0 + */ lazy val na: DataFrameNaFunctions = new DataFrameNaFunctions(this) - /** Returns a [[DataFrameStatFunctions]] object that provides statistic functions. - * - * @group basic - * @since 0.2.0 - */ + /** + * Returns a [[DataFrameStatFunctions]] object that provides statistic functions. + * + * @group basic + * @since 0.2.0 + */ lazy val stat: DataFrameStatFunctions = new DataFrameStatFunctions(this) - /** Returns a new DataFrame with a sample of N rows from the underlying DataFrame. - * - * NOTE: - * - * - If the row count in the DataFrame is larger than the requested number of rows, the method - * returns a DataFrame containing the number of requested rows. - * - If the row count in the DataFrame is smaller than the requested number of rows, the method - * returns a DataFrame containing all rows. - * - * @param num - * The number of rows to sample in the range of 0 to 1,000,000. - * @group transform - * @since 0.2.0 - * @return - * A [[DataFrame]] containing the sample of {@code num} rows. - */ + /** + * Returns a new DataFrame with a sample of N rows from the underlying DataFrame. + * + * NOTE: + * + * - If the row count in the DataFrame is larger than the requested number of rows, the method + * returns a DataFrame containing the number of requested rows. + * - If the row count in the DataFrame is smaller than the requested number of rows, the method + * returns a DataFrame containing all rows. + * + * @param num + * The number of rows to sample in the range of 0 to 1,000,000. + * @group transform + * @since 0.2.0 + * @return + * A [[DataFrame]] containing the sample of {@code num} rows. + */ def sample(num: Long): DataFrame = transformation("sample") { withPlan(SnowflakeSampleNode(None, Some(num), plan)) } - /** Returns a new DataFrame that contains a sampling of rows from the current DataFrame. - * - * NOTE: - * - * - The number of rows returned may be close to (but not exactly equal to) - * {@code (probabilityFraction * totalRowCount)} . - * - The Snowflake - * [[https://docs.snowflake.com/en/sql-reference/constructs/sample.html SAMPLE]] function - * supports specifying 'probability' as a percentage number. The range of 'probability' is - * {@code [0.0, 100.0]} . The conversion formula is - * {@code probability = probabilityFraction * 100} . - * - * @param probabilityFraction - * The fraction of rows to sample. This must be in the range of `0.0` to `1.0`. - * @group transform - * @since 0.2.0 - * @return - * A [[DataFrame]] containing the sample of rows. - */ + /** + * Returns a new DataFrame that contains a sampling of rows from the current DataFrame. + * + * NOTE: + * + * - The number of rows returned may be close to (but not exactly equal to) + * {@code (probabilityFraction * totalRowCount)} . + * - The Snowflake [[https://docs.snowflake.com/en/sql-reference/constructs/sample.html SAMPLE]] + * function supports specifying 'probability' as a percentage number. The range of + * 'probability' is {@code [0.0, 100.0]} . The conversion formula is + * {@code probability = probabilityFraction * 100} . + * + * @param probabilityFraction + * The fraction of rows to sample. This must be in the range of `0.0` to `1.0`. + * @group transform + * @since 0.2.0 + * @return + * A [[DataFrame]] containing the sample of rows. + */ def sample(probabilityFraction: Double): DataFrame = transformation("sample") { withPlan(SnowflakeSampleNode(Some(probabilityFraction), None, plan)) } - /** Randomly splits the current DataFrame into separate DataFrames, using the specified weights. - * - * NOTE: - * - * - If only one weight is specified, the returned DataFrame array only includes the current - * DataFrame. - * - If multiple weights are specified, the current DataFrame will be cached before being - * split. - * - * @param weights - * Weights to use for splitting the DataFrame. If the weights don't add up to 1, the weights - * will be normalized. - * @group actions - * @since 0.2.0 - * @return - * A list of [[DataFrame]] objects - */ + /** + * Randomly splits the current DataFrame into separate DataFrames, using the specified weights. + * + * NOTE: + * + * - If only one weight is specified, the returned DataFrame array only includes the current + * DataFrame. + * - If multiple weights are specified, the current DataFrame will be cached before being split. + * + * @param weights + * Weights to use for splitting the DataFrame. If the weights don't add up to 1, the weights + * will be normalized. + * @group actions + * @since 0.2.0 + * @return + * A list of [[DataFrame]] objects + */ def randomSplit(weights: Array[Double]): Array[DataFrame] = action("randomSplit") { session.conn.telemetry.reportActionRandomSplit() import com.snowflake.snowpark.functions._ @@ -2865,90 +2992,92 @@ class DataFrame private[snowpark] ( } } - /** Flattens (explodes) compound values into multiple rows (similar to the SQL - * [[https://docs.snowflake.com/en/sql-reference/functions/flatten.html FLATTEN]] function). - * - * The `flatten` method adds the following - * [[https://docs.snowflake.com/en/sql-reference/functions/flatten.html#output columns]] to the - * returned DataFrame: - * - * - SEQ - * - KEY - * - PATH - * - INDEX - * - VALUE - * - THIS - * - * If {@code this} DataFrame also has columns with the names above, you can disambiguate the - * columns by using the {@code this("value")} syntax. - * - * For example, if the current DataFrame has a column named `value`: - * {{{ - * val table1 = session.sql( - * "select parse_json(value) as value from values('[1,2]') as T(value)") - * val flattened = table1.flatten(table1("value")) - * flattened.select(table1("value"), flattened("value").as("newValue")).show() - * }}} - * - * @param input - * The expression that will be unseated into rows. The expression must be of data type VARIANT, - * OBJECT, or ARRAY. - * @group transform - * @return - * A [[DataFrame]] containing the flattened values. - * @since 0.2.0 - */ + /** + * Flattens (explodes) compound values into multiple rows (similar to the SQL + * [[https://docs.snowflake.com/en/sql-reference/functions/flatten.html FLATTEN]] function). + * + * The `flatten` method adds the following + * [[https://docs.snowflake.com/en/sql-reference/functions/flatten.html#output columns]] to the + * returned DataFrame: + * + * - SEQ + * - KEY + * - PATH + * - INDEX + * - VALUE + * - THIS + * + * If {@code this} DataFrame also has columns with the names above, you can disambiguate the + * columns by using the {@code this("value")} syntax. + * + * For example, if the current DataFrame has a column named `value`: + * {{{ + * val table1 = session.sql( + * "select parse_json(value) as value from values('[1,2]') as T(value)") + * val flattened = table1.flatten(table1("value")) + * flattened.select(table1("value"), flattened("value").as("newValue")).show() + * }}} + * + * @param input + * The expression that will be unseated into rows. The expression must be of data type VARIANT, + * OBJECT, or ARRAY. + * @group transform + * @return + * A [[DataFrame]] containing the flattened values. + * @since 0.2.0 + */ def flatten(input: Column): DataFrame = transformation("flatten") { flatten(input, "", outer = false, recursive = false, "BOTH") } - /** Flattens (explodes) compound values into multiple rows (similar to the SQL - * [[https://docs.snowflake.com/en/sql-reference/functions/flatten.html FLATTEN]] function). - * - * The `flatten` method adds the following - * [[https://docs.snowflake.com/en/sql-reference/functions/flatten.html#output columns]] to the - * returned DataFrame: - * - * - SEQ - * - KEY - * - PATH - * - INDEX - * - VALUE - * - THIS - * - * If {@code this} DataFrame also has columns with the names above, you can disambiguate the - * columns by using the {@code this("value")} syntax. - * - * For example, if the current DataFrame has a column named `value`: - * {{{ - * val table1 = session.sql( - * "select parse_json(value) as value from values('[1,2]') as T(value)") - * val flattened = table1.flatten(table1("value"), "", outer = false, - * recursive = false, "both") - * flattened.select(table1("value"), flattened("value").as("newValue")).show() - * }}} - * - * @param input - * The expression that will be unseated into rows. The expression must be of data type VARIANT, - * OBJECT, or ARRAY. - * @param path - * The path to the element within a VARIANT data structure which needs to be flattened. Can be - * a zero-length string (i.e. empty path) if the outermost element is to be flattened. - * @param outer - * If FALSE, any input rows that cannot be expanded, either because they cannot be accessed in - * the path or because they have zero fields or entries, are completely omitted from the - * output. Otherwise, exactly one row is generated for zero-row expansions (with NULL in the - * KEY, INDEX, and VALUE columns). - * @param recursive - * If FALSE, only the element referenced by PATH is expanded. Otherwise, the expansion is - * performed for all sub-elements recursively. - * @param mode - * Specifies whether only OBJECT, ARRAY, or BOTH should be flattened. - * @group transform - * @return - * A [[DataFrame]] containing the flattened values. - * @since 0.2.0 - */ + /** + * Flattens (explodes) compound values into multiple rows (similar to the SQL + * [[https://docs.snowflake.com/en/sql-reference/functions/flatten.html FLATTEN]] function). + * + * The `flatten` method adds the following + * [[https://docs.snowflake.com/en/sql-reference/functions/flatten.html#output columns]] to the + * returned DataFrame: + * + * - SEQ + * - KEY + * - PATH + * - INDEX + * - VALUE + * - THIS + * + * If {@code this} DataFrame also has columns with the names above, you can disambiguate the + * columns by using the {@code this("value")} syntax. + * + * For example, if the current DataFrame has a column named `value`: + * {{{ + * val table1 = session.sql( + * "select parse_json(value) as value from values('[1,2]') as T(value)") + * val flattened = table1.flatten(table1("value"), "", outer = false, + * recursive = false, "both") + * flattened.select(table1("value"), flattened("value").as("newValue")).show() + * }}} + * + * @param input + * The expression that will be unseated into rows. The expression must be of data type VARIANT, + * OBJECT, or ARRAY. + * @param path + * The path to the element within a VARIANT data structure which needs to be flattened. Can be a + * zero-length string (i.e. empty path) if the outermost element is to be flattened. + * @param outer + * If FALSE, any input rows that cannot be expanded, either because they cannot be accessed in + * the path or because they have zero fields or entries, are completely omitted from the output. + * Otherwise, exactly one row is generated for zero-row expansions (with NULL in the KEY, INDEX, + * and VALUE columns). + * @param recursive + * If FALSE, only the element referenced by PATH is expanded. Otherwise, the expansion is + * performed for all sub-elements recursively. + * @param mode + * Specifies whether only OBJECT, ARRAY, or BOTH should be flattened. + * @group transform + * @return + * A [[DataFrame]] containing the flattened values. + * @since 0.2.0 + */ def flatten( input: Column, path: String, @@ -3066,22 +3195,23 @@ class DataFrame private[snowpark] ( rhs.select(rhs.output.map(_.name).map(aliasIfNeeded(rhs, _, rhsPrefix, commonColNames)))) } - /** Executes the query representing this DataFrame and returns the query ID that represents its - * result. - */ + /** + * Executes the query representing this DataFrame and returns the query ID that represents its + * result. + */ private[snowpark] def executeAndGetQueryId(): String = { executeAndGetQueryId(Map.empty) } - /** Executes the query representing this DataFrame with statement parameters and returns the query - * ID that represents its result. NOTE: The statement parameters are only used for the last - * query. - * - * @param statementParameters - * The statement parameters map - * @return - * the query ID - */ + /** + * Executes the query representing this DataFrame with statement parameters and returns the query + * ID that represents its result. NOTE: The statement parameters are only used for the last query. + * + * @param statementParameters + * The statement parameters map + * @return + * the query ID + */ private[snowpark] def executeAndGetQueryId(statementParameters: Map[String, Any]): String = { // This function is used by java stored proc. // scalastyle:off @@ -3093,7 +3223,7 @@ class DataFrame private[snowpark] ( } lazy private[snowpark] val output: Seq[Attribute] = { - SnowflakePlan.wrapException(plan.children: _*) { + SnowflakePlan.wrapException(plan.children.toSeq: _*) { snowflakePlan.output } } @@ -3113,28 +3243,30 @@ class DataFrame private[snowpark] ( DataFrame.buildMethodChain(this.methodChain, funcName)(func) } -/** A DataFrame that returns cached data. Repeated invocations of actions on this type of dataframe - * are guaranteed to produce the same results. It is returned from `cacheResult` functions (e.g. - * [[DataFrame.cacheResult]]). - * - * @since 0.4.0 - */ +/** + * A DataFrame that returns cached data. Repeated invocations of actions on this type of dataframe + * are guaranteed to produce the same results. It is returned from `cacheResult` functions (e.g. + * [[DataFrame.cacheResult]]). + * + * @since 0.4.0 + */ class HasCachedResult private[snowpark] ( override private[snowpark] val session: Session, override private[snowpark] val plan: LogicalPlan, override private[snowpark] val methodChain: Seq[String]) extends DataFrame(session, plan, methodChain) { - /** Caches the content of this DataFrame to create a new cached DataFrame. - * - * All subsequent operations on the returned cached DataFrame are performed on the cached data - * and have no effect on the original DataFrame. - * - * @since 1.5.0 - * @group actions - * @return - * A [[HasCachedResult]] - */ + /** + * Caches the content of this DataFrame to create a new cached DataFrame. + * + * All subsequent operations on the returned cached DataFrame are performed on the cached data and + * have no effect on the original DataFrame. + * + * @since 1.5.0 + * @group actions + * @return + * A [[HasCachedResult]] + */ override def cacheResult(): HasCachedResult = action("cacheResult") { // cacheResult function of HashCachedResult returns a clone of this // HashCachedResult DataFrame instead of to cache this DataFrame again. @@ -3142,41 +3274,45 @@ class HasCachedResult private[snowpark] ( } } -/** Provides APIs to execute DataFrame actions asynchronously. - * - * @since 0.11.0 - */ +/** + * Provides APIs to execute DataFrame actions asynchronously. + * + * @since 0.11.0 + */ class DataFrameAsyncActor private[snowpark] (df: DataFrame) { - /** Executes [[DataFrame.collect]] asynchronously. - * - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes [[DataFrame.collect]] asynchronously. + * + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def collect(): TypedAsyncJob[Array[Row]] = action("collect") { df.session.conn.executeAsync[Array[Row]](df.snowflakePlan) } - /** Executes [[DataFrame.toLocalIterator]] asynchronously. - * - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes [[DataFrame.toLocalIterator]] asynchronously. + * + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def toLocalIterator(): TypedAsyncJob[Iterator[Row]] = action("toLocalIterator") { df.session.conn.executeAsync[Iterator[Row]](df.snowflakePlan) } - /** Executes [[DataFrame.count]] asynchronously. - * - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes [[DataFrame.count]] asynchronously. + * + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def count(): TypedAsyncJob[Long] = action("count") { df.session.conn.executeAsync[Long](df.agg(("*", "count")).snowflakePlan) } diff --git a/src/main/scala/com/snowflake/snowpark/DataFrameNaFunctions.scala b/src/main/scala/com/snowflake/snowpark/DataFrameNaFunctions.scala index 843c3f3f..f332a607 100644 --- a/src/main/scala/com/snowflake/snowpark/DataFrameNaFunctions.scala +++ b/src/main/scala/com/snowflake/snowpark/DataFrameNaFunctions.scala @@ -5,31 +5,33 @@ import com.snowflake.snowpark.types._ import com.snowflake.snowpark.functions.{lit, when} import com.snowflake.snowpark.internal.analyzer.quoteName -/** Provides functions for handling missing values in a DataFrame. - * - * @since 0.2.0 - */ +/** + * Provides functions for handling missing values in a DataFrame. + * + * @since 0.2.0 + */ final class DataFrameNaFunctions private[snowpark] (df: DataFrame) extends Logging { - /** Returns a new DataFrame that excludes all rows containing fewer than {@code minNonNullsPerRow} - * non-null and non-NaN values in the specified columns {@code cols} . - * - * - If {@code minNonNullsPerRow} is greater than the number of the specified columns, the - * method returns an empty DataFrame. - * - If {@code minNonNullsPerRow} is less than 1, the method returns the original DataFrame. - * - If {@code cols} is empty, the method returns the original DataFrame. - * - * @param minNonNullsPerRow - * The minimum number of non-null and non-NaN values that should be in the specified columns in - * order for the row to be included. - * @param cols - * A sequence of the names of columns to check for null and NaN values. - * @return - * A [[DataFrame]] - * @throws SnowparkClientException - * if cols contains any unrecognized column name - * @since 0.2.0 - */ + /** + * Returns a new DataFrame that excludes all rows containing fewer than {@code minNonNullsPerRow} + * non-null and non-NaN values in the specified columns {@code cols} . + * + * - If {@code minNonNullsPerRow} is greater than the number of the specified columns, the + * method returns an empty DataFrame. + * - If {@code minNonNullsPerRow} is less than 1, the method returns the original DataFrame. + * - If {@code cols} is empty, the method returns the original DataFrame. + * + * @param minNonNullsPerRow + * The minimum number of non-null and non-NaN values that should be in the specified columns in + * order for the row to be included. + * @param cols + * A sequence of the names of columns to check for null and NaN values. + * @return + * A [[DataFrame]] + * @throws SnowparkClientException + * if cols contains any unrecognized column name + * @since 0.2.0 + */ def drop(minNonNullsPerRow: Int, cols: Seq[String]): DataFrame = transformation("drop") { // translate to // select * from table where @@ -80,25 +82,26 @@ final class DataFrameNaFunctions private[snowpark] (df: DataFrame) extends Loggi } } - /** Returns a new DataFrame that replaces all null and NaN values in the specified columns with - * the values provided. - * - * {@code valueMap} describes which columns will be replaced and what the replacement values are. - * - * - It only supports Long, Int, short, byte, String, Boolean, float, and Double values. - * - If the type of the given value doesn't match the column type (e.g. a Long value for a - * StringType column), the replacement in this column will be skipped. - * - * @param valueMap - * A Map that associates the names of columns with the values that should be used to replace - * null and NaN values in those columns. - * @return - * A [[DataFrame]] - * @throws SnowparkClientException - * if valueMap contains unrecognized columns - * - * @since 0.2.0 - */ + /** + * Returns a new DataFrame that replaces all null and NaN values in the specified columns with the + * values provided. + * + * {@code valueMap} describes which columns will be replaced and what the replacement values are. + * + * - It only supports Long, Int, short, byte, String, Boolean, float, and Double values. + * - If the type of the given value doesn't match the column type (e.g. a Long value for a + * StringType column), the replacement in this column will be skipped. + * + * @param valueMap + * A Map that associates the names of columns with the values that should be used to replace + * null and NaN values in those columns. + * @return + * A [[DataFrame]] + * @throws SnowparkClientException + * if valueMap contains unrecognized columns + * + * @since 0.2.0 + */ def fill(valueMap: Map[String, Any]): DataFrame = transformation("fill") { // translate to // select col, iff(floatCol is null or floatCol == 'NaN', replacement, floatCol), @@ -149,27 +152,28 @@ final class DataFrameNaFunctions private[snowpark] (df: DataFrame) extends Loggi df.select(columns) } - /** Returns a new DataFrame that replaces values in a specified column. - * - * Use the {@code replacement} parameter to specify a Map that associates the values to replace - * with new values. To replace a null value, use None as the key in the Map. - * - * For example, suppose that you pass `col1` for {@code colName} and - * {@code Map(2 -> 3, None -> 2, 4 -> null)} for {@code replacement} . In `col1`, this function - * replaces: - * - * - `2` with `3` - * - null with `2` - * - `4` with null - * - * @param colName - * The name of the column in which the values should be replaced. - * @param replacement - * A Map that associates the original values with the replacement values. - * @throws SnowparkClientException - * if colName is an unrecognized column name - * @since 0.2.0 - */ + /** + * Returns a new DataFrame that replaces values in a specified column. + * + * Use the {@code replacement} parameter to specify a Map that associates the values to replace + * with new values. To replace a null value, use None as the key in the Map. + * + * For example, suppose that you pass `col1` for {@code colName} and + * {@code Map(2 -> 3, None -> 2, 4 -> null)} for {@code replacement} . In `col1`, this function + * replaces: + * + * - `2` with `3` + * - null with `2` + * - `4` with null + * + * @param colName + * The name of the column in which the values should be replaced. + * @param replacement + * A Map that associates the original values with the replacement values. + * @throws SnowparkClientException + * if colName is an unrecognized column name + * @since 0.2.0 + */ def replace(colName: String, replacement: Map[Any, Any]): DataFrame = transformation("replace") { // verify name diff --git a/src/main/scala/com/snowflake/snowpark/DataFrameReader.scala b/src/main/scala/com/snowflake/snowpark/DataFrameReader.scala index a238257d..856a6a1e 100644 --- a/src/main/scala/com/snowflake/snowpark/DataFrameReader.scala +++ b/src/main/scala/com/snowflake/snowpark/DataFrameReader.scala @@ -4,160 +4,163 @@ import com.snowflake.snowpark.internal.analyzer.StagedFileReader import com.snowflake.snowpark.types.StructType // scalastyle:off -/** Provides methods to load data in various supported formats from a Snowflake stage to a - * DataFrame. The paths provided to the DataFrameReader must refer to Snowflake stages. - * - * To use this object: - * - * 1. Access an instance of a DataFrameReader by calling the [[Session.read]] method. - * 1. Specify any - * [[https://docs.snowflake.com/en/sql-reference/sql/create-file-format.html#format-type-options-formattypeoptions format-specific options]] - * and - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#copy-options-copyoptions copy options]] - * by calling the [[option]] or [[options]] method. These methods return a DataFrameReader - * that is configured with these options. (Note that although specifying copy options can make - * error handling more robust during the reading process, it may have an effect on - * performance.) - * 1. Specify the schema of the data that you plan to load by constructing a [[types.StructType]] - * object and passing it to the [[schema]] method. This method returns a DataFrameReader that - * is configured to read data that uses the specified schema. - * 1. Specify the format of the data by calling the method named after the format (e.g. [[csv]], - * [[json]], etc.). These methods return a [[DataFrame]] that is configured to load data in - * the specified format. - * 1. Call a [[DataFrame]] method that performs an action. - * - For example, to load the data from the file, call [[DataFrame.collect]]. - * - As another example, to save the data from the file to a table, call - * [[CopyableDataFrame.copyInto(tableName:String)*]]. This uses the COPY INTO `` - * command. - * - * The following examples demonstrate how to use a DataFrameReader. - * - * '''Example 1:''' Loading the first two columns of a CSV file and skipping the first header line. - * {{{ - * // Import the package for StructType. - * import com.snowflake.snowpark.types._ - * val filePath = "@mystage1" - * // Define the schema for the data in the CSV file. - * val userSchema = StructType(Seq(StructField("a", IntegerType), StructField("b", StringType))) - * // Create a DataFrame that is configured to load data from the CSV file. - * val csvDF = session.read.option("skip_header", 1).schema(userSchema).csv(filePath) - * // Load the data into the DataFrame and return an Array of Rows containing the results. - * val results = csvDF.collect() - * }}} - * - * '''Example 2:''' Loading a gzip compressed json file. - * {{{ - * val filePath = "@mystage2/data.json.gz" - * // Create a DataFrame that is configured to load data from the gzipped JSON file. - * val jsonDF = session.read.option("compression", "gzip").json(filePath) - * // Load the data into the DataFrame and return an Array of Rows containing the results. - * val results = jsonDF.collect() - * }}} - * - * If you want to load only a subset of files from the stage, you can use the - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#loading-using-pattern-matching pattern]] - * option to specify a regular expression that matches the files that you want to load. - * - * '''Example 3:''' Loading only the CSV files from a stage location. - * {{{ - * import com.snowflake.snowpark.types._ - * // Define the schema for the data in the CSV files. - * val userSchema: StructType = StructType(Seq(StructField("a", IntegerType),StructField("b", StringType))) - * // Create a DataFrame that is configured to load data from the CSV files in the stage. - * val csvDF = session.read.option("pattern", ".*[.]csv").schema(userSchema).csv("@stage_location") - * // Load the data into the DataFrame and return an Array of Rows containing the results. - * val results = csvDF.collect() - * }}} - * - * In addition, if you want to load the files from the stage into a specified table with COPY INTO - * `` command, you can use a `copyInto()` method e.g. - * [[CopyableDataFrame.copyInto(tableName:String)*]]. - * - * '''Example 4:''' Loading data from a JSON file in a stage to a table by using COPY INTO - * ``. - * {{{ - * val filePath = "@mystage1" - * // Create a DataFrame that is configured to load data from the JSON file. - * val jsonDF = session.read.json(filePath) - * // Load the data into the specified table `T1`. - * // The table "T1" should exist before calling copyInto(). - * jsonDF.copyInto("T1") - * }}} - * - * @param session - * Snowflake [[Session]] - * @since 0.1.0 - */ +/** + * Provides methods to load data in various supported formats from a Snowflake stage to a DataFrame. + * The paths provided to the DataFrameReader must refer to Snowflake stages. + * + * To use this object: + * + * 1. Access an instance of a DataFrameReader by calling the [[Session.read]] method. + * 1. Specify any + * [[https://docs.snowflake.com/en/sql-reference/sql/create-file-format.html#format-type-options-formattypeoptions format-specific options]] + * and + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#copy-options-copyoptions copy options]] + * by calling the [[option]] or [[options]] method. These methods return a DataFrameReader that + * is configured with these options. (Note that although specifying copy options can make error + * handling more robust during the reading process, it may have an effect on performance.) + * 1. Specify the schema of the data that you plan to load by constructing a [[types.StructType]] + * object and passing it to the [[schema]] method. This method returns a DataFrameReader that + * is configured to read data that uses the specified schema. + * 1. Specify the format of the data by calling the method named after the format (e.g. [[csv]], + * [[json]], etc.). These methods return a [[DataFrame]] that is configured to load data in the + * specified format. + * 1. Call a [[DataFrame]] method that performs an action. + * - For example, to load the data from the file, call [[DataFrame.collect]]. + * - As another example, to save the data from the file to a table, call + * [[CopyableDataFrame.copyInto(tableName:String)*]]. This uses the COPY INTO `` + * command. + * + * The following examples demonstrate how to use a DataFrameReader. + * + * '''Example 1:''' Loading the first two columns of a CSV file and skipping the first header line. + * {{{ + * // Import the package for StructType. + * import com.snowflake.snowpark.types._ + * val filePath = "@mystage1" + * // Define the schema for the data in the CSV file. + * val userSchema = StructType(Seq(StructField("a", IntegerType), StructField("b", StringType))) + * // Create a DataFrame that is configured to load data from the CSV file. + * val csvDF = session.read.option("skip_header", 1).schema(userSchema).csv(filePath) + * // Load the data into the DataFrame and return an Array of Rows containing the results. + * val results = csvDF.collect() + * }}} + * + * '''Example 2:''' Loading a gzip compressed json file. + * {{{ + * val filePath = "@mystage2/data.json.gz" + * // Create a DataFrame that is configured to load data from the gzipped JSON file. + * val jsonDF = session.read.option("compression", "gzip").json(filePath) + * // Load the data into the DataFrame and return an Array of Rows containing the results. + * val results = jsonDF.collect() + * }}} + * + * If you want to load only a subset of files from the stage, you can use the + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#loading-using-pattern-matching pattern]] + * option to specify a regular expression that matches the files that you want to load. + * + * '''Example 3:''' Loading only the CSV files from a stage location. + * {{{ + * import com.snowflake.snowpark.types._ + * // Define the schema for the data in the CSV files. + * val userSchema: StructType = StructType(Seq(StructField("a", IntegerType),StructField("b", StringType))) + * // Create a DataFrame that is configured to load data from the CSV files in the stage. + * val csvDF = session.read.option("pattern", ".*[.]csv").schema(userSchema).csv("@stage_location") + * // Load the data into the DataFrame and return an Array of Rows containing the results. + * val results = csvDF.collect() + * }}} + * + * In addition, if you want to load the files from the stage into a specified table with COPY INTO + * `` command, you can use a `copyInto()` method e.g. + * [[CopyableDataFrame.copyInto(tableName:String)*]]. + * + * '''Example 4:''' Loading data from a JSON file in a stage to a table by using COPY INTO + * ``. + * {{{ + * val filePath = "@mystage1" + * // Create a DataFrame that is configured to load data from the JSON file. + * val jsonDF = session.read.json(filePath) + * // Load the data into the specified table `T1`. + * // The table "T1" should exist before calling copyInto(). + * jsonDF.copyInto("T1") + * }}} + * + * @param session + * Snowflake [[Session]] + * @since 0.1.0 + */ // scalastyle:on class DataFrameReader(session: Session) { private val stagedFileReader = new StagedFileReader(session) - /** Returns a [[DataFrame]] that is set up to load data from the specified table. - * - * For the {@code name} argument, you can specify an unqualified name (if the table is in the - * current database and schema) or a fully qualified name (`db.schema.name`). - * - * Note that the data is not loaded in the DataFrame until you call a method that performs an - * action (e.g. [[DataFrame.collect]], [[DataFrame.count]], etc.). - * - * @since 0.1.0 - * @param name - * Name of the table to use. - * @return - * A [[DataFrame]] - */ + /** + * Returns a [[DataFrame]] that is set up to load data from the specified table. + * + * For the {@code name} argument, you can specify an unqualified name (if the table is in the + * current database and schema) or a fully qualified name (`db.schema.name`). + * + * Note that the data is not loaded in the DataFrame until you call a method that performs an + * action (e.g. [[DataFrame.collect]], [[DataFrame.count]], etc.). + * + * @since 0.1.0 + * @param name + * Name of the table to use. + * @return + * A [[DataFrame]] + */ def table(name: String): DataFrame = session.table(name) - /** Returns a DataFrameReader instance with the specified schema configuration for the data to be - * read. - * - * To define the schema for the data that you want to read, use a [[types.StructType]] object. - * - * @since 0.1.0 - * @param schema - * Schema configuration for the data to be read. - * @return - * A [[DataFrameReader]] - */ + /** + * Returns a DataFrameReader instance with the specified schema configuration for the data to be + * read. + * + * To define the schema for the data that you want to read, use a [[types.StructType]] object. + * + * @since 0.1.0 + * @param schema + * Schema configuration for the data to be read. + * @return + * A [[DataFrameReader]] + */ def schema(schema: StructType): DataFrameReader = { stagedFileReader.userSchema(schema) this } - /** Returns a [[CopyableDataFrame]] that is set up to load data from the specified CSV file. - * - * This method only supports reading data from files in Snowflake stages. - * - * Note that the data is not loaded in the DataFrame until you call a method that performs an - * action (e.g. [[DataFrame.collect]], [[DataFrame.count]], etc.). - * - * For example: - * {{{ - * val filePath = "@mystage1/myfile.csv" - * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. - * val df = session.read.schema(userSchema).csv(fileInAStage).filter(col("a") < 2) - * // Load the data into the DataFrame and return an Array of Rows containing the results. - * val results = df.collect() - * }}} - * - * If you want to use the `COPY INTO ` command to load data from staged files to a - * specified table, call the `copyInto()` method (e.g. - * [[CopyableDataFrame.copyInto(tableName:String)*]]). - * - * For example: The following example loads the CSV files in the stage location specified by - * `path` to the table `T1`. - * {{{ - * // The table "T1" should exist before calling copyInto(). - * session.read.schema(userSchema).csv(path).copyInto("T1") - * }}} - * - * @since 0.1.0 - * @param path - * The path to the CSV file (including the stage name). - * @return - * A [[CopyableDataFrame]] - */ + /** + * Returns a [[CopyableDataFrame]] that is set up to load data from the specified CSV file. + * + * This method only supports reading data from files in Snowflake stages. + * + * Note that the data is not loaded in the DataFrame until you call a method that performs an + * action (e.g. [[DataFrame.collect]], [[DataFrame.count]], etc.). + * + * For example: + * {{{ + * val filePath = "@mystage1/myfile.csv" + * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. + * val df = session.read.schema(userSchema).csv(fileInAStage).filter(col("a") < 2) + * // Load the data into the DataFrame and return an Array of Rows containing the results. + * val results = df.collect() + * }}} + * + * If you want to use the `COPY INTO ` command to load data from staged files to a + * specified table, call the `copyInto()` method (e.g. + * [[CopyableDataFrame.copyInto(tableName:String)*]]). + * + * For example: The following example loads the CSV files in the stage location specified by + * `path` to the table `T1`. + * {{{ + * // The table "T1" should exist before calling copyInto(). + * session.read.schema(userSchema).csv(path).copyInto("T1") + * }}} + * + * @since 0.1.0 + * @param path + * The path to the CSV file (including the stage name). + * @return + * A [[CopyableDataFrame]] + */ def csv(path: String): CopyableDataFrame = { stagedFileReader .path(path) @@ -166,270 +169,277 @@ class DataFrameReader(session: Session) { new CopyableDataFrame(session, stagedFileReader.createSnowflakePlan(), Seq(), stagedFileReader) } - /** Returns a [[DataFrame]] that is set up to load data from the specified JSON file. - * - * This method only supports reading data from files in Snowflake stages. - * - * Note that the data is not loaded in the DataFrame until you call a method that performs an - * action (e.g. [[DataFrame.collect]], [[DataFrame.count]], etc.). - * - * For example: - * {{{ - * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. - * val df = session.read.json(path).where(col("\$1:num") > 1) - * // Load the data into the DataFrame and return an Array of Rows containing the results. - * val results = df.collect() - * }}} - * - * If you want to use the `COPY INTO ` command to load data from staged files to a - * specified table, call the `copyInto()` method (e.g. - * [[CopyableDataFrame.copyInto(tableName:String)*]]). - * - * For example: The following example loads the JSON files in the stage location specified by - * `path` to the table `T1`. - * {{{ - * // The table "T1" should exist before calling copyInto(). - * session.read.json(path).copyInto("T1") - * }}} - * - * @since 0.1.0 - * @param path - * The path to the JSON file (including the stage name). - * @return - * A [[CopyableDataFrame]] - */ + /** + * Returns a [[DataFrame]] that is set up to load data from the specified JSON file. + * + * This method only supports reading data from files in Snowflake stages. + * + * Note that the data is not loaded in the DataFrame until you call a method that performs an + * action (e.g. [[DataFrame.collect]], [[DataFrame.count]], etc.). + * + * For example: + * {{{ + * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. + * val df = session.read.json(path).where(col("\$1:num") > 1) + * // Load the data into the DataFrame and return an Array of Rows containing the results. + * val results = df.collect() + * }}} + * + * If you want to use the `COPY INTO ` command to load data from staged files to a + * specified table, call the `copyInto()` method (e.g. + * [[CopyableDataFrame.copyInto(tableName:String)*]]). + * + * For example: The following example loads the JSON files in the stage location specified by + * `path` to the table `T1`. + * {{{ + * // The table "T1" should exist before calling copyInto(). + * session.read.json(path).copyInto("T1") + * }}} + * + * @since 0.1.0 + * @param path + * The path to the JSON file (including the stage name). + * @return + * A [[CopyableDataFrame]] + */ def json(path: String): CopyableDataFrame = readSemiStructuredFile(path, "JSON") - /** Returns a [[DataFrame]] that is set up to load data from the specified Avro file. - * - * This method only supports reading data from files in Snowflake stages. - * - * Note that the data is not loaded in the DataFrame until you call a method that performs an - * action (e.g. [[DataFrame.collect]], [[DataFrame.count]], etc.). - * - * For example: - * {{{ - * session.read.avro(path).where(col("\$1:num") > 1) - * }}} - * - * If you want to use the `COPY INTO ` command to load data from staged files to a - * specified table, call the `copyInto()` method (e.g. - * [[CopyableDataFrame.copyInto(tableName:String)*]]). - * - * For example: The following example loads the Avro files in the stage location specified by - * `path` to the table `T1`. - * {{{ - * // The table "T1" should exist before calling copyInto(). - * session.read.avro(path).copyInto("T1") - * }}} - * - * @since 0.1.0 - * @param path - * The path to the Avro file (including the stage name). - * @return - * A [[CopyableDataFrame]] - */ + /** + * Returns a [[DataFrame]] that is set up to load data from the specified Avro file. + * + * This method only supports reading data from files in Snowflake stages. + * + * Note that the data is not loaded in the DataFrame until you call a method that performs an + * action (e.g. [[DataFrame.collect]], [[DataFrame.count]], etc.). + * + * For example: + * {{{ + * session.read.avro(path).where(col("\$1:num") > 1) + * }}} + * + * If you want to use the `COPY INTO ` command to load data from staged files to a + * specified table, call the `copyInto()` method (e.g. + * [[CopyableDataFrame.copyInto(tableName:String)*]]). + * + * For example: The following example loads the Avro files in the stage location specified by + * `path` to the table `T1`. + * {{{ + * // The table "T1" should exist before calling copyInto(). + * session.read.avro(path).copyInto("T1") + * }}} + * + * @since 0.1.0 + * @param path + * The path to the Avro file (including the stage name). + * @return + * A [[CopyableDataFrame]] + */ def avro(path: String): CopyableDataFrame = readSemiStructuredFile(path, "AVRO") - /** Returns a [[DataFrame]] that is set up to load data from the specified Parquet file. - * - * This method only supports reading data from files in Snowflake stages. - * - * Note that the data is not loaded in the DataFrame until you call a method that performs an - * action (e.g. [[DataFrame.collect]], [[DataFrame.count]], etc.). - * - * For example: - * {{{ - * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. - * val df = session.read.parquet(path).where(col("\$1:num") > 1) - * // Load the data into the DataFrame and return an Array of Rows containing the results. - * val results = df.collect() - * }}} - * - * If you want to use the `COPY INTO ` command to load data from staged files to a - * specified table, call the `copyInto()` method (e.g. - * [[CopyableDataFrame.copyInto(tableName:String)*]]). - * - * For example: The following example loads the Parquet files in the stage location specified by - * `path` to the table `T1`. - * {{{ - * // The table "T1" should exist before calling copyInto(). - * session.read.parquet(path).copyInto("T1") - * }}} - * - * @since 0.1.0 - * @param path - * The path to the Parquet file (including the stage name). - * @return - * A [[CopyableDataFrame]] - */ + /** + * Returns a [[DataFrame]] that is set up to load data from the specified Parquet file. + * + * This method only supports reading data from files in Snowflake stages. + * + * Note that the data is not loaded in the DataFrame until you call a method that performs an + * action (e.g. [[DataFrame.collect]], [[DataFrame.count]], etc.). + * + * For example: + * {{{ + * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. + * val df = session.read.parquet(path).where(col("\$1:num") > 1) + * // Load the data into the DataFrame and return an Array of Rows containing the results. + * val results = df.collect() + * }}} + * + * If you want to use the `COPY INTO ` command to load data from staged files to a + * specified table, call the `copyInto()` method (e.g. + * [[CopyableDataFrame.copyInto(tableName:String)*]]). + * + * For example: The following example loads the Parquet files in the stage location specified by + * `path` to the table `T1`. + * {{{ + * // The table "T1" should exist before calling copyInto(). + * session.read.parquet(path).copyInto("T1") + * }}} + * + * @since 0.1.0 + * @param path + * The path to the Parquet file (including the stage name). + * @return + * A [[CopyableDataFrame]] + */ def parquet(path: String): CopyableDataFrame = readSemiStructuredFile(path, "PARQUET") - /** Returns a [[DataFrame]] that is set up to load data from the specified ORC file. - * - * This method only supports reading data from files in Snowflake stages. - * - * Note that the data is not loaded in the DataFrame until you call a method that performs an - * action (e.g. [[DataFrame.collect]], [[DataFrame.count]], etc.). - * - * For example: - * {{{ - * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. - * val df = session.read.orc(path).where(col("\$1:num") > 1) - * // Load the data into the DataFrame and return an Array of Rows containing the results. - * val results = df.collect() - * }}} - * - * If you want to use the `COPY INTO ` command to load data from staged files to a - * specified table, call the `copyInto()` method (e.g. - * [[CopyableDataFrame.copyInto(tableName:String)*]]). - * - * For example: The following example loads the ORC files in the stage location specified by - * `path` to the table `T1`. - * {{{ - * // The table "T1" should exist before calling copyInto(). - * session.read.orc(path).copyInto("T1") - * }}} - * - * @since 0.1.0 - * @param path - * The path to the ORC file (including the stage name). - * @return - * A [[CopyableDataFrame]] - */ + /** + * Returns a [[DataFrame]] that is set up to load data from the specified ORC file. + * + * This method only supports reading data from files in Snowflake stages. + * + * Note that the data is not loaded in the DataFrame until you call a method that performs an + * action (e.g. [[DataFrame.collect]], [[DataFrame.count]], etc.). + * + * For example: + * {{{ + * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. + * val df = session.read.orc(path).where(col("\$1:num") > 1) + * // Load the data into the DataFrame and return an Array of Rows containing the results. + * val results = df.collect() + * }}} + * + * If you want to use the `COPY INTO ` command to load data from staged files to a + * specified table, call the `copyInto()` method (e.g. + * [[CopyableDataFrame.copyInto(tableName:String)*]]). + * + * For example: The following example loads the ORC files in the stage location specified by + * `path` to the table `T1`. + * {{{ + * // The table "T1" should exist before calling copyInto(). + * session.read.orc(path).copyInto("T1") + * }}} + * + * @since 0.1.0 + * @param path + * The path to the ORC file (including the stage name). + * @return + * A [[CopyableDataFrame]] + */ def orc(path: String): CopyableDataFrame = readSemiStructuredFile(path, "ORC") - /** Returns a [[DataFrame]] that is set up to load data from the specified XML file. - * - * This method only supports reading data from files in Snowflake stages. - * - * Note that the data is not loaded in the DataFrame until you call a method that performs an - * action (e.g. [[DataFrame.collect]], [[DataFrame.count]], etc.). - * - * For example: - * {{{ - * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. - * val df = session.read.xml(path).where(col("xmlget(\$1, 'num', 0):\"$\"") > 1) - * // Load the data into the DataFrame and return an Array of Rows containing the results. - * val results = df.collect() - * }}} - * - * If you want to use the `COPY INTO ` command to load data from staged files to a - * specified table, call the `copyInto()` method (e.g. - * [[CopyableDataFrame.copyInto(tableName:String)*]]). - * - * For example: The following example loads the XML files in the stage location specified by - * `path` to the table `T1`. - * {{{ - * // The table "T1" should exist before calling copyInto(). - * session.read.xml(path).copyInto("T1") - * }}} - * - * @since 0.1.0 - * @param path - * The path to the XML file (including the stage name). - * @return - * A [[CopyableDataFrame]] - */ + /** + * Returns a [[DataFrame]] that is set up to load data from the specified XML file. + * + * This method only supports reading data from files in Snowflake stages. + * + * Note that the data is not loaded in the DataFrame until you call a method that performs an + * action (e.g. [[DataFrame.collect]], [[DataFrame.count]], etc.). + * + * For example: + * {{{ + * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. + * val df = session.read.xml(path).where(col("xmlget(\$1, 'num', 0):\"$\"") > 1) + * // Load the data into the DataFrame and return an Array of Rows containing the results. + * val results = df.collect() + * }}} + * + * If you want to use the `COPY INTO ` command to load data from staged files to a + * specified table, call the `copyInto()` method (e.g. + * [[CopyableDataFrame.copyInto(tableName:String)*]]). + * + * For example: The following example loads the XML files in the stage location specified by + * `path` to the table `T1`. + * {{{ + * // The table "T1" should exist before calling copyInto(). + * session.read.xml(path).copyInto("T1") + * }}} + * + * @since 0.1.0 + * @param path + * The path to the XML file (including the stage name). + * @return + * A [[CopyableDataFrame]] + */ def xml(path: String): CopyableDataFrame = readSemiStructuredFile(path, "XML") // scalastyle:off - /** Sets the specified option in the DataFrameReader. - * - * Use this method to configure any - * [[https://docs.snowflake.com/en/sql-reference/sql/create-file-format.html#format-type-options-formattypeoptions format-specific options]] - * and - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#copy-options-copyoptions copy options]]. - * (Note that although specifying copy options can make error handling more robust during the - * reading process, it may have an effect on performance.) - * - * '''Example 1:''' Loading a LZO compressed Parquet file. - * {{{ - * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. - * val df = session.read.option("compression", "lzo").parquet(filePath) - * // Load the data into the DataFrame and return an Array of Rows containing the results. - * val results = df.collect() - * }}} - * - * '''Example 2:''' Loading an uncompressed JSON file. - * {{{ - * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. - * val df = session.read.option("compression", "none").json(filePath) - * // Load the data into the DataFrame and return an Array of Rows containing the results. - * val results = df.collect() - * }}} - * - * '''Example 3:''' Loading the first two columns of a colon-delimited CSV file in which the - * first line is the header: - * {{{ - * import com.snowflake.snowpark.types._ - * // Define the schema for the data in the CSV files. - * val userSchema = StructType(Seq(StructField("a", IntegerType), StructField("b", StringType))) - * // Create a DataFrame that is configured to load data from the CSV file. - * val csvDF = session.read.option("field_delimiter", ":").option("skip_header", 1).schema(userSchema).csv(filePath) - * // Load the data into the DataFrame and return an Array of Rows containing the results. - * val results = csvDF.collect() - * }}} - * - * In addition, if you want to load only a subset of files from the stage, you can use the - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#loading-using-pattern-matching pattern]] - * option to specify a regular expression that matches the files that you want to load. - * - * '''Example 4:''' Loading only the CSV files from a stage location. - * {{{ - * import com.snowflake.snowpark.types._ - * // Define the schema for the data in the CSV files. - * val userSchema: StructType = StructType(Seq(StructField("a", IntegerType),StructField("b", StringType))) - * // Create a DataFrame that is configured to load data from the CSV files in the stage. - * val csvDF = session.read.option("pattern", ".*[.]csv").schema(userSchema).csv("@stage_location") - * // Load the data into the DataFrame and return an Array of Rows containing the results. - * val results = csvDF.collect() - * }}} - * - * @since 0.1.0 - * @param key - * Name of the option (e.g. {@code compression} , {@code skip_header} , etc.). - * @param value - * Value of the option. - * @return - * A [[DataFrameReader]] - */ + /** + * Sets the specified option in the DataFrameReader. + * + * Use this method to configure any + * [[https://docs.snowflake.com/en/sql-reference/sql/create-file-format.html#format-type-options-formattypeoptions format-specific options]] + * and + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#copy-options-copyoptions copy options]]. + * (Note that although specifying copy options can make error handling more robust during the + * reading process, it may have an effect on performance.) + * + * '''Example 1:''' Loading a LZO compressed Parquet file. + * {{{ + * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. + * val df = session.read.option("compression", "lzo").parquet(filePath) + * // Load the data into the DataFrame and return an Array of Rows containing the results. + * val results = df.collect() + * }}} + * + * '''Example 2:''' Loading an uncompressed JSON file. + * {{{ + * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. + * val df = session.read.option("compression", "none").json(filePath) + * // Load the data into the DataFrame and return an Array of Rows containing the results. + * val results = df.collect() + * }}} + * + * '''Example 3:''' Loading the first two columns of a colon-delimited CSV file in which the first + * line is the header: + * {{{ + * import com.snowflake.snowpark.types._ + * // Define the schema for the data in the CSV files. + * val userSchema = StructType(Seq(StructField("a", IntegerType), StructField("b", StringType))) + * // Create a DataFrame that is configured to load data from the CSV file. + * val csvDF = session.read.option("field_delimiter", ":").option("skip_header", 1).schema(userSchema).csv(filePath) + * // Load the data into the DataFrame and return an Array of Rows containing the results. + * val results = csvDF.collect() + * }}} + * + * In addition, if you want to load only a subset of files from the stage, you can use the + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#loading-using-pattern-matching pattern]] + * option to specify a regular expression that matches the files that you want to load. + * + * '''Example 4:''' Loading only the CSV files from a stage location. + * {{{ + * import com.snowflake.snowpark.types._ + * // Define the schema for the data in the CSV files. + * val userSchema: StructType = StructType(Seq(StructField("a", IntegerType),StructField("b", StringType))) + * // Create a DataFrame that is configured to load data from the CSV files in the stage. + * val csvDF = session.read.option("pattern", ".*[.]csv").schema(userSchema).csv("@stage_location") + * // Load the data into the DataFrame and return an Array of Rows containing the results. + * val results = csvDF.collect() + * }}} + * + * @since 0.1.0 + * @param key + * Name of the option (e.g. {@code compression} , {@code skip_header} , etc.). + * @param value + * Value of the option. + * @return + * A [[DataFrameReader]] + */ // scalastyle:on def option(key: String, value: Any): DataFrameReader = { stagedFileReader.option(key, value) this } // scalastyle:off - /** Sets multiple specified options in the DataFrameReader. - * - * Use this method to configure any - * [[https://docs.snowflake.com/en/sql-reference/sql/create-file-format.html#format-type-options-formattypeoptions format-specific options]] - * and - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#copy-options-copyoptions copy options]]. - * (Note that although specifying copy options can make error handling more robust during the - * reading process, it may have an effect on performance.) - * - * In addition, if you want to load only a subset of files from the stage, you can use the - * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#loading-using-pattern-matching pattern]] - * option to specify a regular expression that matches the files that you want to load. - * - * '''Example 1:''' Loading a LZO compressed Parquet file and removing any white space from the - * fields. - * - * {{{ - * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. - * val df = session.read.option(Map("compression"-> "lzo", "trim_space" -> true)).parquet(filePath) - * // Load the data into the DataFrame and return an Array of Rows containing the results. - * val results = df.collect() - * }}} - * - * @since 0.1.0 - * @param configs - * Map of the names of options (e.g. {@code compression} , {@code skip_header} , etc.) and - * their corresponding values. - * @return - * A [[DataFrameReader]] - */ + /** + * Sets multiple specified options in the DataFrameReader. + * + * Use this method to configure any + * [[https://docs.snowflake.com/en/sql-reference/sql/create-file-format.html#format-type-options-formattypeoptions format-specific options]] + * and + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#copy-options-copyoptions copy options]]. + * (Note that although specifying copy options can make error handling more robust during the + * reading process, it may have an effect on performance.) + * + * In addition, if you want to load only a subset of files from the stage, you can use the + * [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#loading-using-pattern-matching pattern]] + * option to specify a regular expression that matches the files that you want to load. + * + * '''Example 1:''' Loading a LZO compressed Parquet file and removing any white space from the + * fields. + * + * {{{ + * // Create a DataFrame that uses a DataFrameReader to load data from a file in a stage. + * val df = session.read.option(Map("compression"-> "lzo", "trim_space" -> true)).parquet(filePath) + * // Load the data into the DataFrame and return an Array of Rows containing the results. + * val results = df.collect() + * }}} + * + * @since 0.1.0 + * @param configs + * Map of the names of options (e.g. {@code compression} , {@code skip_header} , etc.) and their + * corresponding values. + * @return + * A [[DataFrameReader]] + */ // scalastyle:on def options(configs: Map[String, Any]): DataFrameReader = { stagedFileReader.options(configs) diff --git a/src/main/scala/com/snowflake/snowpark/DataFrameStatFunctions.scala b/src/main/scala/com/snowflake/snowpark/DataFrameStatFunctions.scala index 18930352..e069aa01 100644 --- a/src/main/scala/com/snowflake/snowpark/DataFrameStatFunctions.scala +++ b/src/main/scala/com/snowflake/snowpark/DataFrameStatFunctions.scala @@ -10,12 +10,13 @@ import com.snowflake.snowpark.functions.{ corr => corr_func } -/** Provides eagerly computed statistical functions for DataFrames. - * - * To access an object of this class, use [[DataFrame.stat]]. - * - * @since 0.2.0 - */ +/** + * Provides eagerly computed statistical functions for DataFrames. + * + * To access an object of this class, use [[DataFrame.stat]]. + * + * @since 0.2.0 + */ final class DataFrameStatFunctions private[snowpark] (df: DataFrame) extends Logging { // Used as temporary column name in approxQuantile @@ -25,88 +26,91 @@ final class DataFrameStatFunctions private[snowpark] (df: DataFrame) extends Log // crosstab execution time: 1000 -> 25s, 3000 -> 2.5 min, 5000 -> 10 min. private val maxColumnsPerTable = 1000 - /** Calculates the correlation coefficient for non-null pairs in two numeric columns. - * - * For example, the following code: - * {{{ - * import session.implicits._ - * val df = Seq((0.1, 0.5), (0.2, 0.6), (0.3, 0.7)).toDF("a", "b") - * double res = df.stat.corr("a", "b").get - * }}} - * - * prints out the following result: - * {{{ - * res: 0.9999999999999991 - * }}} - * - * @param col1 - * The name of the first numeric column to use. - * @param col2 - * The name of the second numeric column to use. - * @since 0.2.0 - * @return - * The correlation of the two numeric columns. If there is not enough data to generate the - * correlation, the method returns None. - */ + /** + * Calculates the correlation coefficient for non-null pairs in two numeric columns. + * + * For example, the following code: + * {{{ + * import session.implicits._ + * val df = Seq((0.1, 0.5), (0.2, 0.6), (0.3, 0.7)).toDF("a", "b") + * double res = df.stat.corr("a", "b").get + * }}} + * + * prints out the following result: + * {{{ + * res: 0.9999999999999991 + * }}} + * + * @param col1 + * The name of the first numeric column to use. + * @param col2 + * The name of the second numeric column to use. + * @since 0.2.0 + * @return + * The correlation of the two numeric columns. If there is not enough data to generate the + * correlation, the method returns None. + */ def corr(col1: String, col2: String): Option[Double] = action("corr") { val res = df.select(corr_func(Col(col1), Col(col2))).limit(1).collect().head if (res.isNullAt(0)) None else Some(res.getDouble(0)) } - /** Calculates the sample covariance for non-null pairs in two numeric columns. - * - * For example, the following code: - * {{{ - * import session.implicits._ - * val df = Seq((0.1, 0.5), (0.2, 0.6), (0.3, 0.7)).toDF("a", "b") - * double res = df.stat.cov("a", "b").get - * }}} - * - * prints out the following result: - * {{{ - * res: 0.010000000000000037 - * }}} - * - * @param col1 - * The name of the first numeric column to use. - * @param col2 - * The name of the second numeric column to use. - * @since 0.2.0 - * @return - * The sample covariance of the two numeric columns, If there is not enough data to generate - * the covariance, the method returns None. - */ + /** + * Calculates the sample covariance for non-null pairs in two numeric columns. + * + * For example, the following code: + * {{{ + * import session.implicits._ + * val df = Seq((0.1, 0.5), (0.2, 0.6), (0.3, 0.7)).toDF("a", "b") + * double res = df.stat.cov("a", "b").get + * }}} + * + * prints out the following result: + * {{{ + * res: 0.010000000000000037 + * }}} + * + * @param col1 + * The name of the first numeric column to use. + * @param col2 + * The name of the second numeric column to use. + * @since 0.2.0 + * @return + * The sample covariance of the two numeric columns, If there is not enough data to generate the + * covariance, the method returns None. + */ def cov(col1: String, col2: String): Option[Double] = action("cov") { val res = df.select(covar_samp(Col(col1), Col(col2))).limit(1).collect().head if (res.isNullAt(0)) None else Some(res.getDouble(0)) } - /** For a specified numeric column and an array of desired quantiles, returns an approximate value - * for the column at each of the desired quantiles. - * - * This function uses the t-Digest algorithm. - * - * For example, the following code: - * {{{ - * import session.implicits._ - * val df = Seq(1, 2, 3, 4, 5, 6, 7, 8, 9, 0).toDF("a") - * val res = df.stat.approxQuantile("a", Array(0, 0.1, 0.4, 0.6, 1)) - * }}} - * - * prints out the following result: - * {{{ - * res: Array(Some(-0.5), Some(0.5), Some(3.5), Some(5.5), Some(9.5)) - * }}} - * - * @param col - * The name of the numeric column. - * @param percentile - * An array of double values greater than or equal to 0.0 and less than 1.0. - * @since 0.2.0 - * @return - * An array of approximate percentile values, If there is not enough data to calculate the - * quantile, the method returns None. - */ + /** + * For a specified numeric column and an array of desired quantiles, returns an approximate value + * for the column at each of the desired quantiles. + * + * This function uses the t-Digest algorithm. + * + * For example, the following code: + * {{{ + * import session.implicits._ + * val df = Seq(1, 2, 3, 4, 5, 6, 7, 8, 9, 0).toDF("a") + * val res = df.stat.approxQuantile("a", Array(0, 0.1, 0.4, 0.6, 1)) + * }}} + * + * prints out the following result: + * {{{ + * res: Array(Some(-0.5), Some(0.5), Some(3.5), Some(5.5), Some(9.5)) + * }}} + * + * @param col + * The name of the numeric column. + * @param percentile + * An array of double values greater than or equal to 0.0 and less than 1.0. + * @since 0.2.0 + * @return + * An array of approximate percentile values, If there is not enough data to calculate the + * quantile, the method returns None. + */ def approxQuantile(col: String, percentile: Array[Double]): Array[Option[Double]] = action("approxQuantile") { if (percentile.isEmpty) { @@ -124,36 +128,36 @@ final class DataFrameStatFunctions private[snowpark] (df: DataFrame) extends Log }.toArray } - /** For an array of numeric columns and an array of desired quantiles, returns a matrix of - * approximate values for each column at each of the desired quantiles. For example, - * `result(0)(1)` contains the approximate value for column `cols(0)` at quantile - * `percentile(1)`. - * - * This function uses the t-Digest algorithm. - * - * For example, the following code: - * {{{ - * import session.implicits._ - * val df = Seq((0.1, 0.5), (0.2, 0.6), (0.3, 0.7)).toDF("a", "b") - * val res = double2.stat.approxQuantile(Array("a", "b"), Array(0, 0.1, 0.6)) - * }}} - * - * prints out the following result: - * {{{ - * res: Array(Array(Some(0.05), Some(0.15000000000000002), Some(0.25)), - * Array(Some(0.45), Some(0.55), Some(0.6499999999999999))) - * }}} - * - * @param cols - * An array of column names. - * @param percentile - * An array of double values greater than or equal to 0.0 and less than 1.0. - * @since 0.2.0 - * @return - * A matrix with the dimensions `(cols.size * percentile.size)` containing the approximate - * percentile values. If there is not enough data to calculate the quantile, the method returns - * None. - */ + /** + * For an array of numeric columns and an array of desired quantiles, returns a matrix of + * approximate values for each column at each of the desired quantiles. For example, + * `result(0)(1)` contains the approximate value for column `cols(0)` at quantile `percentile(1)`. + * + * This function uses the t-Digest algorithm. + * + * For example, the following code: + * {{{ + * import session.implicits._ + * val df = Seq((0.1, 0.5), (0.2, 0.6), (0.3, 0.7)).toDF("a", "b") + * val res = double2.stat.approxQuantile(Array("a", "b"), Array(0, 0.1, 0.6)) + * }}} + * + * prints out the following result: + * {{{ + * res: Array(Array(Some(0.05), Some(0.15000000000000002), Some(0.25)), + * Array(Some(0.45), Some(0.55), Some(0.6499999999999999))) + * }}} + * + * @param cols + * An array of column names. + * @param percentile + * An array of double values greater than or equal to 0.0 and less than 1.0. + * @since 0.2.0 + * @return + * A matrix with the dimensions `(cols.size * percentile.size)` containing the approximate + * percentile values. If there is not enough data to calculate the quantile, the method returns + * None. + */ def approxQuantile(cols: Array[String], percentile: Array[Double]): Array[Array[Option[Double]]] = action("approxQuantile") { if (cols.isEmpty || percentile.isEmpty) { @@ -186,46 +190,47 @@ final class DataFrameStatFunctions private[snowpark] (df: DataFrame) extends Log .toArray } - /** Computes a pair-wise frequency table (a ''contingency table'') for the specified columns. The - * method returns a DataFrame containing this table. - * - * In the returned contingency table: - * - * - The first column of each row contains the distinct values of {@code col1} . - * - The name of the first column is the name of {@code col1} . - * - The rest of the column names are the distinct values of {@code col2} . - * - The counts are returned as Longs. - * - For pairs that have no occurrences, the contingency table contains 0 as the count. - * - * Note: The number of distinct values in {@code col2} should not exceed 1000. - * - * For example, the following code: - * {{{ - * import session.implicits._ - * val df = Seq((1, 1), (1, 2), (2, 1), (2, 1), (2, 3), (3, 2), (3, 3)).toDF("key", "value") - * val ct = df.stat.crosstab("key", "value") - * ct.show() - * }}} - * - * prints out the following result: - * {{{ - * --------------------------------------------------------------------------------------------- - * |"KEY" |"CAST(1 AS NUMBER(38,0))" |"CAST(2 AS NUMBER(38,0))" |"CAST(3 AS NUMBER(38,0))" | - * --------------------------------------------------------------------------------------------- - * |1 |1 |1 |0 | - * |2 |2 |0 |1 | - * |3 |0 |1 |1 | - * --------------------------------------------------------------------------------------------- - * }}} - * - * @param col1 - * The name of the first column to use. - * @param col2 - * The name of the second column to use. - * @since 0.2.0 - * @return - * A DataFrame containing the contingency table. - */ + /** + * Computes a pair-wise frequency table (a ''contingency table'') for the specified columns. The + * method returns a DataFrame containing this table. + * + * In the returned contingency table: + * + * - The first column of each row contains the distinct values of {@code col1} . + * - The name of the first column is the name of {@code col1} . + * - The rest of the column names are the distinct values of {@code col2} . + * - The counts are returned as Longs. + * - For pairs that have no occurrences, the contingency table contains 0 as the count. + * + * Note: The number of distinct values in {@code col2} should not exceed 1000. + * + * For example, the following code: + * {{{ + * import session.implicits._ + * val df = Seq((1, 1), (1, 2), (2, 1), (2, 1), (2, 3), (3, 2), (3, 3)).toDF("key", "value") + * val ct = df.stat.crosstab("key", "value") + * ct.show() + * }}} + * + * prints out the following result: + * {{{ + * --------------------------------------------------------------------------------------------- + * |"KEY" |"CAST(1 AS NUMBER(38,0))" |"CAST(2 AS NUMBER(38,0))" |"CAST(3 AS NUMBER(38,0))" | + * --------------------------------------------------------------------------------------------- + * |1 |1 |1 |0 | + * |2 |2 |0 |1 | + * |3 |0 |1 |1 | + * --------------------------------------------------------------------------------------------- + * }}} + * + * @param col1 + * The name of the first column to use. + * @param col2 + * The name of the second column to use. + * @since 0.2.0 + * @return + * A DataFrame containing the contingency table. + */ def crosstab(col1: String, col2: String): DataFrame = action("crosstab") { // Limit the distinct values of col2 to maxColumnsPerTable. val rowCount = @@ -241,38 +246,39 @@ final class DataFrameStatFunctions private[snowpark] (df: DataFrame) extends Log df.select(col1, col2).pivot(col2, columnNames).agg(count(Col(col2))) } - /** Returns a DataFrame containing a stratified sample without replacement, based on a Map that - * specifies the fraction for each stratum. - * - * For example, the following code: - * {{{ - * import session.implicits._ - * val df = Seq(("Bob", 17), ("Alice", 10), ("Nico", 8), ("Bob", 12)).toDF("name", "age") - * val fractions = Map("Bob" -> 0.5, "Nico" -> 1.0) - * df.stat.sampleBy(col("name"), fractions).show() - * }}} - * - * prints out the following result: - * {{{ - * ------------------ - * |"NAME" |"AGE" | - * ------------------ - * |Bob |17 | - * |Nico |8 | - * ------------------ - * }}} - * - * @param col - * An expression for the column that defines the strata. - * @param fractions - * A Map that specifies the fraction to use for the sample for each stratum. If a stratum is - * not specified in the Map, the method uses 0 as the fraction. - * @tparam T - * The type of the stratum. - * @since 0.2.0 - * @return - * A new DataFrame that contains the stratified sample. - */ + /** + * Returns a DataFrame containing a stratified sample without replacement, based on a Map that + * specifies the fraction for each stratum. + * + * For example, the following code: + * {{{ + * import session.implicits._ + * val df = Seq(("Bob", 17), ("Alice", 10), ("Nico", 8), ("Bob", 12)).toDF("name", "age") + * val fractions = Map("Bob" -> 0.5, "Nico" -> 1.0) + * df.stat.sampleBy(col("name"), fractions).show() + * }}} + * + * prints out the following result: + * {{{ + * ------------------ + * |"NAME" |"AGE" | + * ------------------ + * |Bob |17 | + * |Nico |8 | + * ------------------ + * }}} + * + * @param col + * An expression for the column that defines the strata. + * @param fractions + * A Map that specifies the fraction to use for the sample for each stratum. If a stratum is not + * specified in the Map, the method uses 0 as the fraction. + * @tparam T + * The type of the stratum. + * @since 0.2.0 + * @return + * A new DataFrame that contains the stratified sample. + */ def sampleBy[T](col: Column, fractions: Map[T, Double]): DataFrame = transformation("sampleBy") { if (fractions.isEmpty) { @@ -286,38 +292,39 @@ final class DataFrameStatFunctions private[snowpark] (df: DataFrame) extends Log resDF } - /** Returns a DataFrame containing a stratified sample without replacement, based on a Map that - * specifies the fraction for each stratum. - * - * For example, the following code: - * {{{ - * import session.implicits._ - * val df = Seq(("Bob", 17), ("Alice", 10), ("Nico", 8), ("Bob", 12)).toDF("name", "age") - * val fractions = Map("Bob" -> 0.5, "Nico" -> 1.0) - * df.stat.sampleBy("name", fractions).show() - * }}} - * - * prints out the following result: - * {{{ - * ------------------ - * |"NAME" |"AGE" | - * ------------------ - * |Bob |17 | - * |Nico |8 | - * ------------------ - * }}} - * - * @param col - * The name of the column that defines the strata. - * @param fractions - * A Map that specifies the fraction to use for the sample for each stratum. If a stratum is - * not specified in the Map, the method uses 0 as the fraction. - * @tparam T - * The type of the stratum. - * @since 0.2.0 - * @return - * A new DataFrame that contains the stratified sample. - */ + /** + * Returns a DataFrame containing a stratified sample without replacement, based on a Map that + * specifies the fraction for each stratum. + * + * For example, the following code: + * {{{ + * import session.implicits._ + * val df = Seq(("Bob", 17), ("Alice", 10), ("Nico", 8), ("Bob", 12)).toDF("name", "age") + * val fractions = Map("Bob" -> 0.5, "Nico" -> 1.0) + * df.stat.sampleBy("name", fractions).show() + * }}} + * + * prints out the following result: + * {{{ + * ------------------ + * |"NAME" |"AGE" | + * ------------------ + * |Bob |17 | + * |Nico |8 | + * ------------------ + * }}} + * + * @param col + * The name of the column that defines the strata. + * @param fractions + * A Map that specifies the fraction to use for the sample for each stratum. If a stratum is not + * specified in the Map, the method uses 0 as the fraction. + * @tparam T + * The type of the stratum. + * @since 0.2.0 + * @return + * A new DataFrame that contains the stratified sample. + */ def sampleBy[T](col: String, fractions: Map[T, Double]): DataFrame = transformation("sampleBy") { sampleBy(Col(col), fractions) diff --git a/src/main/scala/com/snowflake/snowpark/DataFrameWriter.scala b/src/main/scala/com/snowflake/snowpark/DataFrameWriter.scala index 7dbba1ac..57f47e51 100644 --- a/src/main/scala/com/snowflake/snowpark/DataFrameWriter.scala +++ b/src/main/scala/com/snowflake/snowpark/DataFrameWriter.scala @@ -14,61 +14,62 @@ import java.util.Locale import scala.collection.JavaConverters._ import scala.collection.mutable -/** Provides methods for writing data from a DataFrame to supported output destinations. - * - * You can write data to the following locations: - * - A Snowflake table - * - A file on a stage - * - * =Saving Data to a Table= - * To use this object to write into a table: - * - * 1. Access an instance of a DataFrameWriter by calling the [[DataFrame.write]] method. - * 1. Specify the save mode to use (overwrite or append) by calling the - * [[mode(saveMode:com\.snowflake\.snowpark\.SaveMode* mode]] method. This method returns a - * DataFrameWriter that is configured to save data using the specified mode. The default - * [[SaveMode]] is [[SaveMode.Append]]. - * 1. (Optional) If you need to set some options for the save operation (e.g. columnOrder), call - * the [[options]] or [[option]] method. - * 1. Call a `saveAs*` method to save the data to the specified destination. - * - * For example: - * - * {{{ - * df.write.mode("overwrite").saveAsTable("T") - * }}} - * - * =Saving Data to a File on a Stage= - * To save data to a file on a stage: - * - * 1. Access an instance of a DataFrameWriter by calling the [[DataFrame.write]] method. - * 1. Specify the save mode to use (Overwrite or ErrorIfExists) by calling the - * [[mode(saveMode:com\.snowflake\.snowpark\.SaveMode* mode]] method. This method returns a - * DataFrameWriter that is configured to save data using the specified mode. The default - * [[SaveMode]] is [[SaveMode.ErrorIfExists]] for this case. - * 1. (Optional) If you need to set some options for the save operation (e.g. file format - * options), call the [[options]] or [[option]] method. - * 1. Call the method named after a file format to save the data in the specified format: - * - To save the data in CSV format, call the [[csv]] method. - * - To save the data in JSON format, call the [[json]] method. - * - To save the data in PARQUET format, call the [[parquet]] method. - * - * For example: - * - * '''Example 1:''' Write a DataFrame to a CSV file. - * {{{ - * val result = df.write.csv("@myStage/prefix") - * }}} - * - * '''Example 2:''' Write a DataFrame to a CSV file without compression. - * {{{ - * val result = df.write.option("compression", "none").csv("@myStage/prefix") - * }}} - * - * @param dataFrame - * Input [[DataFrame]] - * @since 0.1.0 - */ +/** + * Provides methods for writing data from a DataFrame to supported output destinations. + * + * You can write data to the following locations: + * - A Snowflake table + * - A file on a stage + * + * =Saving Data to a Table= + * To use this object to write into a table: + * + * 1. Access an instance of a DataFrameWriter by calling the [[DataFrame.write]] method. + * 1. Specify the save mode to use (overwrite or append) by calling the + * [[mode(saveMode:com\.snowflake\.snowpark\.SaveMode* mode]] method. This method returns a + * DataFrameWriter that is configured to save data using the specified mode. The default + * [[SaveMode]] is [[SaveMode.Append]]. + * 1. (Optional) If you need to set some options for the save operation (e.g. columnOrder), call + * the [[options]] or [[option]] method. + * 1. Call a `saveAs*` method to save the data to the specified destination. + * + * For example: + * + * {{{ + * df.write.mode("overwrite").saveAsTable("T") + * }}} + * + * =Saving Data to a File on a Stage= + * To save data to a file on a stage: + * + * 1. Access an instance of a DataFrameWriter by calling the [[DataFrame.write]] method. + * 1. Specify the save mode to use (Overwrite or ErrorIfExists) by calling the + * [[mode(saveMode:com\.snowflake\.snowpark\.SaveMode* mode]] method. This method returns a + * DataFrameWriter that is configured to save data using the specified mode. The default + * [[SaveMode]] is [[SaveMode.ErrorIfExists]] for this case. + * 1. (Optional) If you need to set some options for the save operation (e.g. file format + * options), call the [[options]] or [[option]] method. + * 1. Call the method named after a file format to save the data in the specified format: + * - To save the data in CSV format, call the [[csv]] method. + * - To save the data in JSON format, call the [[json]] method. + * - To save the data in PARQUET format, call the [[parquet]] method. + * + * For example: + * + * '''Example 1:''' Write a DataFrame to a CSV file. + * {{{ + * val result = df.write.csv("@myStage/prefix") + * }}} + * + * '''Example 2:''' Write a DataFrame to a CSV file without compression. + * {{{ + * val result = df.write.option("compression", "none").csv("@myStage/prefix") + * }}} + * + * @param dataFrame + * Input [[DataFrame]] + * @since 0.1.0 + */ class DataFrameWriter(private[snowpark] val dataFrame: DataFrame) { private var saveMode: Option[SaveMode] = None @@ -86,24 +87,25 @@ class DataFrameWriter(private[snowpark] val dataFrame: DataFrame) { dataFrame.session.analyzer.resolve(CopyIntoLocation(stagedFileWriter, dataFrame.plan)) } - /** Saves the contents of the DataFrame to a CSV file on a stage. - * - * '''Example 1:''' Write a DataFrame to a CSV file. - * {{{ - * val result = df.write.csv("@myStage/prefix") - * }}} - * - * '''Example 2:''' Write a DataFrame to a CSV file without compression. - * {{{ - * val result = df.write.option("compression", "none").csv("@myStage/prefix") - * }}} - * - * @since 1.5.0 - * @param path - * The path (including the stage name) to the CSV file. - * @return - * A [[WriteFileResult]] - */ + /** + * Saves the contents of the DataFrame to a CSV file on a stage. + * + * '''Example 1:''' Write a DataFrame to a CSV file. + * {{{ + * val result = df.write.csv("@myStage/prefix") + * }}} + * + * '''Example 2:''' Write a DataFrame to a CSV file without compression. + * {{{ + * val result = df.write.option("compression", "none").csv("@myStage/prefix") + * }}} + * + * @since 1.5.0 + * @param path + * The path (including the stage name) to the CSV file. + * @return + * A [[WriteFileResult]] + */ def csv(path: String): WriteFileResult = action("csv") { val plan = getCopyIntoLocationPlan(path, "CSV") val (rows, attributes) = dataFrame.session.conn.getResultAndMetadata(plan) @@ -111,40 +113,41 @@ class DataFrameWriter(private[snowpark] val dataFrame: DataFrame) { } // scalastyle:off - /** Saves the contents of the DataFrame to a JSON file on a stage. - * - * NOTE: You can call this method only on a DataFrame that contains a column of the type Variant, - * Array, or Map. If the DataFrame does not contain a column of one of these types, you must call - * the `to_variant`, `array_construct`, or `object_construct` to return a DataFrame that contains - * a column of one of these types. - * - * '''Example 1:''' Write a DataFrame with one variant to a JSON file. - * {{{ - * val result = session.sql("select to_variant('a')").write.json("@myStage/prefix") - * }}} - * - * '''Example 2:''' Transform a DataFrame with some columns with array_construct() and write to a - * JSON file without compression. - * {{{ - * val df = Seq((1, 1.1, "a"), (2, 2.2, "b")).toDF("a", "b", "c") - * val df2 = df.select(array_construct(df.schema.names.map(df(_)): _*)) - * val result = df2.write.option("compression", "none").json("@myStage/prefix") - * }}} - * - * '''Example 3:''' Transform a DataFrame with some columns with object_construct() and write to - * a JSON file without compression. - * {{{ - * val df = Seq((1, 1.1, "a"), (2, 2.2, "b")).toDF("a", "b", "c") - * val df2 = df.select(object_construct(df.schema.names.map(x => Seq(lit(x), df(x))).flatten: _*)) - * val result = df2.write.option("compression", "none").json("@myStage/prefix") - * }}} - * - * @since 1.5.0 - * @param path - * The path (including the stage name) to the JSON file. - * @return - * A [[WriteFileResult]] - */ + /** + * Saves the contents of the DataFrame to a JSON file on a stage. + * + * NOTE: You can call this method only on a DataFrame that contains a column of the type Variant, + * Array, or Map. If the DataFrame does not contain a column of one of these types, you must call + * the `to_variant`, `array_construct`, or `object_construct` to return a DataFrame that contains + * a column of one of these types. + * + * '''Example 1:''' Write a DataFrame with one variant to a JSON file. + * {{{ + * val result = session.sql("select to_variant('a')").write.json("@myStage/prefix") + * }}} + * + * '''Example 2:''' Transform a DataFrame with some columns with array_construct() and write to a + * JSON file without compression. + * {{{ + * val df = Seq((1, 1.1, "a"), (2, 2.2, "b")).toDF("a", "b", "c") + * val df2 = df.select(array_construct(df.schema.names.map(df(_)): _*)) + * val result = df2.write.option("compression", "none").json("@myStage/prefix") + * }}} + * + * '''Example 3:''' Transform a DataFrame with some columns with object_construct() and write to a + * JSON file without compression. + * {{{ + * val df = Seq((1, 1.1, "a"), (2, 2.2, "b")).toDF("a", "b", "c") + * val df2 = df.select(object_construct(df.schema.names.map(x => Seq(lit(x), df(x))).flatten: _*)) + * val result = df2.write.option("compression", "none").json("@myStage/prefix") + * }}} + * + * @since 1.5.0 + * @param path + * The path (including the stage name) to the JSON file. + * @return + * A [[WriteFileResult]] + */ // scalastyle:on def json(path: String): WriteFileResult = action("json") { val plan = getCopyIntoLocationPlan(path, "JSON") @@ -152,24 +155,25 @@ class DataFrameWriter(private[snowpark] val dataFrame: DataFrame) { WriteFileResult(rows, StructType.fromAttributes(attributes)) } - /** Saves the contents of the DataFrame to a Parquet file on a stage. - * - * '''Example 1:''' Write a DataFrame to a parquet file. - * {{{ - * val result = df.write.parquet("@myStage/prefix") - * }}} - * - * '''Example 2:''' Write a DataFrame to a Parquet file without compression. - * {{{ - * val result = df.write.option("compression", "LZO").parquet("@myStage/prefix") - * }}} - * - * @since 1.5.0 - * @param path - * The path (including the stage name) to the Parquet file. - * @return - * A [[WriteFileResult]] - */ + /** + * Saves the contents of the DataFrame to a Parquet file on a stage. + * + * '''Example 1:''' Write a DataFrame to a parquet file. + * {{{ + * val result = df.write.parquet("@myStage/prefix") + * }}} + * + * '''Example 2:''' Write a DataFrame to a Parquet file without compression. + * {{{ + * val result = df.write.option("compression", "LZO").parquet("@myStage/prefix") + * }}} + * + * @since 1.5.0 + * @param path + * The path (including the stage name) to the Parquet file. + * @return + * A [[WriteFileResult]] + */ def parquet(path: String): WriteFileResult = action("parquet") { val plan = getCopyIntoLocationPlan(path, "PARQUET") val (rows, attributes) = dataFrame.session.conn.getResultAndMetadata(plan) @@ -177,46 +181,47 @@ class DataFrameWriter(private[snowpark] val dataFrame: DataFrame) { } // scalastyle:off - /** Sets the specified option in the DataFrameWriter. - * - * =Sets the specified option for saving data to a table= - * - * Use this method to configure options: - * - columnOrder: save data into a table with table's column name order if saveMode is Append - * and target table exists. - * - * =Sets the specified option for saving data to a file on a stage= - * - * Use this method to configure options: - * - [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-location.html#format-type-options-formattypeoptions format-specific options]] - * - [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-location.html#copy-options-copyoptions copy options]] - * - [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-location.html#optional-parameters PARTITION BY or HEADER]] - * - * Note that you cannot use the `option` and `options` methods to set the following options: - * - The `TYPE` format type option. - * - The `OVERWRITE` copy option. To set this option, use the - * [[mode(saveMode:com\.snowflake\.snowpark\.SaveMode* mode]] method instead. - * - To set `OVERWRITE` to `TRUE`, use `SaveMode.Overwrite`. - * - To set `OVERWRITE` to `FALSE`, use `SaveMode.ErrorIfExists`. - * - * '''Example 1:''' Write a DataFrame to a CSV file. - * {{{ - * val result = df.write.csv("@myStage/prefix") - * }}} - * - * '''Example 2:''' Write a DataFrame to a CSV file without compression. - * {{{ - * val result = df.write.option("compression", "none").csv("@myStage/prefix") - * }}} - * - * @since 1.4.0 - * @param key - * Name of the option. - * @param value - * Value of the option. - * @return - * A [[DataFrameWriter]] - */ + /** + * Sets the specified option in the DataFrameWriter. + * + * =Sets the specified option for saving data to a table= + * + * Use this method to configure options: + * - columnOrder: save data into a table with table's column name order if saveMode is Append + * and target table exists. + * + * =Sets the specified option for saving data to a file on a stage= + * + * Use this method to configure options: + * - [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-location.html#format-type-options-formattypeoptions format-specific options]] + * - [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-location.html#copy-options-copyoptions copy options]] + * - [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-location.html#optional-parameters PARTITION BY or HEADER]] + * + * Note that you cannot use the `option` and `options` methods to set the following options: + * - The `TYPE` format type option. + * - The `OVERWRITE` copy option. To set this option, use the + * [[mode(saveMode:com\.snowflake\.snowpark\.SaveMode* mode]] method instead. + * - To set `OVERWRITE` to `TRUE`, use `SaveMode.Overwrite`. + * - To set `OVERWRITE` to `FALSE`, use `SaveMode.ErrorIfExists`. + * + * '''Example 1:''' Write a DataFrame to a CSV file. + * {{{ + * val result = df.write.csv("@myStage/prefix") + * }}} + * + * '''Example 2:''' Write a DataFrame to a CSV file without compression. + * {{{ + * val result = df.write.option("compression", "none").csv("@myStage/prefix") + * }}} + * + * @since 1.4.0 + * @param key + * Name of the option. + * @param value + * Value of the option. + * @return + * A [[DataFrameWriter]] + */ // scalastyle:on def option(key: String, value: Any): DataFrameWriter = { this.writeOptions.put(key.toUpperCase(Locale.ROOT), value) @@ -224,61 +229,62 @@ class DataFrameWriter(private[snowpark] val dataFrame: DataFrame) { } // scalastyle:off - /** Sets multiple specified options in the DataFrameWriter. - * - * =Sets the specified options for saving Data to a Table= - * - * Use this method to configure options: - * - columnOrder: save data into a table with table's column name order if saveMode is Append - * and target table exists. - * - * =Sets the specified options for saving data to a file on a stage= - * - * Use this method to configure options: - * - [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-location.html#format-type-options-formattypeoptions format-specific options]] - * - [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-location.html#copy-options-copyoptions copy options]] - * - [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-location.html#optional-parameters PARTITION BY or HEADER]] - * - * Note that you cannot use the `option` and `options` methods to set the following options: - * - The `TYPE` format type option. - * - The `OVERWRITE` copy option. To set this option, use the - * [[mode(saveMode:com\.snowflake\.snowpark\.SaveMode* mode]] method instead. - * - To set `OVERWRITE` to `TRUE`, use `SaveMode.Overwrite`. - * - To set `OVERWRITE` to `FALSE`, use `SaveMode.ErrorIfExists`. - * - * '''Example 1:''' Write a DataFrame to a CSV file. - * {{{ - * val result = df.write.csv("@myStage/prefix") - * }}} - * - * '''Example 2:''' Write a DataFrame to a CSV file without compression. - * {{{ - * val result = df.write.option("compression", "none").csv("@myStage/prefix") - * }}} - * - * @since 1.5.0 - * @param configs - * Map of the names of options (e.g. {@code compression} , etc.) and their corresponding - * values. - * @return - * A [[DataFrameWriter]] - */ + /** + * Sets multiple specified options in the DataFrameWriter. + * + * =Sets the specified options for saving Data to a Table= + * + * Use this method to configure options: + * - columnOrder: save data into a table with table's column name order if saveMode is Append + * and target table exists. + * + * =Sets the specified options for saving data to a file on a stage= + * + * Use this method to configure options: + * - [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-location.html#format-type-options-formattypeoptions format-specific options]] + * - [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-location.html#copy-options-copyoptions copy options]] + * - [[https://docs.snowflake.com/en/sql-reference/sql/copy-into-location.html#optional-parameters PARTITION BY or HEADER]] + * + * Note that you cannot use the `option` and `options` methods to set the following options: + * - The `TYPE` format type option. + * - The `OVERWRITE` copy option. To set this option, use the + * [[mode(saveMode:com\.snowflake\.snowpark\.SaveMode* mode]] method instead. + * - To set `OVERWRITE` to `TRUE`, use `SaveMode.Overwrite`. + * - To set `OVERWRITE` to `FALSE`, use `SaveMode.ErrorIfExists`. + * + * '''Example 1:''' Write a DataFrame to a CSV file. + * {{{ + * val result = df.write.csv("@myStage/prefix") + * }}} + * + * '''Example 2:''' Write a DataFrame to a CSV file without compression. + * {{{ + * val result = df.write.option("compression", "none").csv("@myStage/prefix") + * }}} + * + * @since 1.5.0 + * @param configs + * Map of the names of options (e.g. {@code compression} , etc.) and their corresponding values. + * @return + * A [[DataFrameWriter]] + */ def options(configs: Map[String, Any]): DataFrameWriter = { configs.foreach(e => option(e._1, e._2)) this } - /** Writes the data to the specified table in a Snowflake database. {@code tableName} can be a - * fully-qualified object identifier. - * - * For example: - * {{{ - * df.write.saveAsTable("db1.public_schema.table1") - * }}} - * @param tableName - * Name of the table where the data should be saved. - * @since 0.1.0 - */ + /** + * Writes the data to the specified table in a Snowflake database. {@code tableName} can be a + * fully-qualified object identifier. + * + * For example: + * {{{ + * df.write.saveAsTable("db1.public_schema.table1") + * }}} + * @param tableName + * Name of the table where the data should be saved. + * @since 0.1.0 + */ def saveAsTable(tableName: String): Unit = action("saveAsTable") { val writePlan = getWriteTablePlan(tableName) dataFrame.session.conn.execute(writePlan) @@ -317,79 +323,84 @@ class DataFrameWriter(private[snowpark] val dataFrame: DataFrame) { dataFrame.session.analyzer.resolve(plan) } - /** Writes the data to the specified table in a Snowflake database. - * - * For example: - * {{{ - * df.write.saveAsTable(Seq("db_name", "schema_name", "table_name")) - * }}} - * - * @param multipartIdentifier - * A sequence of strings that specify the database name, schema name, and table name (e.g. - * {@code Seq("database_name", "schema_name", "table_name")} ). - * @since 0.5.0 - */ + /** + * Writes the data to the specified table in a Snowflake database. + * + * For example: + * {{{ + * df.write.saveAsTable(Seq("db_name", "schema_name", "table_name")) + * }}} + * + * @param multipartIdentifier + * A sequence of strings that specify the database name, schema name, and table name (e.g. + * {@code Seq("database_name", "schema_name", "table_name")} ). + * @since 0.5.0 + */ def saveAsTable(multipartIdentifier: Seq[String]): Unit = action("saveAsTable") { val writePlan = getWriteTablePlan(multipartIdentifier.mkString(".")) dataFrame.session.conn.execute(writePlan) } - /** Writes the data to the specified table in a Snowflake database. - * - * For example: - * {{{ - * val list = new java.util.ArrayList[String](3) - * list.add(db) - * list.add(sc) - * list.add(tableName) - * df.write.saveAsTable(list) - * }}} - * - * @param multipartIdentifier - * A list of strings that specify the database name, schema name, and table name. - * @since 0.5.0 - */ + /** + * Writes the data to the specified table in a Snowflake database. + * + * For example: + * {{{ + * val list = new java.util.ArrayList[String](3) + * list.add(db) + * list.add(sc) + * list.add(tableName) + * df.write.saveAsTable(list) + * }}} + * + * @param multipartIdentifier + * A list of strings that specify the database name, schema name, and table name. + * @since 0.5.0 + */ def saveAsTable(multipartIdentifier: java.util.List[String]): Unit = action("saveAsTable") { val writePlan = getWriteTablePlan(multipartIdentifier.asScala.mkString(".")) dataFrame.session.conn.execute(writePlan) } - /** Returns a new DataFrameWriter with the specified save mode configuration. - * - * @param saveMode - * One of the following strings: `"APPEND"`, `"OVERWRITE"`, `"ERRORIFEXISTS"`, or `"IGNORE"` - * @since 0.1.0 - */ + /** + * Returns a new DataFrameWriter with the specified save mode configuration. + * + * @param saveMode + * One of the following strings: `"APPEND"`, `"OVERWRITE"`, `"ERRORIFEXISTS"`, or `"IGNORE"` + * @since 0.1.0 + */ def mode(saveMode: String): DataFrameWriter = mode(SaveMode(saveMode)) - /** Returns a new DataFrameWriter with the specified save mode configuration. - * - * @param saveMode - * One of the following save modes: [[SaveMode.Append]], [[SaveMode.Overwrite]], - * [[SaveMode.ErrorIfExists]], [[SaveMode.Ignore]] - * @since 0.1.0 - */ + /** + * Returns a new DataFrameWriter with the specified save mode configuration. + * + * @param saveMode + * One of the following save modes: [[SaveMode.Append]], [[SaveMode.Overwrite]], + * [[SaveMode.ErrorIfExists]], [[SaveMode.Ignore]] + * @since 0.1.0 + */ def mode(saveMode: SaveMode): DataFrameWriter = { this.saveMode = Some(saveMode) this } - /** Returns a [[DataFrameWriterAsyncActor]] object that can be used to execute DataFrameWriter - * actions asynchronously. - * - * Example: - * {{{ - * val asyncJob = df.write.mode(SaveMode.Overwrite).async.saveAsTable(tableName) - * // At this point, the thread is not blocked. You can perform additional work before - * // calling asyncJob.getResult() to retrieve the results of the action. - * // NOTE: getResult() is a blocking call. - * asyncJob.getResult() - * }}} - * - * @since 0.11.0 - * @return - * A [[DataFrameWriterAsyncActor]] object - */ + /** + * Returns a [[DataFrameWriterAsyncActor]] object that can be used to execute DataFrameWriter + * actions asynchronously. + * + * Example: + * {{{ + * val asyncJob = df.write.mode(SaveMode.Overwrite).async.saveAsTable(tableName) + * // At this point, the thread is not blocked. You can perform additional work before + * // calling asyncJob.getResult() to retrieve the results of the action. + * // NOTE: getResult() is a blocking call. + * asyncJob.getResult() + * }}} + * + * @since 0.11.0 + * @return + * A [[DataFrameWriterAsyncActor]] object + */ def async: DataFrameWriterAsyncActor = new DataFrameWriterAsyncActor(this) @inline protected def action[T](funcName: String)(func: => T): T = { @@ -400,93 +411,100 @@ class DataFrameWriter(private[snowpark] val dataFrame: DataFrame) { } -/** Provides APIs to execute DataFrameWriter actions asynchronously. - * - * @since 0.11.0 - */ +/** + * Provides APIs to execute DataFrameWriter actions asynchronously. + * + * @since 0.11.0 + */ class DataFrameWriterAsyncActor private[snowpark] (writer: DataFrameWriter) { - /** Executes `DataFrameWriter.saveAsTable` asynchronously. - * - * @param tableName - * Name of the table where the data should be saved. - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `DataFrameWriter.saveAsTable` asynchronously. + * + * @param tableName + * Name of the table where the data should be saved. + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def saveAsTable(tableName: String): TypedAsyncJob[Unit] = action("saveAsTable") { val writePlan = writer.getWriteTablePlan(tableName) writePlan.session.conn.executeAsync[Unit](writePlan) } - /** Executes `DataFrameWriter.saveAsTable` asynchronously. - * - * @param multipartIdentifier - * A sequence of strings that specify the database name, schema name, and table name (e.g. - * {@code Seq("database_name", "schema_name", "table_name")} ). - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `DataFrameWriter.saveAsTable` asynchronously. + * + * @param multipartIdentifier + * A sequence of strings that specify the database name, schema name, and table name (e.g. + * {@code Seq("database_name", "schema_name", "table_name")} ). + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def saveAsTable(multipartIdentifier: Seq[String]): TypedAsyncJob[Unit] = action("saveAsTable") { val writePlan = writer.getWriteTablePlan(multipartIdentifier.mkString(".")) writePlan.session.conn.executeAsync[Unit](writePlan) } - /** Executes `DataFrameWriter.saveAsTable` asynchronously. - * - * @param multipartIdentifier - * A list of strings that specify the database name, schema name, and table name. - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `DataFrameWriter.saveAsTable` asynchronously. + * + * @param multipartIdentifier + * A list of strings that specify the database name, schema name, and table name. + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def saveAsTable(multipartIdentifier: java.util.List[String]): TypedAsyncJob[Unit] = action("saveAsTable") { val writePlan = writer.getWriteTablePlan(multipartIdentifier.asScala.mkString(".")) writePlan.session.conn.executeAsync[Unit](writePlan) } - /** Executes `DataFrameWriter.csv` asynchronously. - * - * @param path - * The path (including the stage name) to the CSV file. - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 1.5.0 - */ + /** + * Executes `DataFrameWriter.csv` asynchronously. + * + * @param path + * The path (including the stage name) to the CSV file. + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 1.5.0 + */ def csv(path: String): TypedAsyncJob[WriteFileResult] = action("csv") { val writePlan = writer.getCopyIntoLocationPlan(path, "CSV") writePlan.session.conn.executeAsync[WriteFileResult](writePlan) } - /** Executes `DataFrameWriter.json` asynchronously. - * - * @param path - * The path (including the stage name) to the JSON file. - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 1.5.0 - */ + /** + * Executes `DataFrameWriter.json` asynchronously. + * + * @param path + * The path (including the stage name) to the JSON file. + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 1.5.0 + */ def json(path: String): TypedAsyncJob[WriteFileResult] = action("json") { val writePlan = writer.getCopyIntoLocationPlan(path, "JSON") writePlan.session.conn.executeAsync[WriteFileResult](writePlan) } - /** Executes `DataFrameWriter.parquet` asynchronously. - * - * @param path - * The path (including the stage name) to the PARQUET file. - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 1.5.0 - */ + /** + * Executes `DataFrameWriter.parquet` asynchronously. + * + * @param path + * The path (including the stage name) to the PARQUET file. + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 1.5.0 + */ def parquet(path: String): TypedAsyncJob[WriteFileResult] = action { "parquet" } { val writePlan = writer.getCopyIntoLocationPlan(path, "PARQUET") writePlan.session.conn.executeAsync[WriteFileResult](writePlan) @@ -500,22 +518,23 @@ class DataFrameWriterAsyncActor private[snowpark] (writer: DataFrameWriter) { } } -/** Represents the results of writing data from a DataFrame to a file in a stage. - * - * To write the data, the DataFrameWriter effectively executes the `COPY INTO ` command. - * WriteFileResult encapsulates the output returned by the command: - * - `rows` represents the rows of output from the command. - * - `schema` defines the schema for these rows. - * - * For example, if the DETAILED_OUTPUT option is TRUE, each row contains a `file_name`, - * `file_size`, and `row_count` field. `schema` defines the names and types of these fields. If the - * DETAILED_OUTPUT option is not specified (meaning that the option is FALSE), each row contains a - * `rows_unloaded`, `input_bytes`, and `output_bytes` field. - * - * @param rows - * The output rows produced by the `COPY INTO ` command. - * @param schema - * The names and types of the fields in the output rows. - * @since 1.5.0 - */ +/** + * Represents the results of writing data from a DataFrame to a file in a stage. + * + * To write the data, the DataFrameWriter effectively executes the `COPY INTO ` command. + * WriteFileResult encapsulates the output returned by the command: + * - `rows` represents the rows of output from the command. + * - `schema` defines the schema for these rows. + * + * For example, if the DETAILED_OUTPUT option is TRUE, each row contains a `file_name`, `file_size`, + * and `row_count` field. `schema` defines the names and types of these fields. If the + * DETAILED_OUTPUT option is not specified (meaning that the option is FALSE), each row contains a + * `rows_unloaded`, `input_bytes`, and `output_bytes` field. + * + * @param rows + * The output rows produced by the `COPY INTO ` command. + * @param schema + * The names and types of the fields in the output rows. + * @since 1.5.0 + */ case class WriteFileResult(rows: Array[Row], schema: StructType) diff --git a/src/main/scala/com/snowflake/snowpark/FileOperation.scala b/src/main/scala/com/snowflake/snowpark/FileOperation.scala index 56680781..0026708f 100644 --- a/src/main/scala/com/snowflake/snowpark/FileOperation.scala +++ b/src/main/scala/com/snowflake/snowpark/FileOperation.scala @@ -16,53 +16,55 @@ private[snowpark] object FileOperationCommand extends Enumeration { } import FileOperationCommand._ -/** Provides methods for working on files in a stage. - * - * To access an object of this class, use [[Session.file]]. - * - * For example: - * {{{ - * // Upload a file to a stage. - * session.file.put("file:///tmp/file1.csv", "@myStage/prefix1") - * // Download a file from a stage. - * session.file.get("@myStage/prefix1/file1.csv", "file:///tmp") - * }}} - * - * @since 0.4.0 - */ +/** + * Provides methods for working on files in a stage. + * + * To access an object of this class, use [[Session.file]]. + * + * For example: + * {{{ + * // Upload a file to a stage. + * session.file.put("file:///tmp/file1.csv", "@myStage/prefix1") + * // Download a file from a stage. + * session.file.get("@myStage/prefix1/file1.csv", "file:///tmp") + * }}} + * + * @since 0.4.0 + */ final class FileOperation(session: Session) extends Logging { - /** Uploads the local files specified by {@code localFileName} to the stage location specified in - * {@code stageLocation} . - * - * This method returns the results as an Array of [[PutResult]] objects (one for each file). Each - * object represents the results of uploading a file. - * - * For example: - * {{{ - * // Upload a file to a stage without compressing the file. - * val putOptions = Map("AUTO_COMPRESS" -> "FALSE") - * val res1 = session.file.put("file:///tmp/file1.csv", "@myStage", putOptions) - * - * // Upload the CSV files in /tmp with names that start with "file". - * // You can use the wildcard characters "*" and "?" to match multiple files. - * val res2 = session.file.put("file:///tmp/file*.csv", "@myStage/prefix2") - * }}} - * - * @param localFileName - * The path to the local file(s) to upload. Specify the path in the following format: - * `file:///`. (The `file://` prefix is optional.) To match multiple - * files in the path, you can specify the wildcard characters `*` and `?`. - * @param stageLocation - * The stage (and prefix) where you want to upload the file(s). The `@` prefix is optional. - * @param options - * A Map containing the names and values of optional - * [[https://docs.snowflake.com/en/sql-reference/sql/put.html#optional-parameters parameters]] - * for the PUT command. - * @return - * An Array of [[PutResult]] objects (one object for each file uploaded). - * @since 0.4.0 - */ + /** + * Uploads the local files specified by {@code localFileName} to the stage location specified in + * {@code stageLocation} . + * + * This method returns the results as an Array of [[PutResult]] objects (one for each file). Each + * object represents the results of uploading a file. + * + * For example: + * {{{ + * // Upload a file to a stage without compressing the file. + * val putOptions = Map("AUTO_COMPRESS" -> "FALSE") + * val res1 = session.file.put("file:///tmp/file1.csv", "@myStage", putOptions) + * + * // Upload the CSV files in /tmp with names that start with "file". + * // You can use the wildcard characters "*" and "?" to match multiple files. + * val res2 = session.file.put("file:///tmp/file*.csv", "@myStage/prefix2") + * }}} + * + * @param localFileName + * The path to the local file(s) to upload. Specify the path in the following format: + * `file:///`. (The `file://` prefix is optional.) To match multiple + * files in the path, you can specify the wildcard characters `*` and `?`. + * @param stageLocation + * The stage (and prefix) where you want to upload the file(s). The `@` prefix is optional. + * @param options + * A Map containing the names and values of optional + * [[https://docs.snowflake.com/en/sql-reference/sql/put.html#optional-parameters parameters]] + * for the PUT command. + * @return + * An Array of [[PutResult]] objects (one object for each file uploaded). + * @since 0.4.0 + */ def put( localFileName: String, stageLocation: String, @@ -88,42 +90,43 @@ final class FileOperation(session: Session) extends Logging { } } - /** Downloads the specified files from a path in a stage (specified by {@code stageLocation} ) to - * the local directory specified by {@code targetLocation} . - * - * This method returns the results as an Array of [[GetResult]] objects (one for each file). Each - * object represents the results of downloading a file. - * - * For example: - * {{{ - * // Upload files to a stage. - * session.file.put("file:///tmp/file_1.csv", "@myStage/prefix2") - * session.file.put("file:///tmp/file_2.csv", "@myStage/prefix2") - * - * // Download one file from a stage. - * val res1 = session.file.get("@myStage/prefix2/file_1.csv", "file:///tmp/target") - * // Download all the files from @myStage/prefix2. - * val res2 = session.file.get("@myStage/prefix2", "file:///tmp/target2") - * // Download files with names that match a regular expression pattern. - * val getOptions = Map("PATTERN" -> s"'.*file_.*.csv.gz'") - * val res3 = session.file.get("@myStage/prefix2", "file:///tmp/target3", getOptions) - * }}} - * - * @param stageLocation - * The location (a directory or filename on a stage) from which you want to download the files. - * The `@` prefix is optional. - * @param targetDirectory - * The path to the local directory where the file(s) should be downloaded. Specify the path in - * the following format: `file:///`. If {@code targetDirectory} does - * not already exist, the method creates the directory. - * @param options - * A Map containing the names and values of optional - * [[https://docs.snowflake.com/en/sql-reference/sql/get.html#optional-parameters parameters]] - * for the GET command. - * @return - * An Array of [[PutResult]] objects (one object for each file downloaded). - * @since 0.4.0 - */ + /** + * Downloads the specified files from a path in a stage (specified by {@code stageLocation} ) to + * the local directory specified by {@code targetLocation} . + * + * This method returns the results as an Array of [[GetResult]] objects (one for each file). Each + * object represents the results of downloading a file. + * + * For example: + * {{{ + * // Upload files to a stage. + * session.file.put("file:///tmp/file_1.csv", "@myStage/prefix2") + * session.file.put("file:///tmp/file_2.csv", "@myStage/prefix2") + * + * // Download one file from a stage. + * val res1 = session.file.get("@myStage/prefix2/file_1.csv", "file:///tmp/target") + * // Download all the files from @myStage/prefix2. + * val res2 = session.file.get("@myStage/prefix2", "file:///tmp/target2") + * // Download files with names that match a regular expression pattern. + * val getOptions = Map("PATTERN" -> s"'.*file_.*.csv.gz'") + * val res3 = session.file.get("@myStage/prefix2", "file:///tmp/target3", getOptions) + * }}} + * + * @param stageLocation + * The location (a directory or filename on a stage) from which you want to download the files. + * The `@` prefix is optional. + * @param targetDirectory + * The path to the local directory where the file(s) should be downloaded. Specify the path in + * the following format: `file:///`. If {@code targetDirectory} does not + * already exist, the method creates the directory. + * @param options + * A Map containing the names and values of optional + * [[https://docs.snowflake.com/en/sql-reference/sql/get.html#optional-parameters parameters]] + * for the GET command. + * @return + * An Array of [[PutResult]] objects (one object for each file downloaded). + * @since 0.4.0 + */ def get( stageLocation: String, targetDirectory: String, @@ -145,34 +148,36 @@ final class FileOperation(session: Session) extends Logging { } } - /** Method to compress data from a stream and upload it at a stage location. The data will be - * uploaded as one file. No splitting is done in this method. - * - *

caller is responsible for releasing the inputStream after the method is called. - * - * @param stageLocation - * Full stage path to the file - * @param inputStream - * Input stream from which the data will be uploaded - * @param compress - * Compress data or not before uploading stream - * @since 1.4.0 - */ + /** + * Method to compress data from a stream and upload it at a stage location. The data will be + * uploaded as one file. No splitting is done in this method. + * + *

caller is responsible for releasing the inputStream after the method is called. + * + * @param stageLocation + * Full stage path to the file + * @param inputStream + * Input stream from which the data will be uploaded + * @param compress + * Compress data or not before uploading stream + * @since 1.4.0 + */ def uploadStream(stageLocation: String, inputStream: InputStream, compress: Boolean): Unit = { val (stageName, pathName, fileName) = parseStageFileLocation(stageLocation) session.conn.uploadStream(stageName, pathName, inputStream, fileName, compress) } - /** Download file from the given stage and return an input stream - * - * @param stageLocation - * Full stage path to the file - * @param decompress - * True if file compressed - * @return - * An InputStream object - * @since 1.4.0 - */ + /** + * Download file from the given stage and return an input stream + * + * @param stageLocation + * Full stage path to the file + * @param decompress + * True if file compressed + * @return + * An InputStream object + * @since 1.4.0 + */ def downloadStream(stageLocation: String, decompress: Boolean): InputStream = { val (stageName, pathName, fileName) = parseStageFileLocation(stageLocation) // TODO: No need to check file existence once this is fixed: SNOW-565154 @@ -200,10 +205,11 @@ final class FileOperation(session: Session) extends Logging { } -/** Represents the results of uploading a local file to a stage location. - * - * @since 0.4.0 - */ +/** + * Represents the results of uploading a local file to a stage location. + * + * @since 0.4.0 + */ case class PutResult( sourceFileName: String, targetFileName: String, @@ -215,13 +221,14 @@ case class PutResult( encryption: String, message: String) -/** Represents the results of downloading a file from a stage location to the local file system. - * - * NOTE: {@code fileName} is the relative path to the file on the stage. For example, if you - * download `@myStage/prefix1/file1.csv.gz`, {@code fileName} is `prefix1/file1.csv.gz`. - * - * @since 0.4.0 - */ +/** + * Represents the results of downloading a file from a stage location to the local file system. + * + * NOTE: {@code fileName} is the relative path to the file on the stage. For example, if you + * download `@myStage/prefix1/file1.csv.gz`, {@code fileName} is `prefix1/file1.csv.gz`. + * + * @since 0.4.0 + */ case class GetResult( fileName: String, sizeBytes: Long, diff --git a/src/main/scala/com/snowflake/snowpark/GroupingSets.scala b/src/main/scala/com/snowflake/snowpark/GroupingSets.scala index 9b2b1e8f..7001ffdd 100644 --- a/src/main/scala/com/snowflake/snowpark/GroupingSets.scala +++ b/src/main/scala/com/snowflake/snowpark/GroupingSets.scala @@ -2,31 +2,34 @@ package com.snowflake.snowpark import com.snowflake.snowpark.internal.analyzer.GroupingSetsExpression -/** Constructors of GroupingSets object. - * - * @since 0.4.0 - */ +/** + * Constructors of GroupingSets object. + * + * @since 0.4.0 + */ object GroupingSets { - /** Creates a GroupingSets object from a list of column/expression sets. - * - * @param set - * a set of DataFrame column, or any expression in the current scope. - * @param sets - * a list of arguments except the first one - * @since 0.4.0 - */ + /** + * Creates a GroupingSets object from a list of column/expression sets. + * + * @param set + * a set of DataFrame column, or any expression in the current scope. + * @param sets + * a list of arguments except the first one + * @since 0.4.0 + */ def apply(set: Set[Column], sets: Set[Column]*): GroupingSets = new GroupingSets(set +: sets) } -/** A Container of grouping sets that you pass to - * [[DataFrame.groupByGroupingSets(groupingSets* DataFrame.groupByGroupingSets]]. - * - * @param sets - * a list of grouping sets - * @since 0.4.0 - */ +/** + * A Container of grouping sets that you pass to + * [[DataFrame.groupByGroupingSets(groupingSets* DataFrame.groupByGroupingSets]]. + * + * @param sets + * a list of grouping sets + * @since 0.4.0 + */ case class GroupingSets(sets: Seq[Set[Column]]) { private[snowpark] val toExpression = GroupingSetsExpression(sets.map(_.map(_.expr))) } diff --git a/src/main/scala/com/snowflake/snowpark/MergeBuilder.scala b/src/main/scala/com/snowflake/snowpark/MergeBuilder.scala index 6bac9453..b719cbaf 100644 --- a/src/main/scala/com/snowflake/snowpark/MergeBuilder.scala +++ b/src/main/scala/com/snowflake/snowpark/MergeBuilder.scala @@ -3,10 +3,11 @@ package com.snowflake.snowpark import com.snowflake.snowpark.internal.{ErrorMessage, OpenTelemetry} import com.snowflake.snowpark.internal.analyzer.{MergeExpression, TableMerge} -/** Result of merging a DataFrame into an Updatable DataFrame - * - * @since 0.7.0 - */ +/** + * Result of merging a DataFrame into an Updatable DataFrame + * + * @since 0.7.0 + */ case class MergeResult(rowsInserted: Long, rowsUpdated: Long, rowsDeleted: Long) private[snowpark] object MergeBuilder { @@ -48,13 +49,14 @@ private[snowpark] object MergeBuilder { } } -/** Builder for a merge action. It provides APIs to build matched and not matched clauses. - * - * @groupname actions Actions - * @groupname transform Transformations - * - * @since 0.7.0 - */ +/** + * Builder for a merge action. It provides APIs to build matched and not matched clauses. + * + * @groupname actions Actions + * @groupname transform Transformations + * + * @since 0.7.0 + */ class MergeBuilder private[snowpark] ( private[snowpark] val target: Updatable, private[snowpark] val source: DataFrame, @@ -64,45 +66,47 @@ class MergeBuilder private[snowpark] ( private[snowpark] val updated: Boolean, private[snowpark] val deleted: Boolean) { - /** Adds a matched clause into the merge action. It matches all remaining rows in target that - * satisfy . Returns a [[MatchedClauseBuilder]] which provides APIs to define actions - * to take when a row is matched. - * - * For example: - * {{{ - * target.merge(source, target("id") === source("id")).whenMatched - * }}} - * - * Adds a matched clause where a row in the [[Updatable]] target is matched if its id equals the - * id of a row in the [[DataFrame]] source. - * - * Caution: Since it matches all remaining rows, no more whenMatched calls will be accepted - * beyond this call. - * - * @group transform - * @since 0.7.0 - * @return - * [[MatchedClauseBuilder]] - */ + /** + * Adds a matched clause into the merge action. It matches all remaining rows in target that + * satisfy . Returns a [[MatchedClauseBuilder]] which provides APIs to define actions to + * take when a row is matched. + * + * For example: + * {{{ + * target.merge(source, target("id") === source("id")).whenMatched + * }}} + * + * Adds a matched clause where a row in the [[Updatable]] target is matched if its id equals the + * id of a row in the [[DataFrame]] source. + * + * Caution: Since it matches all remaining rows, no more whenMatched calls will be accepted beyond + * this call. + * + * @group transform + * @since 0.7.0 + * @return + * [[MatchedClauseBuilder]] + */ def whenMatched: MatchedClauseBuilder = whenMatched(None) - /** Adds a matched clause into the merge action. It matches all rows in target that satisfy - * while also satisfying . Returns a [[MatchedClauseBuilder]] which - * provides APIs to define actions to take when a row is matched. - * - * For example: - * {{{ - * target.merge(source, target("id") === source("id")).whenMatched(target("value") === lit(0)) - * }}} - * - * Adds a matched clause where a row in the [[Updatable]] target is matched if its id equals the - * id of a row in the [[DataFrame]] source and its value equals 0. - * - * @group transform - * @since 0.7.0 - * @return - * [[MatchedClauseBuilder]] - */ + /** + * Adds a matched clause into the merge action. It matches all rows in target that satisfy + * while also satisfying . Returns a [[MatchedClauseBuilder]] which provides + * APIs to define actions to take when a row is matched. + * + * For example: + * {{{ + * target.merge(source, target("id") === source("id")).whenMatched(target("value") === lit(0)) + * }}} + * + * Adds a matched clause where a row in the [[Updatable]] target is matched if its id equals the + * id of a row in the [[DataFrame]] source and its value equals 0. + * + * @group transform + * @since 0.7.0 + * @return + * [[MatchedClauseBuilder]] + */ def whenMatched(condition: Column): MatchedClauseBuilder = whenMatched(Some(condition)) @@ -110,46 +114,48 @@ class MergeBuilder private[snowpark] ( MatchedClauseBuilder(this, condition) } - /** Adds a not matched clause into the merge action. It matches all remaining rows in source that - * do not satisfy . Returns a [[MatchedClauseBuilder]] which provides APIs to define - * actions to take when a row is not matched. - * - * For example: - * {{{ - * target.merge(source, target("id") === source("id")).whenNotMatched - * }}} - * - * Adds a not matched clause where a row in the [[DataFrame]] source is not matched if its id - * does not equal the id of any row in the [[Updatable]] target. - * - * Caution: Since it matches all remaining rows, no more whenNotMatched calls will be accepted - * beyond this call. - * - * @group transform - * @since 0.7.0 - * @return - * [[NotMatchedClauseBuilder]] - */ + /** + * Adds a not matched clause into the merge action. It matches all remaining rows in source that + * do not satisfy . Returns a [[MatchedClauseBuilder]] which provides APIs to define + * actions to take when a row is not matched. + * + * For example: + * {{{ + * target.merge(source, target("id") === source("id")).whenNotMatched + * }}} + * + * Adds a not matched clause where a row in the [[DataFrame]] source is not matched if its id does + * not equal the id of any row in the [[Updatable]] target. + * + * Caution: Since it matches all remaining rows, no more whenNotMatched calls will be accepted + * beyond this call. + * + * @group transform + * @since 0.7.0 + * @return + * [[NotMatchedClauseBuilder]] + */ def whenNotMatched: NotMatchedClauseBuilder = whenNotMatched(None) - /** Adds a not matched clause into the merge action. It matches all rows in source that do not - * satisfy but satisfy . Returns a [[MatchedClauseBuilder]] which provides - * APIs to define actions to take when a row is matched. - * - * For example: - * {{{ - * target.merge(source, target("id") === source("id")) - * .whenNotMatched(source("value") === lit(0)) - * }}} - * - * Adds a not matched clause where a row in the [[DataFrame]] source is not matched if its id - * does not equal the id of any row in the [[Updatable]] source and its value equals 0. - * - * @group transform - * @since 0.7.0 - * @return - * [[NotMatchedClauseBuilder]] - */ + /** + * Adds a not matched clause into the merge action. It matches all rows in source that do not + * satisfy but satisfy . Returns a [[MatchedClauseBuilder]] which provides + * APIs to define actions to take when a row is matched. + * + * For example: + * {{{ + * target.merge(source, target("id") === source("id")) + * .whenNotMatched(source("value") === lit(0)) + * }}} + * + * Adds a not matched clause where a row in the [[DataFrame]] source is not matched if its id does + * not equal the id of any row in the [[Updatable]] source and its value equals 0. + * + * @group transform + * @since 0.7.0 + * @return + * [[NotMatchedClauseBuilder]] + */ def whenNotMatched(condition: Column): NotMatchedClauseBuilder = whenNotMatched(Some(condition)) @@ -157,14 +163,15 @@ class MergeBuilder private[snowpark] ( NotMatchedClauseBuilder(this, condition) } - /** Executes the merge action and returns a [[MergeResult]], representing number of rows inserted, - * updated and deleted by this merge action. - * - * @group action - * @since 0.7.0 - * @return - * [[MergeResult]] - */ + /** + * Executes the merge action and returns a [[MergeResult]], representing number of rows inserted, + * updated and deleted by this merge action. + * + * @group action + * @since 0.7.0 + * @return + * [[MergeResult]] + */ def collect(): MergeResult = action("collect") { val rows = getMergeDataFrame().collect() MergeBuilder.getMergeResult(rows, this) @@ -176,29 +183,30 @@ class MergeBuilder private[snowpark] ( DataFrame(target.session, TableMerge(target.tableName, source.plan, joinExpr.expr, clauses)) } - /** Returns a [[MergeBuilderAsyncActor]] object that can be used to execute MergeBuilder actions - * asynchronously. - * - * Example: - * {{{ - * val target = session.table(tableName) - * val source = Seq((10, "new")).toDF("id", "desc") - * val asyncJob = target - * .merge(source, target("id") === source("id")) - * .whenMatched - * .update(Map(target("desc") -> source("desc"))) - * .async - * .collect() - * // At this point, the thread is not blocked. You can perform additional work before - * // calling asyncJob.getResult() to retrieve the results of the action. - * // NOTE: getResult() is a blocking call. - * val mergeResult = asyncJob.getResult() - * }}} - * - * @since 1.3.0 - * @return - * A [[MergeBuilderAsyncActor]] object - */ + /** + * Returns a [[MergeBuilderAsyncActor]] object that can be used to execute MergeBuilder actions + * asynchronously. + * + * Example: + * {{{ + * val target = session.table(tableName) + * val source = Seq((10, "new")).toDF("id", "desc") + * val asyncJob = target + * .merge(source, target("id") === source("id")) + * .whenMatched + * .update(Map(target("desc") -> source("desc"))) + * .async + * .collect() + * // At this point, the thread is not blocked. You can perform additional work before + * // calling asyncJob.getResult() to retrieve the results of the action. + * // NOTE: getResult() is a blocking call. + * val mergeResult = asyncJob.getResult() + * }}} + * + * @since 1.3.0 + * @return + * A [[MergeBuilderAsyncActor]] object + */ def async: MergeBuilderAsyncActor = new MergeBuilderAsyncActor(this) @inline protected def action[T](funcName: String)(func: => T): T = { @@ -206,19 +214,21 @@ class MergeBuilder private[snowpark] ( } } -/** Provides APIs to execute MergeBuilder actions asynchronously. - * - * @since 1.3.0 - */ +/** + * Provides APIs to execute MergeBuilder actions asynchronously. + * + * @since 1.3.0 + */ class MergeBuilderAsyncActor private[snowpark] (mergeBuilder: MergeBuilder) { - /** Executes `MergeBuilder.collect()` asynchronously. - * - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 1.3.0 - */ + /** + * Executes `MergeBuilder.collect()` asynchronously. + * + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 1.3.0 + */ def collect(): TypedAsyncJob[MergeResult] = action("collect") { val newDf = mergeBuilder.getMergeDataFrame() mergeBuilder.target.session.conn diff --git a/src/main/scala/com/snowflake/snowpark/MergeClause.scala b/src/main/scala/com/snowflake/snowpark/MergeClause.scala index fbf8a315..92ad6832 100644 --- a/src/main/scala/com/snowflake/snowpark/MergeClause.scala +++ b/src/main/scala/com/snowflake/snowpark/MergeClause.scala @@ -16,36 +16,38 @@ private[snowpark] object NotMatchedClauseBuilder { new NotMatchedClauseBuilder(mergeBuilder, condition) } -/** Builder for a not matched clause. It provides APIs to build insert actions - * - * @since 0.7.0 - */ +/** + * Builder for a not matched clause. It provides APIs to build insert actions + * + * @since 0.7.0 + */ class NotMatchedClauseBuilder private[snowpark] ( mergeBuilder: MergeBuilder, condition: Option[Column]) { - /** Defines an insert action for the not matched clause, when a row in source is not matched, - * insert a row in target with . Returns an updated [[MergeBuilder]] with the new clause - * added. - * - * For example: - * {{{ - * target.merge(source, target("id") === source("id")) - * .whenNotMatched.insert(Seq(source("id"), source("value"))) - * }}} - * - * Adds a not matched clause where a row in source is not matched if its id does not equal the id - * of any row in the [[Updatable]] target. For all such rows, insert a row into target whose id - * and value are assigned to the id and value of the not matched row. - * - * Note: This API inserts into all columns in target with values, so the length of must - * equal the number of columns in target. - * - * @group transform - * @since 0.7.0 - * @return - * [[MergeBuilder]] - */ + /** + * Defines an insert action for the not matched clause, when a row in source is not matched, + * insert a row in target with . Returns an updated [[MergeBuilder]] with the new clause + * added. + * + * For example: + * {{{ + * target.merge(source, target("id") === source("id")) + * .whenNotMatched.insert(Seq(source("id"), source("value"))) + * }}} + * + * Adds a not matched clause where a row in source is not matched if its id does not equal the id + * of any row in the [[Updatable]] target. For all such rows, insert a row into target whose id + * and value are assigned to the id and value of the not matched row. + * + * Note: This API inserts into all columns in target with values, so the length of must + * equal the number of columns in target. + * + * @group transform + * @since 0.7.0 + * @return + * [[MergeBuilder]] + */ def insert(values: Seq[Column]): MergeBuilder = { MergeBuilder( mergeBuilder.target, @@ -60,50 +62,52 @@ class NotMatchedClauseBuilder private[snowpark] ( mergeBuilder.deleted) } - /** Defines an insert action for the not matched clause, when a row in source is not matched, - * insert a row in target with , where the key specifies column name and value - * specifies its assigned value. All unspecified columns are set to NULL. Returns an updated - * [[MergeBuilder]] with the new clause added. - * - * For example: - * {{{ - * target.merge(source, target("id") === source("id")) - * .whenNotMatched.insert(Map("id" -> source("id"))) - * }}} - * - * Adds a not matched clause where a row in source is not matched if its id does not equal the id - * of any row in the [[Updatable]] target. For all such rows, insert a row into target whose id - * is assigned to the id of the not matched row. - * - * @group transform - * @since 0.7.0 - * @return - * [[MergeBuilder]] - */ + /** + * Defines an insert action for the not matched clause, when a row in source is not matched, + * insert a row in target with , where the key specifies column name and value + * specifies its assigned value. All unspecified columns are set to NULL. Returns an updated + * [[MergeBuilder]] with the new clause added. + * + * For example: + * {{{ + * target.merge(source, target("id") === source("id")) + * .whenNotMatched.insert(Map("id" -> source("id"))) + * }}} + * + * Adds a not matched clause where a row in source is not matched if its id does not equal the id + * of any row in the [[Updatable]] target. For all such rows, insert a row into target whose id is + * assigned to the id of the not matched row. + * + * @group transform + * @since 0.7.0 + * @return + * [[MergeBuilder]] + */ def insert[T: ClassTag](assignments: Map[String, Column]): MergeBuilder = { insert(assignments.map { case (k, v) => (col(k), v) }) } - /** Defines an insert action for the not matched clause, when a row in source is not matched, - * insert a row in target with , where the key specifies column name and value - * specifies its assigned value. All unspecified columns are set to NULL. Returns an updated - * [[MergeBuilder]] with the new clause added. - * - * For example: - * {{{ - * target.merge(source, target("id") === source("id")) - * .whenNotMatched.insert(Map(target("id") -> source("id"))) - * }}} - * - * Adds a not matched clause where a row in source is not matched if its id does not equal the id - * of any row in the [[Updatable]] target. For all such rows, insert a row into target whose id - * is assigned to the id of the not matched row. - * - * @group transform - * @since 0.7.0 - * @return - * [[MergeBuilder]] - */ + /** + * Defines an insert action for the not matched clause, when a row in source is not matched, + * insert a row in target with , where the key specifies column name and value + * specifies its assigned value. All unspecified columns are set to NULL. Returns an updated + * [[MergeBuilder]] with the new clause added. + * + * For example: + * {{{ + * target.merge(source, target("id") === source("id")) + * .whenNotMatched.insert(Map(target("id") -> source("id"))) + * }}} + * + * Adds a not matched clause where a row in source is not matched if its id does not equal the id + * of any row in the [[Updatable]] target. For all such rows, insert a row into target whose id is + * assigned to the id of the not matched row. + * + * @group transform + * @since 0.7.0 + * @return + * [[MergeBuilder]] + */ def insert(assignments: Map[Column, Column]): MergeBuilder = { MergeBuilder( mergeBuilder.target, @@ -126,55 +130,58 @@ private[snowpark] object MatchedClauseBuilder { new MatchedClauseBuilder(mergeBuilder, condition) } -/** Builder for a matched clause. It provides APIs to build update and delete actions - * - * @since 0.7.0 - */ +/** + * Builder for a matched clause. It provides APIs to build update and delete actions + * + * @since 0.7.0 + */ class MatchedClauseBuilder private[snowpark] ( mergeBuilder: MergeBuilder, condition: Option[Column]) { - /** Defines an update action for the matched clause, when a row in target is matched, update the - * row in target with , where the key specifies column name and value specifies its - * assigned value. Returns an updated [[MergeBuilder]] with the new clause added. - * - * For example: - * {{{ - * target.merge(source, target("id") === source("id")) - * .whenMatched.update(Map("value" -> source("value"))) - * }}} - * - * Adds a matched clause where a row in target is matched if its id equals the id of a row in the - * [[DataFrame]] source. For all such rows, update its value to the value of the corresponding - * row in source. - * - * @group transform - * @since 0.7.0 - * @return - * [[MergeBuilder]] - */ + /** + * Defines an update action for the matched clause, when a row in target is matched, update the + * row in target with , where the key specifies column name and value specifies its + * assigned value. Returns an updated [[MergeBuilder]] with the new clause added. + * + * For example: + * {{{ + * target.merge(source, target("id") === source("id")) + * .whenMatched.update(Map("value" -> source("value"))) + * }}} + * + * Adds a matched clause where a row in target is matched if its id equals the id of a row in the + * [[DataFrame]] source. For all such rows, update its value to the value of the corresponding row + * in source. + * + * @group transform + * @since 0.7.0 + * @return + * [[MergeBuilder]] + */ def update[T: ClassTag](assignments: Map[String, Column]): MergeBuilder = update(assignments.map { case (k, v) => (col(k), v) }) - /** Defines an update action for the matched clause, when a row in target is matched, update the - * row in target with , where the key specifies column name and value specifies its - * assigned value. Returns an updated [[MergeBuilder]] with the new clause added. - * - * For example: - * {{{ - * target.merge(source, target("id") === source("id")) - * .whenMatched.update(Map(target("value") -> source("value"))) - * }}} - * - * Adds a matched clause where a row in target is matched if its id equals the id of a row in the - * [[DataFrame]] source. For all such rows, update its value to the value of the corresponding - * row in source. - * - * @group transform - * @since 0.7.0 - * @return - * [[MergeBuilder]] - */ + /** + * Defines an update action for the matched clause, when a row in target is matched, update the + * row in target with , where the key specifies column name and value specifies its + * assigned value. Returns an updated [[MergeBuilder]] with the new clause added. + * + * For example: + * {{{ + * target.merge(source, target("id") === source("id")) + * .whenMatched.update(Map(target("value") -> source("value"))) + * }}} + * + * Adds a matched clause where a row in target is matched if its id equals the id of a row in the + * [[DataFrame]] source. For all such rows, update its value to the value of the corresponding row + * in source. + * + * @group transform + * @since 0.7.0 + * @return + * [[MergeBuilder]] + */ def update(assignments: Map[Column, Column]): MergeBuilder = { MergeBuilder( mergeBuilder.target, @@ -190,23 +197,24 @@ class MatchedClauseBuilder private[snowpark] ( mergeBuilder.deleted) } - /** Defines a delete action for the matched clause, when a row in target is matched, delete it - * from target. Returns an updated [[MergeBuilder]] with the new clause added. - * - * For example: - * {{{ - * target.merge(source, target("id") === source("id")) - * .whenMatched.delete() - * }}} - * - * Adds a matched clause where a row in target is matched if its id equals the id of a row in the - * [[DataFrame]] source. For all such rows, delete it from target. - * - * @group transform - * @since 0.7.0 - * @return - * [[MergeBuilder]] - */ + /** + * Defines a delete action for the matched clause, when a row in target is matched, delete it from + * target. Returns an updated [[MergeBuilder]] with the new clause added. + * + * For example: + * {{{ + * target.merge(source, target("id") === source("id")) + * .whenMatched.delete() + * }}} + * + * Adds a matched clause where a row in target is matched if its id equals the id of a row in the + * [[DataFrame]] source. For all such rows, delete it from target. + * + * @group transform + * @since 0.7.0 + * @return + * [[MergeBuilder]] + */ def delete(): MergeBuilder = { MergeBuilder( mergeBuilder.target, diff --git a/src/main/scala/com/snowflake/snowpark/RelationalGroupedDataFrame.scala b/src/main/scala/com/snowflake/snowpark/RelationalGroupedDataFrame.scala index 2f4d2866..d2dc40c6 100644 --- a/src/main/scala/com/snowflake/snowpark/RelationalGroupedDataFrame.scala +++ b/src/main/scala/com/snowflake/snowpark/RelationalGroupedDataFrame.scala @@ -32,22 +32,23 @@ private[snowpark] object RelationalGroupedDataFrame { } -/** Represents an underlying DataFrame with rows that are grouped by common values. Can be used to - * define aggregations on these grouped DataFrames. - * - * Example: - * {{{ - * val groupedDf: RelationalGroupedDataFrame = df.groupBy("dept") - * val aggDf: DataFrame = groupedDf.agg(groupedDf("salary") -> "mean") - * }}} - * - * The methods [[DataFrame.groupBy(cols:Array[String* DataFrame.groupBy]], - * [[DataFrame.cube(cols:Seq[String* DataFrame.cube]] and - * [[DataFrame.rollup(cols:Array[String* DataFrame.rollup]] return an instance of type - * [[RelationalGroupedDataFrame]] - * - * @since 0.1.0 - */ +/** + * Represents an underlying DataFrame with rows that are grouped by common values. Can be used to + * define aggregations on these grouped DataFrames. + * + * Example: + * {{{ + * val groupedDf: RelationalGroupedDataFrame = df.groupBy("dept") + * val aggDf: DataFrame = groupedDf.agg(groupedDf("salary") -> "mean") + * }}} + * + * The methods [[DataFrame.groupBy(cols:Array[String* DataFrame.groupBy]], + * [[DataFrame.cube(cols:Seq[String* DataFrame.cube]] and + * [[DataFrame.rollup(cols:Array[String* DataFrame.rollup]] return an instance of type + * [[RelationalGroupedDataFrame]] + * + * @since 0.1.0 + */ class RelationalGroupedDataFrame private[snowpark] ( dataFrame: DataFrame, private[snowpark] val groupingExprs: Seq[Expression], @@ -105,219 +106,234 @@ class RelationalGroupedDataFrame private[snowpark] ( (inputExpr: Expression) => exprToFunc(inputExpr) } - /** Returns a DataFrame with computed aggregates. The first element of the 'expr' pair is the - * column to aggregate and the second element is the aggregate function to compute. The following - * example computes the mean of the price column and the sum of the sales column. The name of the - * aggregate function to compute must be a valid Snowflake - * [[https://docs.snowflake.com/en/sql-reference/functions-aggregation.html aggregate function]] - * "average" and "mean" can be used to specify "avg". - * - * {{{ - * import com.snowflake.snowpark.functions.col - * df.groupBy("itemType").agg( - * col("price") -> "mean", - * col("sales") -> "sum") - * }}} - * - * @return - * a [[DataFrame]] - * @since 0.1.0 - */ + /** + * Returns a DataFrame with computed aggregates. The first element of the 'expr' pair is the + * column to aggregate and the second element is the aggregate function to compute. The following + * example computes the mean of the price column and the sum of the sales column. The name of the + * aggregate function to compute must be a valid Snowflake + * [[https://docs.snowflake.com/en/sql-reference/functions-aggregation.html aggregate function]] + * "average" and "mean" can be used to specify "avg". + * + * {{{ + * import com.snowflake.snowpark.functions.col + * df.groupBy("itemType").agg( + * col("price") -> "mean", + * col("sales") -> "sum") + * }}} + * + * @return + * a [[DataFrame]] + * @since 0.1.0 + */ def agg(expr: (Column, String), exprs: (Column, String)*): DataFrame = transformation("agg") { agg(expr +: exprs) } - /** Returns a DataFrame with computed aggregates. The first element of the 'expr' pair is the - * column to aggregate and the second element is the aggregate function to compute. The following - * example computes the mean of the price column and the sum of the sales column. The name of the - * aggregate function to compute must be a valid Snowflake - * [[https://docs.snowflake.com/en/sql-reference/functions-aggregation.html aggregate function]] - * "average" and "mean" can be used to specify "avg". - * - * {{{ - * import com.snowflake.snowpark.functions.col - * df.groupBy("itemType").agg(Seq( - * col("price") -> "mean", - * col("sales") -> "sum")) - * }}} - * - * @return - * a [[DataFrame]] - * @since 0.2.0 - */ + /** + * Returns a DataFrame with computed aggregates. The first element of the 'expr' pair is the + * column to aggregate and the second element is the aggregate function to compute. The following + * example computes the mean of the price column and the sum of the sales column. The name of the + * aggregate function to compute must be a valid Snowflake + * [[https://docs.snowflake.com/en/sql-reference/functions-aggregation.html aggregate function]] + * "average" and "mean" can be used to specify "avg". + * + * {{{ + * import com.snowflake.snowpark.functions.col + * df.groupBy("itemType").agg(Seq( + * col("price") -> "mean", + * col("sales") -> "sum")) + * }}} + * + * @return + * a [[DataFrame]] + * @since 0.2.0 + */ def agg(exprs: Seq[(Column, String)]): DataFrame = transformation("agg") { toDF(exprs.map { case (col, expr) => strToExpr(expr)(col.expr) }) } - /** Returns a DataFrame with aggregated computed according to the supplied [[Column]] expressions. - * [[com.snowflake.snowpark.functions]] contains some built-in aggregate functions that can be - * used. - * - * {{{ - * impoer com.snowflake.snowpark.functions._ - * df.groupBy("itemType").agg( - * mean($"price"), - * sum($"sales")) - * }}} - * - * @return - * a [[DataFrame]] - * @since 0.1.0 - */ + /** + * Returns a DataFrame with aggregated computed according to the supplied [[Column]] expressions. + * [[com.snowflake.snowpark.functions]] contains some built-in aggregate functions that can be + * used. + * + * {{{ + * impoer com.snowflake.snowpark.functions._ + * df.groupBy("itemType").agg( + * mean($"price"), + * sum($"sales")) + * }}} + * + * @return + * a [[DataFrame]] + * @since 0.1.0 + */ def agg(expr: Column, exprs: Column*): DataFrame = transformation("agg") { agg(expr +: exprs) } - /** Returns a DataFrame with aggregated computed according to the supplied [[Column]] expressions. - * [[com.snowflake.snowpark.functions]] contains some built-in aggregate functions that can be - * used. - * - * {{{ - * impoer com.snowflake.snowpark.functions._ - * df.groupBy("itemType").agg(Seq( - * mean($"price"), - * sum($"sales"))) - * }}} - * - * @return - * a [[DataFrame]] - * @since 0.2.0 - */ + /** + * Returns a DataFrame with aggregated computed according to the supplied [[Column]] expressions. + * [[com.snowflake.snowpark.functions]] contains some built-in aggregate functions that can be + * used. + * + * {{{ + * impoer com.snowflake.snowpark.functions._ + * df.groupBy("itemType").agg(Seq( + * mean($"price"), + * sum($"sales"))) + * }}} + * + * @return + * a [[DataFrame]] + * @since 0.2.0 + */ def agg[T: ClassTag](exprs: Seq[Column]): DataFrame = transformation("agg") { toDF(exprs.map(_.expr)) } - /** Returns a DataFrame with aggregated computed according to the supplied [[Column]] expressions. - * [[com.snowflake.snowpark.functions]] contains some built-in aggregate functions that can be - * used. - * - * @return - * a [[DataFrame]] - * @since 0.9.0 - */ + /** + * Returns a DataFrame with aggregated computed according to the supplied [[Column]] expressions. + * [[com.snowflake.snowpark.functions]] contains some built-in aggregate functions that can be + * used. + * + * @return + * a [[DataFrame]] + * @since 0.9.0 + */ def agg(exprs: Array[Column]): DataFrame = transformation("agg") { agg(exprs.toSeq) } - /** Returns a DataFrame with computed aggregates. The first element of the 'expr' pair is the - * column to aggregate and the second element is the aggregate function to compute. The following - * example computes the mean of the price column and the sum of the sales column. The name of the - * aggregate function to compute must be a valid Snowflake - * [[https://docs.snowflake.com/en/sql-reference/functions-aggregation.html aggregate function]] - * "average" and "mean" can be used to specify "avg". - * - * {{{ - * import com.snowflake.snowpark.functions.col - * df.groupBy("itemType").agg(Map( - * col("price") -> "mean", - * col("sales") -> "sum" - * )) - * }}} - * - * @return - * a [[DataFrame]] - * @since 0.1.0 - */ + /** + * Returns a DataFrame with computed aggregates. The first element of the 'expr' pair is the + * column to aggregate and the second element is the aggregate function to compute. The following + * example computes the mean of the price column and the sum of the sales column. The name of the + * aggregate function to compute must be a valid Snowflake + * [[https://docs.snowflake.com/en/sql-reference/functions-aggregation.html aggregate function]] + * "average" and "mean" can be used to specify "avg". + * + * {{{ + * import com.snowflake.snowpark.functions.col + * df.groupBy("itemType").agg(Map( + * col("price") -> "mean", + * col("sales") -> "sum" + * )) + * }}} + * + * @return + * a [[DataFrame]] + * @since 0.1.0 + */ def agg(exprs: Map[Column, String]): DataFrame = transformation("agg") { toDF(exprs.map { case (col, expr) => strToExpr(expr)(col.expr) }.toSeq) } - /** Return the average for the specified numeric columns. - * - * @since 0.4.0 - * @return - * a [[DataFrame]] - */ + /** + * Return the average for the specified numeric columns. + * + * @since 0.4.0 + * @return + * a [[DataFrame]] + */ def avg(cols: Column*): DataFrame = transformation("avg") { nonEmptyArgumentFunction("avg", cols) } - /** Return the average for the specified numeric columns. Alias of avg - * - * @since 0.4.0 - * @return - * a [[DataFrame]] - */ + /** + * Return the average for the specified numeric columns. Alias of avg + * + * @since 0.4.0 + * @return + * a [[DataFrame]] + */ def mean(cols: Column*): DataFrame = transformation("mean") { avg(cols: _*) } - /** Return the sum for the specified numeric columns. - * - * @since 0.1.0 - * @return - * a [[DataFrame]] - */ + /** + * Return the sum for the specified numeric columns. + * + * @since 0.1.0 + * @return + * a [[DataFrame]] + */ def sum(cols: Column*): DataFrame = transformation("sum") { nonEmptyArgumentFunction("sum", cols) } - /** Return the median for the specified numeric columns. - * - * @since 0.5.0 - * @return - * A [[DataFrame]] - */ + /** + * Return the median for the specified numeric columns. + * + * @since 0.5.0 + * @return + * A [[DataFrame]] + */ def median(cols: Column*): DataFrame = transformation("median") { nonEmptyArgumentFunction("median", cols) } - /** Return the min for the specified numeric columns. - * - * @since 0.1.0 - * @return - * A [[DataFrame]] - */ + /** + * Return the min for the specified numeric columns. + * + * @since 0.1.0 + * @return + * A [[DataFrame]] + */ def min(cols: Column*): DataFrame = transformation("min") { nonEmptyArgumentFunction("min", cols) } - /** Return the max for the specified numeric columns. - * - * @since 0.4.0 - * @return - * A [[DataFrame]] - */ + /** + * Return the max for the specified numeric columns. + * + * @since 0.4.0 + * @return + * A [[DataFrame]] + */ def max(cols: Column*): DataFrame = transformation("max") { nonEmptyArgumentFunction("max", cols) } - /** Returns non-deterministic values for the specified columns. - * - * @since 0.12.0 - * @return - * A [[DataFrame]] - */ + /** + * Returns non-deterministic values for the specified columns. + * + * @since 0.12.0 + * @return + * A [[DataFrame]] + */ def any_value(cols: Column*): DataFrame = transformation("any_value") { nonEmptyArgumentFunction("any_value", cols) } - /** Return the number of rows for each group. - * - * @since 0.1.0 - * @return - * A [[DataFrame]] - */ + /** + * Return the number of rows for each group. + * + * @since 0.1.0 + * @return + * A [[DataFrame]] + */ def count(): DataFrame = transformation("count") { toDF(Seq(Alias(functions.builtin("count")(Literal(1)).expr, "count"))) } - /** Computes the builtin aggregate 'aggName' over the specified columns. Use this function to - * invoke any aggregates not explicitly listed in this class. - * - * For example: - * {{{ - * df.groupBy(col("a")).builtin("max")(col("b")) - * }}} - * - * @since 0.6.0 - * @param aggName - * the Name of an aggregate function. - * @return - * A [[DataFrame]] - */ + /** + * Computes the builtin aggregate 'aggName' over the specified columns. Use this function to + * invoke any aggregates not explicitly listed in this class. + * + * For example: + * {{{ + * df.groupBy(col("a")).builtin("max")(col("b")) + * }}} + * + * @since 0.6.0 + * @param aggName + * the Name of an aggregate function. + * @return + * A [[DataFrame]] + */ def builtin(aggName: String)(cols: Column*): DataFrame = transformation("builtin") { toDF(cols.map(_.expr).map(expr => functions.builtin(aggName)(expr).expr)) } diff --git a/src/main/scala/com/snowflake/snowpark/Row.scala b/src/main/scala/com/snowflake/snowpark/Row.scala index 40ec4ffa..6648944f 100644 --- a/src/main/scala/com/snowflake/snowpark/Row.scala +++ b/src/main/scala/com/snowflake/snowpark/Row.scala @@ -7,23 +7,27 @@ import com.snowflake.snowpark.types.{Geography, Geometry, Variant} import scala.reflect.ClassTag import scala.util.hashing.MurmurHash3 -/** @since 0.1.0 - */ +/** + * @since 0.1.0 + */ object Row { - /** Returns a [[Row]] based on the given values. - * @since 0.1.0 - */ + /** + * Returns a [[Row]] based on the given values. + * @since 0.1.0 + */ def apply(values: Any*): Row = new Row(values.toArray) - /** Return a [[Row]] based on the values in the given Seq. - * @since 0.1.0 - */ + /** + * Return a [[Row]] based on the values in the given Seq. + * @since 0.1.0 + */ def fromSeq(values: Seq[Any]): Row = new Row(values.toArray) - /** Return a [[Row]] based on the values in the given Array. - * @since 0.2.0 - */ + /** + * Return a [[Row]] based on the values in the given Array. + * @since 0.2.0 + */ def fromArray(values: Array[Any]): Row = new Row(values) private[snowpark] def fromMap(map: Map[String, Any]): Row = @@ -36,60 +40,69 @@ private[snowpark] class SnowflakeObject private[snowpark] ( override def toString: String = convertValueToString(this) } -/** Represents a row returned by the evaluation of a [[DataFrame]]. - * - * @groupname getter Getter Functions - * @groupname utl Utility Functions - * @since 0.1.0 - */ +/** + * Represents a row returned by the evaluation of a [[DataFrame]]. + * + * @groupname getter Getter Functions + * @groupname utl Utility Functions + * @since 0.1.0 + */ class Row protected (values: Array[Any]) extends Serializable { - /** Converts this [[Row]] to a Seq - * @since 0.1.0 - * @group utl - */ + /** + * Converts this [[Row]] to a Seq + * @since 0.1.0 + * @group utl + */ def toSeq: Seq[Any] = values.toSeq - /** Total number of [[Column]] in this [[Row]]. Alias of [[length]] - * @group utl - * @since 0.1.0 - */ + /** + * Total number of [[Column]] in this [[Row]]. Alias of [[length]] + * @group utl + * @since 0.1.0 + */ def size: Int = length - /** Total number of [[Column]] in this [[Row]] - * @since 0.1.0 - * @group utl - */ + /** + * Total number of [[Column]] in this [[Row]] + * @since 0.1.0 + * @group utl + */ def length: Int = values.length - /** Returns the value of the column in the row at the given index. Alias of [[get]] - * @since 0.1.0 - * @group getter - */ + /** + * Returns the value of the column in the row at the given index. Alias of [[get]] + * @since 0.1.0 + * @group getter + */ def apply(index: Int): Any = get(index) - /** Returns the value of the column in the row at the given index. - * @since 0.1.0 - * @group getter - */ + /** + * Returns the value of the column in the row at the given index. + * @since 0.1.0 + * @group getter + */ def get(index: Int): Any = values(index) - /** Returns a clone of this row. - * @since 0.1.0 - * @group utl - */ + /** + * Returns a clone of this row. + * @since 0.1.0 + * @group utl + */ def copy(): Row = new Row(values) - /** Returns a clone of this row object. Alias of [[copy]] - * @since 0.1.0 - * @group utl - */ + /** + * Returns a clone of this row object. Alias of [[copy]] + * @since 0.1.0 + * @group utl + */ override def clone(): AnyRef = copy() - /** Returns true iff the given row equals this row. - * @since 0.1.0 - * @group utl - */ + /** + * Returns true iff the given row equals this row. + * @since 0.1.0 + * @group utl + */ override def equals(obj: Any): Boolean = if (!obj.isInstanceOf[Row]) { false @@ -107,10 +120,11 @@ class Row protected (values: Array[Any]) extends Serializable { } } - /** Calculates hashcode of this row. - * @since 0.1.0 - * @group utl - */ + /** + * Calculates hashcode of this row. + * @since 0.1.0 + * @group utl + */ override def hashCode(): Int = { var n = 0 var h = MurmurHash3.seqSeed @@ -122,23 +136,26 @@ class Row protected (values: Array[Any]) extends Serializable { MurmurHash3.finalizeHash(h, n) } - /** Returns true if the value of the column at the given index is null, otherwise, returns false. - * @since 0.1.0 - * @group utl - */ + /** + * Returns true if the value of the column at the given index is null, otherwise, returns false. + * @since 0.1.0 + * @group utl + */ def isNullAt(index: Int): Boolean = get(index) == null - /** Returns the value of the column at the given index as a Boolean value - * @since 0.1.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a Boolean value + * @since 0.1.0 + * @group getter + */ def getBoolean(index: Int): Boolean = getAnyValAs[Boolean](index) - /** Returns the value of the column at the given index as a Byte value. Casts Short, Int, Long - * number to Byte if possible. - * @since 0.1.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a Byte value. Casts Short, Int, Long + * number to Byte if possible. + * @since 0.1.0 + * @group getter + */ def getByte(index: Int): Byte = get(index) match { case byte: Byte => byte case short: Short if short <= Byte.MaxValue && short >= Byte.MinValue => short.toByte @@ -148,11 +165,12 @@ class Row protected (values: Array[Any]) extends Serializable { throw ErrorMessage.MISC_CANNOT_CAST_VALUE(other.getClass.getName, s"$other", "Byte") } - /** Returns the value of the column at the given index as a Short value. Casts Byte, Int, Long - * number to Short if possible. - * @since 0.1.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a Short value. Casts Byte, Int, Long + * number to Short if possible. + * @since 0.1.0 + * @group getter + */ def getShort(index: Int): Short = get(index) match { case byte: Byte => byte.toShort case short: Short => short @@ -162,11 +180,12 @@ class Row protected (values: Array[Any]) extends Serializable { throw ErrorMessage.MISC_CANNOT_CAST_VALUE(other.getClass.getName, s"$other", "Short") } - /** Returns the value of the column at the given index as a Int value. Casts Byte, Short, Long - * number to Int if possible. - * @since 0.1.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a Int value. Casts Byte, Short, Long + * number to Int if possible. + * @since 0.1.0 + * @group getter + */ def getInt(index: Int): Int = get(index) match { case byte: Byte => byte.toInt case short: Short => short.toInt @@ -176,11 +195,12 @@ class Row protected (values: Array[Any]) extends Serializable { throw ErrorMessage.MISC_CANNOT_CAST_VALUE(other.getClass.getName, s"$other", "Int") } - /** Returns the value of the column at the given index as a Long value. Casts Byte, Short, Int - * number to Long if possible. - * @since 0.1.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a Long value. Casts Byte, Short, Int + * number to Long if possible. + * @since 0.1.0 + * @group getter + */ def getLong(index: Int): Long = get(index) match { case byte: Byte => byte.toLong case short: Short => short.toLong @@ -190,11 +210,12 @@ class Row protected (values: Array[Any]) extends Serializable { throw ErrorMessage.MISC_CANNOT_CAST_VALUE(other.getClass.getName, s"$other", "Long") } - /** Returns the value of the column at the given index as a Float value. Casts Byte, Short, Int, - * Long and Double number to Float if possible. - * @since 0.1.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a Float value. Casts Byte, Short, Int, + * Long and Double number to Float if possible. + * @since 0.1.0 + * @group getter + */ def getFloat(index: Int): Float = get(index) match { case float: Float => float case double: Double if double <= Float.MaxValue && double >= Float.MinValue => double.toFloat @@ -206,11 +227,12 @@ class Row protected (values: Array[Any]) extends Serializable { throw ErrorMessage.MISC_CANNOT_CAST_VALUE(other.getClass.getName, s"$other", "Float") } - /** Returns the value of the column at the given index as a Double value. Casts Byte, Short, Int, - * Long, Float number to Double. - * @since 0.1.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a Double value. Casts Byte, Short, Int, + * Long, Float number to Double. + * @since 0.1.0 + * @group getter + */ def getDouble(index: Int): Double = get(index) match { case float: Float => float.toDouble case double: Double => double @@ -222,11 +244,12 @@ class Row protected (values: Array[Any]) extends Serializable { throw ErrorMessage.MISC_CANNOT_CAST_VALUE(other.getClass.getName, s"$other", "Double") } - /** Returns the value of the column at the given index as a String value. Returns geography data - * as string, if geography data of GeoJSON, WKT or EWKT is found. - * @since 0.1.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a String value. Returns geography data as + * string, if geography data of GeoJSON, WKT or EWKT is found. + * @since 0.1.0 + * @group getter + */ def getString(index: Int): String = { get(index) match { case variant: Variant => variant.toString @@ -239,121 +262,133 @@ class Row protected (values: Array[Any]) extends Serializable { } } - /** Returns the value of the column at the given index as a Byte array value. - * @since 0.2.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a Byte array value. + * @since 0.2.0 + * @group getter + */ def getBinary(index: Int): Array[Byte] = getAs[Array[Byte]](index) - /** Returns the value of the column at the given index as a BigDecimal value - * @since 0.1.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a BigDecimal value + * @since 0.1.0 + * @group getter + */ def getDecimal(index: Int): java.math.BigDecimal = getAs[java.math.BigDecimal](index) - /** Returns the value of the column at the given index as a Date value - * @since 0.1.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a Date value + * @since 0.1.0 + * @group getter + */ def getDate(index: Int): Date = getAs[Date](index) - /** Returns the value of the column at the given index as a Time value - * @since 0.2.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a Time value + * @since 0.2.0 + * @group getter + */ def getTime(index: Int): Time = getAs[Time](index) - /** Returns the value of the column at the given index as a Timestamp value - * @since 0.2.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a Timestamp value + * @since 0.2.0 + * @group getter + */ def getTimestamp(index: Int): Timestamp = getAs[Timestamp](index) - /** Returns the value of the column at the given index as Variant class - * @since 0.2.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as Variant class + * @since 0.2.0 + * @group getter + */ def getVariant(index: Int): Variant = new Variant(getString(index)) - /** Returns the value of the column at the given index as Geography class - * @since 0.2.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as Geography class + * @since 0.2.0 + * @group getter + */ def getGeography(index: Int): Geography = getAs[Geography](index) - /** Returns the value of the column at the given index as Geometry class - * - * @since 1.12.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as Geometry class + * + * @since 1.12.0 + * @group getter + */ def getGeometry(index: Int): Geometry = getAs[Geometry](index) - /** Returns the value of the column at the given index as a Seq of Variant - * @since 0.2.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a Seq of Variant + * @since 0.2.0 + * @group getter + */ def getSeqOfVariant(index: Int): Seq[Variant] = new Variant(getString(index)).asSeq() - /** Returns the value of the column at the given index as a java map of Variant - * @since 0.2.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a java map of Variant + * @since 0.2.0 + * @group getter + */ def getMapOfVariant(index: Int): Map[String, Variant] = new Variant(getString(index)).asMap() - /** Returns the Snowflake Object value at the given index as a Row value. - * - * @since 1.13.0 - * @group getter - */ + /** + * Returns the Snowflake Object value at the given index as a Row value. + * + * @since 1.13.0 + * @group getter + */ def getObject(index: Int): Row = getAs[Row](index) - /** Returns the value of the column at the given index as a Seq value. - * - * @since 1.13.0 - * @group getter - */ - def getSeq[T](index: Int): Seq[T] = { + /** + * Returns the value of the column at the given index as a Seq value. + * + * @since 1.13.0 + * @group getter + */ + def getSeq[T: ClassTag](index: Int): Seq[T] = { val result = getAs[Array[_]](index) - result.map { case x: T => - x - } + result.map(_.asInstanceOf[T]) } - /** Returns the value of the column at the given index as a Map value. - * - * @since 1.13.0 - * @group getter - */ + /** + * Returns the value of the column at the given index as a Map value. + * + * @since 1.13.0 + * @group getter + */ def getMap[T, U](index: Int): Map[T, U] = { getAs[Map[T, U]](index) } - /** Returns the value at the specified column index and casts it to the desired type `T`. - * - * Example: - * {{{ - * val row = Row(1, "Alice", 95.5) - * row.getAs[Int](0) // Returns 1 as an Int - * row.getAs[String](1) // Returns "Alice" as a String - * row.getAs[Double](2) // Returns 95.5 as a Double - * }}} - * - * @param index - * the zero-based column index within the row. - * @tparam T - * the expected type of the value at the specified column index. - * @return - * the value at the specified column index cast to type `T`. - * @throws ClassCastException - * if the value at the given index cannot be cast to type `T`. - * @throws ArrayIndexOutOfBoundsException - * if the column index is out of bounds. - * @group getter - * @since 1.15.0 - */ + /** + * Returns the value at the specified column index and casts it to the desired type `T`. + * + * Example: + * {{{ + * val row = Row(1, "Alice", 95.5) + * row.getAs[Int](0) // Returns 1 as an Int + * row.getAs[String](1) // Returns "Alice" as a String + * row.getAs[Double](2) // Returns 95.5 as a Double + * }}} + * + * @param index + * the zero-based column index within the row. + * @tparam T + * the expected type of the value at the specified column index. + * @return + * the value at the specified column index cast to type `T`. + * @throws ClassCastException + * if the value at the given index cannot be cast to type `T`. + * @throws ArrayIndexOutOfBoundsException + * if the column index is out of bounds. + * @group getter + * @since 1.15.0 + */ def getAs[T](index: Int)(implicit classTag: ClassTag[T]): T = { classTag.runtimeClass match { case _ if isNullAt(index) => get(index).asInstanceOf[T] @@ -390,10 +425,11 @@ class Row protected (values: Array[Any]) extends Serializable { case other => other.toString } - /** Returns a string value to represent the content of this row - * @since 0.1.0 - * @group utl - */ + /** + * Returns a string value to represent the content of this row + * @since 0.1.0 + * @group utl + */ override def toString: String = values .map(convertValueToString) diff --git a/src/main/scala/com/snowflake/snowpark/SProcRegistration.scala b/src/main/scala/com/snowflake/snowpark/SProcRegistration.scala index bd7a9119..3920857d 100644 --- a/src/main/scala/com/snowflake/snowpark/SProcRegistration.scala +++ b/src/main/scala/com/snowflake/snowpark/SProcRegistration.scala @@ -6,56 +6,57 @@ import scala.reflect.runtime.universe.TypeTag import com.snowflake.snowpark.internal.ScalaFunctions._ // scalastyle:off -/** Provides methods to register a SProc (Stored Procedure) in the Snowflake database. - * - * [[Session.sproc]] returns an object of this class. - * - * To register anonymous temporary SProcs which work in the current session: - * {{{ - * val sp = session.sproc.registerTemporary((session: Session, num: Int) => s"num: $num") - * session.storedProcedure(sp, 123) - * }}} - * - * To register named temporary SProcs which work in the current session: - * {{{ - * val name = "sproc" - * val sp = session.sproc.registerTemporary(name, - * (session: Session, num: Int) => s"num: $num") - * session.storedProcedure(sp, 123) - * session.storedProcedure(name, 123) - * }}} - * - * It requires a user stage when registering a permanent SProc. Snowpark will upload all JAR files - * for the SProc and any dependencies. It is also required to specify Owner or Caller modes via the - * parameter 'isCallerMode'. - * {{{ - * val name = "sproc" - * val stageName = "" - * val sp = session.sproc.registerPermanent(name, - * (session: Session, num: Int) => s"num: $num", - * stageName, - * isCallerMode = true) - * session.storedProcedure(sp, 123) - * session.storedProcedure(name, 123) - * }}} - * - * This object also provides a convenient methods to execute SProc lambda functions directly with - * current session on the client side. The functions are designed for debugging and development - * only. Since the local and Snowflake server environments are different, the outputs of executing - * a SP function with these test function and on Snowflake server may be different too. - * {{{ - * // a client side Scala lambda - * val func = (session: Session, num: Int) => s"num: $num" - * // register a server side stored procedure - * val sp = session.sproc.registerTemporary(func) - * // execute the lambda function of this SP from the client side - * val localResult = session.sproc.runLocally(func, 123) - * // execute this SP from the server side - * val resultDF = session.storedProcedure(sp, 123) - * }}} - * - * @since 1.8.0 - */ +/** + * Provides methods to register a SProc (Stored Procedure) in the Snowflake database. + * + * [[Session.sproc]] returns an object of this class. + * + * To register anonymous temporary SProcs which work in the current session: + * {{{ + * val sp = session.sproc.registerTemporary((session: Session, num: Int) => s"num: $num") + * session.storedProcedure(sp, 123) + * }}} + * + * To register named temporary SProcs which work in the current session: + * {{{ + * val name = "sproc" + * val sp = session.sproc.registerTemporary(name, + * (session: Session, num: Int) => s"num: $num") + * session.storedProcedure(sp, 123) + * session.storedProcedure(name, 123) + * }}} + * + * It requires a user stage when registering a permanent SProc. Snowpark will upload all JAR files + * for the SProc and any dependencies. It is also required to specify Owner or Caller modes via the + * parameter 'isCallerMode'. + * {{{ + * val name = "sproc" + * val stageName = "" + * val sp = session.sproc.registerPermanent(name, + * (session: Session, num: Int) => s"num: $num", + * stageName, + * isCallerMode = true) + * session.storedProcedure(sp, 123) + * session.storedProcedure(name, 123) + * }}} + * + * This object also provides a convenient methods to execute SProc lambda functions directly with + * current session on the client side. The functions are designed for debugging and development + * only. Since the local and Snowflake server environments are different, the outputs of executing a + * SP function with these test function and on Snowflake server may be different too. + * {{{ + * // a client side Scala lambda + * val func = (session: Session, num: Int) => s"num: $num" + * // register a server side stored procedure + * val sp = session.sproc.registerTemporary(func) + * // execute the lambda function of this SP from the client side + * val localResult = session.sproc.runLocally(func, 123) + * // execute this SP from the server side + * val resultDF = session.storedProcedure(sp, 123) + * }}} + * + * @since 1.8.0 + */ // scalastyle:on class SProcRegistration(session: Session) { @@ -81,11 +82,12 @@ class SProcRegistration(session: Session) { */ // scalastyle:on line.size.limit - /** Registers a Scala closure of 0 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 0 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[RT: TypeTag]( name: String, sp: Function1[Session, RT], @@ -95,11 +97,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 1 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 1 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[RT: TypeTag, A1: TypeTag]( name: String, sp: Function2[Session, A1, RT], @@ -109,11 +112,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 2 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 2 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[RT: TypeTag, A1: TypeTag, A2: TypeTag]( name: String, sp: Function3[Session, A1, A2, RT], @@ -123,11 +127,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 3 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 3 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag]( name: String, sp: Function4[Session, A1, A2, A3, RT], @@ -137,11 +142,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 4 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 4 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag]( name: String, sp: Function5[Session, A1, A2, A3, A4, RT], @@ -151,11 +157,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 5 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 5 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -171,11 +178,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 6 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 6 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -192,11 +200,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 7 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 7 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -214,11 +223,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 8 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 8 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -237,11 +247,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 9 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 9 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -261,11 +272,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 10 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 10 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -286,11 +298,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 11 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 11 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -312,11 +325,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 12 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 12 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -339,11 +353,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 13 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 13 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -367,11 +382,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 14 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 14 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -396,11 +412,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 15 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 15 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -426,11 +443,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 16 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 16 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -475,11 +493,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 17 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 17 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -526,11 +545,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 18 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 18 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -579,11 +599,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 19 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 19 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -634,11 +655,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 20 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 20 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -691,11 +713,12 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp), Some(stageLocation), isCallerMode) } - /** Registers a Scala closure of 21 arguments as a permanent Stored Procedure. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 21 arguments as a permanent Stored Procedure. + * + * @tparam RT + * Return type of the UDF. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -770,70 +793,76 @@ class SProcRegistration(session: Session) { */ // scalastyle:on line.size.limit - /** Registers a Scala closure of 0 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 0 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[RT: TypeTag](sp: Function1[Session, RT]): StoredProcedure = sproc("registerTemporary") { register(None, _toSP(sp)) } - /** Registers a Scala closure of 1 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 1 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[RT: TypeTag, A1: TypeTag](sp: Function2[Session, A1, RT]): StoredProcedure = sproc("registerTemporary") { register(None, _toSP(sp)) } - /** Registers a Scala closure of 2 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 2 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[RT: TypeTag, A1: TypeTag, A2: TypeTag]( sp: Function3[Session, A1, A2, RT]): StoredProcedure = sproc("registerTemporary") { register(None, _toSP(sp)) } - /** Registers a Scala closure of 3 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 3 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag]( sp: Function4[Session, A1, A2, A3, RT]): StoredProcedure = sproc("registerTemporary") { register(None, _toSP(sp)) } - /** Registers a Scala closure of 4 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 4 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag]( sp: Function5[Session, A1, A2, A3, A4, RT]): StoredProcedure = sproc("registerTemporary") { register(None, _toSP(sp)) } - /** Registers a Scala closure of 5 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 5 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -845,12 +874,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 6 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 6 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -863,12 +893,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 7 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 7 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -882,12 +913,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 8 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 8 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -902,12 +934,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 9 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 9 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -924,12 +957,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 10 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 10 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -947,12 +981,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 11 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 11 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -971,12 +1006,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 12 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 12 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -996,12 +1032,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 13 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 13 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1023,12 +1060,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 14 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 14 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1051,12 +1089,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 15 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 15 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1080,12 +1119,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 16 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 16 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1127,12 +1167,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 17 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 17 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1176,12 +1217,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 18 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 18 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1227,12 +1269,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 19 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 19 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1280,12 +1323,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 20 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 20 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1335,12 +1379,13 @@ class SProcRegistration(session: Session) { register(None, _toSP(sp)) } - /** Registers a Scala closure of 21 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 21 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1412,23 +1457,25 @@ class SProcRegistration(session: Session) { */ // scalastyle:on line.size.limit - /** Registers a Scala closure of 0 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 0 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[RT: TypeTag](name: String, sp: Function1[Session, RT]): StoredProcedure = sproc("registerTemporary", execName = name) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 1 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 1 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[RT: TypeTag, A1: TypeTag]( name: String, sp: Function2[Session, A1, RT]): StoredProcedure = @@ -1436,12 +1483,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 2 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 2 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[RT: TypeTag, A1: TypeTag, A2: TypeTag]( name: String, sp: Function3[Session, A1, A2, RT]): StoredProcedure = @@ -1449,12 +1497,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 3 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 3 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag]( name: String, sp: Function4[Session, A1, A2, A3, RT]): StoredProcedure = @@ -1462,12 +1511,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 4 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 4 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag]( name: String, sp: Function5[Session, A1, A2, A3, A4, RT]): StoredProcedure = @@ -1475,12 +1525,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 5 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 5 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1492,12 +1543,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 6 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 6 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1512,12 +1564,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 7 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 7 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1533,12 +1586,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 8 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 8 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1555,12 +1609,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 9 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 9 arguments as a temporary Stored Procedure that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1578,12 +1633,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 10 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 10 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1602,12 +1658,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 11 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 11 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1627,12 +1684,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 12 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 12 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1654,12 +1712,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 13 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 13 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1682,12 +1741,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 14 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 14 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1711,12 +1771,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 15 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 15 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1741,12 +1802,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 16 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 16 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1789,12 +1851,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 17 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 17 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1839,12 +1902,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 18 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 18 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1891,12 +1955,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 19 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 19 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1945,12 +2010,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 20 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 20 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -2001,12 +2067,13 @@ class SProcRegistration(session: Session) { register(Some(name), _toSP(sp)) } - /** Registers a Scala closure of 21 arguments as a temporary Stored Procedure that is scoped to - * this session. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Registers a Scala closure of 21 arguments as a temporary Stored Procedure that is scoped to + * this session. + * + * @tparam RT + * Return type of the UDF. + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -2066,40 +2133,43 @@ class SProcRegistration(session: Session) { isCallerMode: Boolean = true): StoredProcedure = handler.registerSP(name, sp, stageLocation, isCallerMode) - /** Executes a Stored Procedure lambda function of 0 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 0 arguments with current Snowpark session in the + * local environment. This is a test function and used for debugging and development only. Since + * the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[RT: TypeTag](sp: Function1[Session, RT]): RT = { sp.apply(this.session) } - /** Executes a Stored Procedure lambda function of 1 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 1 arguments with current Snowpark session in the + * local environment. This is a test function and used for debugging and development only. Since + * the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[RT: TypeTag, A1: TypeTag](sp: Function2[Session, A1, RT], a1: A1): RT = { sp.apply(this.session, a1) } - /** Executes a Stored Procedure lambda function of 2 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 2 arguments with current Snowpark session in the + * local environment. This is a test function and used for debugging and development only. Since + * the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[RT: TypeTag, A1: TypeTag, A2: TypeTag]( sp: Function3[Session, A1, A2, RT], @@ -2108,14 +2178,15 @@ class SProcRegistration(session: Session) { sp.apply(this.session, a1, a2) } - /** Executes a Stored Procedure lambda function of 3 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 3 arguments with current Snowpark session in the + * local environment. This is a test function and used for debugging and development only. Since + * the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag]( sp: Function4[Session, A1, A2, A3, RT], @@ -2125,14 +2196,15 @@ class SProcRegistration(session: Session) { sp.apply(this.session, a1, a2, a3) } - /** Executes a Stored Procedure lambda function of 4 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 4 arguments with current Snowpark session in the + * local environment. This is a test function and used for debugging and development only. Since + * the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag]( sp: Function5[Session, A1, A2, A3, A4, RT], @@ -2143,14 +2215,15 @@ class SProcRegistration(session: Session) { sp.apply(this.session, a1, a2, a3, a4) } - /** Executes a Stored Procedure lambda function of 5 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 5 arguments with current Snowpark session in the + * local environment. This is a test function and used for debugging and development only. Since + * the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag]( sp: Function6[Session, A1, A2, A3, A4, A5, RT], @@ -2162,14 +2235,15 @@ class SProcRegistration(session: Session) { sp.apply(this.session, a1, a2, a3, a4, a5) } - /** Executes a Stored Procedure lambda function of 6 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 6 arguments with current Snowpark session in the + * local environment. This is a test function and used for debugging and development only. Since + * the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, @@ -2189,14 +2263,15 @@ class SProcRegistration(session: Session) { sp.apply(this.session, a1, a2, a3, a4, a5, a6) } - /** Executes a Stored Procedure lambda function of 7 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 7 arguments with current Snowpark session in the + * local environment. This is a test function and used for debugging and development only. Since + * the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, @@ -2218,14 +2293,15 @@ class SProcRegistration(session: Session) { sp.apply(this.session, a1, a2, a3, a4, a5, a6, a7) } - /** Executes a Stored Procedure lambda function of 8 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 8 arguments with current Snowpark session in the + * local environment. This is a test function and used for debugging and development only. Since + * the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, @@ -2249,14 +2325,15 @@ class SProcRegistration(session: Session) { sp.apply(this.session, a1, a2, a3, a4, a5, a6, a7, a8) } - /** Executes a Stored Procedure lambda function of 9 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 9 arguments with current Snowpark session in the + * local environment. This is a test function and used for debugging and development only. Since + * the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, @@ -2282,14 +2359,15 @@ class SProcRegistration(session: Session) { sp.apply(this.session, a1, a2, a3, a4, a5, a6, a7, a8, a9) } - /** Executes a Stored Procedure lambda function of 10 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 10 arguments with current Snowpark session in + * the local environment. This is a test function and used for debugging and development only. + * Since the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, @@ -2317,14 +2395,15 @@ class SProcRegistration(session: Session) { sp.apply(this.session, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10) } - /** Executes a Stored Procedure lambda function of 11 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 11 arguments with current Snowpark session in + * the local environment. This is a test function and used for debugging and development only. + * Since the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, @@ -2354,14 +2433,15 @@ class SProcRegistration(session: Session) { sp.apply(this.session, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11) } - /** Executes a Stored Procedure lambda function of 12 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 12 arguments with current Snowpark session in + * the local environment. This is a test function and used for debugging and development only. + * Since the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, @@ -2393,14 +2473,15 @@ class SProcRegistration(session: Session) { sp.apply(this.session, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12) } - /** Executes a Stored Procedure lambda function of 13 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 13 arguments with current Snowpark session in + * the local environment. This is a test function and used for debugging and development only. + * Since the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, @@ -2434,14 +2515,15 @@ class SProcRegistration(session: Session) { sp.apply(this.session, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13) } - /** Executes a Stored Procedure lambda function of 14 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 14 arguments with current Snowpark session in + * the local environment. This is a test function and used for debugging and development only. + * Since the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, @@ -2477,14 +2559,15 @@ class SProcRegistration(session: Session) { sp.apply(this.session, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14) } - /** Executes a Stored Procedure lambda function of 15 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 15 arguments with current Snowpark session in + * the local environment. This is a test function and used for debugging and development only. + * Since the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, @@ -2522,14 +2605,15 @@ class SProcRegistration(session: Session) { sp.apply(this.session, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15) } - /** Executes a Stored Procedure lambda function of 16 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 16 arguments with current Snowpark session in + * the local environment. This is a test function and used for debugging and development only. + * Since the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, @@ -2587,14 +2671,15 @@ class SProcRegistration(session: Session) { sp.apply(this.session, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16) } - /** Executes a Stored Procedure lambda function of 17 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 17 arguments with current Snowpark session in + * the local environment. This is a test function and used for debugging and development only. + * Since the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, @@ -2673,14 +2758,15 @@ class SProcRegistration(session: Session) { a17) } - /** Executes a Stored Procedure lambda function of 18 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 18 arguments with current Snowpark session in + * the local environment. This is a test function and used for debugging and development only. + * Since the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, @@ -2763,14 +2849,15 @@ class SProcRegistration(session: Session) { a18) } - /** Executes a Stored Procedure lambda function of 19 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 19 arguments with current Snowpark session in + * the local environment. This is a test function and used for debugging and development only. + * Since the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, @@ -2857,14 +2944,15 @@ class SProcRegistration(session: Session) { a19) } - /** Executes a Stored Procedure lambda function of 20 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 20 arguments with current Snowpark session in + * the local environment. This is a test function and used for debugging and development only. + * Since the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, @@ -2955,14 +3043,15 @@ class SProcRegistration(session: Session) { a20) } - /** Executes a Stored Procedure lambda function of 21 arguments with current Snowpark session in - * the local environment. This is a test function and used for debugging and development only. - * Since the local and Snowflake server environments are different, the outputs of executing a SP - * function with this test function and on Snowflake server may be different too. - * - * @tparam RT - * Return type of the UDF. - */ + /** + * Executes a Stored Procedure lambda function of 21 arguments with current Snowpark session in + * the local environment. This is a test function and used for debugging and development only. + * Since the local and Snowflake server environments are different, the outputs of executing a SP + * function with this test function and on Snowflake server may be different too. + * + * @tparam RT + * Return type of the UDF. + */ @PublicPreview def runLocally[ RT: TypeTag, diff --git a/src/main/scala/com/snowflake/snowpark/SaveMode.scala b/src/main/scala/com/snowflake/snowpark/SaveMode.scala index c6742b30..9233615d 100644 --- a/src/main/scala/com/snowflake/snowpark/SaveMode.scala +++ b/src/main/scala/com/snowflake/snowpark/SaveMode.scala @@ -1,9 +1,10 @@ package com.snowflake.snowpark -/** SaveMode configures the behavior when data is written from a DataFrame to a data source using a - * [[DataFrameWriter]] instance. - * @since 0.1.0 - */ +/** + * SaveMode configures the behavior when data is written from a DataFrame to a data source using a + * [[DataFrameWriter]] instance. + * @since 0.1.0 + */ object SaveMode { def apply(mode: String): SaveMode = @@ -15,33 +16,38 @@ object SaveMode { case "IGNORE" => Ignore } // scalastyle:on - /** In the Append mode, new data is appended to the datasource. - * @since 0.1.0 - */ + /** + * In the Append mode, new data is appended to the datasource. + * @since 0.1.0 + */ object Append extends SaveMode - /** In the Overwrite mode, existing data is overwritten with the new data. If the datasource is a - * table, then the existing data in the table is replaced. - * @since 0.1.0 - */ + /** + * In the Overwrite mode, existing data is overwritten with the new data. If the datasource is a + * table, then the existing data in the table is replaced. + * @since 0.1.0 + */ object Overwrite extends SaveMode - /** In the ErrorIfExists mode, an error is thrown if the data being written already exists in the - * data source. - * @since 0.1.0 - */ + /** + * In the ErrorIfExists mode, an error is thrown if the data being written already exists in the + * data source. + * @since 0.1.0 + */ object ErrorIfExists extends SaveMode - /** In the Ignore mode, if the data already exists, the write operation is not expected to update - * existing data. - * @since 0.1.0 - */ + /** + * In the Ignore mode, if the data already exists, the write operation is not expected to update + * existing data. + * @since 0.1.0 + */ object Ignore extends SaveMode } -/** Please refer to the companion [[SaveMode$]] object. - * @since 0.1.0 - */ +/** + * Please refer to the companion [[SaveMode$]] object. + * @since 0.1.0 + */ sealed trait SaveMode { override def toString: String = this.getClass.getSimpleName.stripSuffix("$") } diff --git a/src/main/scala/com/snowflake/snowpark/Session.scala b/src/main/scala/com/snowflake/snowpark/Session.scala index a9634d3f..939ddf2e 100644 --- a/src/main/scala/com/snowflake/snowpark/Session.scala +++ b/src/main/scala/com/snowflake/snowpark/Session.scala @@ -7,7 +7,7 @@ import java.util.{Properties, Map => JMap, Set => JSet} import java.util.concurrent.{ConcurrentHashMap, ForkJoinPool, ForkJoinWorkerThread} import com.snowflake.snowpark.internal.analyzer._ import com.snowflake.snowpark.internal._ -import com.snowflake.snowpark.internal.analyzer.{TableFunction => TFunction} +import com.snowflake.snowpark.internal.analyzer.{TableFunctionEx => TFunction} import com.snowflake.snowpark.types._ import com.snowflake.snowpark.functions._ import com.snowflake.snowpark.internal.ErrorMessage.{ @@ -27,37 +27,38 @@ import scala.concurrent.{ExecutionContext, Future} import scala.collection.JavaConverters._ import scala.reflect.runtime.universe.TypeTag -/** Establishes a connection with a Snowflake database and provides methods for creating DataFrames - * and accessing objects for working with files in stages. - * - * When you create a {@code Session} object, you provide configuration settings to establish a - * connection with a Snowflake database (e.g. the URL for the account, a user name, etc.). You can - * specify these settings in a configuration file or in a Map that associates configuration setting - * names with values. - * - * To create a Session from a file: - * {{{ - * val session = Session.builder.configFile("/path/to/file.properties").create - * }}} - * - * To create a Session from a map of configuration properties: - * {{{ - * val configMap = Map( - * "URL" -> "demo.snowflakecomputing.com", - * "USER" -> "testUser", - * "PASSWORD" -> "******", - * "ROLE" -> "myrole", - * "WAREHOUSE" -> "warehouse1", - * "DB" -> "db1", - * "SCHEMA" -> "schema1" - * ) - * Session.builder.configs(configMap).create - * }}} - * - * Session contains functions to construct [[DataFrame]]s like - * [[Session.table(name* Session.table]], [[Session.sql]], and [[Session.read]] - * @since 0.1.0 - */ +/** + * Establishes a connection with a Snowflake database and provides methods for creating DataFrames + * and accessing objects for working with files in stages. + * + * When you create a {@code Session} object, you provide configuration settings to establish a + * connection with a Snowflake database (e.g. the URL for the account, a user name, etc.). You can + * specify these settings in a configuration file or in a Map that associates configuration setting + * names with values. + * + * To create a Session from a file: + * {{{ + * val session = Session.builder.configFile("/path/to/file.properties").create + * }}} + * + * To create a Session from a map of configuration properties: + * {{{ + * val configMap = Map( + * "URL" -> "demo.snowflakecomputing.com", + * "USER" -> "testUser", + * "PASSWORD" -> "******", + * "ROLE" -> "myrole", + * "WAREHOUSE" -> "warehouse1", + * "DB" -> "db1", + * "SCHEMA" -> "schema1" + * ) + * Session.builder.configs(configMap).create + * }}} + * + * Session contains functions to construct [[DataFrame]]s like + * [[Session.table(name* Session.table]], [[Session.sql]], and [[Session.read]] + * @since 0.1.0 + */ class Session private (private[snowpark] val conn: ServerConnection) extends Logging { private val STAGE_PREFIX = "@" // URI and file name with md5 @@ -137,11 +138,12 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log lastActionID } - /** Cancel all action methods that are running currently. This does not affect on any action - * methods called in the future. - * - * @since 0.5.0 - */ + /** + * Cancel all action methods that are running currently. This does not affect on any action + * methods called in the future. + * + * @since 0.5.0 + */ def cancelAll(): Unit = synchronized { logInfo("Canceling all running query") lastCanceledID = lastActionID @@ -151,13 +153,14 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log conn.runQuery(s"select system$$cancel_all_queries(${conn.getJDBCSessionID})") } - /** Returns the list of URLs for all the dependencies that were added for user-defined functions - * (UDFs). This list includes any JAR files that were added automatically by the library. - * - * @return - * Set[URI] - * @since 0.1.0 - */ + /** + * Returns the list of URLs for all the dependencies that were added for user-defined functions + * (UDFs). This list includes any JAR files that were added automatically by the library. + * + * @return + * Set[URI] + * @since 0.1.0 + */ def getDependencies: collection.Set[URI] = { conn.telemetry.reportGetDependency() // make a clone of result, but not just return a pointer @@ -168,11 +171,12 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log getDependencies.filterNot(_.getPath.startsWith(STAGE_PREFIX)) } - /** Returns a Java Set of URLs for all the dependencies that were added for user-defined functions - * (UDFs). This list includes any JAR files that were added automatically by the library. - * - * @since 0.2.0 - */ + /** + * Returns a Java Set of URLs for all the dependencies that were added for user-defined functions + * (UDFs). This list includes any JAR files that were added automatically by the library. + * + * @since 0.2.0 + */ def getDependenciesAsJavaSet: JSet[URI] = getDependencies.asJava private[snowpark] val plans: SnowflakePlanBuilder = new SnowflakePlanBuilder(this) @@ -190,43 +194,44 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log } } - /** Returns the JDBC - * [[https://docs.snowflake.com/en/user-guide/jdbc-api.html#object-connection Connection]] object - * used for the connection to the Snowflake database. - * - * @return - * JDBC Connection object - */ + /** + * Returns the JDBC + * [[https://docs.snowflake.com/en/user-guide/jdbc-api.html#object-connection Connection]] object + * used for the connection to the Snowflake database. + * + * @return + * JDBC Connection object + */ def jdbcConnection: Connection = conn.connection - /** Registers a file in stage or a local file as a dependency of a user-defined function (UDF). - * - * The local file can be a JAR file, a directory, or any other file resource. If you pass the - * path to a local file to {@code addDependency} , the Snowpark library uploads the file to a - * temporary stage and imports the file when executing a UDF. - * - * If you pass the path to a file in a stage to {@code addDependency} , the file is included in - * the imports when executing a UDF. - * - * Note that in most cases, you don't need to add the Snowpark JAR file and the JAR file (or - * directory) of the currently running application as dependencies. The Snowpark library - * automatically attempts to detect and upload these JAR files. However, if this automatic - * detection fails, the Snowpark library reports this in an error message, and you must add these - * JAR files explicitly by calling {@code addDependency} . - * - * The following example demonstrates how to add dependencies on local files and files in a - * stage: - * - * {{{ - * session.addDependency("@my_stage/http-commons.jar") - * session.addDependency("/home/username/lib/language-detector.jar") - * session.addDependency("./resource-dir/") - * session.addDependency("./resource.xml") - * }}} - * @since 0.1.0 - * @param path - * Path to a local directory, local file, or file in a stage. - */ + /** + * Registers a file in stage or a local file as a dependency of a user-defined function (UDF). + * + * The local file can be a JAR file, a directory, or any other file resource. If you pass the path + * to a local file to {@code addDependency} , the Snowpark library uploads the file to a temporary + * stage and imports the file when executing a UDF. + * + * If you pass the path to a file in a stage to {@code addDependency} , the file is included in + * the imports when executing a UDF. + * + * Note that in most cases, you don't need to add the Snowpark JAR file and the JAR file (or + * directory) of the currently running application as dependencies. The Snowpark library + * automatically attempts to detect and upload these JAR files. However, if this automatic + * detection fails, the Snowpark library reports this in an error message, and you must add these + * JAR files explicitly by calling {@code addDependency} . + * + * The following example demonstrates how to add dependencies on local files and files in a stage: + * + * {{{ + * session.addDependency("@my_stage/http-commons.jar") + * session.addDependency("/home/username/lib/language-detector.jar") + * session.addDependency("./resource-dir/") + * session.addDependency("./resource.xml") + * }}} + * @since 0.1.0 + * @param path + * Path to a local directory, local file, or file in a stage. + */ def addDependency(path: String): Unit = { val trimmedPath = path.trim if (trimmedPath.startsWith(STAGE_PREFIX)) { @@ -248,11 +253,12 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log conn.telemetry.reportAddDependency() } - /** Removes a path from the set of dependencies. - * @since 0.1.0 - * @param path - * Path to a local directory, local file, or file in a stage. - */ + /** + * Removes a path from the set of dependencies. + * @since 0.1.0 + * @param path + * Path to a local directory, local file, or file in a stage. + */ def removeDependency(path: String): Unit = { val trimmedPath = path.trim if (trimmedPath.startsWith(STAGE_PREFIX)) { @@ -262,96 +268,103 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log } } - /** Adds a server side JVM package as a dependency of a user-defined function (UDF). - * @param packageName - * Name of the package, formatted as `groupName:packageName:version` - */ + /** + * Adds a server side JVM package as a dependency of a user-defined function (UDF). + * @param packageName + * Name of the package, formatted as `groupName:packageName:version` + */ private[snowpark] def addPackage(packageName: String): Unit = { packageNames.add(packageName.trim.toLowerCase()) } - /** Removes a server side JVM package from the set of dependencies. - * @param packageName - * Name of the package - */ + /** + * Removes a server side JVM package from the set of dependencies. + * @param packageName + * Name of the package + */ private[snowpark] def removePackage(packageName: String): Unit = { packageNames.remove(packageName.trim.toLowerCase()) } - /** List server supported JVM packages - * @return - * Set of supported package names - */ + /** + * List server supported JVM packages + * @return + * Set of supported package names + */ private[snowpark] def listPackages(): Set[String] = serverPackages - /** Sets a query tag for this session. You can use the query tag to find all queries run for this - * session. - * - * If not set, the default value of query tag is the Snowpark library call and the class and - * method in your code that invoked the query (e.g. `com.snowflake.snowpark.DataFrame.collect - * Main$.main(Main.scala:18)`). - * - * @param queryTag - * String to use as the query tag for this session. - * @since 0.1.0 - */ + /** + * Sets a query tag for this session. You can use the query tag to find all queries run for this + * session. + * + * If not set, the default value of query tag is the Snowpark library call and the class and + * method in your code that invoked the query (e.g. `com.snowflake.snowpark.DataFrame.collect + * Main$.main(Main.scala:18)`). + * + * @param queryTag + * String to use as the query tag for this session. + * @since 0.1.0 + */ def setQueryTag(queryTag: String): Unit = synchronized { this.conn.setQueryTag(queryTag) } - /** Unset query_tag parameter for this session. - * - * If not set, the default value of query tag is the Snowpark library call and the class and - * method in your code that invoked the query (e.g. `com.snowflake.snowpark.DataFrame.collect - * Main$.main(Main.scala:18)`). - * - * @since 0.10.0 - */ + /** + * Unset query_tag parameter for this session. + * + * If not set, the default value of query tag is the Snowpark library call and the class and + * method in your code that invoked the query (e.g. `com.snowflake.snowpark.DataFrame.collect + * Main$.main(Main.scala:18)`). + * + * @since 0.10.0 + */ def unsetQueryTag(): Unit = synchronized { this.conn.unsetQueryTag() } - /** Returns the query tag that you set by calling [[setQueryTag]]. - * @since 0.1.0 - */ + /** + * Returns the query tag that you set by calling [[setQueryTag]]. + * @since 0.1.0 + */ def getQueryTag(): Option[String] = this.conn.getQueryTag() - /** Updates the query tag that is a JSON encoded string for the current session. - * - * Keep in mind that assigning a value via [[setQueryTag]] will remove any current query tag - * state. - * - * Example 1: - * {{{ - * session.setQueryTag("""{"key1":"value1"}""") - * session.updateQueryTag("""{"key2":"value2"}""") - * print(session.getQueryTag().get) - * {"key1":"value1","key2":"value2"} - * }}} - * - * Example 2: - * {{{ - * session.sql("""ALTER SESSION SET QUERY_TAG = '{"key1":"value1"}'""").collect() - * session.updateQueryTag("""{"key2":"value2"}""") - * print(session.getQueryTag().get) - * {"key1":"value1","key2":"value2"} - * }}} - * - * Example 3: - * {{{ - * session.setQueryTag("") - * session.updateQueryTag("""{"key1":"value1"}""") - * print(session.getQueryTag().get) - * {"key1":"value1"} - * }}} - * - * @param queryTag - * A JSON encoded string that provides updates to the current query tag. - * @throws SnowparkClientException - * If the provided query tag or the query tag of the current session are not valid JSON - * strings; or if it could not serialize the query tag into a JSON string. - * @since 1.13.0 - */ + /** + * Updates the query tag that is a JSON encoded string for the current session. + * + * Keep in mind that assigning a value via [[setQueryTag]] will remove any current query tag + * state. + * + * Example 1: + * {{{ + * session.setQueryTag("""{"key1":"value1"}""") + * session.updateQueryTag("""{"key2":"value2"}""") + * print(session.getQueryTag().get) + * {"key1":"value1","key2":"value2"} + * }}} + * + * Example 2: + * {{{ + * session.sql("""ALTER SESSION SET QUERY_TAG = '{"key1":"value1"}'""").collect() + * session.updateQueryTag("""{"key2":"value2"}""") + * print(session.getQueryTag().get) + * {"key1":"value1","key2":"value2"} + * }}} + * + * Example 3: + * {{{ + * session.setQueryTag("") + * session.updateQueryTag("""{"key1":"value1"}""") + * print(session.getQueryTag().get) + * {"key1":"value1"} + * }}} + * + * @param queryTag + * A JSON encoded string that provides updates to the current query tag. + * @throws SnowparkClientException + * If the provided query tag or the query tag of the current session are not valid JSON strings; + * or if it could not serialize the query tag into a JSON string. + * @since 1.13.0 + */ def updateQueryTag(queryTag: String): Unit = synchronized { val newQueryTagMap = parseJsonString(queryTag) if (newQueryTagMap.isEmpty) { @@ -375,26 +388,28 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log setQueryTag(updatedQueryTagStr.get) } - /** Attempts to parse a JSON-encoded string into a [[scala.collection.immutable.Map]]. - * - * @param jsonString - * The JSON-encoded string to parse. - * @return - * An `Option` containing the `Map` if the parsing of the JSON string was successful, or `None` - * otherwise. - */ + /** + * Attempts to parse a JSON-encoded string into a [[scala.collection.immutable.Map]]. + * + * @param jsonString + * The JSON-encoded string to parse. + * @return + * An `Option` containing the `Map` if the parsing of the JSON string was successful, or `None` + * otherwise. + */ private def parseJsonString(jsonString: String): Option[Map[String, Any]] = { Utils.jsonToMap(jsonString) } - /** Attempts to convert a [[scala.collection.immutable.Map]] into a JSON-encoded string. - * - * @param map - * The `Map` to convert. - * @return - * An `Option` containing the JSON-encoded string if the conversion was successful, or `None` - * otherwise. - */ + /** + * Attempts to convert a [[scala.collection.immutable.Map]] into a JSON-encoded string. + * + * @param map + * The `Map` to convert. + * @return + * An `Option` containing the JSON-encoded string if the conversion was successful, or `None` + * otherwise. + */ private def toJsonString(map: Map[String, Any]): Option[String] = { Utils.mapToJson(map) } @@ -457,13 +472,14 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log } - /** the format of file name on stage is stage/prefix/file - * - * stage: case insensitive, no quote for example: stage -> stage STAGE -> stage "stage" -> stage - * "STAGE" -> stage "sta/ge" -> sta/ge - * - * prefix: case sensitive file: case sensitive - */ + /** + * the format of file name on stage is stage/prefix/file + * + * stage: case insensitive, no quote for example: stage -> stage STAGE -> stage "stage" -> stage + * "STAGE" -> stage "sta/ge" -> sta/ge + * + * prefix: case sensitive file: case sensitive + */ private[snowpark] def listFilesInStage(stageLocation: String): Set[String] = { val normalized = Utils.normalizeStageLocation(stageLocation) @@ -475,64 +491,68 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log fileList.map(_.getString(0).substring(prefixLength)).toSet } - /** Returns an Updatable that points to the specified table. - * - * {@code name} can be a fully qualified identifier and must conform to the rules for a Snowflake - * identifier. - * - * @param name - * Table name that is either a fully qualified name or a name in the current database/schema. - * @return - * A [[Updatable]] - * @since 0.1.0 - */ + /** + * Returns an Updatable that points to the specified table. + * + * {@code name} can be a fully qualified identifier and must conform to the rules for a Snowflake + * identifier. + * + * @param name + * Table name that is either a fully qualified name or a name in the current database/schema. + * @return + * A [[Updatable]] + * @since 0.1.0 + */ def table(name: String): Updatable = { Utils.validateObjectName(name) Updatable(name, this) } - /** Returns an Updatable that points to the specified table. - * - * @param multipartIdentifier - * A sequence of strings that specify the database name, schema name, and table name (e.g. - * {@code Seq("database_name", "schema_name", "table_name")} ). - * @return - * A [[Updatable]] - * @since 0.1.0 - */ + /** + * Returns an Updatable that points to the specified table. + * + * @param multipartIdentifier + * A sequence of strings that specify the database name, schema name, and table name (e.g. + * {@code Seq("database_name", "schema_name", "table_name")} ). + * @return + * A [[Updatable]] + * @since 0.1.0 + */ // [[.].] def table(multipartIdentifier: Seq[String]): Updatable = table(multipartIdentifier.mkString(".")) - /** Returns an Updatable that points to the specified table. - * - * @param multipartIdentifier - * A list of strings that specify the database name, schema name, and table name. - * @return - * A [[Updatable]] - * @since 0.2.0 - */ + /** + * Returns an Updatable that points to the specified table. + * + * @param multipartIdentifier + * A list of strings that specify the database name, schema name, and table name. + * @return + * A [[Updatable]] + * @since 0.2.0 + */ def table(multipartIdentifier: java.util.List[String]): Updatable = - table(multipartIdentifier.asScala) - - /** Returns an Updatable that points to the specified table. - * - * @param multipartIdentifier - * An array of strings that specify the database name, schema name, and table name. - * @since 0.7.0 - */ + table(multipartIdentifier.asScala.toSeq) + + /** + * Returns an Updatable that points to the specified table. + * + * @param multipartIdentifier + * An array of strings that specify the database name, schema name, and table name. + * @since 0.7.0 + */ def table(multipartIdentifier: Array[String]): Updatable = { table(multipartIdentifier.mkString(".")) } - /** Returns a dataframe with only columns that are in the result of df.join but not the original - * df - * - * @param df - * The source DataFrame on which the join operation was called - * @param result - * The resulting Dataframe of the join operation - */ + /** + * Returns a dataframe with only columns that are in the result of df.join but not the original df + * + * @param df + * The source DataFrame on which the join operation was called + * @param result + * The resulting Dataframe of the join operation + */ private def tableFunctionResultOnly(df: DataFrame, result: DataFrame): DataFrame = { // Check if the leading result columns are from the source df to confirm positions if (df.schema.indices.exists(i => result.schema(i).name != df.schema(i).name)) { @@ -546,58 +566,60 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log result.select(outputColumns) } - /** Creates a new DataFrame from the given table function and arguments. - * - * Example - * {{{ - * import com.snowflake.snowpark.functions._ - * import com.snowflake.snowpark.tableFunctions._ - * - * session.tableFunction( - * split_to_table, - * lit("split by space"), - * lit(" ") - * ) - * }}} - * - * @since 0.4.0 - * @param func - * Table function object, can be created from TableFunction class or referred from the built-in - * list from tableFunctions. - * @param firstArg - * the first function argument of the given table function. - * @param remaining - * all remaining function arguments. - */ + /** + * Creates a new DataFrame from the given table function and arguments. + * + * Example + * {{{ + * import com.snowflake.snowpark.functions._ + * import com.snowflake.snowpark.tableFunctions._ + * + * session.tableFunction( + * split_to_table, + * lit("split by space"), + * lit(" ") + * ) + * }}} + * + * @since 0.4.0 + * @param func + * Table function object, can be created from TableFunction class or referred from the built-in + * list from tableFunctions. + * @param firstArg + * the first function argument of the given table function. + * @param remaining + * all remaining function arguments. + */ def tableFunction(func: TableFunction, firstArg: Column, remaining: Column*): DataFrame = tableFunction(func, firstArg +: remaining) - /** Creates a new DataFrame from the given table function and arguments. - * - * Example - * {{{ - * import com.snowflake.snowpark.functions._ - * import com.snowflake.snowpark.tableFunctions._ - * - * session.tableFunction( - * split_to_table, - * Seq(lit("split by space"), lit(" ")) - * ) - * // Since 1.8.0, DataFrame columns are accepted as table function arguments: - * df = Seq(Seq("split by space", " ")).toDF(Seq("a", "b")) - * session.tableFunction(( - * split_to_table, - * Seq(df("a"), df("b")) - * ) - * }}} - * - * @since 0.4.0 - * @param func - * Table function object, can be created from TableFunction class or referred from the built-in - * list from tableFunctions. - * @param args - * function arguments of the given table function. - */ + /** + * Creates a new DataFrame from the given table function and arguments. + * + * Example + * {{{ + * import com.snowflake.snowpark.functions._ + * import com.snowflake.snowpark.tableFunctions._ + * + * session.tableFunction( + * split_to_table, + * Seq(lit("split by space"), lit(" ")) + * ) + * // Since 1.8.0, DataFrame columns are accepted as table function arguments: + * df = Seq(Seq("split by space", " ")).toDF(Seq("a", "b")) + * session.tableFunction(( + * split_to_table, + * Seq(df("a"), df("b")) + * ) + * }}} + * + * @since 0.4.0 + * @param func + * Table function object, can be created from TableFunction class or referred from the built-in + * list from tableFunctions. + * @param args + * function arguments of the given table function. + */ def tableFunction(func: TableFunction, args: Seq[Column]): DataFrame = { // Use df.join to apply function result if args contains a DF column val sourceDFs = args.flatMap(_.expr.sourceDFs) @@ -605,7 +627,7 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log // explode function requires a special handling since it is a client side function. if (func.funcName.trim.toLowerCase() == "explode") { callExplode(args.head) - } else DataFrame(this, TableFunctionRelation(func.call(args: _*))) + } else DataFrame(this, TableFunctionRelation(func.call(args.toSeq: _*))) } else if (sourceDFs.toSet.size > 1) { throw UDF_CANNOT_ACCEPT_MANY_DF_COLS() } else { @@ -615,33 +637,34 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log } } - /** Creates a new DataFrame from the given table function and arguments. - * - * Example - * {{{ - * import com.snowflake.snowpark.functions._ - * import com.snowflake.snowpark.tableFunctions._ - * - * session.tableFunction( - * flatten, - * Map("input" -> parse_json(lit("[1,2]"))) - * ) - * // Since 1.8.0, DataFrame columns are accepted as table function arguments: - * df = Seq("[1,2]").toDF("a") - * session.tableFunction(( - * flatten, - * Map("input" -> parse_json(df("a"))) - * ) - * }}} - * - * @since 0.4.0 - * @param func - * Table function object, can be created from TableFunction class or referred from the built-in - * list from tableFunctions. - * @param args - * function arguments map of the given table function. Some functions, like flatten, have named - * parameters. use this map to assign values to the corresponding parameters. - */ + /** + * Creates a new DataFrame from the given table function and arguments. + * + * Example + * {{{ + * import com.snowflake.snowpark.functions._ + * import com.snowflake.snowpark.tableFunctions._ + * + * session.tableFunction( + * flatten, + * Map("input" -> parse_json(lit("[1,2]"))) + * ) + * // Since 1.8.0, DataFrame columns are accepted as table function arguments: + * df = Seq("[1,2]").toDF("a") + * session.tableFunction(( + * flatten, + * Map("input" -> parse_json(df("a"))) + * ) + * }}} + * + * @since 0.4.0 + * @param func + * Table function object, can be created from TableFunction class or referred from the built-in + * list from tableFunctions. + * @param args + * function arguments map of the given table function. Some functions, like flatten, have named + * parameters. use this map to assign values to the corresponding parameters. + */ def tableFunction(func: TableFunction, args: Map[String, Column]): DataFrame = { // Use df.join to apply function result if args contains a DF column val sourceDFs = args.values.flatMap(_.expr.sourceDFs) @@ -668,26 +691,27 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log sourceDF.select(tableFunctions.explode(sourceDF("b"))) } - /** Creates a new DataFrame from the given table function. - * - * Example - * {{{ - * import com.snowflake.snowpark.functions._ - * import com.snowflake.snowpark.tableFunctions._ - * - * session.tableFunction( - * flatten(parse_json(lit("[1,2]"))) - * ) - * }}} - * - * @since 1.10.0 - * @param func - * Table function object, can be created from TableFunction class or referred from the built-in - * list from tableFunctions. - */ + /** + * Creates a new DataFrame from the given table function. + * + * Example + * {{{ + * import com.snowflake.snowpark.functions._ + * import com.snowflake.snowpark.tableFunctions._ + * + * session.tableFunction( + * flatten(parse_json(lit("[1,2]"))) + * ) + * }}} + * + * @since 1.10.0 + * @param func + * Table function object, can be created from TableFunction class or referred from the built-in + * list from tableFunctions. + */ def tableFunction(func: Column): DataFrame = { func.expr match { - case TFunction(funcName, args) => + case TableFunctionEx(funcName, args) => tableFunction(TableFunction(funcName), args.map(Column(_))) case NamedArgumentsTableFunction(funcName, argMap) => tableFunction( @@ -702,72 +726,75 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log private def createFromStoredProc(spName: String, args: Seq[Any]): DataFrame = DataFrame(this, StoredProcedureRelation(spName, args.map(functions.lit).map(_.expr))) - /** Creates a new DataFrame from the given Stored Procedure and arguments. - * - * {{{ - * session.storedProcedure( - * "sp_name", "arg1", "arg2" - * ).show() - * }}} - * @since 1.8.0 - * @param spName - * The name of stored procedures. - * @param args - * The arguments of the given stored procedure - */ + /** + * Creates a new DataFrame from the given Stored Procedure and arguments. + * + * {{{ + * session.storedProcedure( + * "sp_name", "arg1", "arg2" + * ).show() + * }}} + * @since 1.8.0 + * @param spName + * The name of stored procedures. + * @param args + * The arguments of the given stored procedure + */ def storedProcedure(spName: String, args: Any*): DataFrame = { Utils.validateObjectName(spName) createFromStoredProc(spName, args) } - /** Creates a new DataFrame from the given Stored Procedure and arguments. - * - * {{{ - * val sp = session.sproc.register(...) - * session.storedProcedure( - * sp, "arg1", "arg2" - * ).show() - * }}} - * @since 1.8.0 - * @param sp - * The stored procedures object, can be created by `Session.sproc.register` methods. - * @param args - * The arguments of the given stored procedure - */ + /** + * Creates a new DataFrame from the given Stored Procedure and arguments. + * + * {{{ + * val sp = session.sproc.register(...) + * session.storedProcedure( + * sp, "arg1", "arg2" + * ).show() + * }}} + * @since 1.8.0 + * @param sp + * The stored procedures object, can be created by `Session.sproc.register` methods. + * @param args + * The arguments of the given stored procedure + */ def storedProcedure(sp: StoredProcedure, args: Any*): DataFrame = createFromStoredProc(sp.name.get, args) - /** Creates a new DataFrame containing the specified values. Currently, you can use values of the - * following types: - * - * - '''Base types (Int, Short, String etc.).''' The resulting DataFrame has the column name - * "VALUE". - * - '''Tuples consisting of base types.''' The resulting DataFrame has the column names "_1", - * "_2", etc. - * - '''Case classes consisting of base types.''' The resulting DataFrame has column names that - * correspond to the case class constituents. - * - * If you want to create a DataFrame by calling the {@code toDF} method of a {@code Seq} object, - * import `session.implicits._`, where `session` is an object of the `Session` class that you - * created to connect to the Snowflake database. For example: - * - * {{{ - * val session = Session.builder.configFile(..).create - * // Importing this allows you to call the toDF method on a Seq object. - * import session.implicits._ - * // Create a DataFrame from a Seq object. - * val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("numCol", "varcharCol") - * df.show() - * }}} - * - * @param data - * A sequence in which each element represents a row of values in the DataFrame. - * @tparam T - * DataType - * @return - * A [[DataFrame]] - * @since 0.1.0 - */ + /** + * Creates a new DataFrame containing the specified values. Currently, you can use values of the + * following types: + * + * - '''Base types (Int, Short, String etc.).''' The resulting DataFrame has the column name + * "VALUE". + * - '''Tuples consisting of base types.''' The resulting DataFrame has the column names "_1", + * "_2", etc. + * - '''Case classes consisting of base types.''' The resulting DataFrame has column names that + * correspond to the case class constituents. + * + * If you want to create a DataFrame by calling the {@code toDF} method of a {@code Seq} object, + * import `session.implicits._`, where `session` is an object of the `Session` class that you + * created to connect to the Snowflake database. For example: + * + * {{{ + * val session = Session.builder.configFile(..).create + * // Importing this allows you to call the toDF method on a Seq object. + * import session.implicits._ + * // Create a DataFrame from a Seq object. + * val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("numCol", "varcharCol") + * df.show() + * }}} + * + * @param data + * A sequence in which each element represents a row of values in the DataFrame. + * @tparam T + * DataType + * @return + * A [[DataFrame]] + * @since 0.1.0 + */ def createDataFrame[T: TypeTag](data: Seq[T]): DataFrame = { val schema = TypeToSchemaConverter.inferSchema[T]() @@ -784,32 +811,33 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log createDataFrame(rows, schema) } - /** Creates a new DataFrame that uses the specified schema and contains the specified [[Row]] - * objects. - * - * For example, the following code creates a DataFrame containing three columns of the types - * `int`, `string`, and `variant` with a single row of data: - * {{{ - * import com.snowflake.snowpark.types._ - * ... - * // Create a sequence of a single Row object containing data. - * val data = Seq(Row(1, "a", new Variant(1))) - * // Define the schema for the columns in the DataFrame. - * val schema = StructType(Seq(StructField("int", IntegerType), - * StructField("string", StringType), - * StructField("variant", VariantType))) - * // Create the DataFrame. - * val df = session.createDataFrame(data, schema) - * }}} - * - * @param data - * A sequence of [[Row]] objects representing rows of data. - * @param schema - * [[types.StructType StructType]] representing the schema for the DataFrame. - * @return - * A [[DataFrame]] - * @since 0.2.0 - */ + /** + * Creates a new DataFrame that uses the specified schema and contains the specified [[Row]] + * objects. + * + * For example, the following code creates a DataFrame containing three columns of the types + * `int`, `string`, and `variant` with a single row of data: + * {{{ + * import com.snowflake.snowpark.types._ + * ... + * // Create a sequence of a single Row object containing data. + * val data = Seq(Row(1, "a", new Variant(1))) + * // Define the schema for the columns in the DataFrame. + * val schema = StructType(Seq(StructField("int", IntegerType), + * StructField("string", StringType), + * StructField("variant", VariantType))) + * // Create the DataFrame. + * val df = session.createDataFrame(data, schema) + * }}} + * + * @param data + * A sequence of [[Row]] objects representing rows of data. + * @param schema + * [[types.StructType StructType]] representing the schema for the DataFrame. + * @return + * A [[DataFrame]] + * @since 0.2.0 + */ def createDataFrame(data: Seq[Row], schema: StructType): DataFrame = { val spAttrs = schema.map { field => { @@ -876,213 +904,227 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log DataFrame(this, SnowflakeValues(spAttrs, converted)).select(projectColumns) } - /** Creates a new DataFrame that uses the specified schema and contains the specified [[Row]] - * objects. - * - * For example, the following code creates a DataFrame containing two columns of the types `int` - * and `string` with two rows of data: - * - * For example - * - * {{{ - * import com.snowflake.snowpark.types._ - * ... - * // Create an array of Row objects containing data. - * val data = Array(Row(1, "a"), Row(2, "b")) - * // Define the schema for the columns in the DataFrame. - * val schema = StructType(Seq(StructField("num", IntegerType), - * StructField("str", StringType))) - * // Create the DataFrame. - * val df = session.createDataFrame(data, schema) - * }}} - * - * @param data - * An array of [[Row]] objects representing rows of data. - * @param schema - * [[types.StructType StructType]] representing the schema for the DataFrame. - * @return - * A [[DataFrame]] - * @since 0.7.0 - */ + /** + * Creates a new DataFrame that uses the specified schema and contains the specified [[Row]] + * objects. + * + * For example, the following code creates a DataFrame containing two columns of the types `int` + * and `string` with two rows of data: + * + * For example + * + * {{{ + * import com.snowflake.snowpark.types._ + * ... + * // Create an array of Row objects containing data. + * val data = Array(Row(1, "a"), Row(2, "b")) + * // Define the schema for the columns in the DataFrame. + * val schema = StructType(Seq(StructField("num", IntegerType), + * StructField("str", StringType))) + * // Create the DataFrame. + * val df = session.createDataFrame(data, schema) + * }}} + * + * @param data + * An array of [[Row]] objects representing rows of data. + * @param schema + * [[types.StructType StructType]] representing the schema for the DataFrame. + * @return + * A [[DataFrame]] + * @since 0.7.0 + */ def createDataFrame(data: Array[Row], schema: StructType): DataFrame = createDataFrame(data.toSeq, schema) - /** Creates a new DataFrame from a range of numbers. The resulting DataFrame has the column name - * "ID" and a row for each number in the sequence. - * - * @param start - * Start of the range. - * @param end - * End of the range. - * @param step - * Step function for producing the numbers in the range. - * @return - * A [[DataFrame]] - * @since 0.1.0 - */ + /** + * Creates a new DataFrame from a range of numbers. The resulting DataFrame has the column name + * "ID" and a row for each number in the sequence. + * + * @param start + * Start of the range. + * @param end + * End of the range. + * @param step + * Step function for producing the numbers in the range. + * @return + * A [[DataFrame]] + * @since 0.1.0 + */ def range(start: Long, end: Long, step: Long): DataFrame = DataFrame(this, Range(start, end, step)) - /** Creates a new DataFrame from a range of numbers starting from 0. The resulting DataFrame has - * the column name "ID" and a row for each number in the sequence. - * - * @param end - * End of the range. - * @return - * A [[DataFrame]] - * @since 0.1.0 - */ + /** + * Creates a new DataFrame from a range of numbers starting from 0. The resulting DataFrame has + * the column name "ID" and a row for each number in the sequence. + * + * @param end + * End of the range. + * @return + * A [[DataFrame]] + * @since 0.1.0 + */ def range(end: Long): DataFrame = range(0, end) - /** Creates a new DataFrame from a range of numbers. The resulting DataFrame has the column name - * "ID" and a row for each number in the sequence. - * - * @param start - * Start of the range. - * @param end - * End of the range. - * @return - * A [[DataFrame]] - * @since 0.1.0 - */ + /** + * Creates a new DataFrame from a range of numbers. The resulting DataFrame has the column name + * "ID" and a row for each number in the sequence. + * + * @param start + * Start of the range. + * @param end + * End of the range. + * @return + * A [[DataFrame]] + * @since 0.1.0 + */ def range(start: Long, end: Long): DataFrame = range(start, end, 1) - /** Returns a new DataFrame representing the results of a SQL query. - * - * You can use this method to execute an arbitrary SQL statement. - * - * @param query - * The SQL statement to execute. - * @return - * A [[DataFrame]] - * @since 0.1.0 - */ + /** + * Returns a new DataFrame representing the results of a SQL query. + * + * You can use this method to execute an arbitrary SQL statement. + * + * @param query + * The SQL statement to execute. + * @return + * A [[DataFrame]] + * @since 0.1.0 + */ def sql(query: String): DataFrame = { // PUT and GET command cannot be executed in async mode DataFrame(this, plans.query(query, None, !Utils.isPutOrGetCommand(query))) } - /** Creates a new DataFrame via Generator function. - * - * For example: - * {{{ - * import com.snowflake.snowpark.functions._ - * session.generator(10, Seq(seq4(), uniform(lit(1), lit(5), random()))).show() - * }}} - * - * @param rowCount - * The row count of the result DataFrame. - * @param columns - * the column list of the result DataFrame - * @return - * A [[DataFrame]] - * @since 0.11.0 - */ + /** + * Creates a new DataFrame via Generator function. + * + * For example: + * {{{ + * import com.snowflake.snowpark.functions._ + * session.generator(10, Seq(seq4(), uniform(lit(1), lit(5), random()))).show() + * }}} + * + * @param rowCount + * The row count of the result DataFrame. + * @param columns + * the column list of the result DataFrame + * @return + * A [[DataFrame]] + * @since 0.11.0 + */ def generator(rowCount: Long, columns: Seq[Column]): DataFrame = DataFrame(this, Generator(columns.map(_.expr), rowCount)) - /** Creates a new DataFrame via Generator function. - * - * For example: - * {{{ - * import com.snowflake.snowpark.functions._ - * session.generator(10, seq4(), uniform(lit(1), lit(5), random())).show() - * }}} - * - * @param rowCount - * The row count of the result DataFrame. - * @param col - * the column of the result DataFrame - * @param cols - * A list of columns excepts the first column - * @return - * A [[DataFrame]] - * @since 0.11.0 - */ + /** + * Creates a new DataFrame via Generator function. + * + * For example: + * {{{ + * import com.snowflake.snowpark.functions._ + * session.generator(10, seq4(), uniform(lit(1), lit(5), random())).show() + * }}} + * + * @param rowCount + * The row count of the result DataFrame. + * @param col + * the column of the result DataFrame + * @param cols + * A list of columns excepts the first column + * @return + * A [[DataFrame]] + * @since 0.11.0 + */ def generator(rowCount: Long, col: Column, cols: Column*): DataFrame = generator(rowCount, col +: cols) - /** Returns a [[DataFrameReader]] that you can use to read data from various supported sources - * (e.g. a file in a stage) as a DataFrame. - * - * @return - * A [[DataFrameReader]] - * @since 0.1.0 - */ + /** + * Returns a [[DataFrameReader]] that you can use to read data from various supported sources + * (e.g. a file in a stage) as a DataFrame. + * + * @return + * A [[DataFrameReader]] + * @since 0.1.0 + */ def read: DataFrameReader = new DataFrameReader(this) // Run the query directly but don't need to retrieve the result private[snowpark] def runQuery(sql: String, isDDLOnTempObject: Boolean = false): Unit = conn.runQuery(sql, isDDLOnTempObject) - /** Returns the name of the default database configured for this session in [[Session.builder]]. - * - * @return - * The name of the default database - * @since 0.1.0 - */ + /** + * Returns the name of the default database configured for this session in [[Session.builder]]. + * + * @return + * The name of the default database + * @since 0.1.0 + */ def getDefaultDatabase: Option[String] = conn.getDefaultDatabase - /** Returns the name of the default schema configured for this session in [[Session.builder]]. - * - * @return - * The name of the default schema - * @since 0.1.0 - */ + /** + * Returns the name of the default schema configured for this session in [[Session.builder]]. + * + * @return + * The name of the default schema + * @since 0.1.0 + */ def getDefaultSchema: Option[String] = conn.getDefaultSchema - /** Returns the name of the current database for the JDBC session attached to this session. - * - * For example, if you change the current database by executing the following code: - * - * {{{ - * session.sql("use database newDB").collect() - * }}} - * - * the method returns `newDB`. - * - * @return - * The name of the current database for this session. - * @since 0.1.0 - */ + /** + * Returns the name of the current database for the JDBC session attached to this session. + * + * For example, if you change the current database by executing the following code: + * + * {{{ + * session.sql("use database newDB").collect() + * }}} + * + * the method returns `newDB`. + * + * @return + * The name of the current database for this session. + * @since 0.1.0 + */ def getCurrentDatabase: Option[String] = conn.getCurrentDatabase - /** Returns the name of the current schema for the JDBC session attached to this session. - * - * For example, if you change the current schema by executing the following code: - * - * {{{ - * session.sql("use schema newSchema").collect() - * }}} - * - * the method returns `newSchema`. - * - * @return - * Current schema in session. - * @since 0.1.0 - */ + /** + * Returns the name of the current schema for the JDBC session attached to this session. + * + * For example, if you change the current schema by executing the following code: + * + * {{{ + * session.sql("use schema newSchema").collect() + * }}} + * + * the method returns `newSchema`. + * + * @return + * Current schema in session. + * @since 0.1.0 + */ def getCurrentSchema: Option[String] = conn.getCurrentSchema - /** Returns the fully qualified name of the current schema for the session. - * - * @return - * The fully qualified name of the schema - * @since 0.2.0 - */ + /** + * Returns the fully qualified name of the current schema for the session. + * + * @return + * The fully qualified name of the schema + * @since 0.2.0 + */ def getFullyQualifiedCurrentSchema: String = conn.getCurrentDatabase.get + "." + conn.getCurrentSchema.get private[snowpark] def getResultAttributes(sql: String): Seq[Attribute] = conn.getResultAttributes(sql) - /** Returns the name of the temporary stage created by the Snowpark library for uploading and - * store temporary artifacts for this session. These artifacts include classes for UDFs that you - * define in this session and dependencies that you add when calling [[addDependency]]. - * - * @return - * The name of stage. - * @since 0.1.0 - */ + /** + * Returns the name of the temporary stage created by the Snowpark library for uploading and store + * temporary artifacts for this session. These artifacts include classes for UDFs that you define + * in this session and dependencies that you add when calling [[addDependency]]. + * + * @return + * The name of stage. + * @since 0.1.0 + */ def getSessionStage: String = synchronized { val qualifiedStageName = s"$getFullyQualifiedCurrentSchema.$sessionStage" if (!stageCreated) { @@ -1094,127 +1136,134 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log "@" + qualifiedStageName } - /** Returns a [[UDFRegistration]] object that you can use to register UDFs. For example: - * {{{ - * session.udf.registerTemporary("mydoubleudf", (x: Int) => 2 * x) - * session.sql(s"SELECT mydoubleudf(c) FROM table") - * }}} - * @since 0.1.0 - */ + /** + * Returns a [[UDFRegistration]] object that you can use to register UDFs. For example: + * {{{ + * session.udf.registerTemporary("mydoubleudf", (x: Int) => 2 * x) + * session.sql(s"SELECT mydoubleudf(c) FROM table") + * }}} + * @since 0.1.0 + */ lazy val udf = new UDFRegistration(this) - /** Returns a [[UDTFRegistration]] object that you can use to register UDTFs. For example: - * {{{ - * class MyWordSplitter extends UDTF1[String] { - * override def process(input: String): Iterable[Row] = input.split(" ").map(Row(_)) - * override def endPartition(): Iterable[Row] = Array.empty[Row] - * override def outputSchema(): StructType = StructType(StructField("word", StringType)) - * } - * val tableFunction = session.udtf.registerTemporary(new MyWordSplitter) - * session.tableFunction(tableFunction, lit("My name is Snow Park")).show() - * }}} - * @since 1.2.0 - */ + /** + * Returns a [[UDTFRegistration]] object that you can use to register UDTFs. For example: + * {{{ + * class MyWordSplitter extends UDTF1[String] { + * override def process(input: String): Iterable[Row] = input.split(" ").map(Row(_)) + * override def endPartition(): Iterable[Row] = Array.empty[Row] + * override def outputSchema(): StructType = StructType(StructField("word", StringType)) + * } + * val tableFunction = session.udtf.registerTemporary(new MyWordSplitter) + * session.tableFunction(tableFunction, lit("My name is Snow Park")).show() + * }}} + * @since 1.2.0 + */ lazy val udtf: UDTFRegistration = new UDTFRegistration(this) - /** Returns a [[SProcRegistration]] object that you can use to register Stored Procedures. For - * example: - * {{{ - * val sp = session.sproc.registerTemporary((session: Session, num: Int) => num * 2) - * session.storedProcedure(sp, 100).show() - * }}} - * @since 1.8.0 - */ + /** + * Returns a [[SProcRegistration]] object that you can use to register Stored Procedures. For + * example: + * {{{ + * val sp = session.sproc.registerTemporary((session: Session, num: Int) => num * 2) + * session.storedProcedure(sp, 100).show() + * }}} + * @since 1.8.0 + */ @PublicPreview lazy val sproc: SProcRegistration = new SProcRegistration(this) - /** Returns a [[FileOperation]] object that you can use to perform file operations on stages. For - * example: - * {{{ - * session.file.put("file:///tmp/file1.csv", "@myStage/prefix1") - * session.file.get("@myStage/prefix1", "file:///tmp") - * }}} - * - * @since 0.4.0 - */ + /** + * Returns a [[FileOperation]] object that you can use to perform file operations on stages. For + * example: + * {{{ + * session.file.put("file:///tmp/file1.csv", "@myStage/prefix1") + * session.file.get("@myStage/prefix1", "file:///tmp") + * }}} + * + * @since 0.4.0 + */ lazy val file = new FileOperation(this) - /** Provides implicit methods for convert Scala objects to Snowpark DataFrame and Column objects. - * - * To use this, import {@code session.implicits._} : - * {{{ - * val session = Session.builder.configFile(..).create - * import session.implicits._ - * }}} - * - * After you import this, you can call the {@code toDF} method of a {@code Seq} to convert a - * sequence to a DataFrame: - * {{{ - * // Create a DataFrame from a local sequence of integers. - * val df = (1 to 10).toDF("a") - * val df = Seq((1, "one"), (2, "two")).toDF("a", "b") - * }}} - * - * You can also refer to columns in DataFrames by using `$"colName"` and `'colName`: - * {{{ - * // Refer to a column in a DataFrame by using $"colName". - * val df = session.table("T").filter($"a" > 1) - * // Refer to columns by using 'colName. - * val df = session.table("T").filter('a > 1) - * }}} - * @since 0.1.0 - */ + /** + * Provides implicit methods for convert Scala objects to Snowpark DataFrame and Column objects. + * + * To use this, import {@code session.implicits._} : + * {{{ + * val session = Session.builder.configFile(..).create + * import session.implicits._ + * }}} + * + * After you import this, you can call the {@code toDF} method of a {@code Seq} to convert a + * sequence to a DataFrame: + * {{{ + * // Create a DataFrame from a local sequence of integers. + * val df = (1 to 10).toDF("a") + * val df = Seq((1, "one"), (2, "two")).toDF("a", "b") + * }}} + * + * You can also refer to columns in DataFrames by using `$"colName"` and `'colName`: + * {{{ + * // Refer to a column in a DataFrame by using $"colName". + * val df = session.table("T").filter($"a" > 1) + * // Refer to columns by using 'colName. + * val df = session.table("T").filter('a > 1) + * }}} + * @since 0.1.0 + */ // scalastyle:off object implicits extends Implicits with Serializable { protected override def _session: Session = Session.this } // scalastyle:on - /** Creates a new DataFrame by flattening compound values into multiple rows. - * - * For example: - * {{{ - * import com.snowflake.snowpark.functions._ - * val df = session.flatten(parse_json(lit("""{"a":[1,2]}"""))) - * }}} - * - * @param input - * The expression that will be unseated into rows. The expression must be of data type VARIANT, - * OBJECT, or ARRAY. - * @return - * A [[DataFrame]]. - * @since 0.2.0 - */ + /** + * Creates a new DataFrame by flattening compound values into multiple rows. + * + * For example: + * {{{ + * import com.snowflake.snowpark.functions._ + * val df = session.flatten(parse_json(lit("""{"a":[1,2]}"""))) + * }}} + * + * @param input + * The expression that will be unseated into rows. The expression must be of data type VARIANT, + * OBJECT, or ARRAY. + * @return + * A [[DataFrame]]. + * @since 0.2.0 + */ def flatten(input: Column): DataFrame = flatten(input, "", outer = false, recursive = false, "BOTH") - /** Creates a new DataFrame by flattening compound values into multiple rows. - * - * for example: - * {{{ - * import com.snowflake.snowpark.functions._ - * val df = session.flatten(parse_json(lit("""{"a":[1,2]}""")), "a", false, false, "BOTH") - * }}} - * - * @param input - * The expression that will be unseated into rows. The expression must be of data type VARIANT, - * OBJECT, or ARRAY. - * @param path - * The path to the element within a VARIANT data structure which needs to be flattened. Can be - * a zero-length string (i.e. empty path) if the outermost element is to be flattened. - * @param outer - * If {@code false} , any input rows that cannot be expanded, either because they cannot be - * accessed in the path or because they have zero fields or entries, are completely omitted - * from the output. Otherwise, exactly one row is generated for zero-row expansions (with NULL - * in the KEY, INDEX, and VALUE columns). - * @param recursive - * If {@code false} , only the element referenced by PATH is expanded. Otherwise, the expansion - * is performed for all sub-elements recursively. - * @param mode - * Specifies which types should be flattened ({@code "OBJECT"}, {@code "ARRAY"} , or - * {@code "BOTH"} ). - * @since 0.2.0 - */ + /** + * Creates a new DataFrame by flattening compound values into multiple rows. + * + * for example: + * {{{ + * import com.snowflake.snowpark.functions._ + * val df = session.flatten(parse_json(lit("""{"a":[1,2]}""")), "a", false, false, "BOTH") + * }}} + * + * @param input + * The expression that will be unseated into rows. The expression must be of data type VARIANT, + * OBJECT, or ARRAY. + * @param path + * The path to the element within a VARIANT data structure which needs to be flattened. Can be a + * zero-length string (i.e. empty path) if the outermost element is to be flattened. + * @param outer + * If {@code false} , any input rows that cannot be expanded, either because they cannot be + * accessed in the path or because they have zero fields or entries, are completely omitted from + * the output. Otherwise, exactly one row is generated for zero-row expansions (with NULL in the + * KEY, INDEX, and VALUE columns). + * @param recursive + * If {@code false} , only the element referenced by PATH is expanded. Otherwise, the expansion + * is performed for all sub-elements recursively. + * @param mode + * Specifies which types should be flattened ({@code "OBJECT"}, {@code "ARRAY"} , or + * {@code "BOTH"} ). + * @since 0.2.0 + */ def flatten( input: Column, path: String, @@ -1297,8 +1346,9 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log } } - /** This api is for Stored Procedure internal usage only. Do not call this api. - */ + /** + * This api is for Stored Procedure internal usage only. Do not call this api. + */ private[snowpark] def dropAllTempObjects(): Unit = { tempObjectsMap.foreach(v => { this.runQuery(s"drop ${v._2} if exists ${v._1}", true) @@ -1309,10 +1359,11 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log private[snowpark] def getTempObjectMap: scala.collection.concurrent.Map[String, TempObjectType] = tempObjectsMap - /** Close this session. - * - * @since 0.7.0 - */ + /** + * Close this session. + * + * @since 0.7.0 + */ def close(): Unit = synchronized { // The users should not close a session used by stored procedure. if (conn.isStoredProc) { @@ -1352,36 +1403,39 @@ class Session private (private[snowpark] val conn: ServerConnection) extends Log } } - /** Get the session information. - * - * @since 0.11.0 - */ + /** + * Get the session information. + * + * @since 0.11.0 + */ def getSessionInfo(): String = sessionInfo - /** Returns an [[AsyncJob]] object that you can use to track the status and get the results of the - * asynchronous query specified by the query ID. - * - * For example, create an AsyncJob by specifying a valid ``, check whether the query is - * running or not, and get the result rows. - * {{{ - * val asyncJob = session.createAsyncJob() - * println(s"Is query \${asyncJob.getQueryId} running? \${asyncJob.isRunning()}") - * val rows = asyncJob.getRows() - * }}} - * - * @since 0.11.0 - * @param queryID - * A valid query ID - * @return - * An [[AsyncJob]] object - */ + /** + * Returns an [[AsyncJob]] object that you can use to track the status and get the results of the + * asynchronous query specified by the query ID. + * + * For example, create an AsyncJob by specifying a valid ``, check whether the query is + * running or not, and get the result rows. + * {{{ + * val asyncJob = session.createAsyncJob() + * println(s"Is query \${asyncJob.getQueryId} running? \${asyncJob.isRunning()}") + * val rows = asyncJob.getRows() + * }}} + * + * @since 0.11.0 + * @param queryID + * A valid query ID + * @return + * An [[AsyncJob]] object + */ def createAsyncJob(queryID: String): AsyncJob = new AsyncJob(queryID, this, None) } -/** Companion object to [[Session! Session]] that you use to build and create a session. - * @since 0.1.0 - */ +/** + * Companion object to [[Session! Session]] that you use to build and create a session. + * @since 0.1.0 + */ object Session extends Logging { Utils.checkScalaVersionCompatibility() @@ -1393,11 +1447,12 @@ object Session extends Logging { disableStderr() } - /** This api is for Stored Procedure internal usage only. Do not create a Session with this api. - * - * @return - * [[Session]] - */ + /** + * This api is for Stored Procedure internal usage only. Do not create a Session with this api. + * + * @return + * [[Session]] + */ private[snowpark] def apply(connection: SnowflakeConnectionV1): Session = { Session.builder.createInternal(Some(connection)) } @@ -1414,11 +1469,12 @@ object Session extends Logging { options } - /** Returns a builder you can use to set configuration properties and create a [[Session]] object. - * @return - * [[SessionBuilder]] - * @since 0.1.0 - */ + /** + * Returns a builder you can use to set configuration properties and create a [[Session]] object. + * @return + * [[SessionBuilder]] + * @since 0.1.0 + */ def builder: SessionBuilder = new SessionBuilder private val activeSession: InheritableThreadLocal[Session] = @@ -1450,12 +1506,13 @@ object Session extends Logging { logInfo(s"reset global stored proc session") } - /** Returns the active session in this thread, if any. - * - * @return - * [[Session]] - * @since 0.1.0 - */ + /** + * Returns the active session in this thread, if any. + * + * @return + * [[Session]] + * @since 0.1.0 + */ private[snowpark] def getActiveSession: Option[Session] = { if (globalStoredProcSession.isDefined) { logInfo(s"global stored proc session is defined, returned it instead of the active session") @@ -1472,9 +1529,10 @@ object Session extends Logging { logInfo("Done closing stderr and redirecting to stdout") } - /** Provides methods to set configuration properties and create a [[Session]]. - * @since 0.1.0 - */ + /** + * Provides methods to set configuration properties and create a [[Session]]. + * @since 0.1.0 + */ class SessionBuilder { private var options: Map[String, String] = Map() @@ -1494,95 +1552,101 @@ object Session extends Logging { this } - /** Adds the app name to set in the query_tag after session creation. - * - * Since version 1.13.0, the app name is set to the query tag in JSON format. For example: - * {{{ - * val session = Session.builder.appName("myApp").configFile(myConfigFile).create - * print(session.getQueryTag().get) - * {"APPNAME":"myApp"} - * }}} - * - * In previous versions it is set using a key=value format. For example: - * {{{ - * val session = Session.builder.appName("myApp").configFile(myConfigFile).create - * print(session.getQueryTag().get) - * APPNAME=myApp - * }}} - * - * @param appName - * Name of the app. - * @return - * A [[SessionBuilder]] - * @since 1.12.0 - */ + /** + * Adds the app name to set in the query_tag after session creation. + * + * Since version 1.13.0, the app name is set to the query tag in JSON format. For example: + * {{{ + * val session = Session.builder.appName("myApp").configFile(myConfigFile).create + * print(session.getQueryTag().get) + * {"APPNAME":"myApp"} + * }}} + * + * In previous versions it is set using a key=value format. For example: + * {{{ + * val session = Session.builder.appName("myApp").configFile(myConfigFile).create + * print(session.getQueryTag().get) + * APPNAME=myApp + * }}} + * + * @param appName + * Name of the app. + * @return + * A [[SessionBuilder]] + * @since 1.12.0 + */ def appName(appName: String): SessionBuilder = { this.appName = Some(appName) this } - /** Adds the configuration properties in the specified file to the SessionBuilder configuration. - * - * @param file - * Path to the file containing the configuration properties. - * @return - * A [[SessionBuilder]] - * @since 0.1.0 - */ + /** + * Adds the configuration properties in the specified file to the SessionBuilder configuration. + * + * @param file + * Path to the file containing the configuration properties. + * @return + * A [[SessionBuilder]] + * @since 0.1.0 + */ def configFile(file: String): SessionBuilder = { configs(loadConfFromFile(file)) } - /** Adds the specified configuration property and value to the SessionBuilder configuration. - * - * @param key - * Name of the configuration property. - * @param value - * Value of the configuration property. - * @return - * A [[SessionBuilder]] - * @since 0.1.0 - */ + /** + * Adds the specified configuration property and value to the SessionBuilder configuration. + * + * @param key + * Name of the configuration property. + * @param value + * Value of the configuration property. + * @return + * A [[SessionBuilder]] + * @since 0.1.0 + */ def config(key: String, value: String): SessionBuilder = synchronized { options = options + (key -> value) this } - /** Adds the specified {@code Map} of configuration properties to the SessionBuilder - * configuration. - * - * Note that calling this method overwrites any existing configuration properties that you have - * already set in the SessionBuilder. - * - * @param configs - * Map of the names and values of configuration properties. - * @return - * A [[SessionBuilder]] - * @since 0.1.0 - */ + /** + * Adds the specified {@code Map} of configuration properties to the SessionBuilder + * configuration. + * + * Note that calling this method overwrites any existing configuration properties that you have + * already set in the SessionBuilder. + * + * @param configs + * Map of the names and values of configuration properties. + * @return + * A [[SessionBuilder]] + * @since 0.1.0 + */ def configs(configs: Map[String, String]): SessionBuilder = synchronized { options = options ++ configs this } - /** Adds the specified Java {@code Map} of configuration properties to the SessionBuilder - * configuration. - * - * Note that calling this method overwrites any existing configuration properties that you have - * already set in the SessionBuilder. - * - * @since 0.2.0 - */ + /** + * Adds the specified Java {@code Map} of configuration properties to the SessionBuilder + * configuration. + * + * Note that calling this method overwrites any existing configuration properties that you have + * already set in the SessionBuilder. + * + * @since 0.2.0 + */ def configs(javaMap: java.util.Map[String, String]): SessionBuilder = { configs(javaMap.asScala.toMap) } - /** Creates a new Session. - * - * @return - * A [[Session]] - * @since 0.1.0 - */ + /** + * Creates a new Session. + * + * @return + * A [[Session]] + * @since 0.1.0 + */ def create: Session = { val session = createInternal(None) val appName = this.appName @@ -1593,12 +1657,13 @@ object Session extends Logging { session } - /** Returns the existing session if already exists or create it if not. - * - * @return - * A [[Session]] - * @since 1.10.0 - */ + /** + * Returns the existing session if already exists or create it if not. + * + * @return + * A [[Session]] + * @since 1.10.0 + */ def getOrCreate: Session = { Session.getActiveSession.getOrElse(create) } diff --git a/src/main/scala/com/snowflake/snowpark/SnowparkClientException.scala b/src/main/scala/com/snowflake/snowpark/SnowparkClientException.scala index bbfebe82..6d1c8955 100644 --- a/src/main/scala/com/snowflake/snowpark/SnowparkClientException.scala +++ b/src/main/scala/com/snowflake/snowpark/SnowparkClientException.scala @@ -1,9 +1,10 @@ package com.snowflake.snowpark -/** Represents a Snowpark client side exception. - * - * @since 0.1.0 - */ +/** + * Represents a Snowpark client side exception. + * + * @since 0.1.0 + */ class SnowparkClientException private[snowpark] ( val message: String, val errorCode: String, diff --git a/src/main/scala/com/snowflake/snowpark/StoredProcedure.scala b/src/main/scala/com/snowflake/snowpark/StoredProcedure.scala index d7a54326..697f639a 100644 --- a/src/main/scala/com/snowflake/snowpark/StoredProcedure.scala +++ b/src/main/scala/com/snowflake/snowpark/StoredProcedure.scala @@ -2,21 +2,22 @@ package com.snowflake.snowpark import com.snowflake.snowpark.internal.UdfColumnSchema -/** The reference to a Stored Procedure which can be created by `Session.sproc.register` methods, - * and used in `Session.storedProcedure` method. - * - * For example: - * {{{ - * val sp = session.sproc.registerTemporary( - * (session: Session, num: Int) => { - * val result = session.sql(s"select $num").collect().head.getInt(0) - * result + 100 - * }) - * session.storedProcedure(sp, 123).show() - * }}} - * - * @since 1.8.0 - */ +/** + * The reference to a Stored Procedure which can be created by `Session.sproc.register` methods, and + * used in `Session.storedProcedure` method. + * + * For example: + * {{{ + * val sp = session.sproc.registerTemporary( + * (session: Session, num: Int) => { + * val result = session.sql(s"select $num").collect().head.getInt(0) + * result + 100 + * }) + * session.storedProcedure(sp, 123).show() + * }}} + * + * @since 1.8.0 + */ case class StoredProcedure private[snowpark] ( sp: AnyRef, private[snowpark] val returnType: UdfColumnSchema, diff --git a/src/main/scala/com/snowflake/snowpark/TableFunction.scala b/src/main/scala/com/snowflake/snowpark/TableFunction.scala index db226cab..cfd52d2f 100644 --- a/src/main/scala/com/snowflake/snowpark/TableFunction.scala +++ b/src/main/scala/com/snowflake/snowpark/TableFunction.scala @@ -6,32 +6,33 @@ import com.snowflake.snowpark.internal.analyzer.{ TableFunctionExpression } -/** Looks up table functions by funcName and returns tableFunction object which can be used in - * DataFrame.join and Session.tableFunction methods. - * - * It can reference both system-defined table function and user-defined table functions. - * - * Example - * {{{ - * import com.snowflake.snowpark.functions._ - * import com.snowflake.snowpark.TableFunction - * - * session.tableFunction( - * TableFunction("flatten"), - * Map("input" -> parse_json(lit("[1,2]"))) - * ) - * - * df.join(TableFunction("split_to_table"), df("a"), lit(",")) - * }}} - * - * @param funcName - * table function name, can be a short name like func or a fully qualified name like - * database.schema.func - * @since 0.4.0 - */ +/** + * Looks up table functions by funcName and returns tableFunction object which can be used in + * DataFrame.join and Session.tableFunction methods. + * + * It can reference both system-defined table function and user-defined table functions. + * + * Example + * {{{ + * import com.snowflake.snowpark.functions._ + * import com.snowflake.snowpark.TableFunction + * + * session.tableFunction( + * TableFunction("flatten"), + * Map("input" -> parse_json(lit("[1,2]"))) + * ) + * + * df.join(TableFunction("split_to_table"), df("a"), lit(",")) + * }}} + * + * @param funcName + * table function name, can be a short name like func or a fully qualified name like + * database.schema.func + * @since 0.4.0 + */ case class TableFunction(funcName: String) { private[snowpark] def call(args: Column*): TableFunctionExpression = - analyzer.TableFunction(funcName, args.map(_.expr)) + analyzer.TableFunctionEx(funcName, args.map(_.expr)) private[snowpark] def call(args: Map[String, Column]): TableFunctionExpression = NamedArgumentsTableFunction( @@ -40,24 +41,26 @@ case class TableFunction(funcName: String) { key -> value.expr }) - /** Create a Column reference by passing arguments in the TableFunction object. - * - * @param args - * A list of Column objects representing the arguments of the given table function - * @return - * A Column reference - * @since 1.10.0 - */ + /** + * Create a Column reference by passing arguments in the TableFunction object. + * + * @param args + * A list of Column objects representing the arguments of the given table function + * @return + * A Column reference + * @since 1.10.0 + */ def apply(args: Column*): Column = Column(this.call(args: _*)) - /** Create a Column reference by passing arguments in the TableFunction object. - * - * @param args - * function arguments map of the given table function. Some functions, like flatten, have named - * parameters. use this map to assign values to the corresponding parameters. - * @return - * A Column reference - * @since 1.10.0 - */ + /** + * Create a Column reference by passing arguments in the TableFunction object. + * + * @param args + * function arguments map of the given table function. Some functions, like flatten, have named + * parameters. use this map to assign values to the corresponding parameters. + * @return + * A Column reference + * @since 1.10.0 + */ def apply(args: Map[String, Column]): Column = Column(this.call(args)) } diff --git a/src/main/scala/com/snowflake/snowpark/UDFRegistration.scala b/src/main/scala/com/snowflake/snowpark/UDFRegistration.scala index 61f4cb99..4da2cc90 100644 --- a/src/main/scala/com/snowflake/snowpark/UDFRegistration.scala +++ b/src/main/scala/com/snowflake/snowpark/UDFRegistration.scala @@ -5,57 +5,58 @@ import com.snowflake.snowpark.internal._ import scala.reflect.runtime.universe.TypeTag // scalastyle:off -/** Provides methods to register lambdas and functions as UDFs in the Snowflake database. - * - * [[Session.udf]] returns an object of this class. - * - * You can use this object to register temporary UDFs that you plan to use in the current session: - * {{{ - * session.udf.registerTemporary("mydoubleudf", (x: Int) => x * x) - * session.sql(s"SELECT mydoubleudf(c) from T) - * }}} - * - * You can also register permanent UDFs that you can use in subsequent sessions. When registering a - * permanent UDF, you must specify a stage where the registration method will upload the JAR files - * for the UDF and any dependencies. - * {{{ - * session.udf.registerPermanent("mydoubleudf", (x: Int) => x * x, "mystage") - * session.sql(s"SELECT mydoubleudf(c) from T) - * }}} - * - * The methods that register a UDF return a [[UserDefinedFunction]] object, which you can use in - * [[Column]] expressions. - * {{{ - * val myUdf = session.udf.registerTemporary("mydoubleudf", (x: Int) => x * x) - * session.table("T").select(myUdf(col("c"))) - * }}} - * - * If you do not need to refer to a UDF by name, use - * [[com.snowflake.snowpark.functions.udf[RT](* com.snowflake.snowpark.functions.udf]] to create an - * anonymous UDF instead. - * - * Snowflake supports the following data types for the parameters for a UDF: - * - * | SQL Type | Scala Type | Notes | - * |:----------|:--------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------| - * | NUMBER | Short or Option[Short] | Supported | - * | NUMBER | Int or Option[Int] | Supported | - * | NUMBER | Long or Option[Long] | Supported | - * | FLOAT | Float or Option[Float] | Supported | - * | DOUBLE | Double or Option[Double] | Supported | - * | NUMBER | java.math.BigDecimal | Supported | - * | VARCHAR | String or java.lang.String | Supported | - * | BOOL | Boolean or Option[Boolean] | Supported | - * | DATE | java.sql.Date | Supported | - * | TIMESTAMP | java.sql.Timestamp | Supported | - * | BINARY | Array[Byte] | Supported | - * | ARRAY | Array[String] or Array[Variant] | Supported array of type Array[String] or Array[Variant] | - * | OBJECT | Map[String, String] or Map[String, Variant] | Supported mutable map of type scala.collection.mutable.Map[String, String] or scala.collection.mutable.Map[String, Variant] | - * | GEOGRAPHY | com.snowflake.snowpark.types.Geography | Supported | - * | VARIANT | com.snowflake.snowpark.types.Variant | Supported | - * - * @since 0.1.0 - */ +/** + * Provides methods to register lambdas and functions as UDFs in the Snowflake database. + * + * [[Session.udf]] returns an object of this class. + * + * You can use this object to register temporary UDFs that you plan to use in the current session: + * {{{ + * session.udf.registerTemporary("mydoubleudf", (x: Int) => x * x) + * session.sql(s"SELECT mydoubleudf(c) from T) + * }}} + * + * You can also register permanent UDFs that you can use in subsequent sessions. When registering a + * permanent UDF, you must specify a stage where the registration method will upload the JAR files + * for the UDF and any dependencies. + * {{{ + * session.udf.registerPermanent("mydoubleudf", (x: Int) => x * x, "mystage") + * session.sql(s"SELECT mydoubleudf(c) from T) + * }}} + * + * The methods that register a UDF return a [[UserDefinedFunction]] object, which you can use in + * [[Column]] expressions. + * {{{ + * val myUdf = session.udf.registerTemporary("mydoubleudf", (x: Int) => x * x) + * session.table("T").select(myUdf(col("c"))) + * }}} + * + * If you do not need to refer to a UDF by name, use + * [[com.snowflake.snowpark.functions.udf[RT](* com.snowflake.snowpark.functions.udf]] to create an + * anonymous UDF instead. + * + * Snowflake supports the following data types for the parameters for a UDF: + * + * | SQL Type | Scala Type | Notes | + * |:----------|:--------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------| + * | NUMBER | Short or Option[Short] | Supported | + * | NUMBER | Int or Option[Int] | Supported | + * | NUMBER | Long or Option[Long] | Supported | + * | FLOAT | Float or Option[Float] | Supported | + * | DOUBLE | Double or Option[Double] | Supported | + * | NUMBER | java.math.BigDecimal | Supported | + * | VARCHAR | String or java.lang.String | Supported | + * | BOOL | Boolean or Option[Boolean] | Supported | + * | DATE | java.sql.Date | Supported | + * | TIMESTAMP | java.sql.Timestamp | Supported | + * | BINARY | Array[Byte] | Supported | + * | ARRAY | Array[String] or Array[Variant] | Supported array of type Array[String] or Array[Variant] | + * | OBJECT | Map[String, String] or Map[String, Variant] | Supported mutable map of type scala.collection.mutable.Map[String, String] or scala.collection.mutable.Map[String, Variant] | + * | GEOGRAPHY | com.snowflake.snowpark.types.Geography | Supported | + * | VARIANT | com.snowflake.snowpark.types.Variant | Supported | + * + * @since 0.1.0 + */ // scalastyle:on class UDFRegistration(session: Session) extends Logging { private[snowpark] val handler = new UDXRegistrationHandler(session) @@ -82,76 +83,82 @@ class UDFRegistration(session: Session) extends Logging { } */ - /** Registers a Scala closure of 0 argument as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - */ + /** + * Registers a Scala closure of 0 argument as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + */ def registerTemporary[RT: TypeTag](func: Function0[RT]): UserDefinedFunction = udf("registerTemporary") { register(None, _toUdf(func)) } - /** Registers a Scala closure of 1 argument as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - */ + /** + * Registers a Scala closure of 1 argument as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + */ def registerTemporary[RT: TypeTag, A1: TypeTag](func: Function1[A1, RT]): UserDefinedFunction = udf("registerTemporary") { register(None, _toUdf(func)) } - /** Registers a Scala closure of 2 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - */ + /** + * Registers a Scala closure of 2 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + */ def registerTemporary[RT: TypeTag, A1: TypeTag, A2: TypeTag]( func: Function2[A1, A2, RT]): UserDefinedFunction = udf("registerTemporary") { register(None, _toUdf(func)) } - /** Registers a Scala closure of 3 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - */ + /** + * Registers a Scala closure of 3 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + */ def registerTemporary[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag]( func: Function3[A1, A2, A3, RT]): UserDefinedFunction = udf("registerTemporary") { register(None, _toUdf(func)) } - /** Registers a Scala closure of 4 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - */ + /** + * Registers a Scala closure of 4 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + */ def registerTemporary[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag]( func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = udf("registerTemporary") { register(None, _toUdf(func)) } - /** Registers a Scala closure of 5 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - */ + /** + * Registers a Scala closure of 5 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -163,13 +170,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 6 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - */ + /** + * Registers a Scala closure of 6 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -182,13 +190,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 7 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - */ + /** + * Registers a Scala closure of 7 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -202,13 +211,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 8 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - */ + /** + * Registers a Scala closure of 8 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -223,13 +233,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 9 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - */ + /** + * Registers a Scala closure of 9 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -245,13 +256,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 10 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - */ + /** + * Registers a Scala closure of 10 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -269,13 +281,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 11 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 11 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -294,13 +307,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 12 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 12 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -320,13 +334,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 13 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 13 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -347,13 +362,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 14 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 14 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -376,13 +392,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 15 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 15 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -406,13 +423,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 16 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 16 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -437,13 +455,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 17 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 17 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -486,13 +505,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 18 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 18 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -537,13 +557,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 19 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 19 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -590,13 +611,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 20 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 20 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -645,13 +667,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 21 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 21 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -702,13 +725,14 @@ class UDFRegistration(session: Session) extends Logging { register(None, _toUdf(func)) } - /** Registers a Scala closure of 22 arguments as a temporary anonymous UDF that is scoped to this - * session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 22 arguments as a temporary anonymous UDF that is scoped to this + * session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -782,25 +806,27 @@ class UDFRegistration(session: Session) extends Logging { |}""".stripMargin) } */ - /** Registers a Scala closure of 0 argument as a temporary Snowflake Java UDF that you plan to use - * in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 0 argument as a temporary Snowflake Java UDF that you plan to use + * in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.1.0 + */ def registerTemporary[RT: TypeTag](name: String, func: Function0[RT]): UserDefinedFunction = udf("registerTemporary", execName = name) { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 1 argument as a temporary Snowflake Java UDF that you plan to use - * in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 1 argument as a temporary Snowflake Java UDF that you plan to use + * in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.1.0 + */ def registerTemporary[RT: TypeTag, A1: TypeTag]( name: String, func: Function1[A1, RT]): UserDefinedFunction = @@ -808,13 +834,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 2 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 2 arguments as a temporary Snowflake Java UDF that you plan to use + * in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.1.0 + */ def registerTemporary[RT: TypeTag, A1: TypeTag, A2: TypeTag]( name: String, func: Function2[A1, A2, RT]): UserDefinedFunction = @@ -822,13 +849,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 3 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 3 arguments as a temporary Snowflake Java UDF that you plan to use + * in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.1.0 + */ def registerTemporary[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag]( name: String, func: Function3[A1, A2, A3, RT]): UserDefinedFunction = @@ -836,13 +864,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 4 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 4 arguments as a temporary Snowflake Java UDF that you plan to use + * in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.1.0 + */ def registerTemporary[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag]( name: String, func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = @@ -850,13 +879,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 5 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 5 arguments as a temporary Snowflake Java UDF that you plan to use + * in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.1.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -868,13 +898,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 6 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 6 arguments as a temporary Snowflake Java UDF that you plan to use + * in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.1.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -887,13 +918,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 7 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 7 arguments as a temporary Snowflake Java UDF that you plan to use + * in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.1.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -909,13 +941,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 8 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 8 arguments as a temporary Snowflake Java UDF that you plan to use + * in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.1.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -932,13 +965,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 9 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 9 arguments as a temporary Snowflake Java UDF that you plan to use + * in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.1.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -956,13 +990,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 10 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 10 arguments as a temporary Snowflake Java UDF that you plan to + * use in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.1.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -981,13 +1016,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 11 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 11 arguments as a temporary Snowflake Java UDF that you plan to + * use in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1007,13 +1043,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 12 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 12 arguments as a temporary Snowflake Java UDF that you plan to + * use in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1035,13 +1072,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 13 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 13 arguments as a temporary Snowflake Java UDF that you plan to + * use in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1064,13 +1102,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 14 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 14 arguments as a temporary Snowflake Java UDF that you plan to + * use in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1094,13 +1133,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 15 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 15 arguments as a temporary Snowflake Java UDF that you plan to + * use in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1125,13 +1165,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 16 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 16 arguments as a temporary Snowflake Java UDF that you plan to + * use in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1157,13 +1198,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 17 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 17 arguments as a temporary Snowflake Java UDF that you plan to + * use in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1207,13 +1249,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 18 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 18 arguments as a temporary Snowflake Java UDF that you plan to + * use in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1259,13 +1302,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 19 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 19 arguments as a temporary Snowflake Java UDF that you plan to + * use in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1313,13 +1357,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 20 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 20 arguments as a temporary Snowflake Java UDF that you plan to + * use in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1369,13 +1414,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 21 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 21 arguments as a temporary Snowflake Java UDF that you plan to + * use in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1427,13 +1473,14 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func)) } - /** Registers a Scala closure of 22 arguments as a temporary Snowflake Java UDF that you plan to - * use in the current session. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 22 arguments as a temporary Snowflake Java UDF that you plan to + * use in the current session. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + */ def registerTemporary[ RT: TypeTag, A1: TypeTag, @@ -1516,22 +1563,23 @@ class UDFRegistration(session: Session) extends Logging { |}""".stripMargin) } */ - /** Registers a Scala closure of 0 argument as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 0 argument as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[RT: TypeTag]( name: String, func: Function0[RT], @@ -1540,22 +1588,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 1 argument as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 1 argument as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[RT: TypeTag, A1: TypeTag]( name: String, func: Function1[A1, RT], @@ -1564,22 +1613,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 2 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 2 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[RT: TypeTag, A1: TypeTag, A2: TypeTag]( name: String, func: Function2[A1, A2, RT], @@ -1588,22 +1638,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 3 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 3 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag]( name: String, func: Function3[A1, A2, A3, RT], @@ -1612,22 +1663,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 4 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 4 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag]( name: String, func: Function4[A1, A2, A3, A4, RT], @@ -1636,22 +1688,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 5 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 5 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -1666,22 +1719,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 6 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 6 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -1697,22 +1751,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 7 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 7 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -1729,22 +1784,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 8 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 8 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -1762,22 +1818,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 9 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 9 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -1796,22 +1853,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 10 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.6.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 10 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.6.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -1831,22 +1889,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 11 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 11 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -1867,22 +1926,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 12 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 12 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -1904,22 +1964,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 13 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 13 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -1942,22 +2003,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 14 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 14 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -1981,22 +2043,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 15 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 15 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -2021,22 +2084,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 16 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 16 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -2062,22 +2126,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 17 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 17 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -2122,22 +2187,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 18 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 18 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -2184,22 +2250,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 19 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 19 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -2248,22 +2315,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 20 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 20 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -2314,22 +2382,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 21 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 21 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, @@ -2382,22 +2451,23 @@ class UDFRegistration(session: Session) extends Logging { register(Some(name), _toUdf(func), Some(stageLocation)) } - /** Registers a Scala closure of 22 arguments as a Snowflake Java UDF. - * - * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDF code itself will - * be uploaded to a subdirectory named after the UDF. - * - * @tparam RT - * Return type of the UDF. - * @since 0.12.0 - * @param stageLocation - * Stage location where the JAR files for the UDF and its and its dependencies should be - * uploaded. - */ + /** + * Registers a Scala closure of 22 arguments as a Snowflake Java UDF. + * + * The function uploads the JAR files that the UDF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDFs and specify the same stage location, any dependent JAR files used + * by those functions will only be uploaded once. The JAR file for the UDF code itself will be + * uploaded to a subdirectory named after the UDF. + * + * @tparam RT + * Return type of the UDF. + * @since 0.12.0 + * @param stageLocation + * Stage location where the JAR files for the UDF and its and its dependencies should be + * uploaded. + */ def registerPermanent[ RT: TypeTag, A1: TypeTag, diff --git a/src/main/scala/com/snowflake/snowpark/UDTFRegistration.scala b/src/main/scala/com/snowflake/snowpark/UDTFRegistration.scala index bf520bf5..d937802e 100644 --- a/src/main/scala/com/snowflake/snowpark/UDTFRegistration.scala +++ b/src/main/scala/com/snowflake/snowpark/UDTFRegistration.scala @@ -5,211 +5,215 @@ import com.snowflake.snowpark.udtf.UDTF import com.snowflake.snowpark_java.udtf.JavaUDTF // scalastyle:off -/** Provides methods to register a UDTF (user-defined table function) in the Snowflake database. - * - * [[Session.udtf]] returns an object of this class. - * - * To register an UDTF, you must: - * - * 1. Define a UDTF class. - * 1. Create an instance of that class, and register that instance as a UDTF. - * - * The next sections describe these steps in more detail. - * - * =Defining the UDTF Class= - * - * Define a class that inherits from one of the `UDTF[N]` classes (e.g. `UDTF0`, `UDTF1`, etc.), - * where ''n'' specifies the number of input arguments for your UDTF. For example, if your UDTF - * passes in 3 input arguments, extend the `UDTF3` class. - * - * In your class, override the following three methods: - * - `process()` , which is called once for each row in the input partition. - * - `endPartition()`, which is called once for each partition after all rows have been passed to - * `process()`. - * - `outputSchema()`, which returns a [[types.StructType]] object that describes the schema for - * the returned rows. - * - * When a UDTF is called, the rows are grouped into partitions before they are passed to the UDTF: - * - If the statement that calls the UDTF specifies the PARTITION clause (explicit partitions), - * that clause determines how the rows are partitioned. - * - If the statement does not specify the PARTITION clause (implicit partitions), Snowflake - * determines how best to partition the rows. - * - * For an explanation of partitions, see - * [[https://docs.snowflake.com/en/developer-guide/udf/java/udf-java-tabular-functions.html#label-udf-java-partitions Table Functions and Partitions]] - * - * ==Defining the process() Method== - * - * This method is invoked once for each row in the input partition. - * - * The arguments passed to the registered UDTF are passed to `process()`. For each argument passed - * to the UDTF, you must have a corresponding argument in the signature of the `process()` method. - * Make sure that the type of the argument in the `process()` method matches the Snowflake data - * type of the corresponding argument in the UDTF. - * - * Snowflake supports the following data types for the parameters for a UDTF: - * - * | SQL Type | Scala Type | Notes | - * |:----------|:--------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------| - * | NUMBER | Short or Option[Short] | Supported | - * | NUMBER | Int or Option[Int] | Supported | - * | NUMBER | Long or Option[Long] | Supported | - * | FLOAT | Float or Option[Float] | Supported | - * | DOUBLE | Double or Option[Double] | Supported | - * | NUMBER | java.math.BigDecimal | Supported | - * | VARCHAR | String or java.lang.String | Supported | - * | BOOL | Boolean or Option[Boolean] | Supported | - * | DATE | java.sql.Date | Supported | - * | TIMESTAMP | java.sql.Timestamp | Supported | - * | BINARY | Array[Byte] | Supported | - * | ARRAY | Array[String] or Array[Variant] | Supported array of type Array[String] or Array[Variant] | - * | OBJECT | Map[String, String] or Map[String, Variant] | Supported mutable map of type scala.collection.mutable.Map[String, String] or scala.collection.mutable.Map[String, Variant] | - * | VARIANT | com.snowflake.snowpark.types.Variant | Supported | - * - * ==Defining the endPartition() Method== - * - * This method is invoked once for each partition, after all rows in that partition have been - * passed to the `process()` method. - * - * You can use this method to generate output rows, based on any state information that you - * aggregate in the `process()` method. - * - * ==Defining the outputSchema() Method== - * - * In this method, define the output schema for the rows returned by the `process()` and - * `endPartition()` methods. - * - * Construct and return a [[types.StructType]] object that uses an Array of [[types.StructField]] - * objects to specify the Snowflake data type of each field in a returned row. - * - * Snowflake supports the following DataTypes for the output schema for a UDTF: - * - * | DataType | SQL Type | Notes | - * |:---------------------------------|:----------|:----------| - * | BooleanType | Boolean | Supported | - * | ShortType | NUMBER | Supported | - * | IntegerType | NUMBER | Supported | - * | LongType | NUMBER | Supported | - * | DecimalType | NUMBER | Supported | - * | FloatType | FLOAT | Supported | - * | DoubleType | DOUBLE | Supported | - * | StringType | VARCHAR | Supported | - * | BinaryType | BINARY | Supported | - * | TimeType | TIME | Supported | - * | DateType | DATE | Supported | - * | TimestampType | TIMESTAMP | Supported | - * | VariantType | VARIANT | Supported | - * | ArrayType(StringType) | ARRAY | Supported | - * | ArrayType(VariantType) | ARRAY | Supported | - * | MapType(StringType, StringType) | OBJECT | Supported | - * | MapType(StringType, VariantType) | OBJECT | Supported | - * - * ==Example of a UDTF Class== - * - * The following is an example of a UDTF class that generates a range of rows. - * - * The UDTF passes in 2 arguments, so the class extends `UDTF2`. - * - * The arguments `start` and `count` specify the starting number for the row and the number of rows - * to generate. - * - * {{{ - * class MyRangeUdtf extends UDTF2[Int, Int] { - * override def process(start: Int, count: Int): Iterable[Row] = - * (start until (start + count)).map(Row(_)) - * override def endPartition(): Iterable[Row] = Array.empty[Row] - * override def outputSchema(): StructType = StructType(StructField("C1", IntegerType)) - * } - * }}} - * - * =Registering the UDTF= - * - * Next, create an instance of the new class, and register the class by calling one of the - * [[UDTFRegistration]] methods. You can register a temporary or permanent UDTF by name. If you - * don't need to call the UDTF by name, you can register an anonymous UDTF. - * - * ==Registering a Temporary UDTF By Name== - * - * To register a temporary UDTF by name, call `registerTemporary`, passing in a name for the UDTF - * and an instance of the UDTF class. For example: - * {{{ - * // Use the MyRangeUdtf defined in previous example. - * val tableFunction = session.udtf.registerTemporary("myUdtf", new MyRangeUdtf()) - * session.tableFunction(tableFunction, lit(10), lit(5)).show - * }}} - * - * ==Registering a Permanent UDTF By Name== - * - * If you need to use the UDTF in subsequent sessions, register a permanent UDTF. - * - * When registering a permanent UDTF, you must specify a stage where the registration method will - * upload the JAR files for the UDTF and its dependencies. For example: - * {{{ - * val tableFunction = session.udtf.registerPermanent("myUdtf", new MyRangeUdtf(), "@myStage") - * session.tableFunction(tableFunction, lit(10), lit(5)).show - * }}} - * - * ==Registering an Anonymous Temporary UDTF== - * - * If you do not need to refer to a UDTF by name, use [[registerTemporary(udtf* UDTF)]] to create - * an anonymous UDTF instead. - * - * ==Calling a UDTF== - * The methods that register a UDTF return a [[TableFunction]] object, which you can use in - * [[Session.tableFunction]]. - * {{{ - * val tableFunction = session.udtf.registerTemporary("myUdtf", new MyRangeUdtf()) - * session.tableFunction(tableFunction, lit(10), lit(5)).show - * }}} - * - * @since 1.2.0 - */ +/** + * Provides methods to register a UDTF (user-defined table function) in the Snowflake database. + * + * [[Session.udtf]] returns an object of this class. + * + * To register an UDTF, you must: + * + * 1. Define a UDTF class. + * 1. Create an instance of that class, and register that instance as a UDTF. + * + * The next sections describe these steps in more detail. + * + * =Defining the UDTF Class= + * + * Define a class that inherits from one of the `UDTF[N]` classes (e.g. `UDTF0`, `UDTF1`, etc.), + * where ''n'' specifies the number of input arguments for your UDTF. For example, if your UDTF + * passes in 3 input arguments, extend the `UDTF3` class. + * + * In your class, override the following three methods: + * - `process()` , which is called once for each row in the input partition. + * - `endPartition()`, which is called once for each partition after all rows have been passed to + * `process()`. + * - `outputSchema()`, which returns a [[types.StructType]] object that describes the schema for + * the returned rows. + * + * When a UDTF is called, the rows are grouped into partitions before they are passed to the UDTF: + * - If the statement that calls the UDTF specifies the PARTITION clause (explicit partitions), + * that clause determines how the rows are partitioned. + * - If the statement does not specify the PARTITION clause (implicit partitions), Snowflake + * determines how best to partition the rows. + * + * For an explanation of partitions, see + * [[https://docs.snowflake.com/en/developer-guide/udf/java/udf-java-tabular-functions.html#label-udf-java-partitions Table Functions and Partitions]] + * + * ==Defining the process() Method== + * + * This method is invoked once for each row in the input partition. + * + * The arguments passed to the registered UDTF are passed to `process()`. For each argument passed + * to the UDTF, you must have a corresponding argument in the signature of the `process()` method. + * Make sure that the type of the argument in the `process()` method matches the Snowflake data type + * of the corresponding argument in the UDTF. + * + * Snowflake supports the following data types for the parameters for a UDTF: + * + * | SQL Type | Scala Type | Notes | + * |:----------|:--------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------| + * | NUMBER | Short or Option[Short] | Supported | + * | NUMBER | Int or Option[Int] | Supported | + * | NUMBER | Long or Option[Long] | Supported | + * | FLOAT | Float or Option[Float] | Supported | + * | DOUBLE | Double or Option[Double] | Supported | + * | NUMBER | java.math.BigDecimal | Supported | + * | VARCHAR | String or java.lang.String | Supported | + * | BOOL | Boolean or Option[Boolean] | Supported | + * | DATE | java.sql.Date | Supported | + * | TIMESTAMP | java.sql.Timestamp | Supported | + * | BINARY | Array[Byte] | Supported | + * | ARRAY | Array[String] or Array[Variant] | Supported array of type Array[String] or Array[Variant] | + * | OBJECT | Map[String, String] or Map[String, Variant] | Supported mutable map of type scala.collection.mutable.Map[String, String] or scala.collection.mutable.Map[String, Variant] | + * | VARIANT | com.snowflake.snowpark.types.Variant | Supported | + * + * ==Defining the endPartition() Method== + * + * This method is invoked once for each partition, after all rows in that partition have been passed + * to the `process()` method. + * + * You can use this method to generate output rows, based on any state information that you + * aggregate in the `process()` method. + * + * ==Defining the outputSchema() Method== + * + * In this method, define the output schema for the rows returned by the `process()` and + * `endPartition()` methods. + * + * Construct and return a [[types.StructType]] object that uses an Array of [[types.StructField]] + * objects to specify the Snowflake data type of each field in a returned row. + * + * Snowflake supports the following DataTypes for the output schema for a UDTF: + * + * | DataType | SQL Type | Notes | + * |:---------------------------------|:----------|:----------| + * | BooleanType | Boolean | Supported | + * | ShortType | NUMBER | Supported | + * | IntegerType | NUMBER | Supported | + * | LongType | NUMBER | Supported | + * | DecimalType | NUMBER | Supported | + * | FloatType | FLOAT | Supported | + * | DoubleType | DOUBLE | Supported | + * | StringType | VARCHAR | Supported | + * | BinaryType | BINARY | Supported | + * | TimeType | TIME | Supported | + * | DateType | DATE | Supported | + * | TimestampType | TIMESTAMP | Supported | + * | VariantType | VARIANT | Supported | + * | ArrayType(StringType) | ARRAY | Supported | + * | ArrayType(VariantType) | ARRAY | Supported | + * | MapType(StringType, StringType) | OBJECT | Supported | + * | MapType(StringType, VariantType) | OBJECT | Supported | + * + * ==Example of a UDTF Class== + * + * The following is an example of a UDTF class that generates a range of rows. + * + * The UDTF passes in 2 arguments, so the class extends `UDTF2`. + * + * The arguments `start` and `count` specify the starting number for the row and the number of rows + * to generate. + * + * {{{ + * class MyRangeUdtf extends UDTF2[Int, Int] { + * override def process(start: Int, count: Int): Iterable[Row] = + * (start until (start + count)).map(Row(_)) + * override def endPartition(): Iterable[Row] = Array.empty[Row] + * override def outputSchema(): StructType = StructType(StructField("C1", IntegerType)) + * } + * }}} + * + * =Registering the UDTF= + * + * Next, create an instance of the new class, and register the class by calling one of the + * [[UDTFRegistration]] methods. You can register a temporary or permanent UDTF by name. If you + * don't need to call the UDTF by name, you can register an anonymous UDTF. + * + * ==Registering a Temporary UDTF By Name== + * + * To register a temporary UDTF by name, call `registerTemporary`, passing in a name for the UDTF + * and an instance of the UDTF class. For example: + * {{{ + * // Use the MyRangeUdtf defined in previous example. + * val tableFunction = session.udtf.registerTemporary("myUdtf", new MyRangeUdtf()) + * session.tableFunction(tableFunction, lit(10), lit(5)).show + * }}} + * + * ==Registering a Permanent UDTF By Name== + * + * If you need to use the UDTF in subsequent sessions, register a permanent UDTF. + * + * When registering a permanent UDTF, you must specify a stage where the registration method will + * upload the JAR files for the UDTF and its dependencies. For example: + * {{{ + * val tableFunction = session.udtf.registerPermanent("myUdtf", new MyRangeUdtf(), "@myStage") + * session.tableFunction(tableFunction, lit(10), lit(5)).show + * }}} + * + * ==Registering an Anonymous Temporary UDTF== + * + * If you do not need to refer to a UDTF by name, use [[registerTemporary(udtf* UDTF)]] to create an + * anonymous UDTF instead. + * + * ==Calling a UDTF== + * The methods that register a UDTF return a [[TableFunction]] object, which you can use in + * [[Session.tableFunction]]. + * {{{ + * val tableFunction = session.udtf.registerTemporary("myUdtf", new MyRangeUdtf()) + * session.tableFunction(tableFunction, lit(10), lit(5)).show + * }}} + * + * @since 1.2.0 + */ // scalastyle:on class UDTFRegistration(session: Session) extends Logging { private[snowpark] val handler = new UDXRegistrationHandler(session) - /** Registers an UDTF instance as a temporary anonymous UDTF that is scoped to this session. - * - * @param udtf - * The UDTF instance to be registered - * @since 1.2.0 - */ + /** + * Registers an UDTF instance as a temporary anonymous UDTF that is scoped to this session. + * + * @param udtf + * The UDTF instance to be registered + * @since 1.2.0 + */ def registerTemporary(udtf: UDTF): TableFunction = tableFunction("registerTemporary") { handler.registerUDTF(None, udtf) } - /** Registers an UDTF instance as a temporary Snowflake Java UDTF that you plan to use in the - * current session. - * - * @param funcName - * The UDTF function name - * @param udtf - * The UDTF instance to be registered - * @since 1.2.0 - */ + /** + * Registers an UDTF instance as a temporary Snowflake Java UDTF that you plan to use in the + * current session. + * + * @param funcName + * The UDTF function name + * @param udtf + * The UDTF instance to be registered + * @since 1.2.0 + */ def registerTemporary(funcName: String, udtf: UDTF): TableFunction = tableFunction("registerTemporary", execName = funcName) { handler.registerUDTF(Some(funcName), udtf) } - /** Registers an UDTF instance as a Snowflake Java UDTF. - * - * The function uploads the JAR files that the UDTF depends upon to the specified stage. Each JAR - * file is uploaded to a subdirectory named after the MD5 checksum for the file. - * - * If you register multiple UDTFs and specify the same stage location, any dependent JAR files - * used by those functions will only be uploaded once. The JAR file for the UDTF code itself will - * be uploaded to a subdirectory named after the UDTF. - * - * @param funcName - * The UDTF function name - * @param udtf - * The UDTF instance to be registered. - * @param stageLocation - * Stage location where the JAR files for the UDTF and its and its dependencies should be - * uploaded - * @since 1.2.0 - */ + /** + * Registers an UDTF instance as a Snowflake Java UDTF. + * + * The function uploads the JAR files that the UDTF depends upon to the specified stage. Each JAR + * file is uploaded to a subdirectory named after the MD5 checksum for the file. + * + * If you register multiple UDTFs and specify the same stage location, any dependent JAR files + * used by those functions will only be uploaded once. The JAR file for the UDTF code itself will + * be uploaded to a subdirectory named after the UDTF. + * + * @param funcName + * The UDTF function name + * @param udtf + * The UDTF instance to be registered. + * @param stageLocation + * Stage location where the JAR files for the UDTF and its and its dependencies should be + * uploaded + * @since 1.2.0 + */ def registerPermanent(funcName: String, udtf: UDTF, stageLocation: String): TableFunction = tableFunction("registerPermanent", execName = funcName, execFilePath = stageLocation) { handler.registerUDTF(Some(funcName), udtf, Some(stageLocation)) diff --git a/src/main/scala/com/snowflake/snowpark/Updatable.scala b/src/main/scala/com/snowflake/snowpark/Updatable.scala index 26c5942e..8517a398 100644 --- a/src/main/scala/com/snowflake/snowpark/Updatable.scala +++ b/src/main/scala/com/snowflake/snowpark/Updatable.scala @@ -23,36 +23,39 @@ private[snowpark] object Updatable extends Logging { } -/** Result of updating rows in an Updatable - * - * @since 0.7.0 - */ +/** + * Result of updating rows in an Updatable + * + * @since 0.7.0 + */ case class UpdateResult(rowsUpdated: Long, multiJoinedRowsUpdated: Long) -/** Result of deleting rows in an Updatable - * - * @since 0.7.0 - */ +/** + * Result of deleting rows in an Updatable + * + * @since 0.7.0 + */ case class DeleteResult(rowsDeleted: Long) -/** Represents a lazily-evaluated Updatable. It extends [[DataFrame]] so all [[DataFrame]] - * operations can be applied on it. - * - * '''Creating an Updatable''' - * - * You can create an Updatable by calling [[Session.table(name* session.table]] with the name of - * the Updatable. - * - * Example 1: Creating a Updatable by reading a table. - * {{{ - * val dfPrices = session.table("itemsdb.publicschema.prices") - * }}} - * - * @groupname actions Actions - * @groupname basic Basic DataFrame Functions - * - * @since 0.7.0 - */ +/** + * Represents a lazily-evaluated Updatable. It extends [[DataFrame]] so all [[DataFrame]] operations + * can be applied on it. + * + * '''Creating an Updatable''' + * + * You can create an Updatable by calling [[Session.table(name* session.table]] with the name of the + * Updatable. + * + * Example 1: Creating a Updatable by reading a table. + * {{{ + * val dfPrices = session.table("itemsdb.publicschema.prices") + * }}} + * + * @groupname actions Actions + * @groupname basic Basic DataFrame Functions + * + * @since 0.7.0 + */ class Updatable private[snowpark] ( private[snowpark] val tableName: String, override private[snowpark] val session: Session, @@ -62,118 +65,123 @@ class Updatable private[snowpark] ( session.analyzer.resolve(UnresolvedRelation(tableName)), methodChain) { - /** Updates all rows in the Updatable with specified assignments and returns a [[UpdateResult]], - * representing number of rows modified and number of multi-joined rows modified. - * - * For example: - * {{{ - * updatable.update(Map(col("b") -> lit(0))) - * }}} - * - * Assign value 0 to column b in all rows in updatable. - * - * {{{ - * updatable.update(Map(col("c") -> (col("a") + col("b")))) - * }}} - * - * Assign the sum of column a and column b to column c in all rows in updatable - * - * @group actions - * @since 0.7.0 - * @return - * [[UpdateResult]] - */ + /** + * Updates all rows in the Updatable with specified assignments and returns a [[UpdateResult]], + * representing number of rows modified and number of multi-joined rows modified. + * + * For example: + * {{{ + * updatable.update(Map(col("b") -> lit(0))) + * }}} + * + * Assign value 0 to column b in all rows in updatable. + * + * {{{ + * updatable.update(Map(col("c") -> (col("a") + col("b")))) + * }}} + * + * Assign the sum of column a and column b to column c in all rows in updatable + * + * @group actions + * @since 0.7.0 + * @return + * [[UpdateResult]] + */ def update(assignments: Map[Column, Column]): UpdateResult = action("update") { val newDf = getUpdateDataFrameWithColumn(assignments, None, None) Updatable.getUpdateResult(newDf.collect()) } - /** Updates all rows in the updatable with specified assignments and returns a [[UpdateResult]], - * representing number of rows modified and number of multi-joined rows modified. - * - * For example: - * {{{ - * updatable.update(Map("b" -> lit(0))) - * }}} - * - * Assign value 0 to column b in all rows in updatable. - * - * {{{ - * updatable.update(Map("c" -> (col("a") + col("b")))) - * }}} - * - * Assign the sum of column a and column b to column c in all rows in updatable - * - * @group actions - * @since 0.7.0 - * @return - * [[UpdateResult]] - */ + /** + * Updates all rows in the updatable with specified assignments and returns a [[UpdateResult]], + * representing number of rows modified and number of multi-joined rows modified. + * + * For example: + * {{{ + * updatable.update(Map("b" -> lit(0))) + * }}} + * + * Assign value 0 to column b in all rows in updatable. + * + * {{{ + * updatable.update(Map("c" -> (col("a") + col("b")))) + * }}} + * + * Assign the sum of column a and column b to column c in all rows in updatable + * + * @group actions + * @since 0.7.0 + * @return + * [[UpdateResult]] + */ def update[T: ClassTag](assignments: Map[String, Column]): UpdateResult = action("update") { val newDf = getUpdateDataFrameWithString(assignments, None, None) Updatable.getUpdateResult(newDf.collect()) } - /** Updates all rows in the updatable that satisfy specified condition with specified assignments - * and returns a [[UpdateResult]], representing number of rows modified and number of - * multi-joined rows modified. - * - * For example: - * {{{ - * updatable.update(Map(col("b") -> lit(0)), col("a") === 1) - * }}} - * - * Assign value 0 to column b in all rows where column a has value 1. - * - * @group actions - * @since 0.7.0 - * @return - * [[UpdateResult]] - */ + /** + * Updates all rows in the updatable that satisfy specified condition with specified assignments + * and returns a [[UpdateResult]], representing number of rows modified and number of multi-joined + * rows modified. + * + * For example: + * {{{ + * updatable.update(Map(col("b") -> lit(0)), col("a") === 1) + * }}} + * + * Assign value 0 to column b in all rows where column a has value 1. + * + * @group actions + * @since 0.7.0 + * @return + * [[UpdateResult]] + */ def update(assignments: Map[Column, Column], condition: Column): UpdateResult = action("update") { val newDf = getUpdateDataFrameWithColumn(assignments, Some(condition), None) Updatable.getUpdateResult(newDf.collect()) } - /** Updates all rows in the updatable that satisfy specified condition with specified assignments - * and returns a [[UpdateResult]], representing number of rows modified and number of - * multi-joined rows modified. - * - * For example: - * {{{ - * updatable.update(Map("b" -> lit(0)), col("a") === 1) - * }}} - * - * Assign value 0 to column b in all rows where column a has value 1. - * - * @group actions - * @since 0.7.0 - * @return - * [[UpdateResult]] - */ + /** + * Updates all rows in the updatable that satisfy specified condition with specified assignments + * and returns a [[UpdateResult]], representing number of rows modified and number of multi-joined + * rows modified. + * + * For example: + * {{{ + * updatable.update(Map("b" -> lit(0)), col("a") === 1) + * }}} + * + * Assign value 0 to column b in all rows where column a has value 1. + * + * @group actions + * @since 0.7.0 + * @return + * [[UpdateResult]] + */ def update[T: ClassTag](assignments: Map[String, Column], condition: Column): UpdateResult = action("update") { val newDf = getUpdateDataFrameWithString(assignments, Some(condition), None) Updatable.getUpdateResult(newDf.collect()) } - /** Updates all rows in the updatable that satisfy specified condition where condition includes - * columns in other [[DataFrame]], and returns a [[UpdateResult]], representing number of rows - * modified and number of multi-joined rows modified. - * - * For example: - * {{{ - * t1.update(Map(col("b") -> lit(0)), t1("a") === t2("a"), t2) - * }}} - * - * Assign value 0 to column b in all rows in t1 where column a in t1 equals column a in t2. - * - * @group actions - * @since 0.7.0 - * @return - * [[UpdateResult]] - */ + /** + * Updates all rows in the updatable that satisfy specified condition where condition includes + * columns in other [[DataFrame]], and returns a [[UpdateResult]], representing number of rows + * modified and number of multi-joined rows modified. + * + * For example: + * {{{ + * t1.update(Map(col("b") -> lit(0)), t1("a") === t2("a"), t2) + * }}} + * + * Assign value 0 to column b in all rows in t1 where column a in t1 equals column a in t2. + * + * @group actions + * @since 0.7.0 + * @return + * [[UpdateResult]] + */ def update( assignments: Map[Column, Column], condition: Column, @@ -182,22 +190,23 @@ class Updatable private[snowpark] ( Updatable.getUpdateResult(newDf.collect()) } - /** Updates all rows in the updatable that satisfy specified condition where condition includes - * columns in other [[DataFrame]], and returns a [[UpdateResult]], representing number of rows - * modified and number of multi-joined rows modified. - * - * For example: - * {{{ - * t1.update(Map("b" -> lit(0)), t1("a") === t2("a"), t2) - * }}} - * - * Assign value 0 to column b in all rows in t1 where column a in t1 equals column a in t2. - * - * @group actions - * @since 0.7.0 - * @return - * [[UpdateResult]] - */ + /** + * Updates all rows in the updatable that satisfy specified condition where condition includes + * columns in other [[DataFrame]], and returns a [[UpdateResult]], representing number of rows + * modified and number of multi-joined rows modified. + * + * For example: + * {{{ + * t1.update(Map("b" -> lit(0)), t1("a") === t2("a"), t2) + * }}} + * + * Assign value 0 to column b in all rows in t1 where column a in t1 equals column a in t2. + * + * @group actions + * @since 0.7.0 + * @return + * [[UpdateResult]] + */ def update[T: ClassTag]( assignments: Map[String, Column], condition: Column, @@ -228,62 +237,65 @@ class Updatable private[snowpark] ( sourceData.map(disambiguate(this, _, JoinType("left"), Seq.empty)._2.plan))) } - /** Deletes all rows in the updatable and returns a [[DeleteResult]], representing number of rows - * deleted. - * - * For example: - * {{{ - * updatable.delete() - * }}} - * - * Deletes all rows in updatable. - * - * @group actions - * @since 0.7.0 - * @return - * [[DeleteResult]] - */ + /** + * Deletes all rows in the updatable and returns a [[DeleteResult]], representing number of rows + * deleted. + * + * For example: + * {{{ + * updatable.delete() + * }}} + * + * Deletes all rows in updatable. + * + * @group actions + * @since 0.7.0 + * @return + * [[DeleteResult]] + */ def delete(): DeleteResult = action("delete") { val newDf = getDeleteDataFrame(None, None) Updatable.getDeleteResult(newDf.collect()) } - /** Deletes all rows in the updatable that satisfy specified condition and returns a - * [[DeleteResult]], representing number of rows deleted. - * - * For example: - * {{{ - * updatable.delete(col("a") === 1) - * }}} - * - * Deletes all rows where column a has value 1. - * - * @group actions - * @since 0.7.0 - * @return - * [[DeleteResult]] - */ + /** + * Deletes all rows in the updatable that satisfy specified condition and returns a + * [[DeleteResult]], representing number of rows deleted. + * + * For example: + * {{{ + * updatable.delete(col("a") === 1) + * }}} + * + * Deletes all rows where column a has value 1. + * + * @group actions + * @since 0.7.0 + * @return + * [[DeleteResult]] + */ def delete(condition: Column): DeleteResult = action("delete") { val newDf = getDeleteDataFrame(Some(condition), None) Updatable.getDeleteResult(newDf.collect()) } - /** Deletes all rows in the updatable that satisfy specified condition where condition includes - * columns in other [[DataFrame]], and returns a [[DeleteResult]], representing number of rows - * deleted. - * - * For example: - * {{{ - * t1.delete(t1("a") === t2("a"), t2) - * }}} - * - * Deletes all rows in t1 where column a in t1 equals column a in t2. - * - * @group actions - * @since 0.7.0 - * @return - * [[DeleteResult]] - */ + /** + * Deletes all rows in the updatable that satisfy specified condition where condition includes + * columns in other [[DataFrame]], and returns a [[DeleteResult]], representing number of rows + * deleted. + * + * For example: + * {{{ + * t1.delete(t1("a") === t2("a"), t2) + * }}} + * + * Deletes all rows in t1 where column a in t1 equals column a in t2. + * + * @group actions + * @since 0.7.0 + * @return + * [[DeleteResult]] + */ def delete(condition: Column, sourceData: DataFrame): DeleteResult = action("delete") { val newDf = getDeleteDataFrame(Some(condition), Some(sourceData)) Updatable.getDeleteResult(newDf.collect()) @@ -300,22 +312,23 @@ class Updatable private[snowpark] ( sourceData.map(disambiguate(this, _, JoinType("left"), Seq.empty)._2.plan))) } - /** Initiates a merge action for this updatable with [[DataFrame]] source on specified join - * expression. Returns a [[MergeBuilder]] which provides APIs to define merge clauses. - * - * For example: - * {{{ - * target.merge(source, target("id") === source("id")) - * }}} - * - * Initiates a merge action for target with source where the expression target.id = source.id is - * used to join target and source. - * - * @group actions - * @since 0.7.0 - * @return - * [[MergeBuilder]] - */ + /** + * Initiates a merge action for this updatable with [[DataFrame]] source on specified join + * expression. Returns a [[MergeBuilder]] which provides APIs to define merge clauses. + * + * For example: + * {{{ + * target.merge(source, target("id") === source("id")) + * }}} + * + * Initiates a merge action for target with source where the expression target.id = source.id is + * used to join target and source. + * + * @group actions + * @since 0.7.0 + * @return + * [[MergeBuilder]] + */ def merge(source: DataFrame, joinExpr: Column): MergeBuilder = { session.conn.telemetry.reportActionMerge() MergeBuilder( @@ -328,34 +341,36 @@ class Updatable private[snowpark] ( deleted = false) } - /** Returns a clone of this Updatable. - * - * @return - * A [[Updatable]] - * @since 0.10.0 - * @group basic - */ + /** + * Returns a clone of this Updatable. + * + * @return + * A [[Updatable]] + * @since 0.10.0 + * @group basic + */ override def clone: Updatable = action("clone") { new Updatable(tableName, session, Seq()) } - /** Returns an [[UpdatableAsyncActor]] object that can be used to execute Updatable actions - * asynchronously. - * - * Example: - * {{{ - * val updatable = session.table(tableName) - * val asyncJob = updatable.async.update(Map(col("b") -> lit(0)), col("a") === 1) - * // At this point, the thread is not blocked. You can perform additional work before - * // calling asyncJob.getResult() to retrieve the results of the action. - * // NOTE: getResult() is a blocking call. - * val updateResult = asyncJob.getResult() - * }}} - * - * @since 0.11.0 - * @return - * A [[UpdatableAsyncActor]] object - */ + /** + * Returns an [[UpdatableAsyncActor]] object that can be used to execute Updatable actions + * asynchronously. + * + * Example: + * {{{ + * val updatable = session.table(tableName) + * val asyncJob = updatable.async.update(Map(col("b") -> lit(0)), col("a") === 1) + * // At this point, the thread is not blocked. You can perform additional work before + * // calling asyncJob.getResult() to retrieve the results of the action. + * // NOTE: getResult() is a blocking call. + * val updateResult = asyncJob.getResult() + * }}} + * + * @since 0.11.0 + * @return + * A [[UpdatableAsyncActor]] object + */ override def async: UpdatableAsyncActor = new UpdatableAsyncActor(this) @inline override protected def action[T](funcName: String)(func: => T): T = { @@ -363,59 +378,64 @@ class Updatable private[snowpark] ( } } -/** Provides APIs to execute Updatable actions asynchronously. - * - * @since 0.11.0 - */ +/** + * Provides APIs to execute Updatable actions asynchronously. + * + * @since 0.11.0 + */ class UpdatableAsyncActor private[snowpark] (updatable: Updatable) extends DataFrameAsyncActor(updatable) { - /** Executes `Updatable.update` asynchronously. - * - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `Updatable.update` asynchronously. + * + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def update(assignments: Map[Column, Column]): TypedAsyncJob[UpdateResult] = action("update") { val newDf = updatable.getUpdateDataFrameWithColumn(assignments, None, None) updatable.session.conn.executeAsync[UpdateResult](newDf.snowflakePlan) } - /** Executes `Updatable.update` asynchronously. - * - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `Updatable.update` asynchronously. + * + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def update[T: ClassTag](assignments: Map[String, Column]): TypedAsyncJob[UpdateResult] = action("update") { val newDf = updatable.getUpdateDataFrameWithString(assignments, None, None) updatable.session.conn.executeAsync[UpdateResult](newDf.snowflakePlan) } - /** Executes `Updatable.update` asynchronously. - * - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `Updatable.update` asynchronously. + * + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def update(assignments: Map[Column, Column], condition: Column): TypedAsyncJob[UpdateResult] = action("update") { val newDf = updatable.getUpdateDataFrameWithColumn(assignments, Some(condition), None) updatable.session.conn.executeAsync[UpdateResult](newDf.snowflakePlan) } - /** Executes `Updatable.update` asynchronously. - * - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `Updatable.update` asynchronously. + * + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def update[T: ClassTag]( assignments: Map[String, Column], condition: Column): TypedAsyncJob[UpdateResult] = @@ -424,13 +444,14 @@ class UpdatableAsyncActor private[snowpark] (updatable: Updatable) updatable.session.conn.executeAsync[UpdateResult](newDf.snowflakePlan) } - /** Executes `Updatable.update` asynchronously. - * - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `Updatable.update` asynchronously. + * + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def update( assignments: Map[Column, Column], condition: Column, @@ -440,13 +461,14 @@ class UpdatableAsyncActor private[snowpark] (updatable: Updatable) updatable.session.conn.executeAsync[UpdateResult](newDf.snowflakePlan) } - /** Executes `Updatable.update` asynchronously. - * - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `Updatable.update` asynchronously. + * + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def update[T: ClassTag]( assignments: Map[String, Column], condition: Column, @@ -456,37 +478,40 @@ class UpdatableAsyncActor private[snowpark] (updatable: Updatable) updatable.session.conn.executeAsync[UpdateResult](newDf.snowflakePlan) } - /** Executes `Updatable.delete` asynchronously. - * - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `Updatable.delete` asynchronously. + * + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def delete(): TypedAsyncJob[DeleteResult] = action("delete") { val newDf = updatable.getDeleteDataFrame(None, None) updatable.session.conn.executeAsync[DeleteResult](newDf.snowflakePlan) } - /** Executes `Updatable.delete` asynchronously. - * - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `Updatable.delete` asynchronously. + * + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def delete(condition: Column): TypedAsyncJob[DeleteResult] = action("delete") { val newDf = updatable.getDeleteDataFrame(Some(condition), None) updatable.session.conn.executeAsync[DeleteResult](newDf.snowflakePlan) } - /** Executes `Updatable.delete` asynchronously. - * - * @return - * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the - * results. - * @since 0.11.0 - */ + /** + * Executes `Updatable.delete` asynchronously. + * + * @return + * A [[TypedAsyncJob]] object that you can use to check the status of the action and get the + * results. + * @since 0.11.0 + */ def delete(condition: Column, sourceData: DataFrame): TypedAsyncJob[DeleteResult] = action("delete") { val newDf = updatable.getDeleteDataFrame(Some(condition), Some(sourceData)) diff --git a/src/main/scala/com/snowflake/snowpark/UserDefinedFunction.scala b/src/main/scala/com/snowflake/snowpark/UserDefinedFunction.scala index f8d65c15..892fbf0c 100644 --- a/src/main/scala/com/snowflake/snowpark/UserDefinedFunction.scala +++ b/src/main/scala/com/snowflake/snowpark/UserDefinedFunction.scala @@ -4,28 +4,30 @@ import com.snowflake.snowpark.internal.ErrorMessage import com.snowflake.snowpark.internal.analyzer.Expression import com.snowflake.snowpark.internal.{SnowflakeUDF, UdfColumnSchema} -/** Encapsulates a user defined lambda or function that is returned by - * [[UDFRegistration.registerTemporary[RT](name* UDFRegistration.registerTemporary]] or by - * [[com.snowflake.snowpark.functions.udf[RT](* com.snowflake.snowpark.functions.udf]]. - * - * Use [[UserDefinedFunction!.apply UserDefinedFunction.apply]] to generate [[Column]] expressions - * from an instance. - * {{{ - * import com.snowflake.snowpark.functions._ - * val myUdf = udf((x: Int, y: String) => y + x) - * df.select(myUdf(col("i"), col("s"))) - * }}} - * @since 0.1.0 - */ +/** + * Encapsulates a user defined lambda or function that is returned by + * [[UDFRegistration.registerTemporary[RT](name* UDFRegistration.registerTemporary]] or by + * [[com.snowflake.snowpark.functions.udf[RT](* com.snowflake.snowpark.functions.udf]]. + * + * Use [[UserDefinedFunction!.apply UserDefinedFunction.apply]] to generate [[Column]] expressions + * from an instance. + * {{{ + * import com.snowflake.snowpark.functions._ + * val myUdf = udf((x: Int, y: String) => y + x) + * df.select(myUdf(col("i"), col("s"))) + * }}} + * @since 0.1.0 + */ case class UserDefinedFunction private[snowpark] ( f: AnyRef, private[snowpark] val returnType: UdfColumnSchema, private[snowpark] val inputTypes: Seq[UdfColumnSchema] = Nil, name: Option[String] = None) { - /** Apply the UDF to one or more columns to generate a [[Column]] expression. - * @since 0.1.0 - */ + /** + * Apply the UDF to one or more columns to generate a [[Column]] expression. + * @since 0.1.0 + */ def apply(exprs: Column*): Column = { new Column(createUDFExpression(exprs.map(_.expr))) } diff --git a/src/main/scala/com/snowflake/snowpark/Window.scala b/src/main/scala/com/snowflake/snowpark/Window.scala index 554a66f1..ea646321 100644 --- a/src/main/scala/com/snowflake/snowpark/Window.scala +++ b/src/main/scala/com/snowflake/snowpark/Window.scala @@ -2,48 +2,56 @@ package com.snowflake.snowpark import com.snowflake.snowpark.internal.analyzer.UnspecifiedFrame -/** Contains functions to form [[WindowSpec]]. - * - * @since 0.1.0 - */ +/** + * Contains functions to form [[WindowSpec]]. + * + * @since 0.1.0 + */ object Window { - /** Returns [[WindowSpec]] object with partition by clause. - * @since 0.1.0 - */ + /** + * Returns [[WindowSpec]] object with partition by clause. + * @since 0.1.0 + */ def partitionBy(cols: Column*): WindowSpec = spec.partitionBy(cols: _*) - /** Returns [[WindowSpec]] object with order by clause. - * @since 0.1.0 - */ + /** + * Returns [[WindowSpec]] object with order by clause. + * @since 0.1.0 + */ def orderBy(cols: Column*): WindowSpec = spec.orderBy(cols: _*) - /** Returns a value representing unbounded preceding. - * @since 0.1.0 - */ + /** + * Returns a value representing unbounded preceding. + * @since 0.1.0 + */ def unboundedPreceding: Long = Long.MinValue - /** Returns a value representing unbounded following. - * @since 0.1.0 - */ + /** + * Returns a value representing unbounded following. + * @since 0.1.0 + */ def unboundedFollowing: Long = Long.MaxValue - /** Returns a value representing current row. - * @since 0.1.0 - */ + /** + * Returns a value representing current row. + * @since 0.1.0 + */ def currentRow: Long = 0 - /** Returns [[WindowSpec]] object with row frame clause. - * @since 0.1.0 - */ + /** + * Returns [[WindowSpec]] object with row frame clause. + * @since 0.1.0 + */ def rowsBetween(start: Long, end: Long): WindowSpec = spec.rowsBetween(start, end) - /** Returns [[WindowSpec]] object with range frame clause. - * @since 0.1.0 - */ + /** + * Returns [[WindowSpec]] object with range frame clause. + * @since 0.1.0 + */ def rangeBetween(start: Long, end: Long): WindowSpec = spec.rangeBetween(start, end) diff --git a/src/main/scala/com/snowflake/snowpark/WindowSpec.scala b/src/main/scala/com/snowflake/snowpark/WindowSpec.scala index e5758ec7..910c76e2 100644 --- a/src/main/scala/com/snowflake/snowpark/WindowSpec.scala +++ b/src/main/scala/com/snowflake/snowpark/WindowSpec.scala @@ -3,22 +3,25 @@ package com.snowflake.snowpark import com.snowflake.snowpark.internal.analyzer._ import com.snowflake.snowpark.internal.ErrorMessage -/** Represents a window frame clause. - * @since 0.1.0 - */ +/** + * Represents a window frame clause. + * @since 0.1.0 + */ class WindowSpec private[snowpark] ( partitionSpec: Seq[Expression], orderSpec: Seq[SortOrder], frame: WindowFrame) { - /** Returns a new [[WindowSpec]] object with the new partition by clause. - * @since 0.1.0 - */ + /** + * Returns a new [[WindowSpec]] object with the new partition by clause. + * @since 0.1.0 + */ def partitionBy(cols: Column*): WindowSpec = new WindowSpec(cols.map(_.expr), orderSpec, frame) - /** Returns a new [[WindowSpec]] object with the new order by clause. - * @since 0.1.0 - */ + /** + * Returns a new [[WindowSpec]] object with the new order by clause. + * @since 0.1.0 + */ def orderBy(cols: Column*): WindowSpec = { val sortOrder: Seq[SortOrder] = cols.map { col => col.expr match { @@ -29,9 +32,10 @@ class WindowSpec private[snowpark] ( new WindowSpec(partitionSpec, sortOrder, frame) } - /** Returns a new [[WindowSpec]] object with the new row frame clause. - * @since 0.1.0 - */ + /** + * Returns a new [[WindowSpec]] object with the new row frame clause. + * @since 0.1.0 + */ def rowsBetween(start: Long, end: Long): WindowSpec = { val boundaryStart = start match { case 0 => CurrentRow @@ -53,9 +57,10 @@ class WindowSpec private[snowpark] ( SpecifiedWindowFrame(RowFrame, boundaryStart, boundaryEnd)) } - /** Returns a new [[WindowSpec]] object with the new range frame clause. - * @since 0.1.0 - */ + /** + * Returns a new [[WindowSpec]] object with the new range frame clause. + * @since 0.1.0 + */ def rangeBetween(start: Long, end: Long): WindowSpec = { val boundaryStart = start match { case 0 => CurrentRow diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala index 69970c5f..ddb4f59a 100644 --- a/src/main/scala/com/snowflake/snowpark/functions.scala +++ b/src/main/scala/com/snowflake/snowpark/functions.scala @@ -8,122 +8,127 @@ import com.snowflake.snowpark.types._ import scala.reflect.runtime.universe.TypeTag import scala.util.Random -/** Provides utility functions that generate [[Column]] expressions that you can pass to - * [[DataFrame]] transformation methods. These functions generate references to columns, literals, - * and SQL expressions (e.g. "c + 1"). - * - * This object also provides functions that correspond to Snowflake - * [[https://docs.snowflake.com/en/sql-reference-functions.html system-defined functions]] - * (built-in functions), including functions for aggregation and window functions. - * - * The following examples demonstrate the use of some of these functions: - * - * {{{ - * // Use columns and literals in expressions. - * df.select(col("c") + lit(1)) - * - * // Call system-defined (built-in) functions. - * // This example calls the function that corresponds to the ADD_MONTHS() SQL function. - * df.select(add_months(col("d"), lit(3))) - * - * // Call system-defined functions that have no corresponding function in the functions object. - * // This example calls the RADIANS() SQL function, passing in values from the column "e". - * df.select(callBuiltin("radians", col("e"))) - * - * // Call a user-defined function (UDF) by name. - * df.select(callUDF("some_func", col("c"))) - * - * // Register and call an anonymous UDF. - * val myudf = udf((x:Int) => x + x) - * df.select(myudf(col("c"))) - * - * // Evaluate an SQL expression - * df.select(sqlExpr("c + 1")) - * }}} - * - * For functions that accept scala types, e.g. callUdf, callBuiltin, lit(), the mapping from scala - * types to Snowflake types is as follows: - * {{{ - * String => String - * Byte => TinyInt - * Int => Int - * Short => SmallInt - * Long => BigInt - * Float => Float - * Double => Double - * Decimal => Number - * Boolean => Boolean - * Array => Array - * Timestamp => Timestamp - * Date => Date - * }}} - * - * @groupname client_func Client-side Functions - * @groupname sort_func Sorting Functions - * @groupname agg_func Aggregate Functions - * @groupname win_func Window Functions - * @groupname con_func Conditional Expression Functions - * @groupname num_func Numeric Functions - * @groupname gen_func Data Generation Functions - * @groupname bit_func Bitwise Expression Functions - * @groupname str_func String and Binary Functions - * @groupname utl_func Utility and Hash Functions - * @groupname date_func Date and Time Functions - * @groupname cont_func Context Functions - * @groupname semi_func Semi-structured Data Functions - * @groupname udf_func Anonymous UDF Registration and Invocation Functions - * @since 0.1.0 - */ +/** + * Provides utility functions that generate [[Column]] expressions that you can pass to + * [[DataFrame]] transformation methods. These functions generate references to columns, literals, + * and SQL expressions (e.g. "c + 1"). + * + * This object also provides functions that correspond to Snowflake + * [[https://docs.snowflake.com/en/sql-reference-functions.html system-defined functions]] (built-in + * functions), including functions for aggregation and window functions. + * + * The following examples demonstrate the use of some of these functions: + * + * {{{ + * // Use columns and literals in expressions. + * df.select(col("c") + lit(1)) + * + * // Call system-defined (built-in) functions. + * // This example calls the function that corresponds to the ADD_MONTHS() SQL function. + * df.select(add_months(col("d"), lit(3))) + * + * // Call system-defined functions that have no corresponding function in the functions object. + * // This example calls the RADIANS() SQL function, passing in values from the column "e". + * df.select(callBuiltin("radians", col("e"))) + * + * // Call a user-defined function (UDF) by name. + * df.select(callUDF("some_func", col("c"))) + * + * // Register and call an anonymous UDF. + * val myudf = udf((x:Int) => x + x) + * df.select(myudf(col("c"))) + * + * // Evaluate an SQL expression + * df.select(sqlExpr("c + 1")) + * }}} + * + * For functions that accept scala types, e.g. callUdf, callBuiltin, lit(), the mapping from scala + * types to Snowflake types is as follows: + * {{{ + * String => String + * Byte => TinyInt + * Int => Int + * Short => SmallInt + * Long => BigInt + * Float => Float + * Double => Double + * Decimal => Number + * Boolean => Boolean + * Array => Array + * Timestamp => Timestamp + * Date => Date + * }}} + * + * @groupname client_func Client-side Functions + * @groupname sort_func Sorting Functions + * @groupname agg_func Aggregate Functions + * @groupname win_func Window Functions + * @groupname con_func Conditional Expression Functions + * @groupname num_func Numeric Functions + * @groupname gen_func Data Generation Functions + * @groupname bit_func Bitwise Expression Functions + * @groupname str_func String and Binary Functions + * @groupname utl_func Utility and Hash Functions + * @groupname date_func Date and Time Functions + * @groupname cont_func Context Functions + * @groupname semi_func Semi-structured Data Functions + * @groupname udf_func Anonymous UDF Registration and Invocation Functions + * @since 0.1.0 + */ // scalastyle:off object functions { // scalastyle:on - /** Returns the [[Column]] with the specified name. - * - * @group client_func - * @since 0.1.0 - */ + /** + * Returns the [[Column]] with the specified name. + * + * @group client_func + * @since 0.1.0 + */ def col(colName: String): Column = Column(colName) - /** Returns a [[Column]] with the specified name. Alias for col. - * - * @group client_func - * @since 0.1.0 - */ + /** + * Returns a [[Column]] with the specified name. Alias for col. + * + * @group client_func + * @since 0.1.0 + */ def column(colName: String): Column = Column(colName) - /** Generate a [[Column]] representing the result of the input DataFrame. The parameter `df` - * should have one column and must produce one row. Is an alias of [[toScalar]]. - * - * For Example: - * {{{ - * import functions._ - * val df1 = session.sql("select * from values(1,1,1),(2,2,3) as T(c1,c2,c3)") - * val df2 = session.sql("select * from values(2) as T(a)") - * df1.select(Column("c1"), col(df2)).show() - * df1.filter(Column("c1") < col(df2)).show() - * }}} - * - * @group client_func - * @since 0.2.0 - */ + /** + * Generate a [[Column]] representing the result of the input DataFrame. The parameter `df` should + * have one column and must produce one row. Is an alias of [[toScalar]]. + * + * For Example: + * {{{ + * import functions._ + * val df1 = session.sql("select * from values(1,1,1),(2,2,3) as T(c1,c2,c3)") + * val df2 = session.sql("select * from values(2) as T(a)") + * df1.select(Column("c1"), col(df2)).show() + * df1.filter(Column("c1") < col(df2)).show() + * }}} + * + * @group client_func + * @since 0.2.0 + */ def col(df: DataFrame): Column = toScalar(df) - /** Generate a [[Column]] representing the result of the input DataFrame. The parameter `df` - * should have one column and must produce one row. - * - * For Example: - * {{{ - * import functions._ - * val df1 = session.sql("select * from values(1,1,1),(2,2,3) as T(c1,c2,c3)") - * val df2 = session.sql("select * from values(2) as T(a)") - * df1.select(Column("c1"), toScalar(df2)).show() - * df1.filter(Column("c1") < toScalar(df2)).show() - * }}} - * - * @group client_func - * @since 0.4.0 - */ + /** + * Generate a [[Column]] representing the result of the input DataFrame. The parameter `df` should + * have one column and must produce one row. + * + * For Example: + * {{{ + * import functions._ + * val df1 = session.sql("select * from values(1,1,1),(2,2,3) as T(c1,c2,c3)") + * val df2 = session.sql("select * from values(2) as T(a)") + * df1.select(Column("c1"), toScalar(df2)).show() + * df1.filter(Column("c1") < toScalar(df2)).show() + * }}} + * + * @group client_func + * @since 0.4.0 + */ def toScalar(df: DataFrame): Column = { if (df.output.size != 1) { throw ErrorMessage.DF_DATAFRAME_IS_NOT_QUALIFIED_FOR_SCALAR_QUERY( @@ -134,1042 +139,1138 @@ object functions { Column(ScalarSubquery(df.snowflakePlan)) } - /** Creates a [[Column]] expression for a literal value. - * - * @group client_func - * @since 0.1.0 - */ + /** + * Creates a [[Column]] expression for a literal value. + * + * @group client_func + * @since 0.1.0 + */ def lit(literal: Any): Column = typedLit(literal) - /** Creates a [[Column]] expression for a literal value. - * - * @group client_func - * @since 0.1.0 - */ + /** + * Creates a [[Column]] expression for a literal value. + * + * @group client_func + * @since 0.1.0 + */ def typedLit[T: TypeTag](literal: T): Column = literal match { case c: Column => c case s: Symbol => Column(s.name) case _ => Column(Literal(literal)) } - /** Creates a [[Column]] expression from raw SQL text. - * - * Note that the function does not interpret or check the SQL text. - * - * @group client_func - * @since 0.1.0 - */ + /** + * Creates a [[Column]] expression from raw SQL text. + * + * Note that the function does not interpret or check the SQL text. + * + * @group client_func + * @since 0.1.0 + */ def sqlExpr(sqlText: String): Column = Column.expr(sqlText) - /** Uses HyperLogLog to return an approximation of the distinct cardinality of the input (i.e. - * returns an approximation of `COUNT(DISTINCT col)`). - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Uses HyperLogLog to return an approximation of the distinct cardinality of the input (i.e. + * returns an approximation of `COUNT(DISTINCT col)`). + * + * @group agg_func + * @since 0.1.0 + */ def approx_count_distinct(e: Column): Column = builtin("approx_count_distinct")(e) - /** Returns the average of non-NULL records. If all records inside a group are NULL, the function - * returns NULL. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the average of non-NULL records. If all records inside a group are NULL, the function + * returns NULL. + * + * @group agg_func + * @since 0.1.0 + */ def avg(e: Column): Column = builtin("avg")(e) - /** Returns the correlation coefficient for non-null pairs in a group. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the correlation coefficient for non-null pairs in a group. + * + * @group agg_func + * @since 0.1.0 + */ def corr(column1: Column, column2: Column): Column = { builtin("corr")(column1, column2) } - /** Returns either the number of non-NULL records for the specified columns, or the total number - * of records. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns either the number of non-NULL records for the specified columns, or the total number of + * records. + * + * @group agg_func + * @since 0.1.0 + */ def count(e: Column): Column = e.expr match { // Turn count(*) into count(1) case _: Star => builtin("count")(Literal(1)) case _ => builtin("count")(e) } - /** Returns either the number of non-NULL distinct records for the specified columns, or the total - * number of the distinct records. An alias of count_distinct. - * - * @group agg_func - * @since 1.13.0 - */ + /** + * Returns either the number of non-NULL distinct records for the specified columns, or the total + * number of the distinct records. An alias of count_distinct. + * + * @group agg_func + * @since 1.13.0 + */ def countDistinct(colName: String, colNames: String*): Column = count_distinct(col(colName), colNames.map(Column.apply): _*) - /** Returns either the number of non-NULL distinct records for the specified columns, or the total - * number of the distinct records. An alias of count_distinct. - * - * @group agg_func - * @since 1.13.0 - */ + /** + * Returns either the number of non-NULL distinct records for the specified columns, or the total + * number of the distinct records. An alias of count_distinct. + * + * @group agg_func + * @since 1.13.0 + */ def countDistinct(expr: Column, exprs: Column*): Column = count_distinct(expr, exprs: _*) - /** Returns either the number of non-NULL distinct records for the specified columns, or the total - * number of the distinct records. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns either the number of non-NULL distinct records for the specified columns, or the total + * number of the distinct records. + * + * @group agg_func + * @since 0.1.0 + */ def count_distinct(expr: Column, exprs: Column*): Column = Column(FunctionExpression("count", (expr +: exprs).map(_.expr), isDistinct = true)) - /** Returns the population covariance for non-null pairs in a group. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the population covariance for non-null pairs in a group. + * + * @group agg_func + * @since 0.1.0 + */ def covar_pop(column1: Column, column2: Column): Column = { builtin("covar_pop")(column1, column2) } - /** Returns the sample covariance for non-null pairs in a group. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the sample covariance for non-null pairs in a group. + * + * @group agg_func + * @since 0.1.0 + */ def covar_samp(column1: Column, column2: Column): Column = { builtin("covar_samp")(column1, column2) } - /** Describes which of a list of expressions are grouped in a row produced by a GROUP BY query. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Describes which of a list of expressions are grouped in a row produced by a GROUP BY query. + * + * @group agg_func + * @since 0.1.0 + */ def grouping(e: Column): Column = builtin("grouping")(e) - /** Describes which of a list of expressions are grouped in a row produced by a GROUP BY query. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Describes which of a list of expressions are grouped in a row produced by a GROUP BY query. + * + * @group agg_func + * @since 0.1.0 + */ def grouping_id(cols: Column*): Column = builtin("grouping_id")(cols: _*) - /** Returns the population excess kurtosis of non-NULL records. If all records inside a group are - * NULL, the function returns NULL. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the population excess kurtosis of non-NULL records. If all records inside a group are + * NULL, the function returns NULL. + * + * @group agg_func + * @since 0.1.0 + */ def kurtosis(e: Column): Column = builtin("kurtosis")(e) - /** Returns the maximum value for the records in a group. NULL values are ignored unless all the - * records are NULL, in which case a NULL value is returned. - * - * Example: - * {{{ - * val df = session.createDataFrame(Seq(1, 3, 10, 1, 3)).toDF("x") - * df.select(max("x")).show() - * - * ---------------- - * |"MAX(""X"")" | - * ---------------- - * |10 | - * ---------------- - * }}} - * - * @param colName - * The name of the column - * @return - * The maximum value of the given column - * @group agg_func - * @since 1.13.0 - */ + /** + * Returns the maximum value for the records in a group. NULL values are ignored unless all the + * records are NULL, in which case a NULL value is returned. + * + * Example: + * {{{ + * val df = session.createDataFrame(Seq(1, 3, 10, 1, 3)).toDF("x") + * df.select(max("x")).show() + * + * ---------------- + * |"MAX(""X"")" | + * ---------------- + * |10 | + * ---------------- + * }}} + * + * @param colName + * The name of the column + * @return + * The maximum value of the given column + * @group agg_func + * @since 1.13.0 + */ def max(colName: String): Column = max(col(colName)) - /** Returns the maximum value for the records in a group. NULL values are ignored unless all the - * records are NULL, in which case a NULL value is returned. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the maximum value for the records in a group. NULL values are ignored unless all the + * records are NULL, in which case a NULL value is returned. + * + * @group agg_func + * @since 0.1.0 + */ def max(e: Column): Column = builtin("max")(e) - /** Returns a non-deterministic value for the specified column. - * - * @group agg_func - * @since 0.12.0 - */ + /** + * Returns a non-deterministic value for the specified column. + * + * @group agg_func + * @since 0.12.0 + */ def any_value(e: Column): Column = builtin("any_value")(e) - /** Returns the average of non-NULL records. If all records inside a group are NULL, the function - * returns NULL. Alias of avg. - * - * Example: - * {{{ - * val df = session.createDataFrame(Seq(1, 3, 10, 1, 3)).toDF("x") - * df.select(mean("x")).show() - * - * ---------------- - * |"AVG(""X"")" | - * ---------------- - * |3.600000 | - * ---------------- - * }}} - * - * @param colName - * The name of the column - * @return - * The average value of the given column - * @group agg_func - * @since 1.13.0 - */ + /** + * Returns the average of non-NULL records. If all records inside a group are NULL, the function + * returns NULL. Alias of avg. + * + * Example: + * {{{ + * val df = session.createDataFrame(Seq(1, 3, 10, 1, 3)).toDF("x") + * df.select(mean("x")).show() + * + * ---------------- + * |"AVG(""X"")" | + * ---------------- + * |3.600000 | + * ---------------- + * }}} + * + * @param colName + * The name of the column + * @return + * The average value of the given column + * @group agg_func + * @since 1.13.0 + */ def mean(colName: String): Column = mean(col(colName)) - /** Returns the average of non-NULL records. If all records inside a group are NULL, the function - * returns NULL. Alias of avg - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the average of non-NULL records. If all records inside a group are NULL, the function + * returns NULL. Alias of avg + * + * @group agg_func + * @since 0.1.0 + */ def mean(e: Column): Column = avg(e) - /** Returns the median value for the records in a group. NULL values are ignored unless all the - * records are NULL, in which case a NULL value is returned. - * - * @group agg_func - * @since 0.5.0 - */ + /** + * Returns the median value for the records in a group. NULL values are ignored unless all the + * records are NULL, in which case a NULL value is returned. + * + * @group agg_func + * @since 0.5.0 + */ def median(e: Column): Column = { builtin("median")(e) } - /** Returns the minimum value for the records in a group. NULL values are ignored unless all the - * records are NULL, in which case a NULL value is returned. - * - * Example: - * {{{ - * val df = session.createDataFrame(Seq(1, 3, 10, 1, 3)).toDF("x") - * df.select(min("x")).show() - * - * ---------------- - * |"MIN(""X"")" | - * ---------------- - * |1 | - * ---------------- - * }}} - * - * @param colName - * The name of the column - * @return - * The minimum value of the given column - * @group agg_func - * @since 1.13.0 - */ + /** + * Returns the minimum value for the records in a group. NULL values are ignored unless all the + * records are NULL, in which case a NULL value is returned. + * + * Example: + * {{{ + * val df = session.createDataFrame(Seq(1, 3, 10, 1, 3)).toDF("x") + * df.select(min("x")).show() + * + * ---------------- + * |"MIN(""X"")" | + * ---------------- + * |1 | + * ---------------- + * }}} + * + * @param colName + * The name of the column + * @return + * The minimum value of the given column + * @group agg_func + * @since 1.13.0 + */ def min(colName: String): Column = min(col(colName)) - /** Returns the minimum value for the records in a group. NULL values are ignored unless all the - * records are NULL, in which case a NULL value is returned. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the minimum value for the records in a group. NULL values are ignored unless all the + * records are NULL, in which case a NULL value is returned. + * + * @group agg_func + * @since 0.1.0 + */ def min(e: Column): Column = builtin("min")(e) - /** Returns the sample skewness of non-NULL records. If all records inside a group are NULL, the - * function returns NULL. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the sample skewness of non-NULL records. If all records inside a group are NULL, the + * function returns NULL. + * + * @group agg_func + * @since 0.1.0 + */ def skew(e: Column): Column = builtin("skew")(e) - /** Returns the sample standard deviation (square root of sample variance) of non-NULL values. If - * all records inside a group are NULL, returns NULL. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the sample standard deviation (square root of sample variance) of non-NULL values. If + * all records inside a group are NULL, returns NULL. + * + * @group agg_func + * @since 0.1.0 + */ def stddev(e: Column): Column = builtin("stddev")(e) - /** Returns the sample standard deviation (square root of sample variance) of non-NULL values. If - * all records inside a group are NULL, returns NULL. Alias of stddev - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the sample standard deviation (square root of sample variance) of non-NULL values. If + * all records inside a group are NULL, returns NULL. Alias of stddev + * + * @group agg_func + * @since 0.1.0 + */ def stddev_samp(e: Column): Column = builtin("stddev_samp")(e) - /** Returns the population standard deviation (square root of variance) of non-NULL values. If all - * records inside a group are NULL, returns NULL. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the population standard deviation (square root of variance) of non-NULL values. If all + * records inside a group are NULL, returns NULL. + * + * @group agg_func + * @since 0.1.0 + */ def stddev_pop(e: Column): Column = builtin("stddev_pop")(e) - /** Returns the sum of non-NULL records in a group. If all records inside a group are NULL, the - * function returns NULL. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the sum of non-NULL records in a group. If all records inside a group are NULL, the + * function returns NULL. + * + * @group agg_func + * @since 0.1.0 + */ def sum(e: Column): Column = builtin("sum")(e) - /** Returns the sum of non-NULL records in a group. If all records inside a group are NULL, the - * function returns NULL. - * - * @group agg_func - * @since 1.12.0 - * @param colName - * The input column name - * @return - * The result column - */ + /** + * Returns the sum of non-NULL records in a group. If all records inside a group are NULL, the + * function returns NULL. + * + * @group agg_func + * @since 1.12.0 + * @param colName + * The input column name + * @return + * The result column + */ def sum(colName: String): Column = sum(col(colName)) - /** Returns the sum of non-NULL distinct records in a group. You can use the DISTINCT keyword to - * compute the sum of unique non-null values. If all records inside a group are NULL, the - * function returns NULL. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the sum of non-NULL distinct records in a group. You can use the DISTINCT keyword to + * compute the sum of unique non-null values. If all records inside a group are NULL, the function + * returns NULL. + * + * @group agg_func + * @since 0.1.0 + */ def sum_distinct(e: Column): Column = internalBuiltinFunction(true, "sum", e) - /** Returns the sample variance of non-NULL records in a group. If all records inside a group are - * NULL, a NULL is returned. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the sample variance of non-NULL records in a group. If all records inside a group are + * NULL, a NULL is returned. + * + * @group agg_func + * @since 0.1.0 + */ def variance(e: Column): Column = builtin("variance")(e) - /** Returns the sample variance of non-NULL records in a group. If all records inside a group are - * NULL, a NULL is returned. Alias of var_samp - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the sample variance of non-NULL records in a group. If all records inside a group are + * NULL, a NULL is returned. Alias of var_samp + * + * @group agg_func + * @since 0.1.0 + */ def var_samp(e: Column): Column = variance(e) - /** Returns the population variance of non-NULL records in a group. If all records inside a group - * are NULL, a NULL is returned. - * - * @group agg_func - * @since 0.1.0 - */ + /** + * Returns the population variance of non-NULL records in a group. If all records inside a group + * are NULL, a NULL is returned. + * + * @group agg_func + * @since 0.1.0 + */ def var_pop(e: Column): Column = builtin("var_pop")(e) - /** Returns an approximated value for the desired percentile. This function uses the t-Digest - * algorithm. - * - * @group agg_func - * @since 0.2.0 - */ + /** + * Returns an approximated value for the desired percentile. This function uses the t-Digest + * algorithm. + * + * @group agg_func + * @since 0.2.0 + */ def approx_percentile(col: Column, percentile: Double): Column = { builtin("approx_percentile")(col, sqlExpr(percentile.toString)) } - /** Returns the internal representation of the t-Digest state (as a JSON object) at the end of - * aggregation. This function uses the t-Digest algorithm. - * - * @group agg_func - * @since 0.2.0 - */ + /** + * Returns the internal representation of the t-Digest state (as a JSON object) at the end of + * aggregation. This function uses the t-Digest algorithm. + * + * @group agg_func + * @since 0.2.0 + */ def approx_percentile_accumulate(col: Column): Column = { builtin("approx_percentile_accumulate")(col) } - /** Returns the desired approximated percentile value for the specified t-Digest state. - * APPROX_PERCENTILE_ESTIMATE(APPROX_PERCENTILE_ACCUMULATE(.)) is equivalent to - * APPROX_PERCENTILE(.). - * - * @group agg_func - * @since 0.2.0 - */ + /** + * Returns the desired approximated percentile value for the specified t-Digest state. + * APPROX_PERCENTILE_ESTIMATE(APPROX_PERCENTILE_ACCUMULATE(.)) is equivalent to + * APPROX_PERCENTILE(.). + * + * @group agg_func + * @since 0.2.0 + */ def approx_percentile_estimate(state: Column, percentile: Double): Column = { builtin("approx_percentile_estimate")(state, sqlExpr(percentile.toString)) } - /** Combines (merges) percentile input states into a single output state. - * - * This allows scenarios where APPROX_PERCENTILE_ACCUMULATE is run over horizontal partitions of - * the same table, producing an algorithm state for each table partition. These states can later - * be combined using APPROX_PERCENTILE_COMBINE, producing the same output state as a single run - * of APPROX_PERCENTILE_ACCUMULATE over the entire table. - * - * @group agg_func - * @since 0.2.0 - */ + /** + * Combines (merges) percentile input states into a single output state. + * + * This allows scenarios where APPROX_PERCENTILE_ACCUMULATE is run over horizontal partitions of + * the same table, producing an algorithm state for each table partition. These states can later + * be combined using APPROX_PERCENTILE_COMBINE, producing the same output state as a single run of + * APPROX_PERCENTILE_ACCUMULATE over the entire table. + * + * @group agg_func + * @since 0.2.0 + */ def approx_percentile_combine(state: Column): Column = { builtin("approx_percentile_combine")(state) } - /** Finds the cumulative distribution of a value with regard to other values within the same - * window partition. - * - * @group win_func - * @since 0.1.0 - */ + /** + * Finds the cumulative distribution of a value with regard to other values within the same window + * partition. + * + * @group win_func + * @since 0.1.0 + */ def cume_dist(): Column = builtin("cume_dist")() - /** Returns the rank of a value within a group of values, without gaps in the ranks. The rank - * value starts at 1 and continues up sequentially. If two values are the same, they will have - * the same rank. - * - * @group win_func - * @since 0.1.0 - */ + /** + * Returns the rank of a value within a group of values, without gaps in the ranks. The rank value + * starts at 1 and continues up sequentially. If two values are the same, they will have the same + * rank. + * + * @group win_func + * @since 0.1.0 + */ def dense_rank(): Column = builtin("dense_rank")() - /** Accesses data in a previous row in the same result set without having to join the table to - * itself. - * - * @group win_func - * @since 0.1.0 - */ + /** + * Accesses data in a previous row in the same result set without having to join the table to + * itself. + * + * @group win_func + * @since 0.1.0 + */ def lag(e: Column, offset: Int, defaultValue: Column): Column = builtin("lag")(e, Literal(offset), defaultValue) - /** Accesses data in a previous row in the same result set without having to join the table to - * itself. - * - * @group win_func - * @since 0.1.0 - */ + /** + * Accesses data in a previous row in the same result set without having to join the table to + * itself. + * + * @group win_func + * @since 0.1.0 + */ def lag(e: Column, offset: Int): Column = lag(e, offset, lit(null)) - /** Accesses data in a previous row in the same result set without having to join the table to - * itself. - * - * @group win_func - * @since 0.1.0 - */ + /** + * Accesses data in a previous row in the same result set without having to join the table to + * itself. + * + * @group win_func + * @since 0.1.0 + */ def lag(e: Column): Column = lag(e, 1) - /** Accesses data in a subsequent row in the same result set without having to join the table to - * itself. - * - * @group win_func - * @since 0.1.0 - */ + /** + * Accesses data in a subsequent row in the same result set without having to join the table to + * itself. + * + * @group win_func + * @since 0.1.0 + */ def lead(e: Column, offset: Int, defaultValue: Column): Column = builtin("lead")(e, Literal(offset), defaultValue) - /** Accesses data in a subsequent row in the same result set without having to join the table to - * itself. - * - * @group win_func - * @since 0.1.0 - */ + /** + * Accesses data in a subsequent row in the same result set without having to join the table to + * itself. + * + * @group win_func + * @since 0.1.0 + */ def lead(e: Column, offset: Int): Column = lead(e, offset, lit(null)) - /** Accesses data in a subsequent row in the same result set without having to join the table to - * itself. - * - * @group win_func - * @since 0.1.0 - */ + /** + * Accesses data in a subsequent row in the same result set without having to join the table to + * itself. + * + * @group win_func + * @since 0.1.0 + */ def lead(e: Column): Column = lead(e, 1) - /** Divides an ordered data set equally into the number of buckets specified by n. Buckets are - * sequentially numbered 1 through n. - * - * @group win_func - * @since 0.1.0 - */ + /** + * Divides an ordered data set equally into the number of buckets specified by n. Buckets are + * sequentially numbered 1 through n. + * + * @group win_func + * @since 0.1.0 + */ def ntile(n: Column): Column = builtin("ntile")(n) - /** Returns the relative rank of a value within a group of values, specified as a percentage - * ranging from 0.0 to 1.0. - * - * @group win_func - * @since 0.1.0 - */ + /** + * Returns the relative rank of a value within a group of values, specified as a percentage + * ranging from 0.0 to 1.0. + * + * @group win_func + * @since 0.1.0 + */ def percent_rank(): Column = builtin("percent_rank")() - /** Returns the rank of a value within an ordered group of values. The rank value starts at 1 and - * continues up. - * - * @group win_func - * @since 0.1.0 - */ + /** + * Returns the rank of a value within an ordered group of values. The rank value starts at 1 and + * continues up. + * + * @group win_func + * @since 0.1.0 + */ def rank(): Column = builtin("rank")() - /** Returns a unique row number for each row within a window partition. The row number starts at 1 - * and continues up sequentially. - * - * @group win_func - * @since 0.1.0 - */ + /** + * Returns a unique row number for each row within a window partition. The row number starts at 1 + * and continues up sequentially. + * + * @group win_func + * @since 0.1.0 + */ def row_number(): Column = builtin("row_number")() - /** Returns the first non-NULL expression among its arguments, or NULL if all its arguments are - * NULL. - * - * @group con_func - * @since 0.1.0 - */ + /** + * Returns the first non-NULL expression among its arguments, or NULL if all its arguments are + * NULL. + * + * @group con_func + * @since 0.1.0 + */ def coalesce(e: Column*): Column = builtin("coalesce")(e: _*) - /** Return true if the value in the column is not a number (NaN). - * - * @group con_func - * @since 0.1.0 - */ + /** + * Return true if the value in the column is not a number (NaN). + * + * @group con_func + * @since 0.1.0 + */ def equal_nan(e: Column): Column = withExpr { IsNaN(e.expr) } - /** Return true if the value in the column is null. - * - * @group con_func - * @since 0.1.0 - */ + /** + * Return true if the value in the column is null. + * + * @group con_func + * @since 0.1.0 + */ def is_null(e: Column): Column = withExpr { IsNull(e.expr) } - /** Returns the negation of the value in the column (equivalent to a unary minus). - * - * @group client_func - * @since 0.1.0 - */ + /** + * Returns the negation of the value in the column (equivalent to a unary minus). + * + * @group client_func + * @since 0.1.0 + */ def negate(e: Column): Column = -e - /** Returns the inverse of a boolean expression. - * - * @group client_func - * @since 0.1.0 - */ + /** + * Returns the inverse of a boolean expression. + * + * @group client_func + * @since 0.1.0 + */ def not(e: Column): Column = !e - /** Each call returns a pseudo-random 64-bit integer. - * - * @group gen_func - * @since 0.1.0 - */ + /** + * Each call returns a pseudo-random 64-bit integer. + * + * @group gen_func + * @since 0.1.0 + */ def random(seed: Long): Column = builtin("random")(Literal(seed)) - /** Each call returns a pseudo-random 64-bit integer. - * - * @group gen_func - * @since 0.1.0 - */ + /** + * Each call returns a pseudo-random 64-bit integer. + * + * @group gen_func + * @since 0.1.0 + */ def random(): Column = random(Random.nextLong()) - /** Returns the bitwise negation of a numeric expression. - * - * @group bit_func - * @since 0.1.0 - */ + /** + * Returns the bitwise negation of a numeric expression. + * + * @group bit_func + * @since 0.1.0 + */ def bitnot(e: Column): Column = builtin("bitnot")(e) - /** Converts an input expression to a decimal - * - * @group num_func - * @since 0.5.0 - */ + /** + * Converts an input expression to a decimal + * + * @group num_func + * @since 0.5.0 + */ def to_decimal(expr: Column, precision: Int, scale: Int): Column = { builtin("to_decimal")(expr, sqlExpr(precision.toString), sqlExpr(scale.toString)) } - /** Performs division like the division operator (/), but returns 0 when the divisor is 0 (rather - * than reporting an error). - * - * @group num_func - * @since 0.1.0 - */ + /** + * Performs division like the division operator (/), but returns 0 when the divisor is 0 (rather + * than reporting an error). + * + * @group num_func + * @since 0.1.0 + */ def div0(dividend: Column, divisor: Column): Column = builtin("div0")(dividend, divisor) - /** Returns the square-root of a non-negative numeric expression. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Returns the square-root of a non-negative numeric expression. + * + * @group num_func + * @since 0.1.0 + */ def sqrt(e: Column): Column = builtin("sqrt")(e) - /** Returns the absolute value of a numeric expression. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Returns the absolute value of a numeric expression. + * + * @group num_func + * @since 0.1.0 + */ def abs(e: Column): Column = builtin("abs")(e) - /** Computes the inverse cosine (arc cosine) of its input; the result is a number in the interval - * [-pi, pi]. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Computes the inverse cosine (arc cosine) of its input; the result is a number in the interval + * [-pi, pi]. + * + * @group num_func + * @since 0.1.0 + */ def acos(e: Column): Column = builtin("acos")(e) - /** Computes the inverse sine (arc sine) of its argument; the result is a number in the interval - * [-pi, pi]. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Computes the inverse sine (arc sine) of its argument; the result is a number in the interval + * [-pi, pi]. + * + * @group num_func + * @since 0.1.0 + */ def asin(e: Column): Column = builtin("asin")(e) - /** Computes the inverse tangent (arc tangent) of its argument; the result is a number in the - * interval [-pi, pi]. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Computes the inverse tangent (arc tangent) of its argument; the result is a number in the + * interval [-pi, pi]. + * + * @group num_func + * @since 0.1.0 + */ def atan(e: Column): Column = builtin("atan")(e) - /** Computes the inverse tangent (arc tangent) of the ratio of its two arguments. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Computes the inverse tangent (arc tangent) of the ratio of its two arguments. + * + * @group num_func + * @since 0.1.0 + */ def atan2(y: Column, x: Column): Column = builtin("atan2")(y, x) - /** Returns values from the specified column rounded to the nearest equal or larger integer. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Returns values from the specified column rounded to the nearest equal or larger integer. + * + * @group num_func + * @since 0.1.0 + */ def ceil(e: Column): Column = builtin("ceil")(e) - /** Computes the cosine of its argument; the argument should be expressed in radians. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Computes the cosine of its argument; the argument should be expressed in radians. + * + * @group num_func + * @since 0.1.0 + */ def cos(e: Column): Column = builtin("cos")(e) - /** Computes the hyperbolic cosine of its argument. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Computes the hyperbolic cosine of its argument. + * + * @group num_func + * @since 0.1.0 + */ def cosh(e: Column): Column = builtin("cosh")(e) - /** Computes Euler's number e raised to a floating-point value. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Computes Euler's number e raised to a floating-point value. + * + * @group num_func + * @since 0.1.0 + */ def exp(e: Column): Column = builtin("exp")(e) - /** Computes the factorial of its input. The input argument must be an integer expression in the - * range of 0 to 33. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Computes the factorial of its input. The input argument must be an integer expression in the + * range of 0 to 33. + * + * @group num_func + * @since 0.1.0 + */ def factorial(e: Column): Column = builtin("factorial")(e) - /** Returns values from the specified column rounded to the nearest equal or smaller integer. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Returns values from the specified column rounded to the nearest equal or smaller integer. + * + * @group num_func + * @since 0.1.0 + */ def floor(e: Column): Column = builtin("floor")(e) - /** Returns the largest value from a list of expressions. If any of the argument values is NULL, - * the result is NULL. GREATEST supports all data types, including VARIANT. - * - * @group con_func - * @since 0.1.0 - */ + /** + * Returns the largest value from a list of expressions. If any of the argument values is NULL, + * the result is NULL. GREATEST supports all data types, including VARIANT. + * + * @group con_func + * @since 0.1.0 + */ def greatest(exprs: Column*): Column = builtin("greatest")(exprs: _*) - /** Returns the smallest value from a list of expressions. LEAST supports all data types, - * including VARIANT. - * - * @group con_func - * @since 0.1.0 - */ + /** + * Returns the smallest value from a list of expressions. LEAST supports all data types, including + * VARIANT. + * + * @group con_func + * @since 0.1.0 + */ def least(exprs: Column*): Column = builtin("least")(exprs: _*) - /** Returns the logarithm of a numeric expression. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Returns the logarithm of a numeric expression. + * + * @group num_func + * @since 0.1.0 + */ def log(base: Column, a: Column): Column = builtin("log")(base, a) - /** Returns a number (l) raised to the specified power (r). - * - * @group num_func - * @since 0.1.0 - */ + /** + * Returns a number (l) raised to the specified power (r). + * + * @group num_func + * @since 0.1.0 + */ def pow(l: Column, r: Column): Column = builtin("pow")(l, r) - /** Returns a number (l) raised to the specified power (r). - * - * Example: - * {{{ - * val df = session.sql( - * "select * from (values (0.1, 2), (2, 3), (2, 0.5), (2, -1)) as T(base, exponent)") - * df.select(col("base"), col("exponent"), pow(col("base"), "exponent").as("result")).show() - * - * ---------------------------------------------- - * |"BASE" |"EXPONENT" |"RESULT" | - * ---------------------------------------------- - * |0.1 |2.0 |0.010000000000000002 | - * |2.0 |3.0 |8.0 | - * |2.0 |0.5 |1.4142135623730951 | - * |2.0 |-1.0 |0.5 | - * ---------------------------------------------- - * }}} - * - * @param l - * The numeric column representing the base. - * @param r - * The name of the numeric column representing the exponent. - * @return - * A column containing the result of raising `l` to the power of `r`. - * @group num_func - * @since 1.15.0 - */ + /** + * Returns a number (l) raised to the specified power (r). + * + * Example: + * {{{ + * val df = session.sql( + * "select * from (values (0.1, 2), (2, 3), (2, 0.5), (2, -1)) as T(base, exponent)") + * df.select(col("base"), col("exponent"), pow(col("base"), "exponent").as("result")).show() + * + * ---------------------------------------------- + * |"BASE" |"EXPONENT" |"RESULT" | + * ---------------------------------------------- + * |0.1 |2.0 |0.010000000000000002 | + * |2.0 |3.0 |8.0 | + * |2.0 |0.5 |1.4142135623730951 | + * |2.0 |-1.0 |0.5 | + * ---------------------------------------------- + * }}} + * + * @param l + * The numeric column representing the base. + * @param r + * The name of the numeric column representing the exponent. + * @return + * A column containing the result of raising `l` to the power of `r`. + * @group num_func + * @since 1.15.0 + */ def pow(l: Column, r: String): Column = pow(l, col(r)) - /** Returns a number (l) raised to the specified power (r). - * - * Example: - * {{{ - * val df = session.sql( - * "select * from (values (0.1, 2), (2, 3), (2, 0.5), (2, -1)) as T(base, exponent)") - * df.select(col("base"), col("exponent"), pow("base", col("exponent")).as("result")).show() - * - * ---------------------------------------------- - * |"BASE" |"EXPONENT" |"RESULT" | - * ---------------------------------------------- - * |0.1 |2.0 |0.010000000000000002 | - * |2.0 |3.0 |8.0 | - * |2.0 |0.5 |1.4142135623730951 | - * |2.0 |-1.0 |0.5 | - * ---------------------------------------------- - * }}} - * - * @param l - * The name of the numeric column representing the base. - * @param r - * The numeric column representing the exponent. - * @return - * A column containing the result of raising `l` to the power of `r`. - * @group num_func - * @since 1.15.0 - */ + /** + * Returns a number (l) raised to the specified power (r). + * + * Example: + * {{{ + * val df = session.sql( + * "select * from (values (0.1, 2), (2, 3), (2, 0.5), (2, -1)) as T(base, exponent)") + * df.select(col("base"), col("exponent"), pow("base", col("exponent")).as("result")).show() + * + * ---------------------------------------------- + * |"BASE" |"EXPONENT" |"RESULT" | + * ---------------------------------------------- + * |0.1 |2.0 |0.010000000000000002 | + * |2.0 |3.0 |8.0 | + * |2.0 |0.5 |1.4142135623730951 | + * |2.0 |-1.0 |0.5 | + * ---------------------------------------------- + * }}} + * + * @param l + * The name of the numeric column representing the base. + * @param r + * The numeric column representing the exponent. + * @return + * A column containing the result of raising `l` to the power of `r`. + * @group num_func + * @since 1.15.0 + */ def pow(l: String, r: Column): Column = pow(col(l), r) - /** Returns a number (l) raised to the specified power (r). - * - * Example: - * {{{ - * val df = session.sql( - * "select * from (values (0.1, 2), (2, 3), (2, 0.5), (2, -1)) as T(base, exponent)") - * df.select(col("base"), col("exponent"), pow("base", "exponent").as("result")).show() - * - * ---------------------------------------------- - * |"BASE" |"EXPONENT" |"RESULT" | - * ---------------------------------------------- - * |0.1 |2.0 |0.010000000000000002 | - * |2.0 |3.0 |8.0 | - * |2.0 |0.5 |1.4142135623730951 | - * |2.0 |-1.0 |0.5 | - * ---------------------------------------------- - * }}} - * - * @param l - * The name of the numeric column representing the base. - * @param r - * The name of the numeric column representing the exponent. - * @return - * A column containing the result of raising `l` to the power of `r`. - * @group num_func - * @since 1.15.0 - */ + /** + * Returns a number (l) raised to the specified power (r). + * + * Example: + * {{{ + * val df = session.sql( + * "select * from (values (0.1, 2), (2, 3), (2, 0.5), (2, -1)) as T(base, exponent)") + * df.select(col("base"), col("exponent"), pow("base", "exponent").as("result")).show() + * + * ---------------------------------------------- + * |"BASE" |"EXPONENT" |"RESULT" | + * ---------------------------------------------- + * |0.1 |2.0 |0.010000000000000002 | + * |2.0 |3.0 |8.0 | + * |2.0 |0.5 |1.4142135623730951 | + * |2.0 |-1.0 |0.5 | + * ---------------------------------------------- + * }}} + * + * @param l + * The name of the numeric column representing the base. + * @param r + * The name of the numeric column representing the exponent. + * @return + * A column containing the result of raising `l` to the power of `r`. + * @group num_func + * @since 1.15.0 + */ def pow(l: String, r: String): Column = pow(col(l), col(r)) - /** Returns a number (l) raised to the specified power (r). - * - * Example: - * {{{ - * val df = session.sql("select * from (values (0.5), (2), (2.5), (4)) as T(base)") - * df.select(col("base"), lit(2.0).as("exponent"), pow(col("base"), 2.0).as("result")).show() - * - * ---------------------------------- - * |"BASE" |"EXPONENT" |"RESULT" | - * ---------------------------------- - * |0.5 |2.0 |0.25 | - * |2.0 |2.0 |4.0 | - * |2.5 |2.0 |6.25 | - * |4.0 |2.0 |16.0 | - * ---------------------------------- - * }}} - * - * @param l - * The numeric column representing the base. - * @param r - * The value of the exponent. - * @return - * A column containing the result of raising `l` to the power of `r`. - * @group num_func - * @since 1.15.0 - */ + /** + * Returns a number (l) raised to the specified power (r). + * + * Example: + * {{{ + * val df = session.sql("select * from (values (0.5), (2), (2.5), (4)) as T(base)") + * df.select(col("base"), lit(2.0).as("exponent"), pow(col("base"), 2.0).as("result")).show() + * + * ---------------------------------- + * |"BASE" |"EXPONENT" |"RESULT" | + * ---------------------------------- + * |0.5 |2.0 |0.25 | + * |2.0 |2.0 |4.0 | + * |2.5 |2.0 |6.25 | + * |4.0 |2.0 |16.0 | + * ---------------------------------- + * }}} + * + * @param l + * The numeric column representing the base. + * @param r + * The value of the exponent. + * @return + * A column containing the result of raising `l` to the power of `r`. + * @group num_func + * @since 1.15.0 + */ def pow(l: Column, r: Double): Column = pow(l, lit(r)) - /** Returns a number (l) raised to the specified power (r). - * - * Example: - * {{{ - * val df = session.sql("select * from (values (0.5), (2), (2.5), (4)) as T(base)") - * df.select(col("base"), lit(2.0).as("exponent"), pow("base", 2.0).as("result")).show() - * - * ---------------------------------- - * |"BASE" |"EXPONENT" |"RESULT" | - * ---------------------------------- - * |0.5 |2.0 |0.25 | - * |2.0 |2.0 |4.0 | - * |2.5 |2.0 |6.25 | - * |4.0 |2.0 |16.0 | - * ---------------------------------- - * }}} - * - * @param l - * The name of the numeric column representing the base. - * @param r - * The value of the exponent. - * @return - * A column containing the result of raising `l` to the power of `r`. - * @group num_func - * @since 1.15.0 - */ + /** + * Returns a number (l) raised to the specified power (r). + * + * Example: + * {{{ + * val df = session.sql("select * from (values (0.5), (2), (2.5), (4)) as T(base)") + * df.select(col("base"), lit(2.0).as("exponent"), pow("base", 2.0).as("result")).show() + * + * ---------------------------------- + * |"BASE" |"EXPONENT" |"RESULT" | + * ---------------------------------- + * |0.5 |2.0 |0.25 | + * |2.0 |2.0 |4.0 | + * |2.5 |2.0 |6.25 | + * |4.0 |2.0 |16.0 | + * ---------------------------------- + * }}} + * + * @param l + * The name of the numeric column representing the base. + * @param r + * The value of the exponent. + * @return + * A column containing the result of raising `l` to the power of `r`. + * @group num_func + * @since 1.15.0 + */ def pow(l: String, r: Double): Column = pow(col(l), r) - /** Returns a number (l) raised to the specified power (r). - * - * Example: - * {{{ - * val df = session.sql("select * from (values (0.5), (2), (2.5), (4)) as T(exponent)") - * df.select(lit(2.0).as("base"), col("exponent"), pow(2.0, col("exponent")).as("result")) - * .show() - * - * -------------------------------------------- - * |"BASE" |"EXPONENT" |"RESULT" | - * -------------------------------------------- - * |2.0 |0.5 |1.4142135623730951 | - * |2.0 |2.0 |4.0 | - * |2.0 |2.5 |5.656854249492381 | - * |2.0 |4.0 |16.0 | - * -------------------------------------------- - * }}} - * - * @param l - * The value of the base. - * @param r - * The numeric column representing the exponent. - * @return - * A column containing the result of raising `l` to the power of `r`. - * @group num_func - * @since 1.15.0 - */ + /** + * Returns a number (l) raised to the specified power (r). + * + * Example: + * {{{ + * val df = session.sql("select * from (values (0.5), (2), (2.5), (4)) as T(exponent)") + * df.select(lit(2.0).as("base"), col("exponent"), pow(2.0, col("exponent")).as("result")) + * .show() + * + * -------------------------------------------- + * |"BASE" |"EXPONENT" |"RESULT" | + * -------------------------------------------- + * |2.0 |0.5 |1.4142135623730951 | + * |2.0 |2.0 |4.0 | + * |2.0 |2.5 |5.656854249492381 | + * |2.0 |4.0 |16.0 | + * -------------------------------------------- + * }}} + * + * @param l + * The value of the base. + * @param r + * The numeric column representing the exponent. + * @return + * A column containing the result of raising `l` to the power of `r`. + * @group num_func + * @since 1.15.0 + */ def pow(l: Double, r: Column): Column = pow(lit(l), r) - /** Returns a number (l) raised to the specified power (r). - * - * Example: - * {{{ - * val df = session.sql("select * from (values (0.5), (2), (2.5), (4)) as T(exponent)") - * df.select(lit(2.0).as("base"), col("exponent"), pow(2.0, "exponent").as("result")).show() - * - * -------------------------------------------- - * |"BASE" |"EXPONENT" |"RESULT" | - * -------------------------------------------- - * |2.0 |0.5 |1.4142135623730951 | - * |2.0 |2.0 |4.0 | - * |2.0 |2.5 |5.656854249492381 | - * |2.0 |4.0 |16.0 | - * -------------------------------------------- - * }}} - * - * @param l - * The value of the base. - * @param r - * The name of the numeric column representing the exponent. - * @return - * A column containing the result of raising `l` to the power of `r`. - * @group num_func - * @since 1.15.0 - */ + /** + * Returns a number (l) raised to the specified power (r). + * + * Example: + * {{{ + * val df = session.sql("select * from (values (0.5), (2), (2.5), (4)) as T(exponent)") + * df.select(lit(2.0).as("base"), col("exponent"), pow(2.0, "exponent").as("result")).show() + * + * -------------------------------------------- + * |"BASE" |"EXPONENT" |"RESULT" | + * -------------------------------------------- + * |2.0 |0.5 |1.4142135623730951 | + * |2.0 |2.0 |4.0 | + * |2.0 |2.5 |5.656854249492381 | + * |2.0 |4.0 |16.0 | + * -------------------------------------------- + * }}} + * + * @param l + * The value of the base. + * @param r + * The name of the numeric column representing the exponent. + * @return + * A column containing the result of raising `l` to the power of `r`. + * @group num_func + * @since 1.15.0 + */ def pow(l: Double, r: String): Column = pow(l, col(r)) - /** Rounds the numeric values of the given column `e` to the `scale` decimal places using the half - * away from zero rounding mode. - * - * Example: - * {{{ - * val df = session.sql( - * "select * from (values (-3.78), (-2.55), (1.23), (2.55), (3.78)) as T(a)") - * df.select(round(col("a"), lit(1)).alias("round")).show() - * - * ----------- - * |"ROUND" | - * ----------- - * |-3.8 | - * |-2.6 | - * |1.2 | - * |2.6 | - * |3.8 | - * ----------- - * }}} - * - * @param e - * The column of numeric values to round. - * @param scale - * A column representing the number of decimal places to which `e` should be rounded. - * @return - * A new column containing the rounded numeric values. - * @group num_func - * @since 0.1.0 - */ + /** + * Rounds the numeric values of the given column `e` to the `scale` decimal places using the half + * away from zero rounding mode. + * + * Example: + * {{{ + * val df = session.sql( + * "select * from (values (-3.78), (-2.55), (1.23), (2.55), (3.78)) as T(a)") + * df.select(round(col("a"), lit(1)).alias("round")).show() + * + * ----------- + * |"ROUND" | + * ----------- + * |-3.8 | + * |-2.6 | + * |1.2 | + * |2.6 | + * |3.8 | + * ----------- + * }}} + * + * @param e + * The column of numeric values to round. + * @param scale + * A column representing the number of decimal places to which `e` should be rounded. + * @return + * A new column containing the rounded numeric values. + * @group num_func + * @since 0.1.0 + */ def round(e: Column, scale: Column): Column = builtin("round")(e, scale) - /** Rounds the numeric values of the given column `e` to 0 decimal places using the half away from - * zero rounding mode. - * - * Example: - * {{{ - * val df = session.sql("select * from (values (-3.7), (-2.5), (1.2), (2.5), (3.7)) as T(a)") - * df.select(round(col("a")).alias("round")).show() - * - * ----------- - * |"ROUND" | - * ----------- - * |-4 | - * |-3 | - * |1 | - * |3 | - * |4 | - * ----------- - * }}} - * - * @param e - * The column of numeric values to round. - * @return - * A new column containing the rounded numeric values. - * @group num_func - * @since 0.1.0 - */ + /** + * Rounds the numeric values of the given column `e` to 0 decimal places using the half away from + * zero rounding mode. + * + * Example: + * {{{ + * val df = session.sql("select * from (values (-3.7), (-2.5), (1.2), (2.5), (3.7)) as T(a)") + * df.select(round(col("a")).alias("round")).show() + * + * ----------- + * |"ROUND" | + * ----------- + * |-4 | + * |-3 | + * |1 | + * |3 | + * |4 | + * ----------- + * }}} + * + * @param e + * The column of numeric values to round. + * @return + * A new column containing the rounded numeric values. + * @group num_func + * @since 0.1.0 + */ def round(e: Column): Column = round(e, lit(0)) - /** Rounds the numeric values of the given column `e` to the `scale` decimal places using the half - * away from zero rounding mode. - * - * Example: - * {{{ - * val df = session.sql( - * "select * from (values (-3.78), (-2.55), (1.23), (2.55), (3.78)) as T(a)") - * df.select(round(col("a"), 1).alias("round")).show() - * - * ----------- - * |"ROUND" | - * ----------- - * |-3.8 | - * |-2.6 | - * |1.2 | - * |2.6 | - * |3.8 | - * ----------- - * }}} - * - * @param e - * The column of numeric values to round. - * @param scale - * The number of decimal places to which `e` should be rounded. - * @return - * A new column containing the rounded numeric values. - * @group num_func - * @since 1.14.0 - */ + /** + * Rounds the numeric values of the given column `e` to the `scale` decimal places using the half + * away from zero rounding mode. + * + * Example: + * {{{ + * val df = session.sql( + * "select * from (values (-3.78), (-2.55), (1.23), (2.55), (3.78)) as T(a)") + * df.select(round(col("a"), 1).alias("round")).show() + * + * ----------- + * |"ROUND" | + * ----------- + * |-3.8 | + * |-2.6 | + * |1.2 | + * |2.6 | + * |3.8 | + * ----------- + * }}} + * + * @param e + * The column of numeric values to round. + * @param scale + * The number of decimal places to which `e` should be rounded. + * @return + * A new column containing the rounded numeric values. + * @group num_func + * @since 1.14.0 + */ def round(e: Column, scale: Int): Column = round(e, lit(scale)) - /** Shifts the bits for a numeric expression numBits positions to the left. - * - * @group bit_func - * @since 0.1.0 - */ + /** + * Shifts the bits for a numeric expression numBits positions to the left. + * + * @group bit_func + * @since 0.1.0 + */ def bitshiftleft(e: Column, numBits: Column): Column = withExpr { ShiftLeft(e.expr, numBits.expr) } - /** Shifts the bits for a numeric expression numBits positions to the right. - * - * @group bit_func - * @since 0.1.0 - */ + /** + * Shifts the bits for a numeric expression numBits positions to the right. + * + * @group bit_func + * @since 0.1.0 + */ def bitshiftright(e: Column, numBits: Column): Column = withExpr { ShiftRight(e.expr, numBits.expr) } - /** Computes the sine of its argument; the argument should be expressed in radians. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Computes the sine of its argument; the argument should be expressed in radians. + * + * @group num_func + * @since 0.1.0 + */ def sin(e: Column): Column = builtin("sin")(e) - /** Computes the hyperbolic sine of its argument. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Computes the hyperbolic sine of its argument. + * + * @group num_func + * @since 0.1.0 + */ def sinh(e: Column): Column = builtin("sinh")(e) - /** Computes the tangent of its argument; the argument should be expressed in radians. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Computes the tangent of its argument; the argument should be expressed in radians. + * + * @group num_func + * @since 0.1.0 + */ def tan(e: Column): Column = builtin("tan")(e) - /** Computes the hyperbolic tangent of its argument. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Computes the hyperbolic tangent of its argument. + * + * @group num_func + * @since 0.1.0 + */ def tanh(e: Column): Column = builtin("tanh")(e) - /** Converts radians to degrees. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Converts radians to degrees. + * + * @group num_func + * @since 0.1.0 + */ def degrees(e: Column): Column = builtin("degrees")(e) - /** Converts degrees to radians. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Converts degrees to radians. + * + * @group num_func + * @since 0.1.0 + */ def radians(e: Column): Column = builtin("radians")(e) - /** Returns a 32-character hex-encoded string containing the 128-bit MD5 message digest. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns a 32-character hex-encoded string containing the 128-bit MD5 message digest. + * + * @group str_func + * @since 0.1.0 + */ def md5(e: Column): Column = builtin("md5")(e) - /** Returns a 40-character hex-encoded string containing the 160-bit SHA-1 message digest. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns a 40-character hex-encoded string containing the 160-bit SHA-1 message digest. + * + * @group str_func + * @since 0.1.0 + */ def sha1(e: Column): Column = builtin("sha1")(e) - /** Returns a hex-encoded string containing the N-bit SHA-2 message digest, where N is the - * specified output digest size. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns a hex-encoded string containing the N-bit SHA-2 message digest, where N is the + * specified output digest size. + * + * @group str_func + * @since 0.1.0 + */ def sha2(e: Column, numBits: Int): Column = { require( Seq(0, 224, 256, 384, 512).contains(numBits), @@ -1177,355 +1278,395 @@ object functions { builtin("sha2")(e, Literal(numBits)) } - /** Returns a signed 64-bit hash value. Note that HASH never returns NULL, even for NULL inputs. - * - * @group utl_func - * @since 0.1.0 - */ + /** + * Returns a signed 64-bit hash value. Note that HASH never returns NULL, even for NULL inputs. + * + * @group utl_func + * @since 0.1.0 + */ def hash(cols: Column*): Column = builtin("hash")(cols: _*) - /** Returns the ASCII code for the first character of a string. If the string is empty, a value of - * 0 is returned. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns the ASCII code for the first character of a string. If the string is empty, a value of + * 0 is returned. + * + * @group str_func + * @since 0.1.0 + */ def ascii(e: Column): Column = builtin("ascii")(e) - /** Concatenates two or more strings, or concatenates two or more binary values. If any of the - * values is null, the result is also null. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Concatenates two or more strings, or concatenates two or more binary values. If any of the + * values is null, the result is also null. + * + * @group str_func + * @since 0.1.0 + */ def concat_ws(separator: Column, exprs: Column*): Column = { val args = Seq(separator) ++ exprs builtin("concat_ws")(args: _*) } - /** Returns the input string with the first letter of each word in uppercase and the subsequent - * letters in lowercase. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns the input string with the first letter of each word in uppercase and the subsequent + * letters in lowercase. + * + * @group str_func + * @since 0.1.0 + */ def initcap(e: Column): Column = builtin("initcap")(e) - /** Returns the length of an input string or binary value. For strings, the length is the number - * of characters, and UTF-8 characters are counted as a single character. For binary, the length - * is the number of bytes. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns the length of an input string or binary value. For strings, the length is the number of + * characters, and UTF-8 characters are counted as a single character. For binary, the length is + * the number of bytes. + * + * @group str_func + * @since 0.1.0 + */ def length(e: Column): Column = builtin("length")(e) - /** Returns the input string with all characters converted to lowercase. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns the input string with all characters converted to lowercase. + * + * @group str_func + * @since 0.1.0 + */ def lower(e: Column): Column = builtin("lower")(e) - /** Left-pads a string with characters from another string, or left-pads a binary value with bytes - * from another binary value. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Left-pads a string with characters from another string, or left-pads a binary value with bytes + * from another binary value. + * + * @group str_func + * @since 0.1.0 + */ def lpad(str: Column, len: Column, pad: Column): Column = builtin("lpad")(str, len, pad) - /** Removes leading characters, including whitespace, from a string. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Removes leading characters, including whitespace, from a string. + * + * @group str_func + * @since 0.1.0 + */ def ltrim(e: Column, trimString: Column): Column = builtin("ltrim")(e, trimString) - /** Removes leading characters, including whitespace, from a string. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Removes leading characters, including whitespace, from a string. + * + * @group str_func + * @since 0.1.0 + */ def ltrim(e: Column): Column = builtin("ltrim")(e) - /** Right-pads a string with characters from another string, or right-pads a binary value with - * bytes from another binary value. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Right-pads a string with characters from another string, or right-pads a binary value with + * bytes from another binary value. + * + * @group str_func + * @since 0.1.0 + */ def rpad(str: Column, len: Column, pad: Column): Column = builtin("rpad")(str, len, pad) - /** Builds a string by repeating the input for the specified number of times. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Builds a string by repeating the input for the specified number of times. + * + * @group str_func + * @since 0.1.0 + */ def repeat(str: Column, n: Column): Column = withExpr { StringRepeat(str.expr, n.expr) } - /** Removes trailing characters, including whitespace, from a string. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Removes trailing characters, including whitespace, from a string. + * + * @group str_func + * @since 0.1.0 + */ def rtrim(e: Column, trimString: Column): Column = builtin("rtrim")(e, trimString) - /** Removes trailing characters, including whitespace, from a string. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Removes trailing characters, including whitespace, from a string. + * + * @group str_func + * @since 0.1.0 + */ def rtrim(e: Column): Column = builtin("rtrim")(e) - /** Returns a string that contains a phonetic representation of the input string. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns a string that contains a phonetic representation of the input string. + * + * @group str_func + * @since 0.1.0 + */ def soundex(e: Column): Column = builtin("soundex")(e) - /** Splits a given string with a given separator and returns the result in an array of strings. To - * specify a string separator, use the lit() function. - * - * Example 1: - * {{{ - * val df = session.createDataFrame( - * Seq(("many-many-words", "-"), ("hello--hello", "--"))).toDF("V", "D") - * df.select(split(col("V"), col("D"))).show() - * }}} - * ------------------------- \|"SPLIT(""V"", ""D"")" | ------------------------- - * | [ | - * |:---------| - * | "many", | - * | "many", | - * | "words" | - * | ] | - * | [ | - * | "hello", | - * | "hello" | - * | ] | - * ------------------------- - * - * Example 2: - * {{{ - * val df = session.createDataFrame(Seq("many-many-words", "hello-hi-hello")).toDF("V") - * df.select(split(col("V"), lit("-"))).show() - * }}} - * ------------------------- \|"SPLIT(""V"", ""D"")" | ------------------------- - * | [ | - * |:---------| - * | "many", | - * | "many", | - * | "words" | - * | ] | - * | [ | - * | "hello", | - * | "hello" | - * | ] | - * ------------------------- - * - * @group str_func - * @since 0.1.0 - */ + /** + * Splits a given string with a given separator and returns the result in an array of strings. To + * specify a string separator, use the lit() function. + * + * Example 1: + * {{{ + * val df = session.createDataFrame( + * Seq(("many-many-words", "-"), ("hello--hello", "--"))).toDF("V", "D") + * df.select(split(col("V"), col("D"))).show() + * }}} + * ------------------------- \|"SPLIT(""V"", ""D"")" | ------------------------- + * | [ | + * |:---------| + * | "many", | + * | "many", | + * | "words" | + * | ] | + * | [ | + * | "hello", | + * | "hello" | + * | ] | + * ------------------------- + * + * Example 2: + * {{{ + * val df = session.createDataFrame(Seq("many-many-words", "hello-hi-hello")).toDF("V") + * df.select(split(col("V"), lit("-"))).show() + * }}} + * ------------------------- \|"SPLIT(""V"", ""D"")" | ------------------------- + * | [ | + * |:---------| + * | "many", | + * | "many", | + * | "words" | + * | ] | + * | [ | + * | "hello", | + * | "hello" | + * | ] | + * ------------------------- + * + * @group str_func + * @since 0.1.0 + */ def split(str: Column, pattern: Column): Column = builtin("split")(str, pattern) - /** Returns the portion of the string or binary value str, starting from the character/byte - * specified by pos, with limited length. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns the portion of the string or binary value str, starting from the character/byte + * specified by pos, with limited length. + * + * @group str_func + * @since 0.1.0 + */ def substring(str: Column, pos: Column, len: Column): Column = builtin("substring")(str, pos, len) - /** Translates src from the characters in matchingString to the characters in replaceString. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Translates src from the characters in matchingString to the characters in replaceString. + * + * @group str_func + * @since 0.1.0 + */ def translate(src: Column, matchingString: Column, replaceString: Column): Column = builtin("translate")(src, matchingString, replaceString) - /** Removes leading and trailing characters from a string. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Removes leading and trailing characters from a string. + * + * @group str_func + * @since 0.1.0 + */ def trim(e: Column, trimString: Column): Column = builtin("trim")(e, trimString) - /** Returns the input string with all characters converted to uppercase. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns the input string with all characters converted to uppercase. + * + * @group str_func + * @since 0.1.0 + */ def upper(e: Column): Column = builtin("upper")(e) - /** Returns true if col contains str. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns true if col contains str. + * + * @group str_func + * @since 0.1.0 + */ def contains(col: Column, str: Column): Column = builtin("contains")(col, str) - /** Returns true if col starts with str. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns true if col starts with str. + * + * @group str_func + * @since 0.1.0 + */ def startswith(col: Column, str: Column): Column = builtin("startswith")(col, str) - /** Converts a Unicode code point (including 7-bit ASCII) into the character that matches the - * input Unicode. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Converts a Unicode code point (including 7-bit ASCII) into the character that matches the input + * Unicode. + * + * @group str_func + * @since 0.1.0 + */ def char(col: Column): Column = builtin("char")(col) - /** Adds or subtracts a specified number of months to a date or timestamp, preserving the - * end-of-month information. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Adds or subtracts a specified number of months to a date or timestamp, preserving the + * end-of-month information. + * + * @group date_func + * @since 0.1.0 + */ def add_months(startDate: Column, numMonths: Column): Column = builtin("add_months")(startDate, numMonths) - /** Returns the current date of the system. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns the current date of the system. + * + * @group cont_func + * @since 0.1.0 + */ def current_date(): Column = builtin("current_date")() - /** Returns the current timestamp for the system. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns the current timestamp for the system. + * + * @group cont_func + * @since 0.1.0 + */ def current_timestamp(): Column = builtin("current_timestamp")() - /** Returns the name of the region for the account where the current user is logged in. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns the name of the region for the account where the current user is logged in. + * + * @group cont_func + * @since 0.1.0 + */ def current_region(): Column = builtin("current_region")() - /** Returns the current time for the system. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns the current time for the system. + * + * @group cont_func + * @since 0.1.0 + */ def current_time(): Column = builtin("current_time")() - /** Returns the current Snowflake version. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns the current Snowflake version. + * + * @group cont_func + * @since 0.1.0 + */ def current_version(): Column = builtin("current_version")() - /** Returns the account used by the user's current session. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns the account used by the user's current session. + * + * @group cont_func + * @since 0.1.0 + */ def current_account(): Column = builtin("current_account")() - /** Returns the name of the role in use for the current session. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns the name of the role in use for the current session. + * + * @group cont_func + * @since 0.1.0 + */ def current_role(): Column = builtin("current_role")() - /** Returns a JSON string that lists all roles granted to the current user. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns a JSON string that lists all roles granted to the current user. + * + * @group cont_func + * @since 0.1.0 + */ def current_available_roles(): Column = builtin("current_available_roles")() - /** Returns a unique system identifier for the Snowflake session corresponding to the present - * connection. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns a unique system identifier for the Snowflake session corresponding to the present + * connection. + * + * @group cont_func + * @since 0.1.0 + */ def current_session(): Column = builtin("current_session")() - /** Returns the SQL text of the statement that is currently executing. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns the SQL text of the statement that is currently executing. + * + * @group cont_func + * @since 0.1.0 + */ def current_statement(): Column = builtin("current_statement")() - /** Returns the name of the user currently logged into the system. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns the name of the user currently logged into the system. + * + * @group cont_func + * @since 0.1.0 + */ def current_user(): Column = builtin("current_user")() - /** Returns the name of the database in use for the current session. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns the name of the database in use for the current session. + * + * @group cont_func + * @since 0.1.0 + */ def current_database(): Column = builtin("current_database")() - /** Returns the name of the schema in use by the current session. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns the name of the schema in use by the current session. + * + * @group cont_func + * @since 0.1.0 + */ def current_schema(): Column = builtin("current_schema")() - /** Returns active search path schemas. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns active search path schemas. + * + * @group cont_func + * @since 0.1.0 + */ def current_schemas(): Column = builtin("current_schemas")() - /** Returns the name of the warehouse in use for the current session. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns the name of the warehouse in use for the current session. + * + * @group cont_func + * @since 0.1.0 + */ def current_warehouse(): Column = builtin("current_warehouse")() - /** Returns the current timestamp for the system, but in the UTC time zone. - * - * @group cont_func - * @since 0.1.0 - */ + /** + * Returns the current timestamp for the system, but in the UTC time zone. + * + * @group cont_func + * @since 0.1.0 + */ def sysdate(): Column = builtin("sysdate")() // scalastyle:off - /** Converts the given sourceTimestampNTZ from sourceTimeZone to targetTimeZone. - * - * Supported time zones are listed - * [[https://docs.snowflake.com/en/sql-reference/functions/convert_timezone.html#usage-notes here]] - * - * Example - * {{{ - * timestampNTZ.select(convert_timezone(lit("America/Los_Angeles"), lit("America/New_York"), col("time"))) - * }}} - * - * @group date_func - * @since 0.1.0 - */ + /** + * Converts the given sourceTimestampNTZ from sourceTimeZone to targetTimeZone. + * + * Supported time zones are listed + * [[https://docs.snowflake.com/en/sql-reference/functions/convert_timezone.html#usage-notes here]] + * + * Example + * {{{ + * timestampNTZ.select(convert_timezone(lit("America/Los_Angeles"), lit("America/New_York"), col("time"))) + * }}} + * + * @group date_func + * @since 0.1.0 + */ // scalastyle:on def convert_timezone( sourceTimeZone: Column, @@ -1534,177 +1675,199 @@ object functions { builtin("convert_timezone")(sourceTimeZone, targetTimeZone, sourceTimestampNTZ) // scalastyle:off - /** Converts the given sourceTimestampNTZ to targetTimeZone. - * - * Supported time zones are listed - * [[https://docs.snowflake.com/en/sql-reference/functions/convert_timezone.html#usage-notes here]] - * - * Example - * {{{ - * timestamp.select(convert_timezone(lit("America/New_York"), col("time"))) - * }}} - * - * @group date_func - * @since 0.1.0 - */ + /** + * Converts the given sourceTimestampNTZ to targetTimeZone. + * + * Supported time zones are listed + * [[https://docs.snowflake.com/en/sql-reference/functions/convert_timezone.html#usage-notes here]] + * + * Example + * {{{ + * timestamp.select(convert_timezone(lit("America/New_York"), col("time"))) + * }}} + * + * @group date_func + * @since 0.1.0 + */ // scalastyle:on def convert_timezone(targetTimeZone: Column, sourceTimestamp: Column): Column = builtin("convert_timezone")(targetTimeZone, sourceTimestamp) - /** Extracts the year from a date or timestamp. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Extracts the year from a date or timestamp. + * + * @group date_func + * @since 0.1.0 + */ def year(e: Column): Column = builtin("year")(e) - /** Extracts the quarter from a date or timestamp. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Extracts the quarter from a date or timestamp. + * + * @group date_func + * @since 0.1.0 + */ def quarter(e: Column): Column = builtin("quarter")(e) - /** Extracts the month from a date or timestamp. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Extracts the month from a date or timestamp. + * + * @group date_func + * @since 0.1.0 + */ def month(e: Column): Column = builtin("month")(e) - /** Extracts the day of week from a date or timestamp. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Extracts the day of week from a date or timestamp. + * + * @group date_func + * @since 0.1.0 + */ def dayofweek(e: Column): Column = builtin("dayofweek")(e) - /** Extracts the day of month from a date or timestamp. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Extracts the day of month from a date or timestamp. + * + * @group date_func + * @since 0.1.0 + */ def dayofmonth(e: Column): Column = builtin("dayofmonth")(e) - /** Extracts the day of year from a date or timestamp. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Extracts the day of year from a date or timestamp. + * + * @group date_func + * @since 0.1.0 + */ def dayofyear(e: Column): Column = builtin("dayofyear")(e) - /** Extracts the hour from a date or timestamp. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Extracts the hour from a date or timestamp. + * + * @group date_func + * @since 0.1.0 + */ def hour(e: Column): Column = builtin("hour")(e) - /** Returns the last day of the specified date part for a date or timestamp. Commonly used to - * return the last day of the month for a date or timestamp. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Returns the last day of the specified date part for a date or timestamp. Commonly used to + * return the last day of the month for a date or timestamp. + * + * @group date_func + * @since 0.1.0 + */ def last_day(e: Column): Column = builtin("last_day")(e) - /** Extracts the minute from a date or timestamp. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Extracts the minute from a date or timestamp. + * + * @group date_func + * @since 0.1.0 + */ def minute(e: Column): Column = builtin("minute")(e) - /** Returns the date of the first specified DOW (day of week) that occurs after the input date. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Returns the date of the first specified DOW (day of week) that occurs after the input date. + * + * @group date_func + * @since 0.1.0 + */ def next_day(date: Column, dayOfWeek: Column): Column = withExpr { NextDay(date.expr, lit(dayOfWeek).expr) } - /** Returns the date of the first specified DOW (day of week) that occurs before the input date. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Returns the date of the first specified DOW (day of week) that occurs before the input date. + * + * @group date_func + * @since 0.1.0 + */ def previous_day(date: Column, dayOfWeek: Column): Column = builtin("previous_day")(date, dayOfWeek) - /** Extracts the second from a date or timestamp. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Extracts the second from a date or timestamp. + * + * @group date_func + * @since 0.1.0 + */ def second(e: Column): Column = builtin("second")(e) - /** Extracts the week of year from a date or timestamp. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Extracts the week of year from a date or timestamp. + * + * @group date_func + * @since 0.1.0 + */ def weekofyear(e: Column): Column = builtin("weekofyear")(e) - /** Converts an input expression into the corresponding timestamp. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Converts an input expression into the corresponding timestamp. + * + * @group date_func + * @since 0.1.0 + */ def to_timestamp(s: Column): Column = builtin("to_timestamp")(s) - /** Converts an input expression into the corresponding timestamp. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Converts an input expression into the corresponding timestamp. + * + * @group date_func + * @since 0.1.0 + */ def to_timestamp(s: Column, fmt: Column): Column = builtin("to_timestamp")(s, fmt) - /** Converts an input expression to a date. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Converts an input expression to a date. + * + * @group date_func + * @since 0.1.0 + */ def to_date(e: Column): Column = builtin("to_date")(e) - /** Converts an input expression to a date. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Converts an input expression to a date. + * + * @group date_func + * @since 0.1.0 + */ def to_date(e: Column, fmt: Column): Column = builtin("to_date")(e, fmt) - /** Creates a date from individual numeric components that represent the year, month, and day of - * the month. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Creates a date from individual numeric components that represent the year, month, and day of + * the month. + * + * @group date_func + * @since 0.1.0 + */ def date_from_parts(year: Column, month: Column, day: Column): Column = builtin("date_from_parts")(year, month, day) - /** Creates a time from individual numeric components. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Creates a time from individual numeric components. + * + * @group date_func + * @since 0.1.0 + */ def time_from_parts(hour: Column, minute: Column, second: Column, nanoseconds: Column): Column = builtin("time_from_parts")(hour, minute, second, nanoseconds) - /** Creates a time from individual numeric components. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Creates a time from individual numeric components. + * + * @group date_func + * @since 0.1.0 + */ def time_from_parts(hour: Column, minute: Column, second: Column): Column = builtin("time_from_parts")(hour, minute, second) - /** Creates a timestamp from individual numeric components. If no time zone is in effect, the - * function can be used to create a timestamp from a date expression and a time expression. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Creates a timestamp from individual numeric components. If no time zone is in effect, the + * function can be used to create a timestamp from a date expression and a time expression. + * + * @group date_func + * @since 0.1.0 + */ def timestamp_from_parts( year: Column, month: Column, @@ -1714,12 +1877,13 @@ object functions { second: Column): Column = builtin("timestamp_from_parts")(year, month, day, hour, minute, second) - /** Creates a timestamp from individual numeric components. If no time zone is in effect, the - * function can be used to create a timestamp from a date expression and a time expression. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Creates a timestamp from individual numeric components. If no time zone is in effect, the + * function can be used to create a timestamp from a date expression and a time expression. + * + * @group date_func + * @since 0.1.0 + */ def timestamp_from_parts( year: Column, month: Column, @@ -1730,21 +1894,23 @@ object functions { nanosecond: Column): Column = builtin("timestamp_from_parts")(year, month, day, hour, minute, second, nanosecond) - /** Creates a timestamp from individual numeric components. If no time zone is in effect, the - * function can be used to create a timestamp from a date expression and a time expression. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Creates a timestamp from individual numeric components. If no time zone is in effect, the + * function can be used to create a timestamp from a date expression and a time expression. + * + * @group date_func + * @since 0.1.0 + */ def timestamp_from_parts(dateExpr: Column, timeExpr: Column): Column = builtin("timestamp_from_parts")(dateExpr, timeExpr) - /** Creates a timestamp from individual numeric components. If no time zone is in effect, the - * function can be used to create a timestamp from a date expression and a time expression. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Creates a timestamp from individual numeric components. If no time zone is in effect, the + * function can be used to create a timestamp from a date expression and a time expression. + * + * @group date_func + * @since 0.1.0 + */ def timestamp_ltz_from_parts( year: Column, month: Column, @@ -1754,12 +1920,13 @@ object functions { second: Column): Column = builtin("timestamp_ltz_from_parts")(year, month, day, hour, minute, second) - /** Creates a timestamp from individual numeric components. If no time zone is in effect, the - * function can be used to create a timestamp from a date expression and a time expression. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Creates a timestamp from individual numeric components. If no time zone is in effect, the + * function can be used to create a timestamp from a date expression and a time expression. + * + * @group date_func + * @since 0.1.0 + */ def timestamp_ltz_from_parts( year: Column, month: Column, @@ -1770,12 +1937,13 @@ object functions { nanosecond: Column): Column = builtin("timestamp_ltz_from_parts")(year, month, day, hour, minute, second, nanosecond) - /** Creates a timestamp from individual numeric components. If no time zone is in effect, the - * function can be used to create a timestamp from a date expression and a time expression. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Creates a timestamp from individual numeric components. If no time zone is in effect, the + * function can be used to create a timestamp from a date expression and a time expression. + * + * @group date_func + * @since 0.1.0 + */ def timestamp_ntz_from_parts( year: Column, month: Column, @@ -1785,12 +1953,13 @@ object functions { second: Column): Column = builtin("timestamp_ntz_from_parts")(year, month, day, hour, minute, second) - /** Creates a timestamp from individual numeric components. If no time zone is in effect, the - * function can be used to create a timestamp from a date expression and a time expression. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Creates a timestamp from individual numeric components. If no time zone is in effect, the + * function can be used to create a timestamp from a date expression and a time expression. + * + * @group date_func + * @since 0.1.0 + */ def timestamp_ntz_from_parts( year: Column, month: Column, @@ -1801,21 +1970,23 @@ object functions { nanosecond: Column): Column = builtin("timestamp_ntz_from_parts")(year, month, day, hour, minute, second, nanosecond) - /** Creates a timestamp from individual numeric components. If no time zone is in effect, the - * function can be used to create a timestamp from a date expression and a time expression. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Creates a timestamp from individual numeric components. If no time zone is in effect, the + * function can be used to create a timestamp from a date expression and a time expression. + * + * @group date_func + * @since 0.1.0 + */ def timestamp_ntz_from_parts(dateExpr: Column, timeExpr: Column): Column = builtin("timestamp_ntz_from_parts")(dateExpr, timeExpr) - /** Creates a timestamp from individual numeric components. If no time zone is in effect, the - * function can be used to create a timestamp from a date expression and a time expression. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Creates a timestamp from individual numeric components. If no time zone is in effect, the + * function can be used to create a timestamp from a date expression and a time expression. + * + * @group date_func + * @since 0.1.0 + */ def timestamp_tz_from_parts( year: Column, month: Column, @@ -1825,12 +1996,13 @@ object functions { second: Column): Column = builtin("timestamp_tz_from_parts")(year, month, day, hour, minute, second) - /** Creates a timestamp from individual numeric components. If no time zone is in effect, the - * function can be used to create a timestamp from a date expression and a time expression. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Creates a timestamp from individual numeric components. If no time zone is in effect, the + * function can be used to create a timestamp from a date expression and a time expression. + * + * @group date_func + * @since 0.1.0 + */ def timestamp_tz_from_parts( year: Column, month: Column, @@ -1841,12 +2013,13 @@ object functions { nanosecond: Column): Column = builtin("timestamp_tz_from_parts")(year, month, day, hour, minute, second, nanosecond) - /** Creates a timestamp from individual numeric components. If no time zone is in effect, the - * function can be used to create a timestamp from a date expression and a time expression. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Creates a timestamp from individual numeric components. If no time zone is in effect, the + * function can be used to create a timestamp from a date expression and a time expression. + * + * @group date_func + * @since 0.1.0 + */ def timestamp_tz_from_parts( year: Column, month: Column, @@ -1858,1339 +2031,1454 @@ object functions { timeZone: Column): Column = builtin("timestamp_tz_from_parts")(year, month, day, hour, minute, second, nanosecond, timeZone) - /** Extracts the three-letter day-of-week name from the specified date or timestamp. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Extracts the three-letter day-of-week name from the specified date or timestamp. + * + * @group date_func + * @since 0.1.0 + */ def dayname(expr: Column): Column = builtin("dayname")(expr) - /** Extracts the three-letter month name from the specified date or timestamp. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Extracts the three-letter month name from the specified date or timestamp. + * + * @group date_func + * @since 0.1.0 + */ def monthname(expr: Column): Column = builtin("monthname")(expr) // scalastyle:off - /** Adds the specified value for the specified date or time art to date or time expr. - * - * Supported date and time parts are listed - * [[https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts here]] - * - * Example: add one year on dates - * {{{ - * date.select(dateadd("year", lit(1), col("date_col"))) - * }}} - * - * @group date_func - * @since 0.1.0 - */ + /** + * Adds the specified value for the specified date or time art to date or time expr. + * + * Supported date and time parts are listed + * [[https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts here]] + * + * Example: add one year on dates + * {{{ + * date.select(dateadd("year", lit(1), col("date_col"))) + * }}} + * + * @group date_func + * @since 0.1.0 + */ // scalastyle:on def dateadd(part: String, value: Column, expr: Column): Column = builtin("dateadd")(part, value, expr) // scalastyle:off - /** Calculates the difference between two date, time, or timestamp columns based on the date or - * time part requested. - * - * Supported date and time parts are listed - * [[https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts here]] - * - * Example: year difference between two date columns - * {{{ - * date.select(datediff("year", col("date_col1"), col("date_col2"))), - * }}} - * - * @group date_func - * @since 0.1.0 - */ + /** + * Calculates the difference between two date, time, or timestamp columns based on the date or + * time part requested. + * + * Supported date and time parts are listed + * [[https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts here]] + * + * Example: year difference between two date columns + * {{{ + * date.select(datediff("year", col("date_col1"), col("date_col2"))), + * }}} + * + * @group date_func + * @since 0.1.0 + */ // scalastyle:on def datediff(part: String, col1: Column, col2: Column): Column = builtin("datediff")(part, col1, col2) - /** Rounds the input expression down to the nearest (or equal) integer closer to zero, or to the - * nearest equal or smaller value with the specified number of places after the decimal point. - * - * @group num_func - * @since 0.1.0 - */ + /** + * Rounds the input expression down to the nearest (or equal) integer closer to zero, or to the + * nearest equal or smaller value with the specified number of places after the decimal point. + * + * @group num_func + * @since 0.1.0 + */ def trunc(expr: Column, scale: Column): Column = withExpr { Trunc(expr.expr, scale.expr) } - /** Truncates a DATE, TIME, or TIMESTAMP to the specified precision. - * - * @group date_func - * @since 0.1.0 - */ + /** + * Truncates a DATE, TIME, or TIMESTAMP to the specified precision. + * + * @group date_func + * @since 0.1.0 + */ def date_trunc(format: String, timestamp: Column): Column = withExpr { DateTrunc(Literal(format), timestamp.expr) } - /** Concatenates one or more strings, or concatenates one or more binary values. If any of the - * values is null, the result is also null. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Concatenates one or more strings, or concatenates one or more binary values. If any of the + * values is null, the result is also null. + * + * @group str_func + * @since 0.1.0 + */ def concat(exprs: Column*): Column = builtin("concat")(exprs: _*) - /** Compares whether two arrays have at least one element in common. Returns TRUE if there is at - * least one element in common; otherwise returns FALSE. The function is NULL-safe, meaning it - * treats NULLs as known values for comparing equality. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Compares whether two arrays have at least one element in common. Returns TRUE if there is at + * least one element in common; otherwise returns FALSE. The function is NULL-safe, meaning it + * treats NULLs as known values for comparing equality. + * + * @group semi_func + * @since 0.1.0 + */ def arrays_overlap(a1: Column, a2: Column): Column = withExpr { ArraysOverlap(a1.expr, a2.expr) } - /** Returns TRUE if expr ends with str. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns TRUE if expr ends with str. + * + * @group str_func + * @since 0.1.0 + */ def endswith(expr: Column, str: Column): Column = builtin("endswith")(expr, str) - /** Replaces a substring of the specified length, starting at the specified position, with a new - * string or binary value. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Replaces a substring of the specified length, starting at the specified position, with a new + * string or binary value. + * + * @group str_func + * @since 0.1.0 + */ def insert(baseExpr: Column, position: Column, length: Column, insertExpr: Column): Column = builtin("insert")(baseExpr, position, length, insertExpr) - /** Returns a left most substring of strExpr. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns a left most substring of strExpr. + * + * @group str_func + * @since 0.1.0 + */ def left(strExpr: Column, lengthExpr: Column): Column = builtin("left")(strExpr, lengthExpr) - /** Returns a right most substring of strExpr. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns a right most substring of strExpr. + * + * @group str_func + * @since 0.1.0 + */ def right(strExpr: Column, lengthExpr: Column): Column = builtin("right")(strExpr, lengthExpr) // scalastyle:off - /** Returns the number of times that a pattern occurs in a strExpr. - * - * Pattern syntax is specified - * [[https://docs.snowflake.com/en/sql-reference/functions-regexp.html#label-regexp-general-usage-notes here]] - * - * Parameter detail is specified - * [[https://docs.snowflake.com/en/sql-reference/functions-regexp.html#label-regexp-parameters-argument here]] - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns the number of times that a pattern occurs in a strExpr. + * + * Pattern syntax is specified + * [[https://docs.snowflake.com/en/sql-reference/functions-regexp.html#label-regexp-general-usage-notes here]] + * + * Parameter detail is specified + * [[https://docs.snowflake.com/en/sql-reference/functions-regexp.html#label-regexp-parameters-argument here]] + * + * @group str_func + * @since 0.1.0 + */ // scalastyle:on def regexp_count(strExpr: Column, pattern: Column, position: Column, parameters: Column): Column = builtin("regexp_count")(strExpr, pattern, position, parameters) // scalastyle:off - /** Returns the number of times that a pattern occurs in a strExpr. - * - * Pattern syntax is specified - * [[https://docs.snowflake.com/en/sql-reference/functions-regexp.html#label-regexp-general-usage-notes here]] - * - * Parameter detail is specified - * [[https://docs.snowflake.com/en/sql-reference/functions-regexp.html#label-regexp-parameters-argument here]] - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns the number of times that a pattern occurs in a strExpr. + * + * Pattern syntax is specified + * [[https://docs.snowflake.com/en/sql-reference/functions-regexp.html#label-regexp-general-usage-notes here]] + * + * Parameter detail is specified + * [[https://docs.snowflake.com/en/sql-reference/functions-regexp.html#label-regexp-parameters-argument here]] + * + * @group str_func + * @since 0.1.0 + */ // scalastyle:on def regexp_count(strExpr: Column, pattern: Column): Column = builtin("regexp_count")(strExpr, pattern) - /** Returns the subject with the specified pattern (or all occurrences of the pattern) removed. If - * no matches are found, returns the original subject. - * - * @group str_func - * @since 1.9.0 - */ + /** + * Returns the subject with the specified pattern (or all occurrences of the pattern) removed. If + * no matches are found, returns the original subject. + * + * @group str_func + * @since 1.9.0 + */ def regexp_replace(strExpr: Column, pattern: Column): Column = builtin("regexp_replace")(strExpr, pattern) - /** Returns the subject with the specified pattern (or all occurrences of the pattern) replaced by - * a replacement string. If no matches are found, returns the original subject. - * - * @group str_func - * @since 1.9.0 - */ + /** + * Returns the subject with the specified pattern (or all occurrences of the pattern) replaced by + * a replacement string. If no matches are found, returns the original subject. + * + * @group str_func + * @since 1.9.0 + */ def regexp_replace(strExpr: Column, pattern: Column, replacement: Column): Column = builtin("regexp_replace")(strExpr, pattern, replacement) - /** Removes all occurrences of a specified strExpr, and optionally replaces them with replacement. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Removes all occurrences of a specified strExpr, and optionally replaces them with replacement. + * + * @group str_func + * @since 0.1.0 + */ def replace(strExpr: Column, pattern: Column, replacement: Column): Column = builtin("replace")(strExpr, pattern, replacement) - /** Removes all occurrences of a specified strExpr, and optionally replaces them with replacement. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Removes all occurrences of a specified strExpr, and optionally replaces them with replacement. + * + * @group str_func + * @since 0.1.0 + */ def replace(strExpr: Column, pattern: Column): Column = builtin("replace")(strExpr, pattern) - /** Searches for targetExpr in sourceExpr and, if successful, returns the position (1-based) of - * the targetExpr in sourceExpr. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Searches for targetExpr in sourceExpr and, if successful, returns the position (1-based) of the + * targetExpr in sourceExpr. + * + * @group str_func + * @since 0.1.0 + */ def charindex(targetExpr: Column, sourceExpr: Column): Column = builtin("charindex")(targetExpr, sourceExpr) - /** Searches for targetExpr in sourceExpr and, if successful, returns the position (1-based) of - * the targetExpr in sourceExpr. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Searches for targetExpr in sourceExpr and, if successful, returns the position (1-based) of the + * targetExpr in sourceExpr. + * + * @group str_func + * @since 0.1.0 + */ def charindex(targetExpr: Column, sourceExpr: Column, position: Column): Column = builtin("charindex")(targetExpr, sourceExpr, position) // scalastyle:off - /** Returns a copy of expr, but with the specified collationSpec property instead of the original - * collation specification property. - * - * Collation Specification is specified - * [[https://docs.snowflake.com/en/sql-reference/collation.html#label-collation-specification here]] - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns a copy of expr, but with the specified collationSpec property instead of the original + * collation specification property. + * + * Collation Specification is specified + * [[https://docs.snowflake.com/en/sql-reference/collation.html#label-collation-specification here]] + * + * @group str_func + * @since 0.1.0 + */ // scalastyle:on def collate(expr: Column, collationSpec: String): Column = builtin("collate")(expr, collationSpec) - /** Returns the collation specification of expr. - * - * @group str_func - * @since 0.1.0 - */ + /** + * Returns the collation specification of expr. + * + * @group str_func + * @since 0.1.0 + */ def collation(expr: Column): Column = builtin("collation")(expr) - /** Returns an ARRAY that contains the matching elements in the two input ARRAYs. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns an ARRAY that contains the matching elements in the two input ARRAYs. + * + * @group semi_func + * @since 0.1.0 + */ def array_intersection(col1: Column, col2: Column): Column = withExpr { ArrayIntersect(col1.expr, col2.expr) } - /** Returns true if the specified VARIANT column contains an ARRAY value. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains an ARRAY value. + * + * @group semi_func + * @since 0.1.0 + */ def is_array(col: Column): Column = { builtin("is_array")(col) } - /** Returns true if the specified VARIANT column contains a Boolean value. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains a Boolean value. + * + * @group semi_func + * @since 0.1.0 + */ def is_boolean(col: Column): Column = { builtin("is_boolean")(col) } - /** Returns true if the specified VARIANT column contains a binary value. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains a binary value. + * + * @group semi_func + * @since 0.1.0 + */ def is_binary(col: Column): Column = { builtin("is_binary")(col) } - /** Returns true if the specified VARIANT column contains a string value. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains a string value. + * + * @group semi_func + * @since 0.1.0 + */ def is_char(col: Column): Column = { builtin("is_char")(col) } - /** Returns true if the specified VARIANT column contains a string value. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains a string value. + * + * @group semi_func + * @since 0.1.0 + */ def is_varchar(col: Column): Column = { builtin("is_varchar")(col) } - /** Returns true if the specified VARIANT column contains a DATE value. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains a DATE value. + * + * @group semi_func + * @since 0.1.0 + */ def is_date(col: Column): Column = { builtin("is_date")(col) } - /** Returns true if the specified VARIANT column contains a DATE value. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains a DATE value. + * + * @group semi_func + * @since 0.1.0 + */ def is_date_value(col: Column): Column = { builtin("is_date_value")(col) } - /** Returns true if the specified VARIANT column contains a fixed-point decimal value or integer. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains a fixed-point decimal value or integer. + * + * @group semi_func + * @since 0.1.0 + */ def is_decimal(col: Column): Column = { builtin("is_decimal")(col) } - /** Returns true if the specified VARIANT column contains a floating-point value, fixed-point - * decimal, or integer. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains a floating-point value, fixed-point + * decimal, or integer. + * + * @group semi_func + * @since 0.1.0 + */ def is_double(col: Column): Column = { builtin("is_double")(col) } - /** Returns true if the specified VARIANT column contains a floating-point value, fixed-point - * decimal, or integer. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains a floating-point value, fixed-point + * decimal, or integer. + * + * @group semi_func + * @since 0.1.0 + */ def is_real(col: Column): Column = { builtin("is_real")(col) } - /** Returns true if the specified VARIANT column contains an integer value. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains an integer value. + * + * @group semi_func + * @since 0.1.0 + */ def is_integer(col: Column): Column = { builtin("is_integer")(col) } - /** Returns true if the specified VARIANT column is a JSON null value. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column is a JSON null value. + * + * @group semi_func + * @since 0.1.0 + */ def is_null_value(col: Column): Column = { builtin("is_null_value")(col) } - /** Returns true if the specified VARIANT column contains an OBJECT value. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains an OBJECT value. + * + * @group semi_func + * @since 0.1.0 + */ def is_object(col: Column): Column = { builtin("is_object")(col) } - /** Returns true if the specified VARIANT column contains a TIME value. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains a TIME value. + * + * @group semi_func + * @since 0.1.0 + */ def is_time(col: Column): Column = { builtin("is_time")(col) } - /** Returns true if the specified VARIANT column contains a TIMESTAMP value to be interpreted - * using the local time zone. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains a TIMESTAMP value to be interpreted using + * the local time zone. + * + * @group semi_func + * @since 0.1.0 + */ def is_timestamp_ltz(col: Column): Column = { builtin("is_timestamp_ltz")(col) } - /** Returns true if the specified VARIANT column contains a TIMESTAMP value with no time zone. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains a TIMESTAMP value with no time zone. + * + * @group semi_func + * @since 0.1.0 + */ def is_timestamp_ntz(col: Column): Column = { builtin("is_timestamp_ntz")(col) } - /** Returns true if the specified VARIANT column contains a TIMESTAMP value with a time zone. - * - * @group semi_func - * @since 0.1.0 - */ + /** + * Returns true if the specified VARIANT column contains a TIMESTAMP value with a time zone. + * + * @group semi_func + * @since 0.1.0 + */ def is_timestamp_tz(col: Column): Column = { builtin("is_timestamp_tz")(col) } - /** Checks the validity of a JSON document. If the input string is a valid JSON document or a NULL - * (i.e. no error would occur when parsing the input string), the function returns NULL. In case - * of a JSON parsing error, the function returns a string that contains the error message. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Checks the validity of a JSON document. If the input string is a valid JSON document or a NULL + * (i.e. no error would occur when parsing the input string), the function returns NULL. In case + * of a JSON parsing error, the function returns a string that contains the error message. + * + * @group semi_func + * @since 0.2.0 + */ def check_json(col: Column): Column = { builtin("check_json")(col) } - /** Checks the validity of an XML document. If the input string is a valid XML document or a NULL - * (i.e. no error would occur when parsing the input string), the function returns NULL. In case - * of an XML parsing error, the output string contains the error message. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Checks the validity of an XML document. If the input string is a valid XML document or a NULL + * (i.e. no error would occur when parsing the input string), the function returns NULL. In case + * of an XML parsing error, the output string contains the error message. + * + * @group semi_func + * @since 0.2.0 + */ def check_xml(col: Column): Column = { builtin("check_xml")(col) } - /** Parses a JSON string and returns the value of an element at a specified path in the resulting - * JSON document. - * - * @param col - * Column containing the JSON string that should be parsed. - * @param path - * Column containing the path to the element that should be extracted. - * @group semi_func - * @since 0.2.0 - */ + /** + * Parses a JSON string and returns the value of an element at a specified path in the resulting + * JSON document. + * + * @param col + * Column containing the JSON string that should be parsed. + * @param path + * Column containing the path to the element that should be extracted. + * @group semi_func + * @since 0.2.0 + */ def json_extract_path_text(col: Column, path: Column): Column = { builtin("json_extract_path_text")(col, path) } - /** Parse the value of the specified column as a JSON string and returns the resulting JSON - * document. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Parse the value of the specified column as a JSON string and returns the resulting JSON + * document. + * + * @group semi_func + * @since 0.2.0 + */ def parse_json(col: Column): Column = { builtin("parse_json")(col) } - /** Parse the value of the specified column as a JSON string and returns the resulting XML - * document. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Parse the value of the specified column as a JSON string and returns the resulting XML + * document. + * + * @group semi_func + * @since 0.2.0 + */ def parse_xml(col: Column): Column = { builtin("parse_xml")(col) } - /** Converts a JSON "null" value in the specified column to a SQL NULL value. All other VARIANT - * values in the column are returned unchanged. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Converts a JSON "null" value in the specified column to a SQL NULL value. All other VARIANT + * values in the column are returned unchanged. + * + * @group semi_func + * @since 0.2.0 + */ def strip_null_value(col: Column): Column = { builtin("strip_null_value")(col) } - /** Returns the input values, pivoted into an ARRAY. If the input is empty, an empty ARRAY is - * returned. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns the input values, pivoted into an ARRAY. If the input is empty, an empty ARRAY is + * returned. + * + * @group semi_func + * @since 0.2.0 + */ def array_agg(col: Column): Column = { builtin("array_agg")(col) } - /** Returns an ARRAY containing all elements from the source ARRAYas well as the new element. The - * new element is located at end of the ARRAY. - * - * @param array - * The column containing the source ARRAY. - * @param element - * The column containing the element to be appended. The element may be of almost any data - * type. The data type does not need to match the data type(s) of the existing elements in the - * ARRAY. - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns an ARRAY containing all elements from the source ARRAYas well as the new element. The + * new element is located at end of the ARRAY. + * + * @param array + * The column containing the source ARRAY. + * @param element + * The column containing the element to be appended. The element may be of almost any data type. + * The data type does not need to match the data type(s) of the existing elements in the ARRAY. + * @group semi_func + * @since 0.2.0 + */ def array_append(array: Column, element: Column): Column = { builtin("array_append")(array, element) } - /** Returns the concatenation of two ARRAYs. - * - * @param array1 - * Column containing the source ARRAY. - * @param array2 - * Column containing the ARRAY to be appended to {@code array1} . - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns the concatenation of two ARRAYs. + * + * @param array1 + * Column containing the source ARRAY. + * @param array2 + * Column containing the ARRAY to be appended to {@code array1} . + * @group semi_func + * @since 0.2.0 + */ def array_cat(array1: Column, array2: Column): Column = { builtin("array_cat")(array1, array2) } - /** Returns a compacted ARRAY with missing and null values removed, effectively converting sparse - * arrays into dense arrays. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns a compacted ARRAY with missing and null values removed, effectively converting sparse + * arrays into dense arrays. + * + * @group semi_func + * @since 0.2.0 + */ def array_compact(array: Column): Column = { builtin("array_compact")(array) } - /** Returns an ARRAY constructed from zero, one, or more inputs. - * - * @param cols - * Columns containing the values (or expressions that evaluate to values). The values do not - * all need to be of the same data type. - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns an ARRAY constructed from zero, one, or more inputs. + * + * @param cols + * Columns containing the values (or expressions that evaluate to values). The values do not all + * need to be of the same data type. + * @group semi_func + * @since 0.2.0 + */ def array_construct(cols: Column*): Column = { builtin("array_construct")(cols: _*) } - /** Returns an ARRAY constructed from zero, one, or more inputs; the constructed ARRAY omits any - * NULL input values. - * - * @param cols - * Columns containing the values (or expressions that evaluate to values). The values do not - * all need to be of the same data type. - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns an ARRAY constructed from zero, one, or more inputs; the constructed ARRAY omits any + * NULL input values. + * + * @param cols + * Columns containing the values (or expressions that evaluate to values). The values do not all + * need to be of the same data type. + * @group semi_func + * @since 0.2.0 + */ def array_construct_compact(cols: Column*): Column = { builtin("array_construct_compact")(cols: _*) } - /** Returns {@code true} if the specified VARIANT is found in the specified ARRAY. - * - * @param variant - * Column containing the VARIANT to find. - * @param array - * Column containing the ARRAY to search. - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns {@code true} if the specified VARIANT is found in the specified ARRAY. + * + * @param variant + * Column containing the VARIANT to find. + * @param array + * Column containing the ARRAY to search. + * @group semi_func + * @since 0.2.0 + */ def array_contains(variant: Column, array: Column): Column = { builtin("array_contains")(variant, array) } - /** Returns an ARRAY containing all elements from the source ARRAY as well as the new element. - * - * @param array - * Column containing the source ARRAY. - * @param pos - * Column containing a (zero-based) position in the source ARRAY. The new element is inserted - * at this position. The original element from this position (if any) and all subsequent - * elements (if any) are shifted by one position to the right in the resulting array (i.e. - * inserting at position 0 has the same effect as using [[array_prepend]]). A negative position - * is interpreted as an index from the back of the array (e.g. {@code -1} results in insertion - * before the last element in the array). - * @param element - * Column containing the element to be inserted. The new element is located at position - * {@code pos} . The relative order of the other elements from the source array is preserved. - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns an ARRAY containing all elements from the source ARRAY as well as the new element. + * + * @param array + * Column containing the source ARRAY. + * @param pos + * Column containing a (zero-based) position in the source ARRAY. The new element is inserted at + * this position. The original element from this position (if any) and all subsequent elements + * (if any) are shifted by one position to the right in the resulting array (i.e. inserting at + * position 0 has the same effect as using [[array_prepend]]). A negative position is + * interpreted as an index from the back of the array (e.g. {@code -1} results in insertion + * before the last element in the array). + * @param element + * Column containing the element to be inserted. The new element is located at position + * {@code pos} . The relative order of the other elements from the source array is preserved. + * @group semi_func + * @since 0.2.0 + */ def array_insert(array: Column, pos: Column, element: Column): Column = { builtin("array_insert")(array, pos, element) } - /** Returns the index of the first occurrence of an element in an ARRAY. - * - * @param variant - * Column containing the VARIANT value that you want to find. The function searches for the - * first occurrence of this value in the array. - * @param array - * Column containing the ARRAY to be searched. - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns the index of the first occurrence of an element in an ARRAY. + * + * @param variant + * Column containing the VARIANT value that you want to find. The function searches for the + * first occurrence of this value in the array. + * @param array + * Column containing the ARRAY to be searched. + * @group semi_func + * @since 0.2.0 + */ def array_position(variant: Column, array: Column): Column = { builtin("array_position")(variant, array) } - /** Returns an ARRAY containing the new element as well as all elements from the source ARRAY. The - * new element is positioned at the beginning of the ARRAY. - * - * @param array - * Column containing the source ARRAY. - * @param element - * Column containing the element to be prepended. - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns an ARRAY containing the new element as well as all elements from the source ARRAY. The + * new element is positioned at the beginning of the ARRAY. + * + * @param array + * Column containing the source ARRAY. + * @param element + * Column containing the element to be prepended. + * @group semi_func + * @since 0.2.0 + */ def array_prepend(array: Column, element: Column): Column = { builtin("array_prepend")(array, element) } - /** Returns the size of the input ARRAY. - * - * If the specified column contains a VARIANT value that contains an ARRAY, the size of the ARRAY - * is returned; otherwise, NULL is returned if the value is not an ARRAY. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns the size of the input ARRAY. + * + * If the specified column contains a VARIANT value that contains an ARRAY, the size of the ARRAY + * is returned; otherwise, NULL is returned if the value is not an ARRAY. + * + * @group semi_func + * @since 0.2.0 + */ def array_size(array: Column): Column = { builtin("array_size")(array) } - /** Returns an ARRAY constructed from a specified subset of elements of the input ARRAY. - * - * @param array - * Column containing the source ARRAY. - * @param from - * Column containing a position in the source ARRAY. The position of the first element is - * {@code 0} . Elements from positions less than this parameter are not included in the - * resulting ARRAY. - * @param to - * Column containing a position in the source ARRAY. Elements from positions equal to or - * greater than this parameter are not included in the resulting array. - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns an ARRAY constructed from a specified subset of elements of the input ARRAY. + * + * @param array + * Column containing the source ARRAY. + * @param from + * Column containing a position in the source ARRAY. The position of the first element is + * {@code 0} . Elements from positions less than this parameter are not included in the + * resulting ARRAY. + * @param to + * Column containing a position in the source ARRAY. Elements from positions equal to or greater + * than this parameter are not included in the resulting array. + * @group semi_func + * @since 0.2.0 + */ def array_slice(array: Column, from: Column, to: Column): Column = { builtin("array_slice")(array, from, to) } - /** Returns an input ARRAY converted to a string by casting all values to strings (using - * TO_VARCHAR) and concatenating them (using the string from the second argument to separate the - * elements). - * - * @param array - * Column containing the ARRAY of elements to convert to a string. - * @param separator - * Column containing the string to put between each element (e.g. a space, comma, or other - * human-readable separator). - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns an input ARRAY converted to a string by casting all values to strings (using + * TO_VARCHAR) and concatenating them (using the string from the second argument to separate the + * elements). + * + * @param array + * Column containing the ARRAY of elements to convert to a string. + * @param separator + * Column containing the string to put between each element (e.g. a space, comma, or other + * human-readable separator). + * @group semi_func + * @since 0.2.0 + */ def array_to_string(array: Column, separator: Column): Column = { builtin("array_to_string")(array, separator) } - /** Returns one OBJECT per group. For each (key, value) input pair, where key must be a VARCHAR - * and value must be a VARIANT, the resulting OBJECT contains a key:value field. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns one OBJECT per group. For each (key, value) input pair, where key must be a VARCHAR and + * value must be a VARIANT, the resulting OBJECT contains a key:value field. + * + * @group semi_func + * @since 0.2.0 + */ def objectagg(key: Column, value: Column): Column = { builtin("objectagg")(key, value) } - /** Returns an OBJECT constructed from the arguments. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns an OBJECT constructed from the arguments. + * + * @group semi_func + * @since 0.2.0 + */ def object_construct(key_values: Column*): Column = { builtin("object_construct")(key_values: _*) } - /** Returns an object containing the contents of the input (i.e.source) object with one or more - * keys removed. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns an object containing the contents of the input (i.e.source) object with one or more + * keys removed. + * + * @group semi_func + * @since 0.2.0 + */ def object_delete(obj: Column, key1: Column, keys: Column*): Column = { val args = Seq(obj, key1) ++ keys builtin("object_delete")(args: _*) } - /** Returns an object consisting of the input object with a new key-value pair inserted. The input - * key must not exist in the object. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns an object consisting of the input object with a new key-value pair inserted. The input + * key must not exist in the object. + * + * @group semi_func + * @since 0.2.0 + */ def object_insert(obj: Column, key: Column, value: Column): Column = { builtin("object_insert")(obj, key, value) } - /** Returns an object consisting of the input object with a new key-value pair inserted (or an - * existing key updated with a new value). - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns an object consisting of the input object with a new key-value pair inserted (or an + * existing key updated with a new value). + * + * @group semi_func + * @since 0.2.0 + */ def object_insert(obj: Column, key: Column, value: Column, update_flag: Column): Column = { builtin("object_insert")(obj, key, value, update_flag) } - /** Returns a new OBJECT containing some of the key-value pairs from an existing object. - * - * To identify the key-value pairs to include in the new object, pass in the keys as arguments, - * or pass in an array containing the keys. - * - * If a specified key is not present in the input object, the key is ignored. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns a new OBJECT containing some of the key-value pairs from an existing object. + * + * To identify the key-value pairs to include in the new object, pass in the keys as arguments, or + * pass in an array containing the keys. + * + * If a specified key is not present in the input object, the key is ignored. + * + * @group semi_func + * @since 0.2.0 + */ def object_pick(obj: Column, key1: Column, keys: Column*): Column = { val args = Seq(obj, key1) ++ keys builtin("object_pick")(args: _*) } - /** Casts a VARIANT value to an array. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to an array. + * + * @group semi_func + * @since 0.2.0 + */ def as_array(variant: Column): Column = { builtin("as_array")(variant) } - /** Casts a VARIANT value to a binary string. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a binary string. + * + * @group semi_func + * @since 0.2.0 + */ def as_binary(variant: Column): Column = { builtin("as_binary")(variant) } - /** Casts a VARIANT value to a string. Does not convert values of other types into string. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a string. Does not convert values of other types into string. + * + * @group semi_func + * @since 0.2.0 + */ def as_char(variant: Column): Column = { builtin("as_char")(variant) } - /** Casts a VARIANT value to a string. Does not convert values of other types into string. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a string. Does not convert values of other types into string. + * + * @group semi_func + * @since 0.2.0 + */ def as_varchar(variant: Column): Column = { builtin("as_varchar")(variant) } - /** Casts a VARIANT value to a date. Does not convert from timestamps. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a date. Does not convert from timestamps. + * + * @group semi_func + * @since 0.2.0 + */ def as_date(variant: Column): Column = { builtin("as_date")(variant) } - /** Casts a VARIANT value to a fixed-point decimal (does not match floating-point values). - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a fixed-point decimal (does not match floating-point values). + * + * @group semi_func + * @since 0.2.0 + */ def as_decimal(variant: Column): Column = { builtin("as_decimal")(variant) } - /** Casts a VARIANT value to a fixed-point decimal (does not match floating-point values), with - * precision. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a fixed-point decimal (does not match floating-point values), with + * precision. + * + * @group semi_func + * @since 0.2.0 + */ def as_decimal(variant: Column, precision: Int): Column = { builtin("as_decimal")(variant, sqlExpr(precision.toString)) } - /** Casts a VARIANT value to a fixed-point decimal (does not match floating-point values), with - * precision and scale. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a fixed-point decimal (does not match floating-point values), with + * precision and scale. + * + * @group semi_func + * @since 0.2.0 + */ def as_decimal(variant: Column, precision: Int, scale: Int): Column = { builtin("as_decimal")(variant, sqlExpr(precision.toString), sqlExpr(scale.toString)) } - /** Casts a VARIANT value to a fixed-point decimal (does not match floating-point values). - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a fixed-point decimal (does not match floating-point values). + * + * @group semi_func + * @since 0.2.0 + */ def as_number(variant: Column): Column = { builtin("as_number")(variant) } - /** Casts a VARIANT value to a fixed-point decimal (does not match floating-point values), with - * precision. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a fixed-point decimal (does not match floating-point values), with + * precision. + * + * @group semi_func + * @since 0.2.0 + */ def as_number(variant: Column, precision: Int): Column = { builtin("as_number")(variant, sqlExpr(precision.toString)) } - /** Casts a VARIANT value to a fixed-point decimal (does not match floating-point values), with - * precision and scale. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a fixed-point decimal (does not match floating-point values), with + * precision and scale. + * + * @group semi_func + * @since 0.2.0 + */ def as_number(variant: Column, precision: Int, scale: Int): Column = { builtin("as_number")(variant, sqlExpr(precision.toString), sqlExpr(scale.toString)) } - /** Casts a VARIANT value to a floating-point value. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a floating-point value. + * + * @group semi_func + * @since 0.2.0 + */ def as_double(variant: Column): Column = { builtin("as_double")(variant) } - /** Casts a VARIANT value to a floating-point value. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a floating-point value. + * + * @group semi_func + * @since 0.2.0 + */ def as_real(variant: Column): Column = { builtin("as_real")(variant) } - /** Casts a VARIANT value to an integer. Does not match non-integer values. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to an integer. Does not match non-integer values. + * + * @group semi_func + * @since 0.2.0 + */ def as_integer(variant: Column): Column = { builtin("as_integer")(variant) } - /** Casts a VARIANT value to an object. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to an object. + * + * @group semi_func + * @since 0.2.0 + */ def as_object(variant: Column): Column = { builtin("as_object")(variant) } - /** Casts a VARIANT value to a time value. Does not convert from timestamps. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a time value. Does not convert from timestamps. + * + * @group semi_func + * @since 0.2.0 + */ def as_time(variant: Column): Column = { builtin("as_time")(variant) } - /** Casts a VARIANT value to a TIMESTAMP value with local timezone. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a TIMESTAMP value with local timezone. + * + * @group semi_func + * @since 0.2.0 + */ def as_timestamp_ltz(variant: Column): Column = { builtin("as_timestamp_ltz")(variant) } - /** Casts a VARIANT value to a TIMESTAMP value with no timezone. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a TIMESTAMP value with no timezone. + * + * @group semi_func + * @since 0.2.0 + */ def as_timestamp_ntz(variant: Column): Column = { builtin("as_timestamp_ntz")(variant) } - /** Casts a VARIANT value to a TIMESTAMP value with timezone. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Casts a VARIANT value to a TIMESTAMP value with timezone. + * + * @group semi_func + * @since 0.2.0 + */ def as_timestamp_tz(variant: Column): Column = { builtin("as_timestamp_tz")(variant) } - /** Tokenizes the given string using the given set of delimiters and returns the tokens as an - * array. If either parameter is a NULL, a NULL is returned. An empty array is returned if - * tokenization produces no tokens. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Tokenizes the given string using the given set of delimiters and returns the tokens as an + * array. If either parameter is a NULL, a NULL is returned. An empty array is returned if + * tokenization produces no tokens. + * + * @group semi_func + * @since 0.2.0 + */ def strtok_to_array(array: Column): Column = { builtin("strtok_to_array")(array) } - /** Tokenizes the given string using the given set of delimiters and returns the tokens as an - * array. If either parameter is a NULL, a NULL is returned. An empty array is returned if - * tokenization produces no tokens. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Tokenizes the given string using the given set of delimiters and returns the tokens as an + * array. If either parameter is a NULL, a NULL is returned. An empty array is returned if + * tokenization produces no tokens. + * + * @group semi_func + * @since 0.2.0 + */ def strtok_to_array(array: Column, delimiter: Column): Column = { builtin("strtok_to_array")(array, delimiter) } - /** Converts the input expression into an array: - * - * If the input is an ARRAY, or VARIANT containing an array value, the result is unchanged. For - * NULL or a JSON null input, returns NULL. For any other value, the result is a single-element - * array containing this value. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Converts the input expression into an array: + * + * If the input is an ARRAY, or VARIANT containing an array value, the result is unchanged. For + * NULL or a JSON null input, returns NULL. For any other value, the result is a single-element + * array containing this value. + * + * @group semi_func + * @since 0.2.0 + */ def to_array(col: Column): Column = { builtin("to_array")(col) } - /** Converts any VARIANT value to a string containing the JSON representation of the value. If the - * input is NULL, the result is also NULL. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Converts any VARIANT value to a string containing the JSON representation of the value. If the + * input is NULL, the result is also NULL. + * + * @group semi_func + * @since 0.2.0 + */ def to_json(col: Column): Column = { builtin("to_json")(col) } - /** Converts the input value to an object: - * - * For a variant value containing an object, returns this object (in a value of type OBJECT). For - * a variant value containing JSON null or for NULL input, returns NULL. For all other input - * values, reports an error. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Converts the input value to an object: + * + * For a variant value containing an object, returns this object (in a value of type OBJECT). For + * a variant value containing JSON null or for NULL input, returns NULL. For all other input + * values, reports an error. + * + * @group semi_func + * @since 0.2.0 + */ def to_object(col: Column): Column = { builtin("to_object")(col) } - /** Converts any value to VARIANT value or NULL (if input is NULL). - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Converts any value to VARIANT value or NULL (if input is NULL). + * + * @group semi_func + * @since 0.2.0 + */ def to_variant(col: Column): Column = { builtin("to_variant")(col) } - /** Converts any VARIANT value to a string containing the XML representation of the value. If the - * input is NULL, the result is also NULL. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Converts any VARIANT value to a string containing the XML representation of the value. If the + * input is NULL, the result is also NULL. + * + * @group semi_func + * @since 0.2.0 + */ def to_xml(col: Column): Column = { builtin("to_xml")(col) } - /** Extracts a value from an object or array; returns NULL if either of the arguments is NULL. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Extracts a value from an object or array; returns NULL if either of the arguments is NULL. + * + * @group semi_func + * @since 0.2.0 + */ def get(col1: Column, col2: Column): Column = { builtin("get")(col1, col2) } - /** Extracts a field value from an object; returns NULL if either of the arguments is NULL. This - * function is similar to GET but applies case-insensitive matching to field names. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Extracts a field value from an object; returns NULL if either of the arguments is NULL. This + * function is similar to GET but applies case-insensitive matching to field names. + * + * @group semi_func + * @since 0.2.0 + */ def get_ignore_case(obj: Column, field: Column): Column = { builtin("get_ignore_case")(obj, field) } - /** Returns an array containing the list of keys in the input object. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Returns an array containing the list of keys in the input object. + * + * @group semi_func + * @since 0.2.0 + */ def object_keys(obj: Column): Column = { builtin("object_keys")(obj) } - /** Extracts an XML element object (often referred to as simply a tag) from a content of outer XML - * element object by the name of the tag and its instance number (counting from 0). - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Extracts an XML element object (often referred to as simply a tag) from a content of outer XML + * element object by the name of the tag and its instance number (counting from 0). + * + * @group semi_func + * @since 0.2.0 + */ def xmlget(xml: Column, tag: Column, instance: Column): Column = { builtin("xmlget")(xml, tag, instance) } - /** Extracts the first XML element object (often referred to as simply a tag) from a content of - * outer XML element object by the name of the tag - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Extracts the first XML element object (often referred to as simply a tag) from a content of + * outer XML element object by the name of the tag + * + * @group semi_func + * @since 0.2.0 + */ def xmlget(xml: Column, tag: Column): Column = { builtin("xmlget")(xml, tag) } - /** Extracts a value from semi-structured data using a path name. - * - * @group semi_func - * @since 0.2.0 - */ + /** + * Extracts a value from semi-structured data using a path name. + * + * @group semi_func + * @since 0.2.0 + */ def get_path(col: Column, path: Column): Column = { builtin("get_path")(col, path) } - /** Works like a cascading if-then-else statement. A series of conditions are evaluated in - * sequence. When a condition evaluates to TRUE, the evaluation stops and the associated result - * (after THEN) is returned. If none of the conditions evaluate to TRUE, then the result after - * the optional OTHERWISE is returned, if present; otherwise NULL is returned. For Example: - * {{{ - * import functions._ - * df.select( - * when(col("col").is_null, lit(1)) - * .when(col("col") === 1, lit(2)) - * .otherwise(lit(3)) - * ) - * }}} - * - * @group con_func - * @since 0.2.0 - */ + /** + * Works like a cascading if-then-else statement. A series of conditions are evaluated in + * sequence. When a condition evaluates to TRUE, the evaluation stops and the associated result + * (after THEN) is returned. If none of the conditions evaluate to TRUE, then the result after the + * optional OTHERWISE is returned, if present; otherwise NULL is returned. For Example: + * {{{ + * import functions._ + * df.select( + * when(col("col").is_null, lit(1)) + * .when(col("col") === 1, lit(2)) + * .otherwise(lit(3)) + * ) + * }}} + * + * @group con_func + * @since 0.2.0 + */ def when(condition: Column, value: Column): CaseExpr = new CaseExpr(Seq((condition.expr, value.expr))) - /** Returns one of two specified expressions, depending on a condition. - * - * This is equivalent to an `if-then-else` expression. If `condition` evaluates to TRUE, the - * function returns `expr1`. Otherwise, the function returns `expr2`. - * - * @group con_func - * @param condition - * The condition to evaluate. - * @param expr1 - * The expression to return if the condition evaluates to TRUE. - * @param expr2 - * The expression to return if the condition is not TRUE (i.e. if it is FALSE or NULL). - * @since 0.9.0 - */ + /** + * Returns one of two specified expressions, depending on a condition. + * + * This is equivalent to an `if-then-else` expression. If `condition` evaluates to TRUE, the + * function returns `expr1`. Otherwise, the function returns `expr2`. + * + * @group con_func + * @param condition + * The condition to evaluate. + * @param expr1 + * The expression to return if the condition evaluates to TRUE. + * @param expr2 + * The expression to return if the condition is not TRUE (i.e. if it is FALSE or NULL). + * @since 0.9.0 + */ def iff(condition: Column, expr1: Column, expr2: Column): Column = builtin("iff")(condition, expr1, expr2) - /** Returns a conditional expression that you can pass to the filter or where method to perform - * the equivalent of a WHERE ... IN query that matches rows containing a sequence of values. - * - * The expression evaluates to true if the values in a row matches the values in one of the - * specified sequences. - * - * For example, the following code returns a DataFrame that contains the rows in which the - * columns `c1` and `c2` contain the values: - * - `1` and `"a"`, or - * - `2` and `"b"` This is equivalent to `SELECT * FROM table WHERE (c1, c2) IN ((1, 'a'), (2, - * 'b'))`. - * {{{ - * val df2 = df.filter(functions.in(Seq(df("c1"), df("c2")), Seq(Seq(1, "a"), Seq(2, "b")))) - * }}} - * @group con_func - * @param columns - * A sequence of the columns to compare for the IN operation. - * @param values - * A sequence containing the sequences of values to compare for the IN operation. - * @since 0.10.0 - */ + /** + * Returns a conditional expression that you can pass to the filter or where method to perform the + * equivalent of a WHERE ... IN query that matches rows containing a sequence of values. + * + * The expression evaluates to true if the values in a row matches the values in one of the + * specified sequences. + * + * For example, the following code returns a DataFrame that contains the rows in which the columns + * `c1` and `c2` contain the values: + * - `1` and `"a"`, or + * - `2` and `"b"` This is equivalent to `SELECT * FROM table WHERE (c1, c2) IN ((1, 'a'), (2, + * 'b'))`. + * {{{ + * val df2 = df.filter(functions.in(Seq(df("c1"), df("c2")), Seq(Seq(1, "a"), Seq(2, "b")))) + * }}} + * @group con_func + * @param columns + * A sequence of the columns to compare for the IN operation. + * @param values + * A sequence containing the sequences of values to compare for the IN operation. + * @since 0.10.0 + */ def in(columns: Seq[Column], values: Seq[Seq[Any]]): Column = Column(MultipleExpression(columns.map(_.expr))).in(values) - /** Returns a conditional expression that you can pass to the filter or where method to perform - * the equivalent of a WHERE ... IN query with the subquery represented by the specified - * DataFrame. - * - * The expression evaluates to true if the value in the column is one of the values in the column - * of the same name in a specified DataFrame. - * - * For example, the following code returns a DataFrame that contains the rows where the values of - * the columns `c1` and `c2` in `df2` match the values of the columns `a` and `b` in `df1`. This - * is equivalent to SELECT * FROM table2 WHERE (c1, c2) IN (SELECT a, b FROM table1). - * {{{ - * val df1 = session.sql("select a, b from table1"). - * val df2 = session.table(table2) - * val dfFilter = df2.filter(functions.in(Seq(col("c1"), col("c2")), df1)) - * }}} - * - * @group con_func - * @param columns - * A sequence of the columns to compare for the IN operation. - * @param df - * The DataFrame used as the values for the IN operation - * @since 0.10.0 - */ + /** + * Returns a conditional expression that you can pass to the filter or where method to perform the + * equivalent of a WHERE ... IN query with the subquery represented by the specified DataFrame. + * + * The expression evaluates to true if the value in the column is one of the values in the column + * of the same name in a specified DataFrame. + * + * For example, the following code returns a DataFrame that contains the rows where the values of + * the columns `c1` and `c2` in `df2` match the values of the columns `a` and `b` in `df1`. This + * is equivalent to SELECT * FROM table2 WHERE (c1, c2) IN (SELECT a, b FROM table1). + * {{{ + * val df1 = session.sql("select a, b from table1"). + * val df2 = session.table(table2) + * val dfFilter = df2.filter(functions.in(Seq(col("c1"), col("c2")), df1)) + * }}} + * + * @group con_func + * @param columns + * A sequence of the columns to compare for the IN operation. + * @param df + * The DataFrame used as the values for the IN operation + * @since 0.10.0 + */ def in(columns: Seq[Column], df: DataFrame): Column = { Column(MultipleExpression(columns.map(_.expr))).in(df) } - /** Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around - * occurs after the largest representable integer of the integer width 1 byte. the sequence - * continues at 0 after wrap-around. - * - * @since 0.11.0 - * @group gen_func - */ + /** + * Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around occurs + * after the largest representable integer of the integer width 1 byte. the sequence continues at + * 0 after wrap-around. + * + * @since 0.11.0 + * @group gen_func + */ def seq1(): Column = seq1(true) - /** Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around - * occurs after the largest representable integer of the integer width 1 byte. - * - * @param startsFromZero - * if true, the sequence continues at 0 after wrap-around, otherwise, continues at the smallest - * representable number based on the given integer width. - * @since 0.11.0 - * @group gen_func - */ + /** + * Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around occurs + * after the largest representable integer of the integer width 1 byte. + * + * @param startsFromZero + * if true, the sequence continues at 0 after wrap-around, otherwise, continues at the smallest + * representable number based on the given integer width. + * @since 0.11.0 + * @group gen_func + */ def seq1(startsFromZero: Boolean): Column = builtin("seq1")(if (startsFromZero) 0 else 1) - /** Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around - * occurs after the largest representable integer of the integer width 2 byte. the sequence - * continues at 0 after wrap-around. - * - * @since 0.11.0 - * @group gen_func - */ + /** + * Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around occurs + * after the largest representable integer of the integer width 2 byte. the sequence continues at + * 0 after wrap-around. + * + * @since 0.11.0 + * @group gen_func + */ def seq2(): Column = seq2(true) - /** Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around - * occurs after the largest representable integer of the integer width 2 byte. - * - * @param startsFromZero - * if true, the sequence continues at 0 after wrap-around, otherwise, continues at the smallest - * representable number based on the given integer width. - * @since 0.11.0 - * @group gen_func - */ + /** + * Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around occurs + * after the largest representable integer of the integer width 2 byte. + * + * @param startsFromZero + * if true, the sequence continues at 0 after wrap-around, otherwise, continues at the smallest + * representable number based on the given integer width. + * @since 0.11.0 + * @group gen_func + */ def seq2(startsFromZero: Boolean): Column = builtin("seq2")(if (startsFromZero) 0 else 1) - /** Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around - * occurs after the largest representable integer of the integer width 4 byte. the sequence - * continues at 0 after wrap-around. - * - * @since 0.11.0 - * @group gen_func - */ + /** + * Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around occurs + * after the largest representable integer of the integer width 4 byte. the sequence continues at + * 0 after wrap-around. + * + * @since 0.11.0 + * @group gen_func + */ def seq4(): Column = seq4(true) - /** Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around - * occurs after the largest representable integer of the integer width 4 byte. - * - * @param startsFromZero - * if true, the sequence continues at 0 after wrap-around, otherwise, continues at the smallest - * representable number based on the given integer width. - * @since 0.11.0 - * @group gen_func - */ + /** + * Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around occurs + * after the largest representable integer of the integer width 4 byte. + * + * @param startsFromZero + * if true, the sequence continues at 0 after wrap-around, otherwise, continues at the smallest + * representable number based on the given integer width. + * @since 0.11.0 + * @group gen_func + */ def seq4(startsFromZero: Boolean): Column = builtin("seq4")(if (startsFromZero) 0 else 1) - /** Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around - * occurs after the largest representable integer of the integer width 8 byte. the sequence - * continues at 0 after wrap-around. - * - * @since 0.11.0 - * @group gen_func - */ + /** + * Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around occurs + * after the largest representable integer of the integer width 8 byte. the sequence continues at + * 0 after wrap-around. + * + * @since 0.11.0 + * @group gen_func + */ def seq8(): Column = seq8(true) - /** Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around - * occurs after the largest representable integer of the integer width 8 byte. - * - * @param startsFromZero - * if true, the sequence continues at 0 after wrap-around, otherwise, continues at the smallest - * representable number based on the given integer width. - * @since 0.11.0 - * @group gen_func - */ + /** + * Generates a sequence of monotonically increasing integers, with wrap-around. Wrap-around occurs + * after the largest representable integer of the integer width 8 byte. + * + * @param startsFromZero + * if true, the sequence continues at 0 after wrap-around, otherwise, continues at the smallest + * representable number based on the given integer width. + * @since 0.11.0 + * @group gen_func + */ def seq8(startsFromZero: Boolean): Column = builtin("seq8")(if (startsFromZero) 0 else 1) // scalastyle:off - /** Returns a uniformly random number, in the inclusive range (`min`, `max`) - * - * For example: - * {{{ - * import com.snowflake.snowpark.functions._ - * session.generator(10, seq4(), uniform(lit(1), lit(5), random())).show() - * }}} - * - * @param min - * The lower bound - * @param max - * The upper bound - * @param gen - * The generator expression for the function. for more information, see - * [[https://docs.snowflake.com/en/sql-reference/functions-data-generation.html#label-rand-dist-functions]] - * @since 0.11.0 - * @group gen_func - */ + /** + * Returns a uniformly random number, in the inclusive range (`min`, `max`) + * + * For example: + * {{{ + * import com.snowflake.snowpark.functions._ + * session.generator(10, seq4(), uniform(lit(1), lit(5), random())).show() + * }}} + * + * @param min + * The lower bound + * @param max + * The upper bound + * @param gen + * The generator expression for the function. for more information, see + * [[https://docs.snowflake.com/en/sql-reference/functions-data-generation.html#label-rand-dist-functions]] + * @since 0.11.0 + * @group gen_func + */ // scalastyle:on def uniform(min: Column, max: Column, gen: Column): Column = builtin("uniform")(min, max, gen) - /** Returns the concatenated input values, separated by `delimiter` string. - * - * For example: - * {{{ - * df.groupBy(df.col("col1")).agg(listagg(df.col("col2"), ",") - * .withinGroup(df.col("col2").asc)) - * - * df.select(listagg(df.col("col2"), ",", false)) - * }}} - * - * @param col - * The expression (typically a Column) that determines the values to be put into the list. The - * expression should evaluate to a string, or to a data type that can be cast to string. - * @param delimiter - * A string delimiter. - * @param isDistinct - * Whether the input expression is distinct. - * @since 0.12.0 - * @group agg_func - */ + /** + * Returns the concatenated input values, separated by `delimiter` string. + * + * For example: + * {{{ + * df.groupBy(df.col("col1")).agg(listagg(df.col("col2"), ",") + * .withinGroup(df.col("col2").asc)) + * + * df.select(listagg(df.col("col2"), ",", false)) + * }}} + * + * @param col + * The expression (typically a Column) that determines the values to be put into the list. The + * expression should evaluate to a string, or to a data type that can be cast to string. + * @param delimiter + * A string delimiter. + * @param isDistinct + * Whether the input expression is distinct. + * @since 0.12.0 + * @group agg_func + */ def listagg(col: Column, delimiter: String, isDistinct: Boolean): Column = Column(ListAgg(col.expr, delimiter, isDistinct)) - /** Returns the concatenated input values, separated by `delimiter` string. - * - * For example: - * {{{ - * df.groupBy(df.col("col1")).agg(listagg(df.col("col2"), ",") - * .withinGroup(df.col("col2").asc)) - * - * df.select(listagg(df.col("col2"), ",", false)) - * }}} - * - * @param col - * The expression (typically a Column) that determines the values to be put into the list. The - * expression should evaluate to a string, or to a data type that can be cast to string. - * @param delimiter - * A string delimiter. - * @since 0.12.0 - * @group agg_func - */ + /** + * Returns the concatenated input values, separated by `delimiter` string. + * + * For example: + * {{{ + * df.groupBy(df.col("col1")).agg(listagg(df.col("col2"), ",") + * .withinGroup(df.col("col2").asc)) + * + * df.select(listagg(df.col("col2"), ",", false)) + * }}} + * + * @param col + * The expression (typically a Column) that determines the values to be put into the list. The + * expression should evaluate to a string, or to a data type that can be cast to string. + * @param delimiter + * A string delimiter. + * @since 0.12.0 + * @group agg_func + */ def listagg(col: Column, delimiter: String): Column = listagg(col, delimiter, isDistinct = false) - /** Returns the concatenated input values, separated by empty string. - * - * For example: - * {{{ - * df.groupBy(df.col("col1")).agg(listagg(df.col("col2"), ",") - * .withinGroup(df.col("col2").asc)) - * - * df.select(listagg(df.col("col2"), ",", false)) - * }}} - * - * @param col - * The expression (typically a Column) that determines the values to be put into the list. The - * expression should evaluate to a string, or to a data type that can be cast to string. - * @since 0.12.0 - * @group agg_func - */ + /** + * Returns the concatenated input values, separated by empty string. + * + * For example: + * {{{ + * df.groupBy(df.col("col1")).agg(listagg(df.col("col2"), ",") + * .withinGroup(df.col("col2").asc)) + * + * df.select(listagg(df.col("col2"), ",", false)) + * }}} + * + * @param col + * The expression (typically a Column) that determines the values to be put into the list. The + * expression should evaluate to a string, or to a data type that can be cast to string. + * @since 0.12.0 + * @group agg_func + */ def listagg(col: Column): Column = listagg(col, "", isDistinct = false) - /** Wrapper for Snowflake built-in reverse function. Gets the reversed string. Reverses the order - * of characters in a string, or of bytes in a binary value. The returned value is the same - * length as the input, but with the characters/bytes in reverse order. If subject is NULL, the - * result is also NULL. Example: SELECT REVERSE('Hello, world!'); - * | REVERSE('HELLO, WORLD!') | - * |:-------------------------| - * | !dlrow ,olleH | - * - * @since 1.14.0 - * @param c - * Column to be reverse. - * @return - * Column object. - */ + /** + * Wrapper for Snowflake built-in reverse function. Gets the reversed string. Reverses the order + * of characters in a string, or of bytes in a binary value. The returned value is the same length + * as the input, but with the characters/bytes in reverse order. If subject is NULL, the result is + * also NULL. Example: SELECT REVERSE('Hello, world!'); + * | REVERSE('HELLO, WORLD!') | + * |:-------------------------| + * | !dlrow ,olleH | + * + * @since 1.14.0 + * @param c + * Column to be reverse. + * @return + * Column object. + */ def reverse(c: Column): Column = builtin("reverse")(c) - /** Wrapper for Snowflake built-in isnull function. Gets a boolean depending if value is NULL or - * not. Return true if the value in the column is null. Example:: >>> from - * snowflake.snowpark.functions import is_null >>> df = session.create_dataframe([1.2, - * float("nan"), None, 1.0], schema=["a"]) >>> df.select(is_null("a").as_("a")).collect() - * [Row(A=False), Row(A=False), Row(A=True), Row(A=False)] - * @since 1.14.0 - * @param c - * Column to qnalize if it is null value. - * @return - * Column object. - */ + /** + * Wrapper for Snowflake built-in isnull function. Gets a boolean depending if value is NULL or + * not. Return true if the value in the column is null. Example:: >>> from + * snowflake.snowpark.functions import is_null >>> df = session.create_dataframe([1.2, + * float("nan"), None, 1.0], schema=["a"]) >>> df.select(is_null("a").as_("a")).collect() + * [Row(A=False), Row(A=False), Row(A=True), Row(A=False)] + * @since 1.14.0 + * @param c + * Column to qnalize if it is null value. + * @return + * Column object. + */ def isnull(c: Column): Column = is_null(c) - /** Returns the current Unix timestamp (in seconds) as a long. Extracts a specified date or time - * portion from a date, time, or timestamp. how: EXTRACT , HOUR / MINUTE / SECOND , YEAR* / DAY* - * / WEEK* / MONTH / QUARTER Construction - DATE_PART( , - * ) SELECT TO_TIMESTAMP('2013-05-08T23:39:20.123-07:00') AS "TIME_STAMP1", - * DATE_PART(EPOCH_SECOND, "TIME_STAMP1") AS "EXTRACTED EPOCH SECOND"; - * | TIME_STAMP1 | EXTRACTED EPOCH SECOND | - * |:---------------------------------------------------|:-----------------------| - * | -------------------------+------------------------ | | - * | 2013-05-08 23:39:20.123 | 1368056360 | - * @since 1.14.0 - * @note - * All calls of `unix_timestamp` within the same query return the same value - */ + /** + * Returns the current Unix timestamp (in seconds) as a long. Extracts a specified date or time + * portion from a date, time, or timestamp. how: EXTRACT , HOUR / MINUTE / SECOND , YEAR* / DAY* / + * WEEK* / MONTH / QUARTER Construction - DATE_PART( , ) + * SELECT TO_TIMESTAMP('2013-05-08T23:39:20.123-07:00') AS "TIME_STAMP1", DATE_PART(EPOCH_SECOND, + * "TIME_STAMP1") AS "EXTRACTED EPOCH SECOND"; + * | TIME_STAMP1 | EXTRACTED EPOCH SECOND | + * |:---------------------------------------------------|:-----------------------| + * | -------------------------+------------------------ | | + * | 2013-05-08 23:39:20.123 | 1368056360 | + * @since 1.14.0 + * @note + * All calls of `unix_timestamp` within the same query return the same value + */ def unix_timestamp(c: Column): Column = { builtin("date_part")("epoch_second", c) } - /** Signature - snowflake.snowpark.functions.regexp_extract (value: Union[Column, str], regexp: - * Union[Column, str], idx: int) Column Extract a specific group matched by a regex, from the - * specified string column. If the regex did not match, or the specified group did not match, an - * empty string is returned. Example: from snowflake.snowpark.functions import regexp_extract - * df = session.createDataFrame([["id_20_30", 10], ["id_40_50", 30]], ["id", "age"]) - * df.select(regexp_extract("id", r"(\d+)", 1).alias("RES")).show() --------- \|"RES" - * \| --------- - * | 20 | - * |:---| - * | 40 | - * --------- Note: non-greedy tokens such as are not supported - * @since 1.14.0 - * @return - * Column object. - */ + /** + * Signature - snowflake.snowpark.functions.regexp_extract (value: Union[Column, str], regexp: + * Union[Column, str], idx: int) Column Extract a specific group matched by a regex, from the + * specified string column. If the regex did not match, or the specified group did not match, an + * empty string is returned. Example: from snowflake.snowpark.functions import regexp_extract + * df = session.createDataFrame([["id_20_30", 10], ["id_40_50", 30]], ["id", "age"]) + * df.select(regexp_extract("id", r"(\d+)", 1).alias("RES")).show() --------- \|"RES" + * \| --------- + * | 20 | + * |:---| + * | 40 | + * --------- Note: non-greedy tokens such as are not supported + * @since 1.14.0 + * @return + * Column object. + */ def regexp_extract( colName: Column, exp: String, @@ -3210,69 +3498,73 @@ object functions { lit(""))) } - /** Returns the sign of its argument as mentioned : - * - * - -1 if the argument is negative. - * - 1 if it is positive. - * - 0 if it is 0. - * - * Args: col: The column to evaluate its sign Example:: >>> df = - * session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>> - * df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"), - * sign("c").alias("c_sign")).show() ---------------------------------- \|"A_SIGN" |"B_SIGN" - * \|"C_SIGN" | ---------------------------------- \|-1 |1 |0 | - * ---------------------------------- - * @since 1.14.0 - * @param e - * Column to calculate the sign. - * @return - * Column object. - */ + /** + * Returns the sign of its argument as mentioned : + * + * - -1 if the argument is negative. + * - 1 if it is positive. + * - 0 if it is 0. + * + * Args: col: The column to evaluate its sign Example:: >>> df = + * session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>> + * df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"), + * sign("c").alias("c_sign")).show() ---------------------------------- \|"A_SIGN" |"B_SIGN" + * \|"C_SIGN" | ---------------------------------- \|-1 |1 |0 | ---------------------------------- + * + * @since 1.14.0 + * @param e + * Column to calculate the sign. + * @return + * Column object. + */ def sign(colName: Column): Column = { builtin("SIGN")(colName) } - /** Returns the sign of its argument: - * - * - -1 if the argument is negative. - * - 1 if it is positive. - * - 0 if it is 0. - * - * Args: col: The column to evaluate its sign Example:: >>> df = - * session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>> - * df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"), - * sign("c").alias("c_sign")).show() ---------------------------------- \|"A_SIGN" |"B_SIGN" - * \|"C_SIGN" | ---------------------------------- \|-1 |1 |0 | - * ---------------------------------- - * @since 1.14.0 - * @param e - * Column to calculate the sign. - * @return - * Column object. - */ + /** + * Returns the sign of its argument: + * + * - -1 if the argument is negative. + * - 1 if it is positive. + * - 0 if it is 0. + * + * Args: col: The column to evaluate its sign Example:: >>> df = + * session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>> + * df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"), + * sign("c").alias("c_sign")).show() ---------------------------------- \|"A_SIGN" |"B_SIGN" + * \|"C_SIGN" | ---------------------------------- \|-1 |1 |0 | ---------------------------------- + * + * @since 1.14.0 + * @param e + * Column to calculate the sign. + * @return + * Column object. + */ def signum(colName: Column): Column = { builtin("SIGN")(colName) } - /** Returns the sign of the given column. Returns either 1 for positive, 0 for 0 or NaN, -1 for - * negative and null for null. NOTE: if string values are provided snowflake will attempts to - * cast. If it casts correctly, returns the calculation, if not an error will be thrown - * @since 1.14.0 - * @param columnName - * Name of the column to calculate the sign. - * @return - * Column object. - */ + /** + * Returns the sign of the given column. Returns either 1 for positive, 0 for 0 or NaN, -1 for + * negative and null for null. NOTE: if string values are provided snowflake will attempts to + * cast. If it casts correctly, returns the calculation, if not an error will be thrown + * @since 1.14.0 + * @param columnName + * Name of the column to calculate the sign. + * @return + * Column object. + */ def signum(columnName: String): Column = { signum(col(columnName)) } - /** Returns the substring from string str before count occurrences of the delimiter delim. If - * count is positive, everything the left of the final delimiter (counting from left) is - * returned. If count is negative, every to the right of the final delimiter (counting from the - * right) is returned. substring_index performs a case-sensitive match when searching for delim. - * @since 1.14.0 - */ + /** + * Returns the substring from string str before count occurrences of the delimiter delim. If count + * is positive, everything the left of the final delimiter (counting from left) is returned. If + * count is negative, every to the right of the final delimiter (counting from the right) is + * returned. substring_index performs a case-sensitive match when searching for delim. + * @since 1.14.0 + */ def substring_index(str: String, delim: String, count: Int): Column = { when( lit(count) < lit(0), @@ -3294,265 +3586,278 @@ object functions { callBuiltin("regexp_instr", lit(str), lit(delim), 1, lit(count), 1))) } - /** Returns the input values, pivoted into an ARRAY. If the input is empty, an empty ARRAY is - * returned. Example:: >>> df = session.create_dataframe([[1], [2], [3], [1]], schema=["a"]) - * >>> df.select(array_agg("a", True).alias("result")).show() ------------ \|"RESULT" | - * ------------ - * | [ | - * |:---| - * | 1, | - * | 2, | - * | 3 | - * | ] | - * ------------ - * @since 1.14.0 - * @param c - * Column to be collect. - * @return - * The array. - */ + /** + * Returns the input values, pivoted into an ARRAY. If the input is empty, an empty ARRAY is + * returned. Example:: >>> df = session.create_dataframe([[1], [2], [3], [1]], schema=["a"]) + * >>> df.select(array_agg("a", True).alias("result")).show() ------------ \|"RESULT" | + * ------------ + * | [ | + * |:---| + * | 1, | + * | 2, | + * | 3 | + * | ] | + * ------------ + * @since 1.14.0 + * @param c + * Column to be collect. + * @return + * The array. + */ def collect_list(c: Column): Column = array_agg(c) - /** Returns the input values, pivoted into an ARRAY. If the input is empty, an empty ARRAY is - * returned. - * - * Example:: >>> df = session.create_dataframe([[1], [2], [3], [1]], schema=["a"]) >>> - * df.select(array_agg("a", True).alias("result")).show() ------------ \|"RESULT" | ------------ - * | [ | - * |:---| - * | 1, | - * | 2, | - * | 3 | - * | ] | - * ------------ - * @since 1.14.0 - * @param s - * Column name to be collected. - * @return - * The array. - */ + /** + * Returns the input values, pivoted into an ARRAY. If the input is empty, an empty ARRAY is + * returned. + * + * Example:: >>> df = session.create_dataframe([[1], [2], [3], [1]], schema=["a"]) >>> + * df.select(array_agg("a", True).alias("result")).show() ------------ \|"RESULT" | ------------ + * | [ | + * |:---| + * | 1, | + * | 2, | + * | 3 | + * | ] | + * ------------ + * @since 1.14.0 + * @param s + * Column name to be collected. + * @return + * The array. + */ def collect_list(s: String): Column = array_agg(col(s)) - /** Returns the date that is `days` days after `start` Usage - DATE_ADD( date_or_time_part, value, - * date_or_time_expr ) Example:: SELECT TO_DATE('2013-05-08') AS v1, DATE_ADD(year, 2, - * TO_DATE('2013-05-08')) AS v; - * | V1 | V | - * |:--------------------------|:-----------| - * | ------------+------------ | | - * | 2013-05-08 | 2015-05-08 | - * - * @since 1.15.0 - * @param start - * Column name - * @param days - * Int . - * @return - * Column. - */ + /** + * Returns the date that is `days` days after `start` Usage - DATE_ADD( date_or_time_part, value, + * date_or_time_expr ) Example:: SELECT TO_DATE('2013-05-08') AS v1, DATE_ADD(year, 2, + * TO_DATE('2013-05-08')) AS v; + * | V1 | V | + * |:--------------------------|:-----------| + * | ------------+------------ | | + * | 2013-05-08 | 2015-05-08 | + * + * @since 1.15.0 + * @param start + * Column name + * @param days + * Int . + * @return + * Column. + */ def date_add(days: Int, start: Column): Column = dateadd("day", lit(days), start) - /** Returns the date that is `days` days after `start` Usage - DATE_ADD( date_or_time_part, value, - * date_or_time_expr ) Example:: SELECT TO_DATE('2013-05-08') AS v1, DATE_ADD(year, 2, - * TO_DATE('2013-05-08')) AS v; - * | V1 | V | - * |:--------------------------|:-----------| - * | ------------+------------ | | - * | 2013-05-08 | 2015-05-08 | - * - * @since 1.15.0 - * @param start - * A date, timestamp or string. If a string, the data must be in a format that can be cast to a - * date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS` - * @param days - * The number of days to add to `start`, can be negative to subtract days - * @return - * A date, or null if `start` was a string that could not be cast to a date - */ + /** + * Returns the date that is `days` days after `start` Usage - DATE_ADD( date_or_time_part, value, + * date_or_time_expr ) Example:: SELECT TO_DATE('2013-05-08') AS v1, DATE_ADD(year, 2, + * TO_DATE('2013-05-08')) AS v; + * | V1 | V | + * |:--------------------------|:-----------| + * | ------------+------------ | | + * | 2013-05-08 | 2015-05-08 | + * + * @since 1.15.0 + * @param start + * A date, timestamp or string. If a string, the data must be in a format that can be cast to a + * date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS` + * @param days + * The number of days to add to `start`, can be negative to subtract days + * @return + * A date, or null if `start` was a string that could not be cast to a date + */ def date_add(start: Column, days: Column): Column = dateadd("day", days, start) - /** Aggregate function: returns a set of objects with duplicate elements eliminated. Returns the - * input values, pivoted into an ARRAY. If the input is empty, an empty ARRAY is returned. - * - * Example:: >>> df = session.create_dataframe([[1], [2], [3], [1]], schema=["a"]) >>> - * df.select(array_agg("a", True).alias("result")).show() ------------ \|"RESULT" | ------------ - * | [ | - * |:---| - * | 1, | - * | 2, | - * | 3 | - * | ] | - * ------------ - * @since 1.15.0 - * @param e - * The column to collect the list values - * @return - * A list with unique values - */ + /** + * Aggregate function: returns a set of objects with duplicate elements eliminated. Returns the + * input values, pivoted into an ARRAY. If the input is empty, an empty ARRAY is returned. + * + * Example:: >>> df = session.create_dataframe([[1], [2], [3], [1]], schema=["a"]) >>> + * df.select(array_agg("a", True).alias("result")).show() ------------ \|"RESULT" | ------------ + * | [ | + * |:---| + * | 1, | + * | 2, | + * | 3 | + * | ] | + * ------------ + * @since 1.15.0 + * @param e + * The column to collect the list values + * @return + * A list with unique values + */ def collect_set(e: Column): Column = sqlExpr(s"array_agg(distinct ${e.getName.get})") - /** Aggregate function: returns a set of objects with duplicate elements eliminated. Returns the - * input values, pivoted into an ARRAY. If the input is empty, an empty ARRAY is returned. - * - * Example:: >>> df = session.create_dataframe([[1], [2], [3], [1]], schema=["a"]) >>> - * df.select(array_agg("a", True).alias("result")).show() ------------ \|"RESULT" | ------------ - * | [ | - * |:---| - * | 1, | - * | 2, | - * | 3 | - * | ] | - * ------------ - * @since 1.15.0 - * @param e - * The column to collect the list values - * @return - * A list with unique values - */ + /** + * Aggregate function: returns a set of objects with duplicate elements eliminated. Returns the + * input values, pivoted into an ARRAY. If the input is empty, an empty ARRAY is returned. + * + * Example:: >>> df = session.create_dataframe([[1], [2], [3], [1]], schema=["a"]) >>> + * df.select(array_agg("a", True).alias("result")).show() ------------ \|"RESULT" | ------------ + * | [ | + * |:---| + * | 1, | + * | 2, | + * | 3 | + * | ] | + * ------------ + * @since 1.15.0 + * @param e + * The column to collect the list values + * @return + * A list with unique values + */ def collect_set(e: String): Column = sqlExpr(s"array_agg(distinct ${e})") - /** Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string - * representing the timestamp of that moment in the current system time zone in the yyyy-MM-dd - * HH:mm:ss format. - * @since 1.15.0 - * @param ut - * A number of a type that is castable to a long, such as string or integer. Can be negative - * for timestamps before the unix epoch - * @return - * A string, or null if the input was a string that could not be cast to a long - */ + /** + * Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string + * representing the timestamp of that moment in the current system time zone in the yyyy-MM-dd + * HH:mm:ss format. + * @since 1.15.0 + * @param ut + * A number of a type that is castable to a long, such as string or integer. Can be negative for + * timestamps before the unix epoch + * @return + * A string, or null if the input was a string that could not be cast to a long + */ def from_unixtime(ut: Column): Column = ut.cast(LongType).cast(TimestampType).cast(StringType) - /** Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string - * representing the timestamp of that moment in the current system time zone in the given format. - * @since 1.15.0 - * @param ut - * A number of a type that is castable to a long, such as string or integer. Can be negative - * for timestamps before the unix epoch - * @param f - * A date time pattern that the input will be formatted to - * @return - * A string, or null if `ut` was a string that could not be cast to a long or `f` was an - * invalid date time pattern - */ + /** + * Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string + * representing the timestamp of that moment in the current system time zone in the given format. + * @since 1.15.0 + * @param ut + * A number of a type that is castable to a long, such as string or integer. Can be negative for + * timestamps before the unix epoch + * @param f + * A date time pattern that the input will be formatted to + * @return + * A string, or null if `ut` was a string that could not be cast to a long or `f` was an invalid + * date time pattern + */ def from_unixtime(ut: Column, f: String): Column = date_format(ut.cast(LongType).cast(TimestampType), f) - /** A column expression that generates monotonically increasing 64-bit integers. Returns a - * sequence of monotonically increasing integers, with wrap-around which happens after largest - * representable integer of integer width 8 byte. - * - * Args: sign: When 0, the sequence continues at 0 after wrap-around. When 1, the sequence - * continues at smallest representable 8 byte integer. Defaults to 0. - * - * See Also: - * - :meth:`Session.generator`, which can be used to generate in tandem with `seq8` to generate - * sequences. - * - * Example:: >>> df = session.generator(seq8(0), rowcount=3) >>> df.collect() [Row(SEQ8(0)=0), - * Row(SEQ8(0)=1), Row(SEQ8(0)=2)] - * @since 1.15.0 - */ + /** + * A column expression that generates monotonically increasing 64-bit integers. Returns a sequence + * of monotonically increasing integers, with wrap-around which happens after largest + * representable integer of integer width 8 byte. + * + * Args: sign: When 0, the sequence continues at 0 after wrap-around. When 1, the sequence + * continues at smallest representable 8 byte integer. Defaults to 0. + * + * See Also: + * - :meth:`Session.generator`, which can be used to generate in tandem with `seq8` to generate + * sequences. + * + * Example:: >>> df = session.generator(seq8(0), rowcount=3) >>> df.collect() [Row(SEQ8(0)=0), + * Row(SEQ8(0)=1), Row(SEQ8(0)=2)] + * @since 1.15.0 + */ def monotonically_increasing_id(): Column = builtin("seq8")() - /** Returns number of months between dates `start` and `end`. - * - * A whole number is returned if both inputs have the same day of month or both are the last day - * of their respective months. Otherwise, the difference is calculated assuming 31 days per - * month. - * - * For example: - * {{{ - * months_between("2017-11-14", "2017-07-14") // returns 4.0 - * months_between("2017-01-01", "2017-01-10") // returns 0.29032258 - * months_between("2017-06-01", "2017-06-16 12:00:00") // returns -0.5 - * }}} - * @since 1.15.0 - * @param end - * Column name. If a string, the data must be in a format that can be cast to a timestamp, such - * as yyyy-MM-dd or yyyy-MM-dd HH:mm:ss.SSSS - * @param start - * Column name . If a string, the data must be in a format that can cast to a timestamp, such - * as yyyy-MM-dd or yyyy-MM-dd HH:mm:ss.SSSS - * @return - * A double, or null if either end or start were strings that could not be cast to a timestamp. - * Negative if end is before start - */ + /** + * Returns number of months between dates `start` and `end`. + * + * A whole number is returned if both inputs have the same day of month or both are the last day + * of their respective months. Otherwise, the difference is calculated assuming 31 days per month. + * + * For example: + * {{{ + * months_between("2017-11-14", "2017-07-14") // returns 4.0 + * months_between("2017-01-01", "2017-01-10") // returns 0.29032258 + * months_between("2017-06-01", "2017-06-16 12:00:00") // returns -0.5 + * }}} + * @since 1.15.0 + * @param end + * Column name. If a string, the data must be in a format that can be cast to a timestamp, such + * as yyyy-MM-dd or yyyy-MM-dd HH:mm:ss.SSSS + * @param start + * Column name . If a string, the data must be in a format that can cast to a timestamp, such as + * yyyy-MM-dd or yyyy-MM-dd HH:mm:ss.SSSS + * @return + * A double, or null if either end or start were strings that could not be cast to a timestamp. + * Negative if end is before start + */ def months_between(end: String, start: String): Column = builtin("MONTHS_BETWEEN")(col(end), col(start)) - /** Locate the position of the first occurrence of substr column in the given string. Returns null - * if either of the arguments are null. For example SELECT id, string1, REGEXP_SUBSTR(string1, - * 'nevermore\\d') AS substring, REGEXP_INSTR( string1, 'nevermore\\d') AS position FROM demo1 - * ORDER BY id; - * | ID | STRING1 | SUBSTRING | POSITION | - * |:-------------------------------------------------------------------|:------------------------------------|:-----------|:---------| - * | ----+-------------------------------------+------------+---------- | | | | - * | 1 | nevermore1, nevermore2, nevermore3. | nevermore1 | 1 | - * - * @since 1.15.0 - * @note - * The position is not zero based, but 1 based index. Returns 0 if substr could not be found in - * str. - */ + /** + * Locate the position of the first occurrence of substr column in the given string. Returns null + * if either of the arguments are null. For example SELECT id, string1, REGEXP_SUBSTR(string1, + * 'nevermore\\d') AS substring, REGEXP_INSTR( string1, 'nevermore\\d') AS position FROM demo1 + * ORDER BY id; + * | ID | STRING1 | SUBSTRING | POSITION | + * |:-------------------------------------------------------------------|:------------------------------------|:-----------|:---------| + * | ----+-------------------------------------+------------+---------- | | | | + * | 1 | nevermore1, nevermore2, nevermore3. | nevermore1 | 1 | + * + * @since 1.15.0 + * @note + * The position is not zero based, but 1 based index. Returns 0 if substr could not be found in + * str. + */ def instr(str: Column, substring: String): Column = builtin("REGEXP_INSTR")(str, substring) - /** Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders - * that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 - * 03:40:00.0'. ALTER SESSION SET TIMEZONE = 'America/Los_Angeles'; SELECT - * TO_TIMESTAMP_TZ('2024-04-05 01:02:03'); - * | TO_TIMESTAMP_TZ('2024-04-05 01:02:03') | - * |:---------------------------------------| - * | 2024-04-05 01:02:03.000 -0700 | - * - * @since 1.15.0 - * @param ts - * A date, timestamp or string. If a string, the data must be in a format that can be cast to a - * timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS` A string detailing the time - * zone ID that the input should be adjusted to. It should be in the format of either - * region-based zone IDs or zone offsets. Region IDs must have the form 'area/city', such as - * 'America/Los_Angeles'. Zone offsets must be in the format '(+|-)HH:mm', for example '-08:00' - * or '+01:00'. Also 'UTC' and 'Z' are supported as aliases of '+00:00'. Other short names are - * not recommended to use because they can be ambiguous. - * @return - * A timestamp, or null if `ts` was a string that could not be cast to a timestamp or `tz` was - * an invalid value - */ + /** + * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders + * that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 + * 03:40:00.0'. ALTER SESSION SET TIMEZONE = 'America/Los_Angeles'; SELECT + * TO_TIMESTAMP_TZ('2024-04-05 01:02:03'); + * | TO_TIMESTAMP_TZ('2024-04-05 01:02:03') | + * |:---------------------------------------| + * | 2024-04-05 01:02:03.000 -0700 | + * + * @since 1.15.0 + * @param ts + * A date, timestamp or string. If a string, the data must be in a format that can be cast to a + * timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS` A string detailing the time + * zone ID that the input should be adjusted to. It should be in the format of either + * region-based zone IDs or zone offsets. Region IDs must have the form 'area/city', such as + * 'America/Los_Angeles'. Zone offsets must be in the format '(+|-)HH:mm', for example '-08:00' + * or '+01:00'. Also 'UTC' and 'Z' are supported as aliases of '+00:00'. Other short names are + * not recommended to use because they can be ambiguous. + * @return + * A timestamp, or null if `ts` was a string that could not be cast to a timestamp or `tz` was + * an invalid value + */ def from_utc_timestamp(ts: Column): Column = builtin("TO_TIMESTAMP_TZ")(ts) - /** Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time - * zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield - * '2017-07-14 01:40:00.0'. - * @since 1.15.0 - * @param ts - * A date, timestamp or string. If a string, the data must be in a format that can be cast to a - * timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS` A string detailing the time - * zone ID that the input should be adjusted to. It should be in the format of either - * region-based zone IDs or zone offsets. Region IDs must have the form 'area/city', such as - * 'America/Los_Angeles'. Zone offsets must be in the format '(+|-)HH:mm', for example '-08:00' - * or '+01:00'. Also 'UTC' and 'Z' are supported as aliases of '+00:00'. Other short names are - * not recommended to use because they can be ambiguous. - * @return - * A timestamp, or null if `ts` was a string that could not be cast to a timestamp or `tz` was - * an invalid value - */ + /** + * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time zone, + * and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield '2017-07-14 + * 01:40:00.0'. + * @since 1.15.0 + * @param ts + * A date, timestamp or string. If a string, the data must be in a format that can be cast to a + * timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS` A string detailing the time + * zone ID that the input should be adjusted to. It should be in the format of either + * region-based zone IDs or zone offsets. Region IDs must have the form 'area/city', such as + * 'America/Los_Angeles'. Zone offsets must be in the format '(+|-)HH:mm', for example '-08:00' + * or '+01:00'. Also 'UTC' and 'Z' are supported as aliases of '+00:00'. Other short names are + * not recommended to use because they can be ambiguous. + * @return + * A timestamp, or null if `ts` was a string that could not be cast to a timestamp or `tz` was + * an invalid value + */ def to_utc_timestamp(ts: Column): Column = builtin("TO_TIMESTAMP_TZ")(ts) - /** Formats numeric column x to a format like '#,###,###.##', rounded to d decimal places with - * HALF_EVEN round mode, and returns the result as a string column. - * @since 1.15.0 - * If d is 0, the result has no decimal point or fractional part. If d is less than 0, the - * result will be null. - * - * @param x - * numeric column to be transformed - * @param d - * Amount of decimal for the number format - * - * @return - * Number casted to the specific string format - */ + /** + * Formats numeric column x to a format like '#,###,###.##', rounded to d decimal places with + * HALF_EVEN round mode, and returns the result as a string column. + * @since 1.15.0 + * If d is 0, the result has no decimal point or fractional part. If d is less than 0, the + * result will be null. + * + * @param x + * numeric column to be transformed + * @param d + * Amount of decimal for the number format + * + * @return + * Number casted to the specific string format + */ def format_number(x: Column, d: Int): Column = { if (d < 0) { lit(null) @@ -3581,516 +3886,537 @@ object functions { */ def desc(colName: String): Column = col(colName).desc - /** Returns a Column expression with values sorted in ascending order. Example: - * {{{ - * val df = session.createDataFrame(Seq(3, 2, 1)).toDF("id") - * df.sort(asc("id")).show() - * - * -------- - * |"ID" | - * -------- - * |1 | - * |2 | - * |3 | - * -------- - * }}} - * @since 1.14.0 - * @param colName - * Column name. - * @return - * Column object ordered in an ascending manner. - */ + /** + * Returns a Column expression with values sorted in ascending order. Example: + * {{{ + * val df = session.createDataFrame(Seq(3, 2, 1)).toDF("id") + * df.sort(asc("id")).show() + * + * -------- + * |"ID" | + * -------- + * |1 | + * |2 | + * |3 | + * -------- + * }}} + * @since 1.14.0 + * @param colName + * Column name. + * @return + * Column object ordered in an ascending manner. + */ def asc(colName: String): Column = col(colName).asc - /** Returns the size of the input ARRAY. - * - * If the specified column contains a VARIANT value that contains an ARRAY, the size of the ARRAY - * is returned; otherwise, NULL is returned if the value is not an ARRAY. - * - * Example: - * {{{ - * val df = session.createDataFrame(Seq(Array(1, 2, 3))).toDF("id") - * df.select(size(col("id"))).show() - * - * ------------------------ - * |"ARRAY_SIZE(""ID"")" | - * ------------------------ - * |3 | - * ------------------------ - * }}} - * - * @since 1.14.0 - * @param c - * Column to get the size. - * @return - * Size of array column. - */ + /** + * Returns the size of the input ARRAY. + * + * If the specified column contains a VARIANT value that contains an ARRAY, the size of the ARRAY + * is returned; otherwise, NULL is returned if the value is not an ARRAY. + * + * Example: + * {{{ + * val df = session.createDataFrame(Seq(Array(1, 2, 3))).toDF("id") + * df.select(size(col("id"))).show() + * + * ------------------------ + * |"ARRAY_SIZE(""ID"")" | + * ------------------------ + * |3 | + * ------------------------ + * }}} + * + * @since 1.14.0 + * @param c + * Column to get the size. + * @return + * Size of array column. + */ def size(c: Column): Column = array_size(c) - /** Creates a [[Column]] expression from raw SQL text. - * - * Note that the function does not interpret or check the SQL text. - * - * Example: - * {{{ - * val df = session.createDataFrame(Seq(Array(1, 2, 3))).toDF("id") - * df.filter(expr("id > 2")).show() - * - * -------- - * |"ID" | - * -------- - * |3 | - * -------- - * }}} - * - * @since 1.14.0 - * @param s - * SQL Expression as text. - * @return - * Converted SQL Expression. - */ + /** + * Creates a [[Column]] expression from raw SQL text. + * + * Note that the function does not interpret or check the SQL text. + * + * Example: + * {{{ + * val df = session.createDataFrame(Seq(Array(1, 2, 3))).toDF("id") + * df.filter(expr("id > 2")).show() + * + * -------- + * |"ID" | + * -------- + * |3 | + * -------- + * }}} + * + * @since 1.14.0 + * @param s + * SQL Expression as text. + * @return + * Converted SQL Expression. + */ def expr(s: String): Column = sqlExpr(s) - /** Returns an ARRAY constructed from zero, one, or more inputs. - * - * Example: - * {{{ - * val df = session.createDataFrame(Seq((1, 2, 3), (4, 5, 6))).toDF("id") - * df.select(array(col("a"), col("b")).as("id")).show() - * - * -------- - * |"ID" | - * -------- - * |[ | - * | 1, | - * | 2 | - * |] | - * |[ | - * | 4, | - * | 5 | - * |] | - * -------- - * }}} - * - * @since 1.14.0 - * @param c - * Columns to build the array. - * @return - * The array. - */ + /** + * Returns an ARRAY constructed from zero, one, or more inputs. + * + * Example: + * {{{ + * val df = session.createDataFrame(Seq((1, 2, 3), (4, 5, 6))).toDF("id") + * df.select(array(col("a"), col("b")).as("id")).show() + * + * -------- + * |"ID" | + * -------- + * |[ | + * | 1, | + * | 2 | + * |] | + * |[ | + * | 4, | + * | 5 | + * |] | + * -------- + * }}} + * + * @since 1.14.0 + * @param c + * Columns to build the array. + * @return + * The array. + */ def array(c: Column*): Column = array_construct(c: _*) - /** Converts an input expression into the corresponding date in the specified date format. - * Example: - * {{{ - * val df = Seq("2023-10-10", "2022-05-15", null.asInstanceOf[String]).toDF("date") - * df.select(date_format(col("date"), "YYYY/MM/DD").as("formatted_date")).show() - * - * -------------------- - * |"FORMATTED_DATE" | - * -------------------- - * |2023/10/10 | - * |2022/05/15 | - * |NULL | - * -------------------- - * - * }}} - * - * @since 1.14.0 - * @param c - * Column to format to date. - * @param s - * Date format. - * @return - * Column object. - */ + /** + * Converts an input expression into the corresponding date in the specified date format. Example: + * {{{ + * val df = Seq("2023-10-10", "2022-05-15", null.asInstanceOf[String]).toDF("date") + * df.select(date_format(col("date"), "YYYY/MM/DD").as("formatted_date")).show() + * + * -------------------- + * |"FORMATTED_DATE" | + * -------------------- + * |2023/10/10 | + * |2022/05/15 | + * |NULL | + * -------------------- + * + * }}} + * + * @since 1.14.0 + * @param c + * Column to format to date. + * @param s + * Date format. + * @return + * Column object. + */ def date_format(c: Column, s: String): Column = builtin("to_varchar")(c.cast(TimestampType), s.replace("mm", "mi")) - /** Returns the last value of the column in a group. Example - * {{{ - * val df = session.createDataFrame(Seq((5, "a", 10), - * (5, "b", 20), - * (3, "d", 15), - * (3, "e", 40))).toDF("grade", "name", "score") - * val window = Window.partitionBy(col("grade")).orderBy(col("score").desc) - * df.select(last(col("name")).over(window)).show() - * - * --------------------- - * |"LAST_SCORE_NAME" | - * --------------------- - * |a | - * |a | - * |d | - * |d | - * --------------------- - * }}} - * - * @since 1.14.0 - * @param c - * Column to obtain last value. - * @return - * Column object. - */ + /** + * Returns the last value of the column in a group. Example + * {{{ + * val df = session.createDataFrame(Seq((5, "a", 10), + * (5, "b", 20), + * (3, "d", 15), + * (3, "e", 40))).toDF("grade", "name", "score") + * val window = Window.partitionBy(col("grade")).orderBy(col("score").desc) + * df.select(last(col("name")).over(window)).show() + * + * --------------------- + * |"LAST_SCORE_NAME" | + * --------------------- + * |a | + * |a | + * |d | + * |d | + * --------------------- + * }}} + * + * @since 1.14.0 + * @param c + * Column to obtain last value. + * @return + * Column object. + */ def last(c: Column): Column = builtin("LAST_VALUE")(c) - /** Computes the logarithm of the given value in base 10. Example - * {{{ - * val df = session.createDataFrame(Seq(100)).toDF("a") - * df.select(log10(col("a"))).show() - * - * ----------- - * |"LOG10" | - * ----------- - * |2.0 | - * ----------- - * }}} - * - * @since 1.14.0 - * @param c - * Column to apply logarithm operation - * @return - * log10 of the given column - */ + /** + * Computes the logarithm of the given value in base 10. Example + * {{{ + * val df = session.createDataFrame(Seq(100)).toDF("a") + * df.select(log10(col("a"))).show() + * + * ----------- + * |"LOG10" | + * ----------- + * |2.0 | + * ----------- + * }}} + * + * @since 1.14.0 + * @param c + * Column to apply logarithm operation + * @return + * log10 of the given column + */ def log10(c: Column): Column = builtin("LOG")(10, c) - /** Computes the logarithm of the given column in base 10. Example - * {{{ - * val df = session.createDataFrame(Seq(100)).toDF("a") - * df.select(log10("a"))).show() - * ----------- - * |"LOG10" | - * ----------- - * |2.0 | - * ----------- - * - * }}} - * - * @since 1.14.0 - * @param columnName - * ColumnName in String to apply logarithm operation - * @return - * log10 of the given column - */ + /** + * Computes the logarithm of the given column in base 10. Example + * {{{ + * val df = session.createDataFrame(Seq(100)).toDF("a") + * df.select(log10("a"))).show() + * ----------- + * |"LOG10" | + * ----------- + * |2.0 | + * ----------- + * + * }}} + * + * @since 1.14.0 + * @param columnName + * ColumnName in String to apply logarithm operation + * @return + * log10 of the given column + */ def log10(columnName: String): Column = builtin("LOG")(10, col(columnName)) - /** Computes the natural logarithm of the given value plus one. Example - * {{{ - * val df = session.createDataFrame(Seq(0.1)).toDF("a") - * df.select(log1p(col("a")).as("log1p")).show() - * ----------------------- - * |"LOG1P" | - * ----------------------- - * |0.09531017980432493 | - * ----------------------- - * - * }}} - * - * @since 1.14.0 - * @param c - * Column to apply logarithm operation - * @return - * the natural logarithm of the given value plus one. - */ + /** + * Computes the natural logarithm of the given value plus one. Example + * {{{ + * val df = session.createDataFrame(Seq(0.1)).toDF("a") + * df.select(log1p(col("a")).as("log1p")).show() + * ----------------------- + * |"LOG1P" | + * ----------------------- + * |0.09531017980432493 | + * ----------------------- + * + * }}} + * + * @since 1.14.0 + * @param c + * Column to apply logarithm operation + * @return + * the natural logarithm of the given value plus one. + */ def log1p(c: Column): Column = callBuiltin("ln", lit(1) + c) - /** Computes the natural logarithm of the given value plus one. Example - * {{{ - * val df = session.createDataFrame(Seq(0.1)).toDF("a") - * df.select(log1p("a").as("log1p")).show() - * ----------------------- - * |"LOG1P" | - * ----------------------- - * |0.09531017980432493 | - * ----------------------- - * - * }}} - * - * @since 1.14.0 - * @param columnName - * ColumnName in String to apply logarithm operation - * @return - * the natural logarithm of the given value plus one. - */ + /** + * Computes the natural logarithm of the given value plus one. Example + * {{{ + * val df = session.createDataFrame(Seq(0.1)).toDF("a") + * df.select(log1p("a").as("log1p")).show() + * ----------------------- + * |"LOG1P" | + * ----------------------- + * |0.09531017980432493 | + * ----------------------- + * + * }}} + * + * @since 1.14.0 + * @param columnName + * ColumnName in String to apply logarithm operation + * @return + * the natural logarithm of the given value plus one. + */ def log1p(columnName: String): Column = callBuiltin("ln", lit(1) + col(columnName)) - /** Computes the BASE64 encoding of a column and returns it as a string column. This is the - * reverse of unbase64. Example - * {{{ - * val df = session.createDataFrame(Seq("test")).toDF("a") - * df.select(base64(col("a")).as("base64")).show() - * ------------ - * |"BASE64" | - * ------------ - * |dGVzdA== | - * ------------ - * - * }}} - * - * @since 1.14.0 - * @param columnName - * ColumnName to apply base64 operation - * @return - * base64 encoded value of the given input column. - */ + /** + * Computes the BASE64 encoding of a column and returns it as a string column. This is the reverse + * of unbase64. Example + * {{{ + * val df = session.createDataFrame(Seq("test")).toDF("a") + * df.select(base64(col("a")).as("base64")).show() + * ------------ + * |"BASE64" | + * ------------ + * |dGVzdA== | + * ------------ + * + * }}} + * + * @since 1.14.0 + * @param columnName + * ColumnName to apply base64 operation + * @return + * base64 encoded value of the given input column. + */ def base64(col: Column): Column = callBuiltin("BASE64_ENCODE", col) - /** Decodes a BASE64 encoded string column and returns it as a column. Example - * {{{ - * val df = session.createDataFrame(Seq("dGVzdA==")).toDF("a") - * df.select(unbase64(col("a")).as("unbase64")).show() - * -------------- - * |"UNBASE64" | - * -------------- - * |test | - * -------------- - * - * }}} - * - * @since 1.14.0 - * @param columnName - * ColumnName to apply unbase64 operation - * @return - * the decoded value of the given encoded value. - */ + /** + * Decodes a BASE64 encoded string column and returns it as a column. Example + * {{{ + * val df = session.createDataFrame(Seq("dGVzdA==")).toDF("a") + * df.select(unbase64(col("a")).as("unbase64")).show() + * -------------- + * |"UNBASE64" | + * -------------- + * |test | + * -------------- + * + * }}} + * + * @since 1.14.0 + * @param columnName + * ColumnName to apply unbase64 operation + * @return + * the decoded value of the given encoded value. + */ def unbase64(col: Column): Column = callBuiltin("BASE64_DECODE_STRING", col) - /** Locate the position of the first occurrence of substr in a string column, after position pos. - * - * @note - * The position is not zero based, but 1 based index. returns 0 if substr could not be found in - * str. This function is just leverages the SF POSITION builtin Example - * {{{ - * val df = session.createDataFrame(Seq(("b", "abcd"))).toDF("a", "b") - * df.select(locate(col("a"), col("b"), 1).as("locate")).show() - * ------------ - * |"LOCATE" | - * ------------ - * |2 | - * ------------ - * - * }}} - * @since 1.14.0 - * @param substr - * string to search - * @param str - * value where string will be searched - * @param pos - * index for starting the search - * @return - * returns the position of the first occurrence. - */ + /** + * Locate the position of the first occurrence of substr in a string column, after position pos. + * + * @note + * The position is not zero based, but 1 based index. returns 0 if substr could not be found in + * str. This function is just leverages the SF POSITION builtin Example + * {{{ + * val df = session.createDataFrame(Seq(("b", "abcd"))).toDF("a", "b") + * df.select(locate(col("a"), col("b"), 1).as("locate")).show() + * ------------ + * |"LOCATE" | + * ------------ + * |2 | + * ------------ + * + * }}} + * @since 1.14.0 + * @param substr + * string to search + * @param str + * value where string will be searched + * @param pos + * index for starting the search + * @return + * returns the position of the first occurrence. + */ def locate(substr: Column, str: Column, pos: Int): Column = if (pos == 0) lit(0) else callBuiltin("POSITION", substr, str, pos) - /** Locate the position of the first occurrence of substr in a string column, after position pos. - * - * @note - * The position is not zero based, but 1 based index. returns 0 if substr could not be found in - * str. This function is just leverages the SF POSITION builtin Example - * {{{ - * val df = session.createDataFrame(Seq("java scala python")).toDF("a") - * df.select(locate("scala", col("a")).as("locate")).show() - * ------------ - * |"LOCATE" | - * ------------ - * |6 | - * ------------ - * - * }}} - * @since 1.14.0 - * @param substr - * string to search - * @param str - * value where string will be searched - * @param pos - * index for starting the search. default to 1. - * @return - * Returns the position of the first occurrence - */ + /** + * Locate the position of the first occurrence of substr in a string column, after position pos. + * + * @note + * The position is not zero based, but 1 based index. returns 0 if substr could not be found in + * str. This function is just leverages the SF POSITION builtin Example + * {{{ + * val df = session.createDataFrame(Seq("java scala python")).toDF("a") + * df.select(locate("scala", col("a")).as("locate")).show() + * ------------ + * |"LOCATE" | + * ------------ + * |6 | + * ------------ + * + * }}} + * @since 1.14.0 + * @param substr + * string to search + * @param str + * value where string will be searched + * @param pos + * index for starting the search. default to 1. + * @return + * Returns the position of the first occurrence + */ def locate(substr: String, str: Column, pos: Int = 1): Column = if (pos == 0) lit(0) else callBuiltin("POSITION", lit(substr), str, lit(pos)) - /** Window function: returns the ntile group id (from 1 to `n` inclusive) in an ordered window - * partition. For example, if `n` is 4, the first quarter of the rows will get value 1, the - * second quarter will get 2, the third quarter will get 3, and the last quarter will get 4. - * - * This is equivalent to the NTILE function in SQL. Example - * {{{ - * val df = Seq((5, 15), (5, 15), (5, 15), (5, 20)).toDF("grade", "score") - * val window = Window.partitionBy(col("grade")).orderBy(col("score")) - * df.select(ntile(2).over(window).as("ntile")).show() - * ----------- - * |"NTILE" | - * ----------- - * |1 | - * |1 | - * |2 | - * |2 | - * ----------- - * }}} - * - * @since 1.14.0 - * @param n - * number of groups - * @return - * returns the ntile group id (from 1 to n inclusive) in an ordered window partition. - */ + /** + * Window function: returns the ntile group id (from 1 to `n` inclusive) in an ordered window + * partition. For example, if `n` is 4, the first quarter of the rows will get value 1, the second + * quarter will get 2, the third quarter will get 3, and the last quarter will get 4. + * + * This is equivalent to the NTILE function in SQL. Example + * {{{ + * val df = Seq((5, 15), (5, 15), (5, 15), (5, 20)).toDF("grade", "score") + * val window = Window.partitionBy(col("grade")).orderBy(col("score")) + * df.select(ntile(2).over(window).as("ntile")).show() + * ----------- + * |"NTILE" | + * ----------- + * |1 | + * |1 | + * |2 | + * |2 | + * ----------- + * }}} + * + * @since 1.14.0 + * @param n + * number of groups + * @return + * returns the ntile group id (from 1 to n inclusive) in an ordered window partition. + */ def ntile(n: Int): Column = callBuiltin("ntile", lit(n)) - /** Generate a column with independent and identically distributed (i.i.d.) samples from the - * standard normal distribution. Return a call to the Snowflake RANDOM function. NOTE: Snowflake - * returns integers of 17-19 digits. Example - * {{{ - * val df = session.createDataFrame(Seq((1), (2), (3))).toDF("a") - * df.withColumn("randn", randn()).select("randn").show() - * ------------------------ - * |"RANDN" | - * ------------------------ - * |-2093909082984812541 | - * |-1379817492278593383 | - * |-1231198046297539927 | - * ------------------------ - * }}} - * - * @since 1.14.0 - * @return - * Random number. - */ + /** + * Generate a column with independent and identically distributed (i.i.d.) samples from the + * standard normal distribution. Return a call to the Snowflake RANDOM function. NOTE: Snowflake + * returns integers of 17-19 digits. Example + * {{{ + * val df = session.createDataFrame(Seq((1), (2), (3))).toDF("a") + * df.withColumn("randn", randn()).select("randn").show() + * ------------------------ + * |"RANDN" | + * ------------------------ + * |-2093909082984812541 | + * |-1379817492278593383 | + * |-1231198046297539927 | + * ------------------------ + * }}} + * + * @since 1.14.0 + * @return + * Random number. + */ def randn(): Column = builtin("RANDOM")() - /** Generate a column with independent and identically distributed (i.i.d.) samples from the - * standard normal distribution. Calls to the Snowflake RANDOM function. NOTE: Snowflake returns - * integers of 17-19 digits. Example - * {{{ - * val df = session.createDataFrame(Seq((1), (2), (3))).toDF("a") - * df.withColumn("randn_with_seed", randn(123L)).select("randn_with_seed").show() - * ------------------------ - * |"RANDN_WITH_SEED" | - * ------------------------ - * |5777523539921853504 | - * |-8190739547906189845 | - * |-1138438814981368515 | - * ------------------------ - * }}} - * - * @since 1.14.0 - * @param seed - * Seed to use in the random function. - * @return - * Random number. - */ + /** + * Generate a column with independent and identically distributed (i.i.d.) samples from the + * standard normal distribution. Calls to the Snowflake RANDOM function. NOTE: Snowflake returns + * integers of 17-19 digits. Example + * {{{ + * val df = session.createDataFrame(Seq((1), (2), (3))).toDF("a") + * df.withColumn("randn_with_seed", randn(123L)).select("randn_with_seed").show() + * ------------------------ + * |"RANDN_WITH_SEED" | + * ------------------------ + * |5777523539921853504 | + * |-8190739547906189845 | + * |-1138438814981368515 | + * ------------------------ + * }}} + * + * @since 1.14.0 + * @param seed + * Seed to use in the random function. + * @return + * Random number. + */ def randn(seed: Long): Column = builtin("RANDOM")(seed) - /** Shift the given value numBits left. If the given value is a long value, this function will - * return a long value else it will return an integer value. Example - * {{{ - * val df = session.createDataFrame(Seq((1), (2), (3))).toDF("a") - * df.select(shiftleft(col("A"), 1).as("shiftleft")).show() - * --------------- - * |"SHIFTLEFT" | - * --------------- - * |2 | - * |4 | - * |6 | - * --------------- - * }}} - * - * @since 1.14.0 - * @param c - * Column to modify. - * @param numBits - * Number of bits to shift. - * @return - * Column object. - */ + /** + * Shift the given value numBits left. If the given value is a long value, this function will + * return a long value else it will return an integer value. Example + * {{{ + * val df = session.createDataFrame(Seq((1), (2), (3))).toDF("a") + * df.select(shiftleft(col("A"), 1).as("shiftleft")).show() + * --------------- + * |"SHIFTLEFT" | + * --------------- + * |2 | + * |4 | + * |6 | + * --------------- + * }}} + * + * @since 1.14.0 + * @param c + * Column to modify. + * @param numBits + * Number of bits to shift. + * @return + * Column object. + */ def shiftleft(c: Column, numBits: Int): Column = bitshiftleft(c, lit(numBits)) - /** Shift the given value numBits right. If the given value is a long value, it will return a long - * value else it will return an integer value. Example - * {{{ - * val df = session.createDataFrame(Seq((1), (2), (3))).toDF("a") - * df.select(shiftright(col("A"), 1).as("shiftright")).show() - * ---------------- - * |"SHIFTRIGHT" | - * ---------------- - * |0 | - * |1 | - * |1 | - * ---------------- - * }}} - * - * @since 1.14.0 - * @param c - * Column to modify. - * @param numBits - * Number of bits to shift. - * @return - * Column object. - */ + /** + * Shift the given value numBits right. If the given value is a long value, it will return a long + * value else it will return an integer value. Example + * {{{ + * val df = session.createDataFrame(Seq((1), (2), (3))).toDF("a") + * df.select(shiftright(col("A"), 1).as("shiftright")).show() + * ---------------- + * |"SHIFTRIGHT" | + * ---------------- + * |0 | + * |1 | + * |1 | + * ---------------- + * }}} + * + * @since 1.14.0 + * @param c + * Column to modify. + * @param numBits + * Number of bits to shift. + * @return + * Column object. + */ def shiftright(c: Column, numBits: Int): Column = bitshiftright(c, lit(numBits)) - /** Computes hex value of the given column. Example - * {{{ - * val df = session.createDataFrame(Seq((1), (2), (3))).toDF("a") - * df.withColumn("hex_col", hex(col("A"))).select("hex_col").show() - * ------------- - * |"HEX_COL" | - * ------------- - * |31 | - * |32 | - * |33 | - * ------------- - * }}} - * - * @since 1.14.0 - * @param c - * Column to encode. - * @return - * Encoded string. - */ + /** + * Computes hex value of the given column. Example + * {{{ + * val df = session.createDataFrame(Seq((1), (2), (3))).toDF("a") + * df.withColumn("hex_col", hex(col("A"))).select("hex_col").show() + * ------------- + * |"HEX_COL" | + * ------------- + * |31 | + * |32 | + * |33 | + * ------------- + * }}} + * + * @since 1.14.0 + * @param c + * Column to encode. + * @return + * Encoded string. + */ def hex(c: Column): Column = builtin("HEX_ENCODE")(c) - /** Inverse of hex. Interprets each pair of characters as a hexadecimal number and converts to the - * byte representation of number. Example - * {{{ - * val df = session.createDataFrame(Seq((31), (32), (33))).toDF("a") - * df.withColumn("unhex_col", unhex(col("A"))).select("unhex_col").show() - * --------------- - * |"UNHEX_COL" | - * --------------- - * |1 | - * |2 | - * |3 | - * --------------- - * }}} - * - * @param c - * Column to encode. - * @since 1.14.0 - * @return - * Encoded string. - */ + /** + * Inverse of hex. Interprets each pair of characters as a hexadecimal number and converts to the + * byte representation of number. Example + * {{{ + * val df = session.createDataFrame(Seq((31), (32), (33))).toDF("a") + * df.withColumn("unhex_col", unhex(col("A"))).select("unhex_col").show() + * --------------- + * |"UNHEX_COL" | + * --------------- + * |1 | + * |2 | + * |3 | + * --------------- + * }}} + * + * @param c + * Column to encode. + * @since 1.14.0 + * @return + * Encoded string. + */ def unhex(c: Column): Column = builtin("HEX_DECODE_STRING")(c) - /** Invokes a built-in snowflake function with the specified name and arguments. Arguments can be - * of two types - * - * a. [[Column]], or - * - * b. Basic types such as Int, Long, Double, Decimal etc. which are converted to Snowpark - * literals. - * - * @group client_func - * @since 0.1.0 - */ + /** + * Invokes a built-in snowflake function with the specified name and arguments. Arguments can be + * of two types + * + * a. [[Column]], or + * + * b. Basic types such as Int, Long, Double, Decimal etc. which are converted to Snowpark + * literals. + * + * @group client_func + * @since 0.1.0 + */ def callBuiltin(functionName: String, args: Any*): Column = internalBuiltinFunction(false, functionName, args: _*) @@ -4102,11 +4428,12 @@ object functions { session.udf.register(None, udf) } - /** Calls a user-defined function (UDF) by name. - * - * @group udf_func - * @since 0.1.0 - */ + /** + * Calls a user-defined function (UDF) by name. + * + * @group udf_func + * @since 0.1.0 + */ def callUDF(udfName: String, cols: Any*): Column = { Utils.validateObjectName(udfName) internalBuiltinFunction(false, udfName, cols: _*) @@ -4132,76 +4459,83 @@ object functions { } */ - /** Registers a Scala closure of 0 argument as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 0 argument as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.1.0 + */ def udf[RT: TypeTag](func: Function0[RT]): UserDefinedFunction = udf("udf") { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 1 argument as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 1 argument as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.1.0 + */ def udf[RT: TypeTag, A1: TypeTag](func: Function1[A1, RT]): UserDefinedFunction = udf("udf") { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 2 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 2 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.1.0 + */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag](func: Function2[A1, A2, RT]): UserDefinedFunction = udf("udf") { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 3 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 3 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.1.0 + */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag]( func: Function3[A1, A2, A3, RT]): UserDefinedFunction = udf("udf") { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 4 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 4 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.1.0 + */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag]( func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = udf("udf") { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 5 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 5 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.1.0 + */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag]( func: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = udf("udf") { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 6 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 6 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.1.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4214,12 +4548,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 7 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 7 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.1.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4233,12 +4568,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 8 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 8 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.1.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4253,12 +4589,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 9 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 9 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.1.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4274,12 +4611,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 10 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.1.0 - */ + /** + * Registers a Scala closure of 10 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.1.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4297,12 +4635,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 11 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 11 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.12.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4321,12 +4660,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 12 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 12 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.12.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4346,12 +4686,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 13 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 13 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.12.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4371,12 +4712,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 14 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 14 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.12.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4398,12 +4740,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 15 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 15 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.12.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4426,12 +4769,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 16 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 16 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.12.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4455,12 +4799,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 17 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 17 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.12.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4502,12 +4847,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 18 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 18 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.12.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4551,12 +4897,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 19 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 19 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.12.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4602,12 +4949,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 20 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 20 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.12.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4655,12 +5003,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 21 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 21 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.12.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4710,12 +5059,13 @@ object functions { registerUdf(_toUdf(func)) } - /** Registers a Scala closure of 22 arguments as a Snowflake Java UDF and returns the UDF. - * @tparam RT - * return type of UDF. - * @group udf_func - * @since 0.12.0 - */ + /** + * Registers a Scala closure of 22 arguments as a Snowflake Java UDF and returns the UDF. + * @tparam RT + * return type of UDF. + * @group udf_func + * @since 0.12.0 + */ def udf[ RT: TypeTag, A1: TypeTag, @@ -4767,18 +5117,19 @@ object functions { registerUdf(_toUdf(func)) } - /** Function object to invoke a Snowflake builtin. Use this to invoke any builtins not explicitly - * listed in this object. - * - * Example - * {{{ - * val repeat = functions.builtin("repeat") - * df.select(repeat(col("col_1"), 3)) - * }}} - * - * @group client_func - * @since 0.1.0 - */ + /** + * Function object to invoke a Snowflake builtin. Use this to invoke any builtins not explicitly + * listed in this object. + * + * Example + * {{{ + * val repeat = functions.builtin("repeat") + * df.select(repeat(col("col_1"), 3)) + * }}} + * + * @group client_func + * @since 0.1.0 + */ // scalastyle:off case class builtin(functionName: String) { // scalastyle:on diff --git a/src/main/scala/com/snowflake/snowpark/internal/ClosureCleaner.scala b/src/main/scala/com/snowflake/snowpark/internal/ClosureCleaner.scala index a4f14e60..1804bc5d 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/ClosureCleaner.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/ClosureCleaner.scala @@ -71,11 +71,12 @@ private[snowpark] object ClosureCleaner extends Logging { } } - /** Try to get a serialized Lambda from the closure. - * - * @param closure - * the closure to check. - */ + /** + * Try to get a serialized Lambda from the closure. + * + * @param closure + * the closure to check. + */ private def getSerializedLambda(closure: AnyRef): Option[SerializedLambda] = { val isClosureCandidate = closure.getClass.isSynthetic && @@ -141,18 +142,19 @@ private[snowpark] object ClosureCleaner extends Logging { clone } - /** Clean the given closure in place. The mechanism is to traverse the hierarchy of enclosing - * closures and null out any references along the way that are not actually used by the starting - * closure, but are nevertheless included in the compiled anonymous classes. - * - * Closures are cleaned transitively. Does not verify whether the closure is serializable after - * cleaning. - * - * @param func - * the closure to be cleaned - * @param closureCleanerMode - * closure cleaner mode, can be always, never, repl_only. - */ + /** + * Clean the given closure in place. The mechanism is to traverse the hierarchy of enclosing + * closures and null out any references along the way that are not actually used by the starting + * closure, but are nevertheless included in the compiled anonymous classes. + * + * Closures are cleaned transitively. Does not verify whether the closure is serializable after + * cleaning. + * + * @param func + * the closure to be cleaned + * @param closureCleanerMode + * closure cleaner mode, can be always, never, repl_only. + */ private[snowpark] def clean(func: AnyRef, closureCleanerMode: ClosureCleanerMode.Value): Unit = { if (func == null || closureCleanerMode == ClosureCleanerMode.never) { return @@ -268,20 +270,22 @@ private object IndylambdaScalaClosures extends Logging { writeReplace.invoke(closure).asInstanceOf[SerializedLambda] } - /** Check if the handle represents the LambdaMetafactory that indylambda Scala closures use for - * creating the lambda class and getting a closure instance. - */ + /** + * Check if the handle represents the LambdaMetafactory that indylambda Scala closures use for + * creating the lambda class and getting a closure instance. + */ def isLambdaMetafactory(bsmHandle: Handle): Boolean = { bsmHandle.getOwner == LambdaMetafactoryClassName && bsmHandle.getName == LambdaMetafactoryMethodName && bsmHandle.getDesc == LambdaMetafactoryMethodDesc } - /** Check if the handle represents a target method that is: - * - a STATIC method that implements a Scala lambda body in the indylambda style - * - captures the enclosing `this`, i.e. the first argument is a reference to the same type as - * the owning class. Returns true if both criteria above are met. - */ + /** + * Check if the handle represents a target method that is: + * - a STATIC method that implements a Scala lambda body in the indylambda style + * - captures the enclosing `this`, i.e. the first argument is a reference to the same type as + * the owning class. Returns true if both criteria above are met. + */ def isLambdaBodyCapturingOuter(handle: Handle, ownerInternalName: String): Boolean = { handle.getTag == H_INVOKESTATIC && handle.getName.contains("$anonfun$") && @@ -289,12 +293,13 @@ private object IndylambdaScalaClosures extends Logging { handle.getDesc.startsWith(s"(L$ownerInternalName;") } - /** Check if the callee of a call site is a inner class constructor. - * - A constructor has to be invoked via INVOKESPECIAL - * - A constructor's internal name is "<init>" and the return type is "V" (void) - * - An inner class' first argument in the signature has to be a reference to the enclosing - * "this", aka `$outer` in Scala. - */ + /** + * Check if the callee of a call site is a inner class constructor. + * - A constructor has to be invoked via INVOKESPECIAL + * - A constructor's internal name is "<init>" and the return type is "V" (void) + * - An inner class' first argument in the signature has to be a reference to the enclosing + * "this", aka `$outer` in Scala. + */ def isInnerClassCtorCapturingOuter( op: Int, owner: String, diff --git a/src/main/scala/com/snowflake/snowpark/internal/ErrorMessage.scala b/src/main/scala/com/snowflake/snowpark/internal/ErrorMessage.scala index fdab262f..fd1abdf8 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/ErrorMessage.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/ErrorMessage.scala @@ -414,15 +414,16 @@ private[snowpark] object ErrorMessage { def MISC_FAILED_TO_SERIALIZE_QUERY_TAG(): SnowparkClientException = createException("0428") - /** Create Snowpark client Exception. - * - * @param errorCode - * error code for the message - * @param args - * parameters for the Exception - * @return - * Snowpark client Exception - */ + /** + * Create Snowpark client Exception. + * + * @param errorCode + * error code for the message + * @param args + * parameters for the Exception + * @return + * Snowpark client Exception + */ private def createException(errorCode: String, args: Any*): SnowparkClientException = { val message = allMessages(errorCode) new SnowparkClientException( diff --git a/src/main/scala/com/snowflake/snowpark/internal/FatJarBuilder.scala b/src/main/scala/com/snowflake/snowpark/internal/FatJarBuilder.scala index a94573ff..725029f9 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/FatJarBuilder.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/FatJarBuilder.scala @@ -9,17 +9,18 @@ import scala.collection.mutable class FatJarBuilder { - /** @param classFiles - * class bytes that are copied to the fat jar - * @param classDirs - * directories from which files are copied to the fat jar - * @param jars - * Jars to be copied to the fat jar - * @param funcBytesMap - * func bytes map (entry format: fileName -> funcBytes) - * @param target - * The outputstream the jar contents should be written to - */ + /** + * @param classFiles + * class bytes that are copied to the fat jar + * @param classDirs + * directories from which files are copied to the fat jar + * @param jars + * Jars to be copied to the fat jar + * @param funcBytesMap + * func bytes map (entry format: fileName -> funcBytes) + * @param target + * The outputstream the jar contents should be written to + */ def createFatJar( classFiles: List[InMemoryClassObject], classDirs: List[File], @@ -44,14 +45,15 @@ class FatJarBuilder { } } - /** This method adds a class file to target jar. - * @param classObj - * Class file that is copied to target jar - * @param target - * OutputStream for target jar - * @param trackPaths - * This tracks all the directories already added to the jar - */ + /** + * This method adds a class file to target jar. + * @param classObj + * Class file that is copied to target jar + * @param target + * OutputStream for target jar + * @param trackPaths + * This tracks all the directories already added to the jar + */ private def copyFileToTargetJar( classObj: InMemoryClassObject, target: JarOutputStream, @@ -72,14 +74,15 @@ class FatJarBuilder { target.closeEntry() } - /** This method recursively adds all directories and files in root dir to the target jar - * @param root - * Root directory, all directories are added to the jar relative to root's path - * @param target - * OutputStream for target jar - * @param trackPaths - * This tracks all the directories already added to the jar - */ + /** + * This method recursively adds all directories and files in root dir to the target jar + * @param root + * Root directory, all directories are added to the jar relative to root's path + * @param target + * OutputStream for target jar + * @param trackPaths + * This tracks all the directories already added to the jar + */ private def copyDirToTargetJar( root: File, target: JarOutputStream, @@ -106,14 +109,15 @@ class FatJarBuilder { }) } - /** This method adds all entries in source jar to the target jar - * @param sourceJar - * Source directory - * @param target - * OutputStream for target jar - * @param trackPaths - * This tracks all the directories already added to the jar - */ + /** + * This method adds all entries in source jar to the target jar + * @param sourceJar + * Source directory + * @param target + * OutputStream for target jar + * @param trackPaths + * This tracks all the directories already added to the jar + */ private def copyJarToTargetJar( sourceJar: JarFile, target: JarOutputStream, @@ -130,14 +134,15 @@ class FatJarBuilder { } } - /** This method adds a file entry into the target jar - * @param entryName - * Name of entry - * @param is - * Input stream to fetch file bytes, it closes the input stream once done - * @param target - * OutputStream for target jar - */ + /** + * This method adds a file entry into the target jar + * @param entryName + * Name of entry + * @param is + * Input stream to fetch file bytes, it closes the input stream once done + * @param target + * OutputStream for target jar + */ private def addFileEntryToJar( entryName: String, is: InputStream, diff --git a/src/main/scala/com/snowflake/snowpark/internal/Implicits.scala b/src/main/scala/com/snowflake/snowpark/internal/Implicits.scala index 676a4980..e7cd6797 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/Implicits.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/Implicits.scala @@ -6,11 +6,12 @@ import scala.reflect.runtime.universe.TypeTag abstract class Implicits { protected def _session: Session - /** Converts $"col name" into a [[Column]]. - */ + /** + * Converts $"col name" into a [[Column]]. + */ implicit class ColumnFromString(val sc: StringContext) { def $(args: Any*): Column = { - Column(sc.s(args: _*)) + Column(sc.s(args.toSeq: _*)) } } diff --git a/src/main/scala/com/snowflake/snowpark/internal/JavaCodeCompiler.scala b/src/main/scala/com/snowflake/snowpark/internal/JavaCodeCompiler.scala index 74880826..fceb0002 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/JavaCodeCompiler.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/JavaCodeCompiler.scala @@ -16,15 +16,16 @@ class JavaCodeCompiler { val releaseVersionOption = Seq("--release", "11") - /** Compiles strings of java code and returns class bytes - * - * @param classSources - * A map of className and its code in java - * @param classPath - * List of paths to include in classpath - * @return - * A list of compiled classes. - */ + /** + * Compiles strings of java code and returns class bytes + * + * @param classSources + * A map of className and its code in java + * @param classPath + * List of paths to include in classpath + * @return + * A list of compiled classes. + */ def compile( classSources: Map[String, String], classPath: List[String] = List.empty): List[InMemoryClassObject] = { @@ -63,14 +64,15 @@ class JavaCodeCompiler { } } -/** A class that represents a Java source file generated from a string. This is mostly boilerplate - * for JavaCompiler API - * - * @param className - * Name of the class - * @param code - * String representation of the class code - */ +/** + * A class that represents a Java source file generated from a string. This is mostly boilerplate + * for JavaCompiler API + * + * @param className + * Name of the class + * @param code + * String representation of the class code + */ class JavaSourceFromString(className: String, code: String) extends SimpleJavaFileObject( URI.create("string:///" + className.replace(".", "/") + Kind.SOURCE.extension), @@ -78,14 +80,15 @@ class JavaSourceFromString(className: String, code: String) override def getCharContent(ignoreEncodingErrors: Boolean): CharSequence = code } -/** A class that represents a compiled class stored in memory. This is mostly boilerplate for - * JavaCompiler API - * - * @param className - * Name of class - * @param kind - * of file like .class - */ +/** + * A class that represents a compiled class stored in memory. This is mostly boilerplate for + * JavaCompiler API + * + * @param className + * Name of class + * @param kind + * of file like .class + */ class InMemoryClassObject(className: String, kind: Kind) extends SimpleJavaFileObject( URI.create("mem:///" + className.replace('.', '/') + kind.extension), @@ -106,9 +109,10 @@ class InMemoryClassObject(className: String, kind: Kind) } } -/** A handler for managing output generated by the compiler task. This is mostly boilerplate for - * JavaCompiler API - */ +/** + * A handler for managing output generated by the compiler task. This is mostly boilerplate for + * JavaCompiler API + */ class InMemoryClassFilesManager(fileManager: JavaFileManager) extends ForwardingJavaFileManager[JavaFileManager](fileManager) { diff --git a/src/main/scala/com/snowflake/snowpark/internal/JavaUtils.scala b/src/main/scala/com/snowflake/snowpark/internal/JavaUtils.scala index 0246f438..6406f15c 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/JavaUtils.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/JavaUtils.scala @@ -317,7 +317,7 @@ object JavaUtils { def stringArrayToStringSeq(arr: Array[String]): Seq[String] = arr def objectListToAnySeq(input: java.util.List[java.util.List[Object]]): Seq[Seq[Any]] = - input.asScala.map(list => list.asScala) + input.asScala.map(list => list.asScala.toSeq).toSeq def registerUDF( udfRegistration: UDFRegistration, @@ -364,12 +364,18 @@ object JavaUtils { case (key, value) => key -> value }.toMap - def scalaMapToJavaWithVariantConversion(map: Map[_, _]): java.util.Map[Object, Object] = - map.map { + def scalaMapToJavaWithVariantConversion(map: Map[_, _]): java.util.Map[Object, Object] = { + val result = new java.util.HashMap[Object, Object]() + map.foreach { case (key, value: com.snowflake.snowpark.types.Variant) => - key.asInstanceOf[Object] -> InternalUtils.createVariant(value) - case (key, value) => key.asInstanceOf[Object] -> value.asInstanceOf[Object] - }.asJava + result.put( + key.asInstanceOf[Object], + InternalUtils.createVariant(value).asInstanceOf[Object]) + case (key, value) => + result.put(key.asInstanceOf[Object], value.asInstanceOf[Object]) + } + result + } def serialize(obj: Any): Array[Byte] = { val bos = new ByteArrayOutputStream() diff --git a/src/main/scala/com/snowflake/snowpark/internal/ScalaFunctions.scala b/src/main/scala/com/snowflake/snowpark/internal/ScalaFunctions.scala index 34d6876f..7a40506c 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/ScalaFunctions.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/ScalaFunctions.scala @@ -415,10 +415,11 @@ object ScalaFunctions { } */ - /** Creates a Scala closure of 0 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 0 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[RT: TypeTag](func: Function0[RT]): UserDefinedFunction = { Vector().foreach(isSupported(_)) isSupported(typeOf[RT]) @@ -427,10 +428,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 1 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 1 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[RT: TypeTag, A1: TypeTag](func: Function1[A1, RT]): UserDefinedFunction = { Vector(typeOf[A1]).foreach(isSupported(_)) isSupported(typeOf[RT]) @@ -439,10 +441,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 2 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 2 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[RT: TypeTag, A1: TypeTag, A2: TypeTag]( func: Function2[A1, A2, RT]): UserDefinedFunction = { Vector(typeOf[A1], typeOf[A2]).foreach(isSupported(_)) @@ -452,10 +455,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 3 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 3 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag]( func: Function3[A1, A2, A3, RT]): UserDefinedFunction = { Vector(typeOf[A1], typeOf[A2], typeOf[A3]).foreach(isSupported(_)) @@ -466,10 +470,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 4 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 4 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag]( func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = { Vector(typeOf[A1], typeOf[A2], typeOf[A3], typeOf[A4]).foreach(isSupported(_)) @@ -480,10 +485,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 5 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 5 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag]( func: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = { Vector(typeOf[A1], typeOf[A2], typeOf[A3], typeOf[A4], typeOf[A5]).foreach(isSupported(_)) @@ -494,10 +500,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 6 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 6 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -516,10 +523,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 7 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 7 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -539,10 +547,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 8 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 8 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -570,10 +579,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 9 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 9 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -604,10 +614,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 10 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 10 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -641,10 +652,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 11 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 11 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -681,10 +693,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 12 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 12 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -723,10 +736,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 13 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 13 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -767,10 +781,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 14 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 14 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -815,10 +830,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 15 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 15 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -865,10 +881,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 16 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 16 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -917,10 +934,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 17 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 17 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -989,10 +1007,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 18 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 18 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -1064,10 +1083,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 19 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 19 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -1142,10 +1162,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 20 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 20 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -1224,10 +1245,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 21 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 21 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -1309,10 +1331,11 @@ object ScalaFunctions { UserDefinedFunction(func, returnColumn, inputColumns) } - /** Creates a Scala closure of 22 arguments as user-defined function (UDF). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 22 arguments as user-defined function (UDF). + * @tparam RT + * return type of UDF. + */ def _toUdf[ RT: TypeTag, A1: TypeTag, @@ -1602,10 +1625,11 @@ object ScalaFunctions { * } */ - /** Creates a Scala closure of 0 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 0 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[RT: TypeTag](sp: Function1[Session, RT]): StoredProcedure = { Vector().foreach(isSupported) isSupported(typeOf[RT]) @@ -1614,10 +1638,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 1 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 1 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[RT: TypeTag, A1: TypeTag](sp: Function2[Session, A1, RT]): StoredProcedure = { Vector(typeOf[A1]).foreach(isSupported) isSupported(typeOf[RT]) @@ -1626,10 +1651,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 2 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 2 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[RT: TypeTag, A1: TypeTag, A2: TypeTag]( sp: Function3[Session, A1, A2, RT]): StoredProcedure = { Vector(typeOf[A1], typeOf[A2]).foreach(isSupported) @@ -1639,10 +1665,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 3 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 3 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag]( sp: Function4[Session, A1, A2, A3, RT]): StoredProcedure = { Vector(typeOf[A1], typeOf[A2], typeOf[A3]).foreach(isSupported) @@ -1653,10 +1680,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 4 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 4 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag]( sp: Function5[Session, A1, A2, A3, A4, RT]): StoredProcedure = { Vector(typeOf[A1], typeOf[A2], typeOf[A3], typeOf[A4]).foreach(isSupported) @@ -1667,10 +1695,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 5 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 5 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag]( sp: Function6[Session, A1, A2, A3, A4, A5, RT]): StoredProcedure = { Vector(typeOf[A1], typeOf[A2], typeOf[A3], typeOf[A4], typeOf[A5]).foreach(isSupported) @@ -1681,10 +1710,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 6 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 6 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, @@ -1703,10 +1733,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 7 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 7 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, @@ -1726,10 +1757,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 8 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 8 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, @@ -1757,10 +1789,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 9 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 9 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, @@ -1792,10 +1825,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 10 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 10 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, @@ -1829,10 +1863,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 11 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 11 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, @@ -1869,10 +1904,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 12 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 12 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, @@ -1911,10 +1947,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 13 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 13 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, @@ -1956,10 +1993,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 14 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 14 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, @@ -2004,10 +2042,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 15 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 15 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, @@ -2054,10 +2093,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 16 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 16 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, @@ -2123,10 +2163,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 17 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 17 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, @@ -2196,10 +2237,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 18 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 18 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, @@ -2272,10 +2314,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 19 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 19 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, @@ -2351,10 +2394,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 20 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 20 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, @@ -2434,10 +2478,11 @@ object ScalaFunctions { StoredProcedure(sp, returnColumn, inputColumns) } - /** Creates a Scala closure of 21 arguments as Stored Procedure function (SProc). - * @tparam RT - * return type of UDF. - */ + /** + * Creates a Scala closure of 21 arguments as Stored Procedure function (SProc). + * @tparam RT + * return type of UDF. + */ def _toSP[ RT: TypeTag, A1: TypeTag, diff --git a/src/main/scala/com/snowflake/snowpark/internal/SchemaUtils.scala b/src/main/scala/com/snowflake/snowpark/internal/SchemaUtils.scala index 60e92ef2..13c56bdf 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/SchemaUtils.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/SchemaUtils.scala @@ -6,8 +6,9 @@ import com.snowflake.snowpark.types._ import scala.util.Random -/** All functions in this object are temporary solutions. - */ +/** + * All functions in this object are temporary solutions. + */ private[snowpark] object SchemaUtils { val CommandAttributes: Seq[Attribute] = Seq(Attribute("\"status\"", StringType)) diff --git a/src/main/scala/com/snowflake/snowpark/internal/ServerConnection.scala b/src/main/scala/com/snowflake/snowpark/internal/ServerConnection.scala index efd89676..7ac60a2e 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/ServerConnection.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/ServerConnection.scala @@ -315,7 +315,7 @@ private[snowpark] class ServerConnection( private[snowpark] def resultSetToRows(statement: Statement): Array[Row] = withValidConnection { val iterator = resultSetToIterator(statement)._1 - val buff = mutable.ArrayBuilder.make[Row]() + val buff = mutable.ArrayBuilder.make[Row] while (iterator.hasNext) { buff += iterator.next() } @@ -399,8 +399,9 @@ private[snowpark] class ServerConnection( result } - /** Close the underlying data source. - */ + /** + * Close the underlying data source. + */ override def close(): Unit = { _hasNext = false statement.close() diff --git a/src/main/scala/com/snowflake/snowpark/internal/UDXRegistrationHandler.scala b/src/main/scala/com/snowflake/snowpark/internal/UDXRegistrationHandler.scala index d460e12d..0e8da556 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/UDXRegistrationHandler.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/UDXRegistrationHandler.scala @@ -390,7 +390,7 @@ class UDXRegistrationHandler(session: Session) extends Logging { if (actionID <= session.getLastCanceledID) { throw ErrorMessage.MISC_QUERY_IS_CANCELLED() } - allUrls + allUrls.toSeq } (allImports, targetJarStageLocation) } @@ -1072,23 +1072,24 @@ class UDXRegistrationHandler(session: Session) extends Logging { byteArrayOutputStream.toByteArray } - /** This method uses the Piped{Input/Output}Stream classes to create an in-memory jar file and - * write to a snowflake stage in parallel in two threads. This design is not the most-efficient - * since the implementation of PipedInputStream puts the thread to sleep for 1 sec if it is - * waiting to read/write data. But this is still faster than writing stream to a temp file. - * - * @param classDirs - * class directories that are copied to the jar - * @param stageName - * Name of stage - * @param destPrefix - * Destination prefix - * @param jarFileName - * Name of the jar file - * @param funcBytesMap - * func bytes map (entry format: fileName -> funcBytes) - * @since 0.1.0 - */ + /** + * This method uses the Piped{Input/Output}Stream classes to create an in-memory jar file and + * write to a snowflake stage in parallel in two threads. This design is not the most-efficient + * since the implementation of PipedInputStream puts the thread to sleep for 1 sec if it is + * waiting to read/write data. But this is still faster than writing stream to a temp file. + * + * @param classDirs + * class directories that are copied to the jar + * @param stageName + * Name of stage + * @param destPrefix + * Destination prefix + * @param jarFileName + * Name of the jar file + * @param funcBytesMap + * func bytes map (entry format: fileName -> funcBytes) + * @since 0.1.0 + */ private[snowpark] def createAndUploadJarToStage( classDirs: List[File], stageName: String, diff --git a/src/main/scala/com/snowflake/snowpark/internal/Utils.scala b/src/main/scala/com/snowflake/snowpark/internal/Utils.scala index 5cc54b42..e973ff03 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/Utils.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/Utils.scala @@ -219,12 +219,13 @@ object Utils extends Logging { res } - /** Parses a stage file location into stageName, path and fileName - * @param stageLocation - * a string that represent a file on a stage - * @return - * stageName, path and fileName - */ + /** + * Parses a stage file location into stageName, path and fileName + * @param stageLocation + * a string that represent a file on a stage + * @return + * stageName, path and fileName + */ private[snowpark] def parseStageFileLocation(stageLocation: String): (String, String, String) = { val normalized = normalizeStageLocation(stageLocation) if (stageLocation.endsWith("/")) { @@ -304,8 +305,9 @@ object Utils extends Logging { override def toString: String = this.getClass.getName.split("\\$").last.stripSuffix("$") } - /** Define types of temporary objects that will be created by Snowpark. - */ + /** + * Define types of temporary objects that will be created by Snowpark. + */ private[snowpark] object TempObjectType { case object Table extends TempObjectType case object Stage extends TempObjectType diff --git a/src/main/scala/com/snowflake/snowpark/internal/analyzer/Expression.scala b/src/main/scala/com/snowflake/snowpark/internal/analyzer/Expression.scala index d947e979..012f7027 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/analyzer/Expression.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/analyzer/Expression.scala @@ -119,12 +119,12 @@ private[snowpark] case class FlattenFunction( FlattenFunction(analyzedChildren.head, path, outer, recursive, mode) } -private[snowpark] case class TableFunction(funcName: String, args: Seq[Expression]) +private[snowpark] case class TableFunctionEx(funcName: String, args: Seq[Expression]) extends TableFunctionExpression { override def children: Seq[Expression] = args override protected def createAnalyzedExpression(analyzedChildren: Seq[Expression]): Expression = - TableFunction(funcName, analyzedChildren) + TableFunctionEx(funcName, analyzedChildren) } private[snowpark] case class NamedArgumentsTableFunction( diff --git a/src/main/scala/com/snowflake/snowpark/internal/analyzer/SnowflakePlan.scala b/src/main/scala/com/snowflake/snowpark/internal/analyzer/SnowflakePlan.scala index 8760369b..18dd98d2 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/analyzer/SnowflakePlan.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/analyzer/SnowflakePlan.scala @@ -74,9 +74,9 @@ class SnowflakePlan( } val supportAsyncMode = subqueryPlans.forall(_.supportAsyncMode) SnowflakePlan( - preQueries :+ queries.last, + preQueries.toSeq :+ queries.last, newSchemaQuery, - newPostActions, + newPostActions.toSeq, session, sourcePlan, supportAsyncMode) @@ -750,13 +750,14 @@ class SnowflakePlanBuilder(session: Session) extends Logging { } } -/** Assign a place holder for all queries. replace this place holder by real uuid if necessary. for - * example, a query list - * 1. show tables , "query_id_place_holder_XXXX" 2. select * from - * table(result_scan('query_id_place_holder_XXXX')) , "query_id_place_holder_YYYY" when - * executing 1, execute query 1, and get read uuid, such as 1234567 2, replace - * uuid_place_holder_XXXXX by 1234567 in query 2, and execute it - */ +/** + * Assign a place holder for all queries. replace this place holder by real uuid if necessary. for + * example, a query list + * 1. show tables , "query_id_place_holder_XXXX" 2. select * from + * table(result_scan('query_id_place_holder_XXXX')) , "query_id_place_holder_YYYY" when + * executing 1, execute query 1, and get read uuid, such as 1234567 2, replace + * uuid_place_holder_XXXXX by 1234567 in query 2, and execute it + */ private[snowpark] class Query( val sql: String, val queryIdPlaceHolder: String, diff --git a/src/main/scala/com/snowflake/snowpark/internal/analyzer/SnowflakePlanNode.scala b/src/main/scala/com/snowflake/snowpark/internal/analyzer/SnowflakePlanNode.scala index 54212a90..4f258698 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/analyzer/SnowflakePlanNode.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/analyzer/SnowflakePlanNode.scala @@ -162,16 +162,17 @@ private[snowpark] trait UnaryNode extends LogicalPlan { override val internalRenamedColumns: Map[String, String] = child.internalRenamedColumns } -/** Plan Node to sample some rows from a DataFrame. Either a fraction or a row number needs to be - * specified. - * - * @param probabilityFraction - * the sampling fraction(0.0 - 1.0) - * @param rowCount - * the sampling row count - * @param child - * the LogicalPlan - */ +/** + * Plan Node to sample some rows from a DataFrame. Either a fraction or a row number needs to be + * specified. + * + * @param probabilityFraction + * the sampling fraction(0.0 - 1.0) + * @param rowCount + * the sampling row count + * @param child + * the LogicalPlan + */ private[snowpark] case class SnowflakeSampleNode( probabilityFraction: Option[Double], rowCount: Option[Long], diff --git a/src/main/scala/com/snowflake/snowpark/internal/analyzer/SqlGenerator.scala b/src/main/scala/com/snowflake/snowpark/internal/analyzer/SqlGenerator.scala index 57592ad7..701a21e7 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/analyzer/SqlGenerator.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/analyzer/SqlGenerator.scala @@ -259,7 +259,7 @@ private object SqlGenerator extends Logging { Option(expr match { case FlattenFunction(input, path, outer, recursive, mode) => flattenExpression(expressionToSql(input), path, outer, recursive, mode) - case TableFunction(functionName, args) => + case TableFunctionEx(functionName, args) => functionExpression(functionName, args.map(expressionToSql), isDistinct = false) case NamedArgumentsTableFunction(funcName, args) => namedArgumentsFunction( diff --git a/src/main/scala/com/snowflake/snowpark/internal/analyzer/package.scala b/src/main/scala/com/snowflake/snowpark/internal/analyzer/package.scala index 6087e0cc..10a4e372 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/analyzer/package.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/analyzer/package.scala @@ -699,9 +699,10 @@ package object analyzer { _LeftParenthesis + aggregate + _For + pivotColumn + _In + pivotValues.mkString(_LeftParenthesis, _Comma, _RightParenthesis) + _RightParenthesis - /** copy into from file_format = (type = ) - * - */ + /** + * copy into from file_format = (type = ) + * + */ private[snowpark] def copyIntoTable( tableName: String, filePath: String, @@ -780,11 +781,12 @@ package object analyzer { } } - /** Use this function to normalize all user input and client generated names - * - * Rule: Name with quote: Do nothing Without quote: Starts with _A-Za-z or and only contains - * _A-Za-z0-9$, upper case all letters and quote otherwise, quote without upper casing - */ + /** + * Use this function to normalize all user input and client generated names + * + * Rule: Name with quote: Do nothing Without quote: Starts with _A-Za-z or and only contains + * _A-Za-z0-9$, upper case all letters and quote otherwise, quote without upper casing + */ def quoteName(name: String): String = { val alreadyQuoted = "^(\".+\")$".r val unquotedCaseInsenstive = "^([_A-Za-z]+[_A-Za-z0-9$]*)$".r @@ -808,9 +810,10 @@ package object analyzer { } } - /** Quotes name without upper casing if not quoted NOTE: All characters in name are DATA so "c1" - * will be converted to """c1""". - */ + /** + * Quotes name without upper casing if not quoted NOTE: All characters in name are DATA so "c1" + * will be converted to """c1""". + */ def quoteNameWithoutUpperCasing(name: String): String = _DoubleQuote + escapeQuotes(name) + _DoubleQuote diff --git a/src/main/scala/com/snowflake/snowpark/tableFunctions.scala b/src/main/scala/com/snowflake/snowpark/tableFunctions.scala index dbc1c278..c8a5fd20 100644 --- a/src/main/scala/com/snowflake/snowpark/tableFunctions.scala +++ b/src/main/scala/com/snowflake/snowpark/tableFunctions.scala @@ -3,180 +3,186 @@ package com.snowflake.snowpark import com.snowflake.snowpark.functions.lit // scalastyle:off -/** Provides utility functions that generate table function expressions that can be passed to - * DataFrame join method and Session tableFunction method. - * - * This object also provides functions that correspond to Snowflake - * [[https://docs.snowflake.com/en/sql-reference/functions-table.html system-defined table functions]]. - * - * The following examples demonstrate the use of some of these functions: - * {{{ - * import com.snowflake.snowpark.functions.parse_json - * - * // Creates DataFrame from Session.tableFunction - * session.tableFunction(tableFunctions.flatten, Map("input" -> parse_json(lit("[1,2]")))) - * session.tableFunction(tableFunctions.split_to_table, "split by space", " ") - * - * // DataFrame joins table function - * df.join(tableFunctions.flatten, Map("input" -> parse_json(df("a")))) - * df.join(tableFunctions.split_to_table, df("a"), ",") - * - * // Invokes any table function including user-defined table function - * df.join(tableFunctions.tableFunction("flatten"), Map("input" -> parse_json(df("a")))) - * session.tableFunction(tableFunctions.tableFunction("split_to_table"), "split by space", " ") - * }}} - * - * @since 0.4.0 - */ +/** + * Provides utility functions that generate table function expressions that can be passed to + * DataFrame join method and Session tableFunction method. + * + * This object also provides functions that correspond to Snowflake + * [[https://docs.snowflake.com/en/sql-reference/functions-table.html system-defined table functions]]. + * + * The following examples demonstrate the use of some of these functions: + * {{{ + * import com.snowflake.snowpark.functions.parse_json + * + * // Creates DataFrame from Session.tableFunction + * session.tableFunction(tableFunctions.flatten, Map("input" -> parse_json(lit("[1,2]")))) + * session.tableFunction(tableFunctions.split_to_table, "split by space", " ") + * + * // DataFrame joins table function + * df.join(tableFunctions.flatten, Map("input" -> parse_json(df("a")))) + * df.join(tableFunctions.split_to_table, df("a"), ",") + * + * // Invokes any table function including user-defined table function + * df.join(tableFunctions.tableFunction("flatten"), Map("input" -> parse_json(df("a")))) + * session.tableFunction(tableFunctions.tableFunction("split_to_table"), "split by space", " ") + * }}} + * + * @since 0.4.0 + */ object tableFunctions { // scalastyle:on - /** This table function splits a string (based on a specified delimiter) and flattens the results - * into rows. - * - * Argument List: - * - * First argument (no name): Required. Text to be split. - * - * Second argument (no name): Required. Text to split string by. - * - * Example - * {{{ - * import com.snowflake.snowpark.functions._ - * import com.snowflake.snowpark.tableFunctions._ - * - * df.join(tableFunctions.split_to_table, df("a"), lit(",")) - * session.tableFunction( - * tableFunctions.split_to_table, - * lit("split by space"), - * lit(" ") - * ) - * }}} - * - * @since 0.4.0 - */ + /** + * This table function splits a string (based on a specified delimiter) and flattens the results + * into rows. + * + * Argument List: + * + * First argument (no name): Required. Text to be split. + * + * Second argument (no name): Required. Text to split string by. + * + * Example + * {{{ + * import com.snowflake.snowpark.functions._ + * import com.snowflake.snowpark.tableFunctions._ + * + * df.join(tableFunctions.split_to_table, df("a"), lit(",")) + * session.tableFunction( + * tableFunctions.split_to_table, + * lit("split by space"), + * lit(" ") + * ) + * }}} + * + * @since 0.4.0 + */ lazy val split_to_table: TableFunction = TableFunction("split_to_table") - /** This table function splits a string (based on a specified delimiter) and flattens the results - * into rows. - * - * Example - * {{{ - * import com.snowflake.snowpark.functions._ - * import com.snowflake.snowpark.tableFunctions._ - * - * df.join(tableFunctions.split_to_table(df("a"), lit(","))) - * }}} - * - * @since 1.10.0 - * @param str - * Text to be split. - * @param delimiter - * Text to split string by. - * @return - * The result Column reference - */ + /** + * This table function splits a string (based on a specified delimiter) and flattens the results + * into rows. + * + * Example + * {{{ + * import com.snowflake.snowpark.functions._ + * import com.snowflake.snowpark.tableFunctions._ + * + * df.join(tableFunctions.split_to_table(df("a"), lit(","))) + * }}} + * + * @since 1.10.0 + * @param str + * Text to be split. + * @param delimiter + * Text to split string by. + * @return + * The result Column reference + */ def split_to_table(str: Column, delimiter: String): Column = split_to_table.apply(str, lit(delimiter)) - /** Flattens (explodes) compound values into multiple rows. - * - * Argument List: - * - * input: Required. The expression that will be unseated into rows. The expression must be of - * data type VariantType, MapType or ArrayType. - * - * path: Optional. The path to the element within a VariantType data structure which needs to be - * flattened. Can be a zero-length string (i.e. empty path) if the outermost element is to be - * flattened. Default: Zero-length string (i.e. empty path) - * - * outer: Optional boolean value. If FALSE, any input rows that cannot be expanded, either - * because they cannot be accessed in the path or because they have zero fields or entries, are - * completely omitted from the output. If TRUE, exactly one row is generated for zero-row - * expansions (with NULL in the KEY, INDEX, and VALUE columns). Default: FALSE - * - * recursive: Optional boolean value If FALSE, only the element referenced by PATH is expanded. - * If TRUE, the expansion is performed for all sub-elements recursively. Default: FALSE - * - * mode: Optional String ("object", "array", or "both") Specifies whether only objects, arrays, - * or both should be flattened. Default: both - * - * Example - * {{{ - * import com.snowflake.snowpark.functions._ - * import com.snowflake.snowpark.tableFunctions._ - * - * df.join( - * tableFunctions.flatten, - * Map("input" -> parse_json(df("a"), "outer" -> lit(true))) - * ) - * - * session.tableFunction( - * tableFunctions.flatten, - * Map("input" -> parse_json(lit("[1,2]"), "mode" -> lit("array"))) - * ) - * }}} - * - * @since 0.4.0 - */ + /** + * Flattens (explodes) compound values into multiple rows. + * + * Argument List: + * + * input: Required. The expression that will be unseated into rows. The expression must be of data + * type VariantType, MapType or ArrayType. + * + * path: Optional. The path to the element within a VariantType data structure which needs to be + * flattened. Can be a zero-length string (i.e. empty path) if the outermost element is to be + * flattened. Default: Zero-length string (i.e. empty path) + * + * outer: Optional boolean value. If FALSE, any input rows that cannot be expanded, either because + * they cannot be accessed in the path or because they have zero fields or entries, are completely + * omitted from the output. If TRUE, exactly one row is generated for zero-row expansions (with + * NULL in the KEY, INDEX, and VALUE columns). Default: FALSE + * + * recursive: Optional boolean value If FALSE, only the element referenced by PATH is expanded. If + * TRUE, the expansion is performed for all sub-elements recursively. Default: FALSE + * + * mode: Optional String ("object", "array", or "both") Specifies whether only objects, arrays, or + * both should be flattened. Default: both + * + * Example + * {{{ + * import com.snowflake.snowpark.functions._ + * import com.snowflake.snowpark.tableFunctions._ + * + * df.join( + * tableFunctions.flatten, + * Map("input" -> parse_json(df("a"), "outer" -> lit(true))) + * ) + * + * session.tableFunction( + * tableFunctions.flatten, + * Map("input" -> parse_json(lit("[1,2]"), "mode" -> lit("array"))) + * ) + * }}} + * + * @since 0.4.0 + */ lazy val flatten: TableFunction = TableFunction("flatten") - /** Flattens (explodes) compound values into multiple rows. - * - * Example - * {{{ - * import com.snowflake.snowpark.functions._ - * import com.snowflake.snowpark.tableFunctions._ - * - * df.join( - * tableFunctions.flatten(parse_json(df("a"))) - * ) - * - * }}} - * - * @since 1.10.0 - * @param input - * The expression that will be unseated into rows. The expression must be of data type - * VariantType, MapType or ArrayType. - * @return - * The result Column reference - */ + /** + * Flattens (explodes) compound values into multiple rows. + * + * Example + * {{{ + * import com.snowflake.snowpark.functions._ + * import com.snowflake.snowpark.tableFunctions._ + * + * df.join( + * tableFunctions.flatten(parse_json(df("a"))) + * ) + * + * }}} + * + * @since 1.10.0 + * @param input + * The expression that will be unseated into rows. The expression must be of data type + * VariantType, MapType or ArrayType. + * @return + * The result Column reference + */ def flatten(input: Column): Column = flatten.apply(input) - /** Flattens (explodes) compound values into multiple rows. - * - * Example - * {{{ - * import com.snowflake.snowpark.functions._ - * import com.snowflake.snowpark.tableFunctions._ - * - * df.join( - * tableFunctions.flatten(parse_json(df("a")), "path", true, true, "both") - * ) - * - * }}} - * - * @since 1.10.0 - * @param input - * The expression that will be unseated into rows. The expression must be of data type - * VariantType, MapType or ArrayType. - * @param path - * The path to the element within a VariantType data structure which needs to be flattened. Can - * be a zero-length string (i.e. empty path) if the outermost element is to be flattened. - * @param outer - * Optional boolean value. If FALSE, any input rows that cannot be expanded, either because - * they cannot be accessed in the path or because they have zero fields or entries, are - * completely omitted from the output. If TRUE, exactly one row is generated for zero-row - * expansions (with NULL in the KEY, INDEX, and VALUE columns). - * @param recursive - * If FALSE, only the element referenced by PATH is expanded. If TRUE, the expansion is - * performed for all sub-elements recursively. - * @param mode - * ("object", "array", or "both") Specifies whether only objects, arrays, or both should be - * flattened. - * @return - * The result Column reference - */ + /** + * Flattens (explodes) compound values into multiple rows. + * + * Example + * {{{ + * import com.snowflake.snowpark.functions._ + * import com.snowflake.snowpark.tableFunctions._ + * + * df.join( + * tableFunctions.flatten(parse_json(df("a")), "path", true, true, "both") + * ) + * + * }}} + * + * @since 1.10.0 + * @param input + * The expression that will be unseated into rows. The expression must be of data type + * VariantType, MapType or ArrayType. + * @param path + * The path to the element within a VariantType data structure which needs to be flattened. Can + * be a zero-length string (i.e. empty path) if the outermost element is to be flattened. + * @param outer + * Optional boolean value. If FALSE, any input rows that cannot be expanded, either because they + * cannot be accessed in the path or because they have zero fields or entries, are completely + * omitted from the output. If TRUE, exactly one row is generated for zero-row expansions (with + * NULL in the KEY, INDEX, and VALUE columns). + * @param recursive + * If FALSE, only the element referenced by PATH is expanded. If TRUE, the expansion is + * performed for all sub-elements recursively. + * @param mode + * ("object", "array", or "both") Specifies whether only objects, arrays, or both should be + * flattened. + * @return + * The result Column reference + */ def flatten( input: Column, path: String, @@ -191,28 +197,29 @@ object tableFunctions { "recursive" -> lit(recursive), "mode" -> lit(mode))) - /** Flattens a given array or map type column into individual rows. The output column(s) in case - * of array input column is `VALUE`, and are `KEY` and `VALUE` in case of amp input column. - * - * Example - * {{{ - * import com.snowflake.snowpark.functions._ - * - * val df = Seq("""{"a":1, "b": 2}""").toDF("a") - * val df1 = df.select( - * parse_json(df("a")) - * .cast(types.MapType(types.StringType, types.IntegerType)) - * .as("a")) - * df1.select(lit(1), tableFunctions.explode(df1("a")), df1("a")("a")).show() - * }}} - * - * @since 1.10.0 - * @param input - * The expression that will be unseated into rows. The expression must be either MapType or - * ArrayType data. - * @return - * The result Column reference - */ + /** + * Flattens a given array or map type column into individual rows. The output column(s) in case of + * array input column is `VALUE`, and are `KEY` and `VALUE` in case of amp input column. + * + * Example + * {{{ + * import com.snowflake.snowpark.functions._ + * + * val df = Seq("""{"a":1, "b": 2}""").toDF("a") + * val df1 = df.select( + * parse_json(df("a")) + * .cast(types.MapType(types.StringType, types.IntegerType)) + * .as("a")) + * df1.select(lit(1), tableFunctions.explode(df1("a")), df1("a")("a")).show() + * }}} + * + * @since 1.10.0 + * @param input + * The expression that will be unseated into rows. The expression must be either MapType or + * ArrayType data. + * @return + * The result Column reference + */ def explode(input: Column): Column = TableFunction("explode").apply(input) } diff --git a/src/main/scala/com/snowflake/snowpark/types/ArrayType.scala b/src/main/scala/com/snowflake/snowpark/types/ArrayType.scala index d7b7fc58..2561c1bd 100644 --- a/src/main/scala/com/snowflake/snowpark/types/ArrayType.scala +++ b/src/main/scala/com/snowflake/snowpark/types/ArrayType.scala @@ -1,8 +1,9 @@ package com.snowflake.snowpark.types -/** Array data type. This maps to ARRAY data type in Snowflake. - * @since 0.1.0 - */ +/** + * Array data type. This maps to ARRAY data type in Snowflake. + * @since 0.1.0 + */ case class ArrayType(elementType: DataType) extends DataType { override def toString: String = { s"ArrayType[${elementType.toString}]" diff --git a/src/main/scala/com/snowflake/snowpark/types/BinaryType.scala b/src/main/scala/com/snowflake/snowpark/types/BinaryType.scala index d1799fa5..c2298d2c 100644 --- a/src/main/scala/com/snowflake/snowpark/types/BinaryType.scala +++ b/src/main/scala/com/snowflake/snowpark/types/BinaryType.scala @@ -1,6 +1,7 @@ package com.snowflake.snowpark.types -/** Binary data type. Mapped to BINARY Snowflake data type. - * @since 0.1.0 - */ +/** + * Binary data type. Mapped to BINARY Snowflake data type. + * @since 0.1.0 + */ object BinaryType extends AtomicType diff --git a/src/main/scala/com/snowflake/snowpark/types/BooleanType.scala b/src/main/scala/com/snowflake/snowpark/types/BooleanType.scala index af2cc2f1..9e269348 100644 --- a/src/main/scala/com/snowflake/snowpark/types/BooleanType.scala +++ b/src/main/scala/com/snowflake/snowpark/types/BooleanType.scala @@ -1,6 +1,7 @@ package com.snowflake.snowpark.types -/** Boolean data type. Mapped to BOOLEAN Snowflake data type. - * @since 0.1.0 - */ +/** + * Boolean data type. Mapped to BOOLEAN Snowflake data type. + * @since 0.1.0 + */ object BooleanType extends AtomicType diff --git a/src/main/scala/com/snowflake/snowpark/types/DataType.scala b/src/main/scala/com/snowflake/snowpark/types/DataType.scala index 2642fcb4..35b1f28e 100644 --- a/src/main/scala/com/snowflake/snowpark/types/DataType.scala +++ b/src/main/scala/com/snowflake/snowpark/types/DataType.scala @@ -1,19 +1,22 @@ package com.snowflake.snowpark.types -/** The trait of Snowpark data types - * @since 0.1.0 - */ +/** + * The trait of Snowpark data types + * @since 0.1.0 + */ abstract class DataType { - /** Returns a data type name. - * @since 0.1.0 - */ + /** + * Returns a data type name. + * @since 0.1.0 + */ def typeName: String = this.getClass.getSimpleName.stripSuffix("$").stripSuffix("Type") - /** Returns a data type name. Alias of [[typeName]] - * @since 0.1.0 - */ + /** + * Returns a data type name. Alias of [[typeName]] + * @since 0.1.0 + */ override def toString: String = typeName private[snowpark] def schemaString: String = toString diff --git a/src/main/scala/com/snowflake/snowpark/types/DateType.scala b/src/main/scala/com/snowflake/snowpark/types/DateType.scala index 96d208d0..f1129f0f 100644 --- a/src/main/scala/com/snowflake/snowpark/types/DateType.scala +++ b/src/main/scala/com/snowflake/snowpark/types/DateType.scala @@ -1,6 +1,7 @@ package com.snowflake.snowpark.types -/** Date data type. Mapped to DATE Snowflake data type. - * @since 0.1.0 - */ +/** + * Date data type. Mapped to DATE Snowflake data type. + * @since 0.1.0 + */ object DateType extends AtomicType diff --git a/src/main/scala/com/snowflake/snowpark/types/Geography.scala b/src/main/scala/com/snowflake/snowpark/types/Geography.scala index 9235b607..a1e30b85 100644 --- a/src/main/scala/com/snowflake/snowpark/types/Geography.scala +++ b/src/main/scala/com/snowflake/snowpark/types/Geography.scala @@ -3,34 +3,38 @@ package com.snowflake.snowpark.types import java.io.IOException import java.io.UncheckedIOException -/** Companion object of Geography class. - */ +/** + * Companion object of Geography class. + */ object Geography { - /** Creates a Geography class from a GeoJSON string - * - * @param g - * GeoJSON string - * @return - * a Geography class - * @since 0.2.0 - */ + /** + * Creates a Geography class from a GeoJSON string + * + * @param g + * GeoJSON string + * @return + * a Geography class + * @since 0.2.0 + */ def fromGeoJSON(g: String): Geography = new Geography(g) } -/** Scala representation of Snowflake Geography data. Only support GeoJSON format. - * - * @since 0.2.0 - */ +/** + * Scala representation of Snowflake Geography data. Only support GeoJSON format. + * + * @since 0.2.0 + */ class Geography private (private val stringData: String) { if (stringData == null) throwNullInputError() - /** Returns whether the Geography object equals to the input object. - * - * @return - * GeoJSON string - * @since 0.2.0 - */ + /** + * Returns whether the Geography object equals to the input object. + * + * @return + * GeoJSON string + * @since 0.2.0 + */ override def equals(obj: Any): Boolean = { obj match { case g: Geography => stringData.equals(g.stringData) @@ -38,39 +42,43 @@ class Geography private (private val stringData: String) { } } - /** Returns the hashCode of the stored GeoJSON string. - * - * @return - * hash code - * @since 0.2.0 - */ + /** + * Returns the hashCode of the stored GeoJSON string. + * + * @return + * hash code + * @since 0.2.0 + */ override def hashCode(): Int = stringData.hashCode private def throwNullInputError() = throw new UncheckedIOException( new IOException("Cannot create geography object from null input")) - /** Returns the underling string data for GeoJSON. - * - * @return - * GeoJSON string - * @since 0.2.0 - */ + /** + * Returns the underling string data for GeoJSON. + * + * @return + * GeoJSON string + * @since 0.2.0 + */ def asGeoJSON(): String = stringData - /** Returns the underling string data for GeoJSON. - * - * @return - * GeoJSON string - * @since 0.2.0 - */ + /** + * Returns the underling string data for GeoJSON. + * + * @return + * GeoJSON string + * @since 0.2.0 + */ def getString: String = stringData - /** Returns the underling string data for GeoJSON. - * - * @return - * GeoJSON string - * @since 0.2.0 - */ + /** + * Returns the underling string data for GeoJSON. + * + * @return + * GeoJSON string + * @since 0.2.0 + */ override def toString: String = stringData } diff --git a/src/main/scala/com/snowflake/snowpark/types/GeographyType.scala b/src/main/scala/com/snowflake/snowpark/types/GeographyType.scala index ac687c9a..e1130930 100644 --- a/src/main/scala/com/snowflake/snowpark/types/GeographyType.scala +++ b/src/main/scala/com/snowflake/snowpark/types/GeographyType.scala @@ -1,8 +1,9 @@ package com.snowflake.snowpark.types -/** Geography data type. This maps to GEOGRAPHY data type in Snowflake. - * @since 0.2.0 - */ +/** + * Geography data type. This maps to GEOGRAPHY data type in Snowflake. + * @since 0.2.0 + */ object GeographyType extends DataType { override def toString: String = { s"GeographyType" diff --git a/src/main/scala/com/snowflake/snowpark/types/Geometry.scala b/src/main/scala/com/snowflake/snowpark/types/Geometry.scala index de7d46e4..274be6e3 100644 --- a/src/main/scala/com/snowflake/snowpark/types/Geometry.scala +++ b/src/main/scala/com/snowflake/snowpark/types/Geometry.scala @@ -2,56 +2,62 @@ package com.snowflake.snowpark.types import java.io.{IOException, UncheckedIOException} -/** Companion object of Geometry class. - * @since 1.12.0 - */ +/** + * Companion object of Geometry class. + * @since 1.12.0 + */ object Geometry { - /** Creates a Geometry class from a GeoJSON string - * - * @param g - * GeoJSON string - * @return - * a Geometry class - * @since 1.12.0 - */ + /** + * Creates a Geometry class from a GeoJSON string + * + * @param g + * GeoJSON string + * @return + * a Geometry class + * @since 1.12.0 + */ def fromGeoJSON(g: String): Geometry = new Geometry(g) } -/** Scala representation of Snowflake Geometry data. Only support GeoJSON format. - * - * @since 1.12.0 - */ +/** + * Scala representation of Snowflake Geometry data. Only support GeoJSON format. + * + * @since 1.12.0 + */ class Geometry private (private val stringData: String) { if (stringData == null) { throw new UncheckedIOException(new IOException("Cannot create geometry object from null input")) } - /** Returns whether the Geometry object equals to the input object. - * - * @return - * GeoJSON string - * @since 1.12.0 - */ + /** + * Returns whether the Geometry object equals to the input object. + * + * @return + * GeoJSON string + * @since 1.12.0 + */ override def equals(obj: Any): Boolean = obj match { case g: Geometry => stringData.equals(g.stringData) case _ => false } - /** Returns the hashCode of the stored GeoJSON string. - * - * @return - * hash code - * @since 1.12.0 - */ + /** + * Returns the hashCode of the stored GeoJSON string. + * + * @return + * hash code + * @since 1.12.0 + */ override def hashCode(): Int = stringData.hashCode - /** Returns the underling string data for GeoJSON. - * - * @return - * GeoJSON string - * @since 1.12.0 - */ + /** + * Returns the underling string data for GeoJSON. + * + * @return + * GeoJSON string + * @since 1.12.0 + */ override def toString: String = stringData } diff --git a/src/main/scala/com/snowflake/snowpark/types/GeometryType.scala b/src/main/scala/com/snowflake/snowpark/types/GeometryType.scala index acef7073..a2a64c0c 100644 --- a/src/main/scala/com/snowflake/snowpark/types/GeometryType.scala +++ b/src/main/scala/com/snowflake/snowpark/types/GeometryType.scala @@ -1,8 +1,9 @@ package com.snowflake.snowpark.types -/** Geometry data type. This maps to GEOMETRY data type in Snowflake. - * @since 1.12.0 - */ +/** + * Geometry data type. This maps to GEOMETRY data type in Snowflake. + * @since 1.12.0 + */ object GeometryType extends DataType { override def toString: String = { s"GeometryType" diff --git a/src/main/scala/com/snowflake/snowpark/types/MapType.scala b/src/main/scala/com/snowflake/snowpark/types/MapType.scala index 1a796e66..add1be96 100644 --- a/src/main/scala/com/snowflake/snowpark/types/MapType.scala +++ b/src/main/scala/com/snowflake/snowpark/types/MapType.scala @@ -1,8 +1,9 @@ package com.snowflake.snowpark.types -/** Map data type. This maps to OBJECT data type in Snowflake. - * @since 0.1.0 - */ +/** + * Map data type. This maps to OBJECT data type in Snowflake. + * @since 0.1.0 + */ case class MapType(keyType: DataType, valueType: DataType) extends DataType { override def toString: String = { s"MapType[${keyType.toString}, ${valueType.toString}]" diff --git a/src/main/scala/com/snowflake/snowpark/types/NumericType.scala b/src/main/scala/com/snowflake/snowpark/types/NumericType.scala index a9e09048..417a76f9 100644 --- a/src/main/scala/com/snowflake/snowpark/types/NumericType.scala +++ b/src/main/scala/com/snowflake/snowpark/types/NumericType.scala @@ -6,63 +6,74 @@ private[snowpark] abstract class IntegralType extends NumericType private[snowpark] abstract class FractionalType extends NumericType -/** Byte data type. Mapped to TINYINT Snowflake date type. - * @since 0.1.0 - */ +/** + * Byte data type. Mapped to TINYINT Snowflake date type. + * @since 0.1.0 + */ object ByteType extends IntegralType -/** Short integer data type. Mapped to SMALLINT Snowflake date type. - * @since 0.1.0 - */ +/** + * Short integer data type. Mapped to SMALLINT Snowflake date type. + * @since 0.1.0 + */ object ShortType extends IntegralType -/** Integer data type. Mapped to INT Snowflake date type. - * @since 0.1.0 - */ +/** + * Integer data type. Mapped to INT Snowflake date type. + * @since 0.1.0 + */ object IntegerType extends IntegralType -/** Long integer data type. Mapped to BIGINT Snowflake date type. - * @since 0.1.0 - */ +/** + * Long integer data type. Mapped to BIGINT Snowflake date type. + * @since 0.1.0 + */ object LongType extends IntegralType -/** Float data type. Mapped to FLOAT Snowflake date type. - * @since 0.1.0 - */ +/** + * Float data type. Mapped to FLOAT Snowflake date type. + * @since 0.1.0 + */ object FloatType extends FractionalType -/** Double data type. Mapped to DOUBLE Snowflake date type. - * @since 0.1.0 - */ +/** + * Double data type. Mapped to DOUBLE Snowflake date type. + * @since 0.1.0 + */ object DoubleType extends FractionalType -/** Decimal data type. Mapped to NUMBER Snowflake date type. - * @since 0.1.0 - */ +/** + * Decimal data type. Mapped to NUMBER Snowflake date type. + * @since 0.1.0 + */ case class DecimalType(precision: Int, scale: Int) extends FractionalType { - /** Returns Decimal Info. Decimal(precision, scale), Alias of [[toString]] - * @since 0.1.0 - */ + /** + * Returns Decimal Info. Decimal(precision, scale), Alias of [[toString]] + * @since 0.1.0 + */ override def typeName: String = toString - /** Returns Decimal Info. Decimal(precision, scale) - * @since 0.1.0 - */ + /** + * Returns Decimal Info. Decimal(precision, scale) + * @since 0.1.0 + */ override def toString: String = s"Decimal($precision, $scale)" } -/** Companion object of DecimalType. - * @since 0.9.0 - */ +/** + * Companion object of DecimalType. + * @since 0.9.0 + */ object DecimalType { private[snowpark] val MAX_PRECISION = 38 private[snowpark] val MAX_SCALE = 38 - /** Retrieve DecimalType from BigDecimal value. - * @since 0.9.0 - */ + /** + * Retrieve DecimalType from BigDecimal value. + * @since 0.9.0 + */ def apply(decimal: BigDecimal): DecimalType = { if (decimal.precision < decimal.scale) { // For DecimalType, Snowflake Compiler expects the precision is equal to or large than diff --git a/src/main/scala/com/snowflake/snowpark/types/StringType.scala b/src/main/scala/com/snowflake/snowpark/types/StringType.scala index 0d5938f9..153c3a29 100644 --- a/src/main/scala/com/snowflake/snowpark/types/StringType.scala +++ b/src/main/scala/com/snowflake/snowpark/types/StringType.scala @@ -1,6 +1,7 @@ package com.snowflake.snowpark.types -/** String data type. Mapped to VARCHAR Snowflake data type. - * @since 0.1.0 - */ +/** + * String data type. Mapped to VARCHAR Snowflake data type. + * @since 0.1.0 + */ object StringType extends AtomicType diff --git a/src/main/scala/com/snowflake/snowpark/types/StructType.scala b/src/main/scala/com/snowflake/snowpark/types/StructType.scala index 8d27968a..4e26a874 100644 --- a/src/main/scala/com/snowflake/snowpark/types/StructType.scala +++ b/src/main/scala/com/snowflake/snowpark/types/StructType.scala @@ -3,85 +3,99 @@ package com.snowflake.snowpark.types import com.snowflake.snowpark.internal.analyzer.Attribute import com.snowflake.snowpark.internal.analyzer -/** StructType data type, represents table schema. - * @since 0.1.0 - */ +/** + * StructType data type, represents table schema. + * @since 0.1.0 + */ object StructType { private[snowpark] def fromAttributes(attrs: Seq[Attribute]): StructType = StructType(attrs.map(a => StructField(a.name, a.dataType, a.nullable))) - /** Clones the given [[StructType]] object. - * @since 0.1.0 - */ + /** + * Clones the given [[StructType]] object. + * @since 0.1.0 + */ def apply(other: StructType): StructType = StructType(other.fields) - /** Creates a [[StructType]] object based on the given Seq of [[StructField]] - * @since 0.1.0 - */ + /** + * Creates a [[StructType]] object based on the given Seq of [[StructField]] + * @since 0.1.0 + */ def apply(fields: Seq[StructField]): StructType = StructType(fields.toArray) - /** Creates a [[StructType]] object based on the given [[StructField]] - * @since 0.7.0 - */ + /** + * Creates a [[StructType]] object based on the given [[StructField]] + * @since 0.7.0 + */ def apply(field: StructField, remaining: StructField*): StructType = apply(field +: remaining) } -/** StructType data type, represents table schema. - * @constructor - * Creates a new [[StructType]] object based on the given array of [[StructField]]. - * @since 0.1.0 - */ +/** + * StructType data type, represents table schema. + * @constructor + * Creates a new [[StructType]] object based on the given array of [[StructField]]. + * @since 0.1.0 + */ case class StructType(fields: Array[StructField] = Array()) extends DataType with Seq[StructField] { - /** Returns the total number of [[StructField]] - * @since 0.1.0 - */ + /** + * Returns the total number of [[StructField]] + * @since 0.1.0 + */ override def length: Int = fields.length - /** Converts this object to Iterator. - * @since 0.1.0 - */ + /** + * Converts this object to Iterator. + * @since 0.1.0 + */ override def iterator: Iterator[StructField] = fields.toIterator - /** Returns the corresponding [[StructField]] of the given index. - * @since 0.1.0 - */ + /** + * Returns the corresponding [[StructField]] of the given index. + * @since 0.1.0 + */ override def apply(idx: Int): StructField = fields(idx) - /** Returns a String values to represent this object info. - * @since 0.1.0 - */ + /** + * Returns a String values to represent this object info. + * @since 0.1.0 + */ override def toString: String = s"StructType[${fields.map(_.toString).mkString(", ")}]" override private[snowpark] def schemaString: String = "Struct" - /** Appends a new [[StructField]] to the end of this object. - * @since 0.1.0 - */ + /** + * Appends a new [[StructField]] to the end of this object. + * @since 0.1.0 + */ def add(field: StructField): StructType = StructType(fields :+ field) - /** Appends a new [[StructField]] to the end of this object. - * @since 0.1.0 - */ + /** + * Appends a new [[StructField]] to the end of this object. + * @since 0.1.0 + */ def add(name: String, dataType: DataType, nullable: Boolean = true): StructType = add(StructField(name, dataType, nullable)) - /** (Scala API Only) Returns a Seq of the name of [[StructField]]. - * @since 0.1.0 - */ + /** + * (Scala API Only) Returns a Seq of the name of [[StructField]]. + * @since 0.1.0 + */ def names: Seq[String] = fields.map(_.name) - /** Returns the corresponding [[StructField]] object of the given name. - * @since 0.1.0 - */ + /** + * Returns the corresponding [[StructField]] object of the given name. + * @since 0.1.0 + */ def nameToField(name: String): Option[StructField] = fields.find(_.columnIdentifier.quotedName == analyzer.quoteName(name)) - /** Return the corresponding [[StructField]] object of the given name. - * @since 0.1.0 - */ + /** + * Return the corresponding [[StructField]] object of the given name. + * @since 0.1.0 + */ def apply(name: String): StructField = nameToField(name).getOrElse( throw new IllegalArgumentException(s"$name does not exits. Names: ${names.mkString(", ")}")) @@ -95,9 +109,10 @@ case class StructType(fields: Array[StructField] = Array()) extends DataType wit map(f => Attribute(f.columnIdentifier.quotedName, f.dataType, f.nullable)) } - /** Prints the StructType content in a tree structure diagram. - * @since 0.9.0 - */ + /** + * Prints the StructType content in a tree structure diagram. + * @since 0.9.0 + */ def printTreeString(): Unit = // scalastyle:off println(treeString(0)) @@ -108,42 +123,48 @@ case class StructType(fields: Array[StructField] = Array()) extends DataType wit } -/** Constructors and Util functions of [[StructField]] - * @since 0.1.0 - */ +/** + * Constructors and Util functions of [[StructField]] + * @since 0.1.0 + */ object StructField { - /** Creates a [[StructField]] - * - * @since 0.1.0 - */ + /** + * Creates a [[StructField]] + * + * @since 0.1.0 + */ def apply(name: String, dataType: DataType, nullable: Boolean): StructField = StructField(ColumnIdentifier(name), dataType, nullable) - /** Creates a [[StructField]] - * - * @since 0.1.0 - */ + /** + * Creates a [[StructField]] + * + * @since 0.1.0 + */ def apply(name: String, dataType: DataType): StructField = StructField(ColumnIdentifier(name), dataType) } -/** Represents the content of [[StructType]]. - * @since 0.1.0 - */ +/** + * Represents the content of [[StructType]]. + * @since 0.1.0 + */ case class StructField( columnIdentifier: ColumnIdentifier, dataType: DataType, nullable: Boolean = true) { - /** Returns the column name. - * @since 0.1.0 - */ + /** + * Returns the column name. + * @since 0.1.0 + */ val name: String = columnIdentifier.name - /** Returns a String values to represent this object info. - * @since 0.1.0 - */ + /** + * Returns a String values to represent this object info. + * @since 0.1.0 + */ override def toString: String = s"StructField($name, $dataType, Nullable = $nullable)" private[types] def treeString(layer: Int): String = { @@ -158,24 +179,27 @@ case class StructField( } } -/** Constructors and Util functions of ColumnIdentifier - * @since 0.1.0 - */ +/** + * Constructors and Util functions of ColumnIdentifier + * @since 0.1.0 + */ object ColumnIdentifier { - /** Creates a [[ColumnIdentifier]] object for the giving column name. Identifier Requirement can - * be found from https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html - * @since 0.1.0 - */ + /** + * Creates a [[ColumnIdentifier]] object for the giving column name. Identifier Requirement can be + * found from https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html + * @since 0.1.0 + */ def apply(name: String): ColumnIdentifier = new ColumnIdentifier(analyzer.quoteName(name)) - /** Removes the unnecessary quotes from name - * - * Remove quotes if name starts with _A-Z and only contains _0-9A-Z$, or starts with $ and - * follows by digits - * @since 0.1.0 - */ + /** + * Removes the unnecessary quotes from name + * + * Remove quotes if name starts with _A-Z and only contains _0-9A-Z$, or starts with $ and follows + * by digits + * @since 0.1.0 + */ private def stripUnnecessaryQuotes(str: String): String = { val removeQuote = "^\"(([_A-Z]+[_A-Z0-9$]*)|(\\$\\d+))\"$".r str match { @@ -185,57 +209,64 @@ object ColumnIdentifier { } } -/** Represents Column Identifier - * @since 0.1.0 - */ +/** + * Represents Column Identifier + * @since 0.1.0 + */ class ColumnIdentifier private (normalizedName: String) { - /** Returns the name of column. Name format: - * 1. if the name quoted. - * a. starts with _A-Z and follows by _A-Z0-9$: remove quotes b. starts with $ and follows - * by digits: remove quotes c. otherwise, do nothing 2. if not quoted. - * a. starts with _a-zA-Z and follows by _a-zA-Z0-9$, upper case all letters. b. starts with - * $ and follows by digits, do nothing c. otherwise, quote name - * - * More details can be found from - * https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html - * @since 0.1.0 - */ + /** + * Returns the name of column. Name format: + * 1. if the name quoted. + * a. starts with _A-Z and follows by _A-Z0-9$: remove quotes b. starts with $ and follows by + * digits: remove quotes c. otherwise, do nothing 2. if not quoted. + * a. starts with _a-zA-Z and follows by _a-zA-Z0-9$, upper case all letters. b. starts with + * $ and follows by digits, do nothing c. otherwise, quote name + * + * More details can be found from + * https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html + * @since 0.1.0 + */ val name: String = ColumnIdentifier.stripUnnecessaryQuotes(normalizedName) - /** Returns the quoted name of this column Name Format: - * 1. if quoted, do nothing 2. if not quoted. - * a. starts with _a-zA-Z and follows by _a-zA-Z0-9$, upper case all letters and then quote - * b. otherwise, quote name - * - * It is same as [[name]], but quotes always added. It is always safe to do String comparisons - * between quoted column names - * @since 0.1.0 - */ + /** + * Returns the quoted name of this column Name Format: + * 1. if quoted, do nothing 2. if not quoted. + * a. starts with _a-zA-Z and follows by _a-zA-Z0-9$, upper case all letters and then quote + * b. otherwise, quote name + * + * It is same as [[name]], but quotes always added. It is always safe to do String comparisons + * between quoted column names + * @since 0.1.0 + */ def quotedName: String = normalizedName - /** Returns a copy of this [[ColumnIdentifier]]. - * @since 0.1.0 - */ + /** + * Returns a copy of this [[ColumnIdentifier]]. + * @since 0.1.0 + */ override def clone(): AnyRef = new ColumnIdentifier(normalizedName) - /** Returns the hashCode of this [[ColumnIdentifier]]. - * @since 0.1.0 - */ + /** + * Returns the hashCode of this [[ColumnIdentifier]]. + * @since 0.1.0 + */ override def hashCode(): Int = normalizedName.hashCode - /** Compares this [[ColumnIdentifier]] with the giving one, returns true if these two are - * equivalent, otherwise, returns false. - * @since 0.1.0 - */ + /** + * Compares this [[ColumnIdentifier]] with the giving one, returns true if these two are + * equivalent, otherwise, returns false. + * @since 0.1.0 + */ override def equals(obj: Any): Boolean = obj match { case other: ColumnIdentifier => normalizedName == other.quotedName case _ => false } - /** Returns the column name. Alias of [[name]] - * @since 0.1.0 - */ + /** + * Returns the column name. Alias of [[name]] + * @since 0.1.0 + */ override def toString: String = name } diff --git a/src/main/scala/com/snowflake/snowpark/types/TimeType.scala b/src/main/scala/com/snowflake/snowpark/types/TimeType.scala index fe130d30..1ff3e8c0 100644 --- a/src/main/scala/com/snowflake/snowpark/types/TimeType.scala +++ b/src/main/scala/com/snowflake/snowpark/types/TimeType.scala @@ -1,7 +1,8 @@ package com.snowflake.snowpark.types -/** Time data type. Mapped to TIME Snowflake data type. - * - * @since 0.2.0 - */ +/** + * Time data type. Mapped to TIME Snowflake data type. + * + * @since 0.2.0 + */ object TimeType extends AtomicType diff --git a/src/main/scala/com/snowflake/snowpark/types/TimestampType.scala b/src/main/scala/com/snowflake/snowpark/types/TimestampType.scala index 91e6a367..89ff643a 100644 --- a/src/main/scala/com/snowflake/snowpark/types/TimestampType.scala +++ b/src/main/scala/com/snowflake/snowpark/types/TimestampType.scala @@ -1,6 +1,7 @@ package com.snowflake.snowpark.types -/** Timestamp data type. Mapped to TIMESTAMP Snowflake data type. - * @since 0.1.0 - */ +/** + * Timestamp data type. Mapped to TIMESTAMP Snowflake data type. + * @since 0.1.0 + */ object TimestampType extends AtomicType diff --git a/src/main/scala/com/snowflake/snowpark/types/Variant.scala b/src/main/scala/com/snowflake/snowpark/types/Variant.scala index e27285cf..ddd25ca6 100644 --- a/src/main/scala/com/snowflake/snowpark/types/Variant.scala +++ b/src/main/scala/com/snowflake/snowpark/types/Variant.scala @@ -11,6 +11,7 @@ import Variant._ import org.apache.commons.codec.binary.{Base64, Hex} import java.io.{IOException, UncheckedIOException} +import java.util.function.Consumer import scala.util.hashing.MurmurHash3 private[snowpark] object Variant { @@ -70,85 +71,97 @@ private[snowpark] object Variant { } } -/** Representation of Snowflake Variant data - * - * @since 0.2.0 - */ +/** + * Representation of Snowflake Variant data + * + * @since 0.2.0 + */ class Variant private[snowpark] ( private[snowpark] val value: JsonNode, private[snowpark] val dataType: VariantType) { - /** Creates a Variant from double value - * - * @since 0.2.0 - */ + /** + * Creates a Variant from double value + * + * @since 0.2.0 + */ def this(num: Double) = this(JsonNodeFactory.instance.numberNode(num), VariantTypes.RealNumber) - /** Creates a Variant from float value - * - * @since 0.2.0 - */ + /** + * Creates a Variant from float value + * + * @since 0.2.0 + */ def this(num: Float) = this(JsonNodeFactory.instance.numberNode(num), VariantTypes.RealNumber) - /** Creates a Variant from long integer value - * - * @since 0.2.0 - */ + /** + * Creates a Variant from long integer value + * + * @since 0.2.0 + */ def this(num: Long) = this(JsonNodeFactory.instance.numberNode(num), VariantTypes.FixedNumber) - /** Creates a Variant from integer value - * - * @since 0.2.0 - */ + /** + * Creates a Variant from integer value + * + * @since 0.2.0 + */ def this(num: Int) = this(JsonNodeFactory.instance.numberNode(num), VariantTypes.FixedNumber) - /** Creates a Variant from short integer value - * - * @since 0.2.0 - */ + /** + * Creates a Variant from short integer value + * + * @since 0.2.0 + */ def this(num: Short) = this(JsonNodeFactory.instance.numberNode(num), VariantTypes.FixedNumber) - /** Creates a Variant from Java BigDecimal value - * - * @since 0.2.0 - */ + /** + * Creates a Variant from Java BigDecimal value + * + * @since 0.2.0 + */ def this(num: JavaBigDecimal) = this(JsonNodeFactory.instance.numberNode(num), VariantTypes.FixedNumber) - /** Creates a Variant from Scala BigDecimal value - * - * @since 0.6.0 - */ + /** + * Creates a Variant from Scala BigDecimal value + * + * @since 0.6.0 + */ def this(num: BigDecimal) = this(num.bigDecimal) - /** Creates a Variant from Java BigInteger value - * - * @since 0.2.0 - */ + /** + * Creates a Variant from Java BigInteger value + * + * @since 0.2.0 + */ def this(num: JavaBigInteger) = this(JsonNodeFactory.instance.numberNode(num), VariantTypes.FixedNumber) - /** Creates a Variant from Scala BigInt value - * - * @since 0.6.0 - */ + /** + * Creates a Variant from Scala BigInt value + * + * @since 0.6.0 + */ def this(num: BigInt) = this(num.bigInteger) - /** Creates a Variant from Boolean value - * - * @since 0.2.0 - */ + /** + * Creates a Variant from Boolean value + * + * @since 0.2.0 + */ def this(bool: Boolean) = this(JsonNodeFactory.instance.booleanNode(bool), VariantTypes.Boolean) - /** Creates a Variant from String value. By default string is parsed as Json. If the parsing - * failed, the string is stored as text. - * - * @since 0.2.0 - */ + /** + * Creates a Variant from String value. By default string is parsed as Json. If the parsing + * failed, the string is stored as text. + * + * @since 0.2.0 + */ def this(str: String) = this( { @@ -168,36 +181,41 @@ class Variant private[snowpark] ( }, VariantTypes.String) - /** Creates a Variant from binary value - * - * @since 0.2.0 - */ + /** + * Creates a Variant from binary value + * + * @since 0.2.0 + */ def this(bytes: Array[Byte]) = this(JsonNodeFactory.instance.binaryNode(bytes), VariantTypes.Binary) - /** Creates a Variant from time value - * - * @since 0.2.0 - */ + /** + * Creates a Variant from time value + * + * @since 0.2.0 + */ def this(time: Time) = this(JsonNodeFactory.instance.textNode(time.toString), VariantTypes.Time) - /** Creates a Variant from date value - * - * @since 0.2.0 - */ + /** + * Creates a Variant from date value + * + * @since 0.2.0 + */ def this(date: Date) = this(JsonNodeFactory.instance.textNode(date.toString), VariantTypes.Date) - /** Creates a Variant from timestamp value - * - * @since 0.2.0 - */ + /** + * Creates a Variant from timestamp value + * + * @since 0.2.0 + */ def this(timestamp: Timestamp) = this(JsonNodeFactory.instance.textNode(timestamp.toString), VariantTypes.Timestamp) - /** Creates a Variant from Scala Seq - * - * @since 0.6.0 - */ + /** + * Creates a Variant from Scala Seq + * + * @since 0.6.0 + */ def this(seq: Seq[Any]) = this( { @@ -207,28 +225,35 @@ class Variant private[snowpark] ( }, VariantTypes.String) - /** Creates a Variant from Java List - * - * @since 0.2.0 - */ - def this(list: JavaList[Object]) = this(list.asScala) - - /** Creates a Variant from array - * - * @since 0.2.0 - */ + /** + * Creates a Variant from Java List + * + * @since 0.2.0 + */ + def this(list: JavaList[Object]) = this(list.asScala.toSeq) + + /** + * Creates a Variant from array + * + * @since 0.2.0 + */ def this(objects: Array[Any]) = this(objects.toSeq) - /** Creates a Variant from Object - * - * @since 0.2.0 - */ + /** + * Creates a Variant from Object + * + * @since 0.2.0 + */ def this(obj: Any) = this( { def mapToNode(map: JavaMap[Object, Object]): ObjectNode = { val result = MAPPER.createObjectNode() - map.keySet().forEach(key => result.set(key.toString, objectToJsonNode(map.get(key)))) + map.keySet.forEach(new Consumer[Object] { + override def accept(key: Object): Unit = { + result.set(key.toString, objectToJsonNode(map.get(key))) + } + }) result } obj match { @@ -247,50 +272,56 @@ class Variant private[snowpark] ( }, VariantTypes.String) - /** Converts the variant as double value - * - * @since 0.2.0 - */ + /** + * Converts the variant as double value + * + * @since 0.2.0 + */ def asDouble(): Double = convert(VariantTypes.RealNumber) { value.asDouble() } - /** Converts the variant as float value - * - * @since 0.2.0 - */ + /** + * Converts the variant as float value + * + * @since 0.2.0 + */ def asFloat(): Float = convert(VariantTypes.RealNumber) { value.asDouble().toFloat } - /** Converts the variant as long value - * - * @since 0.2.0 - */ + /** + * Converts the variant as long value + * + * @since 0.2.0 + */ def asLong(): Long = convert(VariantTypes.FixedNumber) { value.asLong() } - /** Converts the variant as integer value - * - * @since 0.2.0 - */ + /** + * Converts the variant as integer value + * + * @since 0.2.0 + */ def asInt(): Int = convert(VariantTypes.FixedNumber) { value.asInt() } - /** Converts the variant as short value - * - * @since 0.2.0 - */ + /** + * Converts the variant as short value + * + * @since 0.2.0 + */ def asShort(): Short = convert(VariantTypes.FixedNumber) { value.asInt().toShort } - /** Converts the variant as BigDecimal value - * - * @since 0.6.0 - */ + /** + * Converts the variant as BigDecimal value + * + * @since 0.6.0 + */ def asBigDecimal(): BigDecimal = convert(VariantTypes.RealNumber) { if (value.isBoolean) { BigDecimal(value.asInt()) @@ -299,10 +330,11 @@ class Variant private[snowpark] ( } } - /** Converts the variant as Scala BigInt value - * - * @since 0.6.0 - */ + /** + * Converts the variant as Scala BigInt value + * + * @since 0.6.0 + */ def asBigInt(): BigInt = convert(VariantTypes.FixedNumber) { if (value.isBoolean) { BigInt(value.asInt()) @@ -311,18 +343,20 @@ class Variant private[snowpark] ( } } - /** Converts the variant as boolean value - * - * @since 0.2.0 - */ + /** + * Converts the variant as boolean value + * + * @since 0.2.0 + */ def asBoolean(): Boolean = convert(VariantTypes.Boolean) { value.asBoolean() } - /** Converts the variant as string value - * - * @since 0.2.0 - */ + /** + * Converts the variant as string value + * + * @since 0.2.0 + */ def asString(): String = convert(VariantTypes.String) { if (value.isBinary) { val decoded = Base64.decodeBase64(value.asText()) @@ -334,15 +368,17 @@ class Variant private[snowpark] ( } } - /** An alias of [[asString]] - * - * @since 0.2.0 - */ + /** + * An alias of [[asString]] + * + * @since 0.2.0 + */ override def toString: String = asString() - /** Converts the variant as valid Json String - * @since 0.2.0 - */ + /** + * Converts the variant as valid Json String + * @since 0.2.0 + */ def asJsonString(): String = convert(VariantTypes.String) { if (value.isBinary) { val decoded = Base64.decodeBase64(value.asText()) @@ -352,25 +388,27 @@ class Variant private[snowpark] ( } } - /** Return the variant value as a JsonNode. This function allows to read the JSON object directly - * as JsonNode from variant column rather parsing it as String Example - to get the first value - * from array for key "a" - * {{{ - * val sv = new Variant("{\"a\": [1, 2], \"b\": 3, \"c\": \"xyz\"}") - * println(sv.asJsonNode().get("a").get(0)) - * output - * 1 - * }}} - * - * @since 1.14.0 - */ + /** + * Return the variant value as a JsonNode. This function allows to read the JSON object directly + * as JsonNode from variant column rather parsing it as String Example - to get the first value + * from array for key "a" + * {{{ + * val sv = new Variant("{\"a\": [1, 2], \"b\": 3, \"c\": \"xyz\"}") + * println(sv.asJsonNode().get("a").get(0)) + * output + * 1 + * }}} + * + * @since 1.14.0 + */ def asJsonNode(): JsonNode = { value } - /** Converts the variant as binary value - * @since 0.2.0 - */ + /** + * Converts the variant as binary value + * @since 0.2.0 + */ def asBinary(): Array[Byte] = convert(VariantTypes.Binary) { try { value.binaryValue() @@ -388,23 +426,26 @@ class Variant private[snowpark] ( } } - /** Converts the variant as time value - * @since 0.2.0 - */ + /** + * Converts the variant as time value + * @since 0.2.0 + */ def asTime(): Time = convert(VariantTypes.Time) { Time.valueOf(value.asText()) } - /** Converts the variant as date value - * @since 0.2.0 - */ + /** + * Converts the variant as date value + * @since 0.2.0 + */ def asDate(): Date = convert(VariantTypes.Date) { Date.valueOf(value.asText()) } - /** Converts the variant as timestamp value - * @since 0.2.0 - */ + /** + * Converts the variant as timestamp value + * @since 0.2.0 + */ def asTimestamp(): Timestamp = convert(VariantTypes.Timestamp) { if (value.isNumber) { new Timestamp(value.asLong()) @@ -413,14 +454,16 @@ class Variant private[snowpark] ( } } - /** Converts the variant as Scala Seq of Variant - * @since 0.6.0 - */ + /** + * Converts the variant as Scala Seq of Variant + * @since 0.6.0 + */ def asSeq(): Seq[Variant] = asArray() - /** Converts the variant as Array of Variant - * @since 0.2.0 - */ + /** + * Converts the variant as Array of Variant + * @since 0.2.0 + */ def asArray(): Array[Variant] = value match { case null => null; case arr: ArrayNode => @@ -432,9 +475,10 @@ class Variant private[snowpark] ( result } - /** Converts the variant as Scala Map of String to Variant - * @since 0.6.0 - */ + /** + * Converts the variant as Scala Map of String to Variant + * @since 0.6.0 + */ def asMap(): Map[String, Variant] = value match { case null => null case obj: ObjectNode => @@ -447,17 +491,19 @@ class Variant private[snowpark] ( map } - /** Checks whether two Variants are equal - * @since 0.2.0 - */ + /** + * Checks whether two Variants are equal + * @since 0.2.0 + */ override def equals(obj: Any): Boolean = obj match { case v: Variant => value.equals(v.value) case _ => false } - /** Calculates hashcode of this Variant - * @since 0.6.0 - */ + /** + * Calculates hashcode of this Variant + * @since 0.6.0 + */ override def hashCode(): Int = { var h = MurmurHash3.seqSeed h = MurmurHash3.mix(h, dataType.##) diff --git a/src/main/scala/com/snowflake/snowpark/types/VariantType.scala b/src/main/scala/com/snowflake/snowpark/types/VariantType.scala index d6f79864..4f0ed568 100644 --- a/src/main/scala/com/snowflake/snowpark/types/VariantType.scala +++ b/src/main/scala/com/snowflake/snowpark/types/VariantType.scala @@ -1,6 +1,7 @@ package com.snowflake.snowpark.types -/** Variant data type. This maps to VARIANT data type in Snowflake. - * @since 0.1.0 - */ +/** + * Variant data type. This maps to VARIANT data type in Snowflake. + * @since 0.1.0 + */ object VariantType extends DataType diff --git a/src/main/scala/com/snowflake/snowpark/types/package.scala b/src/main/scala/com/snowflake/snowpark/types/package.scala index 362e9b2c..949962a5 100644 --- a/src/main/scala/com/snowflake/snowpark/types/package.scala +++ b/src/main/scala/com/snowflake/snowpark/types/package.scala @@ -2,9 +2,10 @@ package com.snowflake.snowpark import com.snowflake.snowpark.internal.ErrorMessage -/** This package contains all Snowpark logical types. - * @since 0.1.0 - */ +/** + * This package contains all Snowpark logical types. + * @since 0.1.0 + */ package object types { private[snowpark] def toJavaType(datatype: DataType): String = diff --git a/src/main/scala/com/snowflake/snowpark/udtf/UDTFs.scala b/src/main/scala/com/snowflake/snowpark/udtf/UDTFs.scala index 85bece5c..34aa4637 100644 --- a/src/main/scala/com/snowflake/snowpark/udtf/UDTFs.scala +++ b/src/main/scala/com/snowflake/snowpark/udtf/UDTFs.scala @@ -6,49 +6,54 @@ import com.snowflake.snowpark.types.StructType import scala.reflect.runtime.universe.TypeTag -/** The Scala UDTF (user-defined table function) trait. - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) trait. + * @since 1.2.0 + */ sealed trait UDTF extends java.io.Serializable { - /** A StructType that describes the data types of the fields in the rows returned by the process() - * and endPartition() methods. - * - * For example, if a UDTF returns rows that contain a StringType and IntegerType field, the - * outputSchema() method should construct and return the following StructType object: {{ override - * def outputSchema(): StructType = StructType(StructField("word", StringType), - * StructField("count", IntegerType)) }} - * - * Since: 1.2.0 - */ + /** + * A StructType that describes the data types of the fields in the rows returned by the process() + * and endPartition() methods. + * + * For example, if a UDTF returns rows that contain a StringType and IntegerType field, the + * outputSchema() method should construct and return the following StructType object: {{ override + * def outputSchema(): StructType = StructType(StructField("word", StringType), + * StructField("count", IntegerType)) }} + * + * Since: 1.2.0 + */ def outputSchema(): StructType - /** This method can be used to generate output rows that are based on any state information - * aggregated in process(). This method is invoked once for each partition, after all rows in - * that partition have been passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method can be used to generate output rows that are based on any state information + * aggregated in process(). This method is invoked once for each partition, after all rows in that + * partition have been passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ def endPartition(): Iterable[Row] // Below are internal private functions private[snowpark] def inputColumns: Seq[UdfColumn] } -/** The Scala UDTF (user-defined table function) abstract class that has no argument. - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has no argument. + * @since 1.2.0 + */ abstract class UDTF0 extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ def process(): Iterable[Row] override private[snowpark] def inputColumns: Seq[UdfColumn] = Seq.empty @@ -94,57 +99,63 @@ abstract class UDTF0 extends UDTF { */ // scalastyle:on -/** The Scala UDTF (user-defined table function) abstract class that has 1 argument. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 1 argument. + * + * @since 1.2.0 + */ abstract class UDTF1[A0: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ def process(arg0: A0): Iterable[Row] override private[snowpark] def inputColumns: Seq[UdfColumn] = Seq(ScalaFunctions.schemaForUdfColumn[A0](1)) } -/** The Scala UDTF (user-defined table function) abstract class that has 2 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 2 arguments. + * + * @since 1.2.0 + */ abstract class UDTF2[A0: TypeTag, A1: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ def process(arg0: A0, arg1: A1): Iterable[Row] override private[snowpark] def inputColumns: Seq[UdfColumn] = Seq(ScalaFunctions.schemaForUdfColumn[A0](1), ScalaFunctions.schemaForUdfColumn[A1](2)) } -/** The Scala UDTF (user-defined table function) abstract class that has 3 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 3 arguments. + * + * @since 1.2.0 + */ abstract class UDTF3[A0: TypeTag, A1: TypeTag, A2: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ def process(arg0: A0, arg1: A1, arg2: A2): Iterable[Row] override private[snowpark] def inputColumns: Seq[UdfColumn] = @@ -154,19 +165,21 @@ abstract class UDTF3[A0: TypeTag, A1: TypeTag, A2: TypeTag] extends UDTF { ScalaFunctions.schemaForUdfColumn[A2](3)) } -/** The Scala UDTF (user-defined table function) abstract class that has 4 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 4 arguments. + * + * @since 1.2.0 + */ abstract class UDTF4[A0: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ def process(arg0: A0, arg1: A1, arg2: A2, arg3: A3): Iterable[Row] override private[snowpark] def inputColumns: Seq[UdfColumn] = @@ -177,19 +190,21 @@ abstract class UDTF4[A0: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag] extends ScalaFunctions.schemaForUdfColumn[A3](4)) } -/** The Scala UDTF (user-defined table function) abstract class that has 5 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 5 arguments. + * + * @since 1.2.0 + */ abstract class UDTF5[A0: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ def process(arg0: A0, arg1: A1, arg2: A2, arg3: A3, arg4: A4): Iterable[Row] override private[snowpark] def inputColumns: Seq[UdfColumn] = @@ -201,20 +216,22 @@ abstract class UDTF5[A0: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: Typ ScalaFunctions.schemaForUdfColumn[A4](5)) } -/** The Scala UDTF (user-defined table function) abstract class that has 6 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 6 arguments. + * + * @since 1.2.0 + */ abstract class UDTF6[A0: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ def process(arg0: A0, arg1: A1, arg2: A2, arg3: A3, arg4: A4, arg5: A5): Iterable[Row] override private[snowpark] def inputColumns: Seq[UdfColumn] = @@ -227,10 +244,11 @@ abstract class UDTF6[A0: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: Typ ScalaFunctions.schemaForUdfColumn[A5](6)) } -/** The Scala UDTF (user-defined table function) abstract class that has 7 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 7 arguments. + * + * @since 1.2.0 + */ abstract class UDTF7[ A0: TypeTag, A1: TypeTag, @@ -241,13 +259,14 @@ abstract class UDTF7[ A6: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ def process(arg0: A0, arg1: A1, arg2: A2, arg3: A3, arg4: A4, arg5: A5, arg6: A6): Iterable[Row] override private[snowpark] def inputColumns: Seq[UdfColumn] = @@ -261,10 +280,11 @@ abstract class UDTF7[ ScalaFunctions.schemaForUdfColumn[A6](7)) } -/** The Scala UDTF (user-defined table function) abstract class that has 8 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 8 arguments. + * + * @since 1.2.0 + */ abstract class UDTF8[ A0: TypeTag, A1: TypeTag, @@ -276,13 +296,14 @@ abstract class UDTF8[ A7: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ def process(arg0: A0, arg1: A1, arg2: A2, arg3: A3, arg4: A4, arg5: A5, arg6: A6, arg7: A7) : Iterable[Row] @@ -298,10 +319,11 @@ abstract class UDTF8[ ScalaFunctions.schemaForUdfColumn[A7](8)) } -/** The Scala UDTF (user-defined table function) abstract class that has 9 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 9 arguments. + * + * @since 1.2.0 + */ abstract class UDTF9[ A0: TypeTag, A1: TypeTag, @@ -314,13 +336,14 @@ abstract class UDTF9[ A8: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ def process( arg0: A0, arg1: A1, @@ -345,10 +368,11 @@ abstract class UDTF9[ ScalaFunctions.schemaForUdfColumn[A8](9)) } -/** The Scala UDTF (user-defined table function) abstract class that has 10 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 10 arguments. + * + * @since 1.2.0 + */ abstract class UDTF10[ A0: TypeTag, A1: TypeTag, @@ -362,13 +386,14 @@ abstract class UDTF10[ A9: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ def process( arg0: A0, arg1: A1, @@ -395,10 +420,11 @@ abstract class UDTF10[ ScalaFunctions.schemaForUdfColumn[A9](10)) } -/** The Scala UDTF (user-defined table function) abstract class that has 11 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 11 arguments. + * + * @since 1.2.0 + */ abstract class UDTF11[ A0: TypeTag, A1: TypeTag, @@ -413,13 +439,14 @@ abstract class UDTF11[ A10: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ // scalastyle:off def process( arg0: A0, @@ -450,10 +477,11 @@ abstract class UDTF11[ ScalaFunctions.schemaForUdfColumn[A10](11)) } -/** The Scala UDTF (user-defined table function) abstract class that has 12 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 12 arguments. + * + * @since 1.2.0 + */ abstract class UDTF12[ A0: TypeTag, A1: TypeTag, @@ -469,13 +497,14 @@ abstract class UDTF12[ A11: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ // scalastyle:off def process( arg0: A0, @@ -508,10 +537,11 @@ abstract class UDTF12[ ScalaFunctions.schemaForUdfColumn[A11](12)) } -/** The Scala UDTF (user-defined table function) abstract class that has 13 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 13 arguments. + * + * @since 1.2.0 + */ abstract class UDTF13[ A0: TypeTag, A1: TypeTag, @@ -528,13 +558,14 @@ abstract class UDTF13[ A12: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ // scalastyle:off def process( arg0: A0, @@ -569,10 +600,11 @@ abstract class UDTF13[ ScalaFunctions.schemaForUdfColumn[A12](13)) } -/** The Scala UDTF (user-defined table function) abstract class that has 14 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 14 arguments. + * + * @since 1.2.0 + */ abstract class UDTF14[ A0: TypeTag, A1: TypeTag, @@ -590,13 +622,14 @@ abstract class UDTF14[ A13: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ // scalastyle:off def process( arg0: A0, @@ -633,10 +666,11 @@ abstract class UDTF14[ ScalaFunctions.schemaForUdfColumn[A13](14)) } -/** The Scala UDTF (user-defined table function) abstract class that has 15 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 15 arguments. + * + * @since 1.2.0 + */ abstract class UDTF15[ A0: TypeTag, A1: TypeTag, @@ -655,13 +689,14 @@ abstract class UDTF15[ A14: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ // scalastyle:off def process( arg0: A0, @@ -700,10 +735,11 @@ abstract class UDTF15[ ScalaFunctions.schemaForUdfColumn[A14](15)) } -/** The Scala UDTF (user-defined table function) abstract class that has 16 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 16 arguments. + * + * @since 1.2.0 + */ abstract class UDTF16[ A0: TypeTag, A1: TypeTag, @@ -723,13 +759,14 @@ abstract class UDTF16[ A15: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ // scalastyle:off def process( arg0: A0, @@ -770,10 +807,11 @@ abstract class UDTF16[ ScalaFunctions.schemaForUdfColumn[A15](16)) } -/** The Scala UDTF (user-defined table function) abstract class that has 17 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 17 arguments. + * + * @since 1.2.0 + */ abstract class UDTF17[ A0: TypeTag, A1: TypeTag, @@ -794,13 +832,14 @@ abstract class UDTF17[ A16: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ // scalastyle:off def process( arg0: A0, @@ -843,10 +882,11 @@ abstract class UDTF17[ ScalaFunctions.schemaForUdfColumn[A16](17)) } -/** The Scala UDTF (user-defined table function) abstract class that has 18 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 18 arguments. + * + * @since 1.2.0 + */ abstract class UDTF18[ A0: TypeTag, A1: TypeTag, @@ -868,13 +908,14 @@ abstract class UDTF18[ A17: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ // scalastyle:off def process( arg0: A0, @@ -919,10 +960,11 @@ abstract class UDTF18[ ScalaFunctions.schemaForUdfColumn[A17](18)) } -/** The Scala UDTF (user-defined table function) abstract class that has 19 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 19 arguments. + * + * @since 1.2.0 + */ abstract class UDTF19[ A0: TypeTag, A1: TypeTag, @@ -945,13 +987,14 @@ abstract class UDTF19[ A18: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ // scalastyle:off def process( arg0: A0, @@ -998,10 +1041,11 @@ abstract class UDTF19[ ScalaFunctions.schemaForUdfColumn[A18](19)) } -/** The Scala UDTF (user-defined table function) abstract class that has 20 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 20 arguments. + * + * @since 1.2.0 + */ abstract class UDTF20[ A0: TypeTag, A1: TypeTag, @@ -1025,13 +1069,14 @@ abstract class UDTF20[ A19: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ // scalastyle:off def process( arg0: A0, @@ -1080,10 +1125,11 @@ abstract class UDTF20[ ScalaFunctions.schemaForUdfColumn[A19](20)) } -/** The Scala UDTF (user-defined table function) abstract class that has 21 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 21 arguments. + * + * @since 1.2.0 + */ abstract class UDTF21[ A0: TypeTag, A1: TypeTag, @@ -1108,13 +1154,14 @@ abstract class UDTF21[ A20: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ // scalastyle:off def process( arg0: A0, @@ -1165,10 +1212,11 @@ abstract class UDTF21[ ScalaFunctions.schemaForUdfColumn[A20](21)) } -/** The Scala UDTF (user-defined table function) abstract class that has 22 arguments. - * - * @since 1.2.0 - */ +/** + * The Scala UDTF (user-defined table function) abstract class that has 22 arguments. + * + * @since 1.2.0 + */ abstract class UDTF22[ A0: TypeTag, A1: TypeTag, @@ -1194,13 +1242,14 @@ abstract class UDTF22[ A21: TypeTag] extends UDTF { - /** This method is invoked once for each row in the input partition. The arguments passed to the - * registered UDTF are passed to process(). - * - * The rows returned in this method must match the StructType defined in [[outputSchema]] - * - * Since: 1.2.0 - */ + /** + * This method is invoked once for each row in the input partition. The arguments passed to the + * registered UDTF are passed to process(). + * + * The rows returned in this method must match the StructType defined in [[outputSchema]] + * + * Since: 1.2.0 + */ // scalastyle:off def process( arg0: A0, diff --git a/src/test/scala/com/snowflake/code_verification/ClassUtils.scala b/src/test/scala/com/snowflake/code_verification/ClassUtils.scala index bd20025f..4e8a9c81 100644 --- a/src/test/scala/com/snowflake/code_verification/ClassUtils.scala +++ b/src/test/scala/com/snowflake/code_verification/ClassUtils.scala @@ -28,9 +28,10 @@ object ClassUtils extends Logging { .toSeq } - /** Check if two classes have same function names. It is not perfect since it can only check - * function names but not args. - */ + /** + * Check if two classes have same function names. It is not perfect since it can only check + * function names but not args. + */ def containsSameFunctionNames[A: TypeTag, B: TypeTag]( class1: Class[A], class2: Class[B], diff --git a/src/test/scala/com/snowflake/code_verification/JavaScalaAPISuite.scala b/src/test/scala/com/snowflake/code_verification/JavaScalaAPISuite.scala index f71a8182..7c417f46 100644 --- a/src/test/scala/com/snowflake/code_verification/JavaScalaAPISuite.scala +++ b/src/test/scala/com/snowflake/code_verification/JavaScalaAPISuite.scala @@ -1,11 +1,11 @@ package com.snowflake.code_verification import com.snowflake.snowpark.{CodeVerification, DataFrame} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite // verify API Java and Scala API contain same functions @CodeVerification -class JavaScalaAPISuite extends FunSuite { +class JavaScalaAPISuite extends AnyFunSuite { private val scalaCaseClassFunctions = Set( "apply", "copy", diff --git a/src/test/scala/com/snowflake/code_verification/PomSuite.scala b/src/test/scala/com/snowflake/code_verification/PomSuite.scala index b5b39f4d..45535f1f 100644 --- a/src/test/scala/com/snowflake/code_verification/PomSuite.scala +++ b/src/test/scala/com/snowflake/code_verification/PomSuite.scala @@ -1,12 +1,12 @@ package com.snowflake.code_verification import com.snowflake.snowpark.CodeVerification -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import scala.collection.mutable @CodeVerification -class PomSuite extends FunSuite { +class PomSuite extends AnyFunSuite { private val pomFileName = "pom.xml" private val fipsPomFileName = "fips-pom.xml" diff --git a/src/test/scala/com/snowflake/snowpark/ErrorMessageSuite.scala b/src/test/scala/com/snowflake/snowpark/ErrorMessageSuite.scala index 211131ab..74c1f123 100644 --- a/src/test/scala/com/snowflake/snowpark/ErrorMessageSuite.scala +++ b/src/test/scala/com/snowflake/snowpark/ErrorMessageSuite.scala @@ -6,9 +6,9 @@ import com.snowflake.snowpark.internal.ParameterUtils.{ MIN_REQUEST_TIMEOUT_IN_SECONDS, SnowparkRequestTimeoutInSeconds } -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class ErrorMessageSuite extends FunSuite { +class ErrorMessageSuite extends AnyFunSuite { test("INTERNAL_TEST_MESSAGE") { val ex = ErrorMessage.INTERNAL_TEST_MESSAGE("my message: '%d $'") diff --git a/src/test/scala/com/snowflake/snowpark/ExpressionAndPlanNodeSuite.scala b/src/test/scala/com/snowflake/snowpark/ExpressionAndPlanNodeSuite.scala index 374105f4..f8dfcb73 100644 --- a/src/test/scala/com/snowflake/snowpark/ExpressionAndPlanNodeSuite.scala +++ b/src/test/scala/com/snowflake/snowpark/ExpressionAndPlanNodeSuite.scala @@ -165,7 +165,7 @@ class ExpressionAndPlanNodeSuite extends SNTestBase { binaryChecker(Like) binaryChecker(RegExp) binaryChecker((x, y) => FunctionExpression("dummy", Seq(x, y), isDistinct = false)) - binaryChecker((x, y) => internal.analyzer.TableFunction("dummy", Seq(x, y))) + binaryChecker((x, y) => internal.analyzer.TableFunctionEx("dummy", Seq(x, y))) binaryChecker((x, y) => NamedArgumentsTableFunction("dummy", Map("a" -> x, "b" -> y))) binaryChecker((x, y) => GroupingSetsExpression(Seq(Set(x, y)))) binaryChecker((x, y) => GroupingSetsExpression(Seq(Set(x), Set(y)))) @@ -386,7 +386,7 @@ class ExpressionAndPlanNodeSuite extends SNTestBase { unaryAnalyzerChecker(SubfieldInt(_, 1)) analyzerChecker(2, FunctionExpression("dummy", _, isDistinct = false)) unaryAnalyzerChecker(FlattenFunction(_, "a", outer = false, recursive = true, "b")) - analyzerChecker(2, internal.analyzer.TableFunction("dummy", _)) + analyzerChecker(2, internal.analyzer.TableFunctionEx("dummy", _)) analyzerChecker( 2, data => NamedArgumentsTableFunction("dummy", Map("a" -> data.head, "b" -> data(1)))) @@ -607,12 +607,12 @@ class ExpressionAndPlanNodeSuite extends SNTestBase { } test("TableFunctionRelation - Analyzer") { - val exp = internal.analyzer.TableFunction("dummy", Seq.empty) + val exp = internal.analyzer.TableFunctionEx("dummy", Seq.empty) val plan = TableFunctionRelation(exp) assert(plan.aliasMap.isEmpty) assert(plan.analyzed == plan) - val exp1 = internal.analyzer.TableFunction("dummy", Seq(alias1, alias2)) + val exp1 = internal.analyzer.TableFunctionEx("dummy", Seq(alias1, alias2)) val plan1 = TableFunctionRelation(exp1) assert(plan1.aliasMap == map1) assert(plan1.analyzed.toString == plan1.toString) @@ -832,7 +832,7 @@ class ExpressionAndPlanNodeSuite extends SNTestBase { } test("Lateral - Analyzer") { - import com.snowflake.snowpark.internal.analyzer.{TableFunction => TableFunc} + import com.snowflake.snowpark.internal.analyzer.{TableFunctionEx => TableFunc} val tf = TableFunc("dummy", Seq(attr3)) val plan1 = Lateral(child1, tf) assert(plan1.aliasMap == map2) @@ -880,7 +880,7 @@ class ExpressionAndPlanNodeSuite extends SNTestBase { } test("TableFunctionJoin - Analyzer") { - import com.snowflake.snowpark.internal.analyzer.{TableFunction => TableFunc} + import com.snowflake.snowpark.internal.analyzer.{TableFunctionEx => TableFunc} val tf = TableFunc("dummy", Seq(attr3)) val plan1 = TableFunctionJoin(child1, tf, None) assert(plan1.aliasMap == map2) @@ -1087,7 +1087,7 @@ class ExpressionAndPlanNodeSuite extends SNTestBase { simplifierChecker(2, data => func(data.head, data(1))) test("simplifier") { - val tableFunction = internal.analyzer.TableFunction("dummy", Seq.empty) + val tableFunction = internal.analyzer.TableFunctionEx("dummy", Seq.empty) leafSimplifierChecker(TableFunctionRelation(tableFunction)) leafSimplifierChecker(Range(1, 1, 2)) leafSimplifierChecker(Generator(Seq(tableFunction), 1)) diff --git a/src/test/scala/com/snowflake/snowpark/FatJarBuilderSuite.scala b/src/test/scala/com/snowflake/snowpark/FatJarBuilderSuite.scala index c3007066..28d94199 100644 --- a/src/test/scala/com/snowflake/snowpark/FatJarBuilderSuite.scala +++ b/src/test/scala/com/snowflake/snowpark/FatJarBuilderSuite.scala @@ -3,14 +3,13 @@ package com.snowflake.snowpark import java.io.{File, FileOutputStream} import java.util.jar.{JarFile, JarOutputStream} import java.util.zip.ZipException - import com.snowflake.snowpark.internal.{FatJarBuilder, JavaCodeCompiler} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import scala.collection.mutable.ArrayBuffer import scala.util.Random -class FatJarBuilderSuite extends FunSuite with FileUtils { +class FatJarBuilderSuite extends AnyFunSuite with FileUtils { test("Check that fat jar is built correctly") { val className = "HelloWorld" diff --git a/src/test/scala/com/snowflake/snowpark/JavaAPISuite.scala b/src/test/scala/com/snowflake/snowpark/JavaAPISuite.scala index 98365cac..ede3bb5f 100644 --- a/src/test/scala/com/snowflake/snowpark/JavaAPISuite.scala +++ b/src/test/scala/com/snowflake/snowpark/JavaAPISuite.scala @@ -1,10 +1,11 @@ package com.snowflake.snowpark -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite + import java.io.ByteArrayOutputStream @JavaAPITest -class JavaAPISuite extends FunSuite { +class JavaAPISuite extends AnyFunSuite { // some tests can't be implemented in Java are listed below // console redirect doesn't work in Java since run JUnit from Scala diff --git a/src/test/scala/com/snowflake/snowpark/JavaCodeCompilerSuite.scala b/src/test/scala/com/snowflake/snowpark/JavaCodeCompilerSuite.scala index 4e3faa54..f9e7bd4e 100644 --- a/src/test/scala/com/snowflake/snowpark/JavaCodeCompilerSuite.scala +++ b/src/test/scala/com/snowflake/snowpark/JavaCodeCompilerSuite.scala @@ -1,9 +1,9 @@ package com.snowflake.snowpark import com.snowflake.snowpark.internal.{InMemoryClassObject, JavaCodeCompiler, UDFClassPath} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class JavaCodeCompilerSuite extends FunSuite { +class JavaCodeCompilerSuite extends AnyFunSuite { test("Compile a class that requires scala in classpath") { val className = "HelloWorld" diff --git a/src/test/scala/com/snowflake/snowpark/LoggingSuite.scala b/src/test/scala/com/snowflake/snowpark/LoggingSuite.scala index d3a31d9e..0ea52e39 100644 --- a/src/test/scala/com/snowflake/snowpark/LoggingSuite.scala +++ b/src/test/scala/com/snowflake/snowpark/LoggingSuite.scala @@ -1,9 +1,9 @@ package com.snowflake.snowpark import com.snowflake.snowpark.internal.Logging -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class LoggingSuite extends FunSuite { +class LoggingSuite extends AnyFunSuite { test("log name") { val a = new LoggingTestA diff --git a/src/test/scala/com/snowflake/snowpark/NewColumnReferenceSuite.scala b/src/test/scala/com/snowflake/snowpark/NewColumnReferenceSuite.scala index 3281a32c..2a501bb2 100644 --- a/src/test/scala/com/snowflake/snowpark/NewColumnReferenceSuite.scala +++ b/src/test/scala/com/snowflake/snowpark/NewColumnReferenceSuite.scala @@ -232,7 +232,7 @@ class NewColumnReferenceSuite extends SNTestBase { // internal renamed columns tests test("internal renamed columns") { - val tableExp = internal.analyzer.TableFunction("dummy", Seq.empty) + val tableExp = internal.analyzer.TableFunctionEx("dummy", Seq.empty) val att = internal.analyzer.Attribute("a", IntegerType) // Project val p1 = Project(Seq(Alias(Attribute("c", IntegerType), "d", isInternal = true)), project1) diff --git a/src/test/scala/com/snowflake/snowpark/SNTestBase.scala b/src/test/scala/com/snowflake/snowpark/SNTestBase.scala index e1245569..2a5ab872 100644 --- a/src/test/scala/com/snowflake/snowpark/SNTestBase.scala +++ b/src/test/scala/com/snowflake/snowpark/SNTestBase.scala @@ -8,14 +8,15 @@ import com.snowflake.snowpark.internal.{ParameterUtils, ServerConnection, UDFCla import com.snowflake.snowpark.types._ import com.snowflake.snowpark_test.TestFiles import org.mockito.Mockito.{doReturn, spy, when} -import org.scalatest.{BeforeAndAfterAll, FunSuite} +import org.scalatest.BeforeAndAfterAll +import org.scalatest.funsuite.AnyFunSuite import scala.collection.mutable.ArrayBuffer import scala.concurrent.{Await, Future} import scala.concurrent.duration._ import scala.concurrent.ExecutionContext.Implicits.global -trait SNTestBase extends FunSuite with BeforeAndAfterAll with SFTestUtils with SnowTestFiles { +trait SNTestBase extends AnyFunSuite with BeforeAndAfterAll with SFTestUtils with SnowTestFiles { protected val defaultProfile: String = TestUtils.defaultProfile diff --git a/src/test/scala/com/snowflake/snowpark/SnowparkSFConnectionHandlerSuite.scala b/src/test/scala/com/snowflake/snowpark/SnowparkSFConnectionHandlerSuite.scala index 042a8146..cd11837a 100644 --- a/src/test/scala/com/snowflake/snowpark/SnowparkSFConnectionHandlerSuite.scala +++ b/src/test/scala/com/snowflake/snowpark/SnowparkSFConnectionHandlerSuite.scala @@ -1,9 +1,9 @@ package com.snowflake.snowpark -import org.scalatest.FunSuite import com.snowflake.snowpark.internal.SnowparkSFConnectionHandler +import org.scalatest.funsuite.AnyFunSuite -class SnowparkSFConnectionHandlerSuite extends FunSuite { +class SnowparkSFConnectionHandlerSuite extends AnyFunSuite { test("version") { assert(SnowparkSFConnectionHandler.extractValidVersionNumber("0.1.0-snapshot").equals("0.1.0")) diff --git a/src/test/scala/com/snowflake/snowpark/TestUtils.scala b/src/test/scala/com/snowflake/snowpark/TestUtils.scala index 46b22227..0221fa10 100644 --- a/src/test/scala/com/snowflake/snowpark/TestUtils.scala +++ b/src/test/scala/com/snowflake/snowpark/TestUtils.scala @@ -29,6 +29,7 @@ import net.snowflake.client.jdbc.{ SnowflakeConnectString, SnowflakeConnectionV1 } +import org.scalatest.funsuite.AnyFunSuite import java.security.Provider import scala.collection.JavaConverters._ @@ -142,6 +143,8 @@ object TestUtils extends Logging { List( classOf[BeforeAndAfterAll], // scala test jar + classOf[AnyFunSuite], + classOf[org.scalatest.compatible.Assertion], classOf[org.scalactic.TripleEquals], // scalactic jar classOf[io.opentelemetry.exporters.inmemory.InMemorySpanExporter], classOf[io.opentelemetry.sdk.trace.export.SpanExporter]) diff --git a/src/test/scala/com/snowflake/snowpark_test/DataFrameAggregateSuite.scala b/src/test/scala/com/snowflake/snowpark_test/DataFrameAggregateSuite.scala index 7418701a..dd7b748e 100644 --- a/src/test/scala/com/snowflake/snowpark_test/DataFrameAggregateSuite.scala +++ b/src/test/scala/com/snowflake/snowpark_test/DataFrameAggregateSuite.scala @@ -3,7 +3,6 @@ package com.snowflake.snowpark_test import com.snowflake.snowpark.functions._ import com.snowflake.snowpark._ import net.snowflake.client.jdbc.SnowflakeSQLException -import org.scalatest.Matchers.the import java.sql.ResultSet @@ -306,9 +305,7 @@ class DataFrameAggregateSuite extends TestData { // Used temporary VIEW which is not supported by owner's mode stored proc yet test("Window functions inside aggregate functions", JavaStoredProcExcludeOwner) { def checkWindowError(df: => DataFrame): Unit = { - the[SnowflakeSQLException] thrownBy { - df.collect() - } + assertThrows[SnowflakeSQLException](df.collect()) } checkWindowError(testData2.select(min(avg($"b").over(Window.partitionBy($"a"))))) checkWindowError(testData2.agg(sum($"b"), max(rank().over(Window.orderBy($"a"))))) diff --git a/src/test/scala/com/snowflake/snowpark_test/IndependentClassSuite.scala b/src/test/scala/com/snowflake/snowpark_test/IndependentClassSuite.scala index 25b772db..5d02f598 100644 --- a/src/test/scala/com/snowflake/snowpark_test/IndependentClassSuite.scala +++ b/src/test/scala/com/snowflake/snowpark_test/IndependentClassSuite.scala @@ -1,14 +1,14 @@ package com.snowflake.snowpark_test -import org.scalatest.FunSuite import org.scalatest.exceptions.TestFailedException import com.snowflake.snowpark.internal.Utils +import org.scalatest.funsuite.AnyFunSuite import scala.language.postfixOps import sys.process._ // verify those classes do not depend on Snowpark package -class IndependentClassSuite extends FunSuite { +class IndependentClassSuite extends AnyFunSuite { lazy val pathPrefix = s"target/scala-${Utils.ScalaCompatVersion}/" private def generatePath(path: String): String = pathPrefix + path test("scala variant") { diff --git a/src/test/scala/com/snowflake/snowpark_test/JavaUtilsSuite.scala b/src/test/scala/com/snowflake/snowpark_test/JavaUtilsSuite.scala index 04228c55..c8eb59b5 100644 --- a/src/test/scala/com/snowflake/snowpark_test/JavaUtilsSuite.scala +++ b/src/test/scala/com/snowflake/snowpark_test/JavaUtilsSuite.scala @@ -1,16 +1,17 @@ package com.snowflake.snowpark_test -import org.scalatest.FunSuite import com.snowflake.snowpark.internal.JavaUtils._ import com.snowflake.snowpark.types.Variant import com.snowflake.snowpark_java.types.{Variant => JavaVariant} +import org.scalatest.funsuite.AnyFunSuite + import scala.collection.JavaConverters._ import java.util // test UDF utils functions // those functions work on server side. // can't be detected by test coverage report. -class JavaUtilsSuite extends FunSuite { +class JavaUtilsSuite extends AnyFunSuite { test("geography to string") { val data = "{\"type\":\"Point\",\"coordinates\":[125.6, 10.1]}" diff --git a/src/test/scala/com/snowflake/snowpark_test/ScalaVariantSuite.scala b/src/test/scala/com/snowflake/snowpark_test/ScalaVariantSuite.scala index 1f03c3f6..9d84ae77 100644 --- a/src/test/scala/com/snowflake/snowpark_test/ScalaVariantSuite.scala +++ b/src/test/scala/com/snowflake/snowpark_test/ScalaVariantSuite.scala @@ -1,12 +1,12 @@ package com.snowflake.snowpark_test import com.snowflake.snowpark.types.{Geography, Variant} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import java.io.UncheckedIOException import java.sql.{Date, Time, Timestamp} -class ScalaVariantSuite extends FunSuite { +class ScalaVariantSuite extends AnyFunSuite { test("scala constructor and extension functions") { assert(new Variant(1.1).asDouble() == 1.1) assert(new Variant(1.2f).asFloat() == 1.2f) diff --git a/src/test/scala/com/snowflake/snowpark_test/WindowSpecSuite.scala b/src/test/scala/com/snowflake/snowpark_test/WindowSpecSuite.scala index 00100566..d2cb2b14 100644 --- a/src/test/scala/com/snowflake/snowpark_test/WindowSpecSuite.scala +++ b/src/test/scala/com/snowflake/snowpark_test/WindowSpecSuite.scala @@ -3,7 +3,6 @@ package com.snowflake.snowpark_test import com.snowflake.snowpark.functions._ import com.snowflake.snowpark.{DataFrame, Row, TestData, Window} import net.snowflake.client.jdbc.SnowflakeSQLException -import org.scalatest.Matchers.the import scala.reflect.ClassTag @@ -55,44 +54,41 @@ class WindowSpecSuite extends TestData { } test("Window functions inside WHERE and HAVING clauses") { - def checkAnalysisError[T: ClassTag](df: => DataFrame): Unit = { - the[T] thrownBy { + def checkAnalysisError(df: => DataFrame): Unit = { + assertThrows[SnowflakeSQLException] { df.collect() } } - checkAnalysisError[SnowflakeSQLException]( - testData2.select("a").where(rank().over(Window.orderBy($"b")) === 1)) - checkAnalysisError[SnowflakeSQLException]( - testData2.where($"b" === 2 && rank().over(Window.orderBy($"b")) === 1)) - checkAnalysisError[SnowflakeSQLException]( + checkAnalysisError(testData2.select("a").where(rank().over(Window.orderBy($"b")) === 1)) + checkAnalysisError(testData2.where($"b" === 2 && rank().over(Window.orderBy($"b")) === 1)) + checkAnalysisError( testData2 .groupBy($"a") .agg(avg($"b").as("avgb")) .where($"a" > $"avgb" && rank().over(Window.orderBy($"a")) === 1)) - checkAnalysisError[SnowflakeSQLException]( + checkAnalysisError( testData2 .groupBy($"a") .agg(max($"b").as("maxb"), sum($"b").as("sumb")) .where(rank().over(Window.orderBy($"a")) === 1)) - checkAnalysisError[SnowflakeSQLException]( + checkAnalysisError( testData2 .groupBy($"a") .agg(max($"b").as("maxb"), sum($"b").as("sumb")) .where($"sumb" === 5 && rank().over(Window.orderBy($"a")) === 1)) testData2.createOrReplaceTempView("testData2") - checkAnalysisError[SnowflakeSQLException]( - session.sql("SELECT a FROM testData2 WHERE RANK() OVER(ORDER BY b) = 1")) - checkAnalysisError[SnowflakeSQLException]( + checkAnalysisError(session.sql("SELECT a FROM testData2 WHERE RANK() OVER(ORDER BY b) = 1")) + checkAnalysisError( session.sql("SELECT * FROM testData2 WHERE b = 2 AND RANK() OVER(ORDER BY b) = 1")) - checkAnalysisError[SnowflakeSQLException]( + checkAnalysisError( session.sql( "SELECT * FROM testData2 GROUP BY a HAVING a > AVG(b) AND RANK() OVER(ORDER BY a) = 1")) - checkAnalysisError[SnowflakeSQLException]( + checkAnalysisError( session.sql( "SELECT a, MAX(b), SUM(b) FROM testData2 GROUP BY a HAVING RANK() OVER(ORDER BY a) = 1")) - checkAnalysisError[SnowflakeSQLException](session.sql(s"""SELECT a, MAX(b) + checkAnalysisError(session.sql(s"""SELECT a, MAX(b) |FROM testData2 |GROUP BY a |HAVING SUM(b) = 5 AND RANK() OVER(ORDER BY a) = 1