From a33c0ba3766162bf0d3a3cdd0b4202008aca824b Mon Sep 17 00:00:00 2001 From: Gokul R Date: Thu, 17 Oct 2024 20:08:58 +0530 Subject: [PATCH 1/5] Implement Cryptographic hash functions Signed-off-by: Gokul R --- docs/ppl-lang/functions/ppl-cryptographic.md | 73 +++++++++++++++++++ .../src/main/antlr4/OpenSearchPPLLexer.g4 | 5 ++ .../src/main/antlr4/OpenSearchPPLParser.g4 | 8 ++ .../function/BuiltinFunctionName.java | 5 ++ .../ppl/utils/BuiltinFunctionTranslator.java | 28 ++----- ...ographicFunctionsTranslatorTestSuite.scala | 69 ++++++++++++++++++ 6 files changed, 165 insertions(+), 23 deletions(-) create mode 100644 docs/ppl-lang/functions/ppl-cryptographic.md create mode 100644 ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanCryptographicFunctionsTranslatorTestSuite.scala diff --git a/docs/ppl-lang/functions/ppl-cryptographic.md b/docs/ppl-lang/functions/ppl-cryptographic.md new file mode 100644 index 000000000..7843893ec --- /dev/null +++ b/docs/ppl-lang/functions/ppl-cryptographic.md @@ -0,0 +1,73 @@ +## PPL Cryptographic Functions + +### `MD5` + +**Description** + + +Usage: `md5('hello')` calculates the Cryptographic hash value of `hello` using md5 algorithm. + +**Argument type:** +- STRING +- Return type: **STRING** + +Example: + + os> source=people | eval `MD5('hello')` = MD5('hello') | fields `MD5('hello')` + fetched rows / total rows = 1/1 + +----------------------------------+ + | MD5('hello') | + |----------------------------------| + | 5d41402abc4b2a76b9719d911017c592 | + +----------------------------------+ + +### `SHA1` + +**Description** + + +Usage: `sha1('hello')` calculates the Cryptographic hash value of `hello` using sha1 algorithm. + +**Argument type:** +- STRING +- Return type: **STRING** + +Example: + + os> source=people | eval `SHA1('hello')` = SHA1('hello') | fields `SHA1('hello')` + fetched rows / total rows = 1/1 + +------------------------------------------+ + | SHA1('hello') | + |------------------------------------------| + | aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d | + +------------------------------------------+ + +### `SHA2` + +**Description** + + +Usage: `sha2('hello',256)` calculates the Cryptographic hash value of `hello` using sha256 algorithm. +Usage: `sha2('hello',512)` calculates the Cryptographic hash value of `hello` using sha512 algorithm. + +**Argument type:** +- STRING +- Return type: **STRING** + +Example: + + os> source=people | eval `SHA2('hello',256)` = SHA2('hello',256) | fields `SHA2('hello',256)` + fetched rows / total rows = 1/1 + +------------------------------------------------------------------+ + | SHA2('hello',256) | + |------------------------------------------------------------------| + | 2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 | + +------------------------------------------------------------------+ + + os> source=people | eval `SHA2('hello',512)` = SHA2('hello',512) | fields `SHA2('hello',512)` + fetched rows / total rows = 1/1 + +----------------------------------------------------------------------------------------------------------------------------------+ + | SHA2('hello',512) | + |----------------------------------------------------------------------------------------------------------------------------------| + | 9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043 | + +----------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 index 2b916a245..3a0a8d1fb 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 @@ -280,6 +280,11 @@ RADIANS: 'RADIANS'; SIN: 'SIN'; TAN: 'TAN'; +// CRYPTOGRAPHIC FUNCTIONS +MD5: 'MD5'; +SHA1: 'SHA1'; +SHA2: 'SHA2'; + // DATE AND TIME FUNCTIONS ADDDATE: 'ADDDATE'; ADDTIME: 'ADDTIME'; diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 index 7a6f14839..c20ed7a24 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 @@ -498,6 +498,7 @@ evalFunctionName | systemFunctionName | positionFunctionName | coalesceFunctionName + | cryptographicFunctionName ; functionArgs @@ -613,6 +614,12 @@ trigonometricFunctionName | TAN ; +cryptographicFunctionName + : MD5 + | SHA1 + | SHA2 + ; + dateTimeFunctionName : ADDDATE | ADDTIME @@ -943,6 +950,7 @@ keywordsCanBeId | textFunctionName | mathematicalFunctionName | positionFunctionName + | cryptographicFunctionName // commands | SEARCH | DESCRIBE diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 6b549663a..1e61272bc 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -52,6 +52,11 @@ public enum BuiltinFunctionName { SIN(FunctionName.of("sin")), TAN(FunctionName.of("tan")), + /** Cryptographic Functions. */ + MD5(FunctionName.of("md5")), + SHA1(FunctionName.of("sha1")), + SHA2(FunctionName.of("sha2")), + /** Date and Time Functions. */ ADDDATE(FunctionName.of("adddate")), ADDTIME(FunctionName.of("addtime")), diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/BuiltinFunctionTranslator.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/BuiltinFunctionTranslator.java index d817305a9..b0c8befaf 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/BuiltinFunctionTranslator.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/BuiltinFunctionTranslator.java @@ -13,29 +13,7 @@ import java.util.List; import java.util.Map; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADD; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADDDATE; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.DATEDIFF; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAY_OF_MONTH; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.COALESCE; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.SUBTRACT; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLY; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDE; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.MODULUS; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAY_OF_WEEK; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAY_OF_YEAR; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.HOUR_OF_DAY; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NOT_NULL; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NULL; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.LENGTH; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.LOCALTIME; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE_OF_HOUR; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTH_OF_YEAR; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.SECOND_OF_MINUTE; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.SUBDATE; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.TRIM; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEK; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEK_OF_YEAR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.*; import static org.opensearch.sql.ppl.utils.DataTypeTransformer.seq; import static scala.Option.empty; @@ -66,6 +44,10 @@ public interface BuiltinFunctionTranslator { .put(ADDDATE, "date_add") // only maps adddate(date, days) .put(DATEDIFF, "datediff") .put(LOCALTIME, "localtimestamp") + // Cryptographic functions + .put(MD5, "md5") + .put(SHA1, "sha1") + .put(SHA2, "sha2") //condition functions .put(IS_NULL, "isnull") .put(IS_NOT_NULL, "isnotnull") diff --git a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanCryptographicFunctionsTranslatorTestSuite.scala b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanCryptographicFunctionsTranslatorTestSuite.scala new file mode 100644 index 000000000..a3f163de9 --- /dev/null +++ b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanCryptographicFunctionsTranslatorTestSuite.scala @@ -0,0 +1,69 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.flint.spark.ppl + +import org.opensearch.flint.spark.ppl.PlaneUtils.plan +import org.opensearch.sql.ppl.{CatalystPlanContext, CatalystQueryPlanVisitor} +import org.opensearch.sql.ppl.utils.DataTypeTransformer.seq +import org.scalatest.matchers.should.Matchers + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation, UnresolvedStar} +import org.apache.spark.sql.catalyst.expressions.{Alias, EqualTo, GreaterThan, GreaterThanOrEqual, LessThan, LessThanOrEqual, Literal, Not} +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical.{Filter, Project} + +class PPLLogicalPlanCryptographicFunctionsTranslatorTestSuite + extends SparkFunSuite + with PlanTest + with LogicalPlanTestUtils + with Matchers { + + private val planTransformer = new CatalystQueryPlanVisitor() + private val pplParser = new PPLSyntaxParser() + + test("test md5") { + val context = new CatalystPlanContext + val logPlan = planTransformer.visit(plan(pplParser, "source=t a = md5(b)"), context) + + val table = UnresolvedRelation(Seq("t")) + val filterExpr = EqualTo( + UnresolvedAttribute("a"), + UnresolvedFunction("md5", seq(UnresolvedAttribute("b")), isDistinct = false)) + val filterPlan = Filter(filterExpr, table) + val projectList = Seq(UnresolvedStar(None)) + val expectedPlan = Project(projectList, filterPlan) + comparePlans(expectedPlan, logPlan, false) + } + + test("test sha1") { + val context = new CatalystPlanContext + val logPlan = planTransformer.visit(plan(pplParser, "source=t a = sha1(b)"), context) + + val table = UnresolvedRelation(Seq("t")) + val filterExpr = EqualTo( + UnresolvedAttribute("a"), + UnresolvedFunction("sha1", seq(UnresolvedAttribute("b")), isDistinct = false)) + val filterPlan = Filter(filterExpr, table) + val projectList = Seq(UnresolvedStar(None)) + val expectedPlan = Project(projectList, filterPlan) + comparePlans(expectedPlan, logPlan, false) + } + + test("test sha2") { + val context = new CatalystPlanContext + val logPlan = planTransformer.visit(plan(pplParser, "source=t a = sha2(b,256)"), context) + + val table = UnresolvedRelation(Seq("t")) + val filterExpr = EqualTo( + UnresolvedAttribute("a"), + UnresolvedFunction("sha2", seq(UnresolvedAttribute("b"), Literal(256)), isDistinct = false)) + val filterPlan = Filter(filterExpr, table) + val projectList = Seq(UnresolvedStar(None)) + val expectedPlan = Project(projectList, filterPlan) + comparePlans(expectedPlan, logPlan, false) + } +} From 8558278589172e3667a36e8f602fe7ca6441a3aa Mon Sep 17 00:00:00 2001 From: Gokul R Date: Fri, 18 Oct 2024 14:52:28 +0530 Subject: [PATCH 2/5] update documentation Signed-off-by: Gokul R --- docs/ppl-lang/PPL-Example-Commands.md | 4 ++++ docs/ppl-lang/functions/ppl-cryptographic.md | 14 +++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/docs/ppl-lang/PPL-Example-Commands.md b/docs/ppl-lang/PPL-Example-Commands.md index 8e6cbaae9..8a98e30f9 100644 --- a/docs/ppl-lang/PPL-Example-Commands.md +++ b/docs/ppl-lang/PPL-Example-Commands.md @@ -92,6 +92,10 @@ Assumptions: `a`, `b`, `c` are existing fields in `table` - `source = table | eval f = case(a = 0, 'zero', a = 1, 'one', a = 2, 'two', a = 3, 'three', a = 4, 'four', a = 5, 'five', a = 6, 'six', a = 7, 'se7en', a = 8, 'eight', a = 9, 'nine')` - `source = table | eval f = case(a = 0, 'zero', a = 1, 'one' else 'unknown')` - `source = table | eval f = case(a = 0, 'zero', a = 1, 'one' else concat(a, ' is an incorrect binary digit'))` +- `source = table | eval digest = md5(fieldName) | fields digest` +- `source = table | eval digest = sha1(fieldName) | fields digest` +- `source = table | eval digest = sha2(fieldName,256) | fields digest` +- `source = table | eval digest = sha2(fieldName,512) | fields digest` #### Fillnull Assumptions: `a`, `b`, `c`, `d`, `e` are existing fields in `table` diff --git a/docs/ppl-lang/functions/ppl-cryptographic.md b/docs/ppl-lang/functions/ppl-cryptographic.md index 7843893ec..ecabc624c 100644 --- a/docs/ppl-lang/functions/ppl-cryptographic.md +++ b/docs/ppl-lang/functions/ppl-cryptographic.md @@ -4,8 +4,9 @@ **Description** +Calculates the MD5 digest and returns the value as a 32 character hex string. -Usage: `md5('hello')` calculates the Cryptographic hash value of `hello` using md5 algorithm. +Usage: `md5('hello')` **Argument type:** - STRING @@ -25,8 +26,9 @@ Example: **Description** +Returns the hex string result of SHA-1 -Usage: `sha1('hello')` calculates the Cryptographic hash value of `hello` using sha1 algorithm. +Usage: `sha1('hello')` **Argument type:** - STRING @@ -46,12 +48,14 @@ Example: **Description** +Returns the hex string result of SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384, and SHA-512). The numBits indicates the desired bit length of the result, which must have a value of 224, 256, 384, 512 -Usage: `sha2('hello',256)` calculates the Cryptographic hash value of `hello` using sha256 algorithm. -Usage: `sha2('hello',512)` calculates the Cryptographic hash value of `hello` using sha512 algorithm. +Usage: `sha2('hello',256)` + +Usage: `sha2('hello',512)` **Argument type:** -- STRING +- STRING, INTEGER - Return type: **STRING** Example: From cba75ac29e24e57da055fb33992b90964082de6d Mon Sep 17 00:00:00 2001 From: Gokul R Date: Mon, 21 Oct 2024 13:28:48 +0530 Subject: [PATCH 3/5] added integration tests and updated readme file Signed-off-by: Gokul R --- docs/ppl-lang/README.md | 2 + .../FlintSparkPPLBuiltinFunctionITSuite.scala | 69 +++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/docs/ppl-lang/README.md b/docs/ppl-lang/README.md index 2ddceca0a..a840a9f28 100644 --- a/docs/ppl-lang/README.md +++ b/docs/ppl-lang/README.md @@ -79,6 +79,8 @@ For additional examples see the next [documentation](PPL-Example-Commands.md). - [`Type Conversion Functions`](functions/ppl-conversion.md) + - [`Cryptographic Functions`](functions/ppl-cryptographic.md) + --- ### PPL On Spark diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala index 67e799c00..215751236 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala @@ -785,6 +785,75 @@ class FlintSparkPPLBuiltinFunctionITSuite assert(results.sameElements(expectedResults)) } + test("test cryptographic hash functions - md5") { + val frame = sql(s""" + | source = $testTable digest=md5('Spark') | fields digest + | """.stripMargin) + + val results: Array[Row] = frame.collect() + val expectedResults: Array[Row] = Array(Row("8cde774d6f7333752ed72cacddb05126")) + assert(results.sameElements(expectedResults)) + + val logicalPlan: LogicalPlan = frame.queryExecution.logical + val table = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test")) + val filterExpr = EqualTo( + UnresolvedAttribute("digest"), + UnresolvedFunction( + "md5", + seq(Literal("Spark")), + isDistinct = false)) + val filterPlan = Filter(filterExpr, table) + val projectList = Seq(UnresolvedAttribute("digest")) + val expectedPlan = Project(projectList, filterPlan) + comparePlans(logicalPlan, expectedPlan, checkAnalysis = false) + } + + test("test cryptographic hash functions - sha1") { + val frame = sql(s""" + | source = $testTable digest=sha1('Spark') | fields digest + | """.stripMargin) + + val results: Array[Row] = frame.collect() + val expectedResults: Array[Row] = Array(Row("85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c")) + assert(results.sameElements(expectedResults)) + + val logicalPlan: LogicalPlan = frame.queryExecution.logical + val table = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test")) + val filterExpr = EqualTo( + UnresolvedAttribute("digest"), + UnresolvedFunction( + "sha1", + seq(Literal("Spark")), + isDistinct = false)) + val filterPlan = Filter(filterExpr, table) + val projectList = Seq(UnresolvedAttribute("digest")) + val expectedPlan = Project(projectList, filterPlan) + comparePlans(logicalPlan, expectedPlan, checkAnalysis = false) + } + + test("test cryptographic hash functions - sha2") { + val frame = sql(s""" + | source = $testTable digest=sha2('Spark',256) | fields digest + | """.stripMargin) + + val results: Array[Row] = frame.collect() + val expectedResults: Array[Row] = Array(Row("529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b")) + assert(results.sameElements(expectedResults)) + + val logicalPlan: LogicalPlan = frame.queryExecution.logical + val table = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test")) + val filterExpr = EqualTo( + UnresolvedAttribute("digest"), + UnresolvedFunction( + "sha2", + seq(Literal("Spark"), Literal(256)), + isDistinct = false)) + val filterPlan = Filter(filterExpr, table) + val projectList = Seq(UnresolvedAttribute("digest")) + val expectedPlan = Project(projectList, filterPlan) + comparePlans(logicalPlan, expectedPlan, checkAnalysis = false) + } + // Todo // +---------------------------------------+ // | Below tests are not supported (cast) | From 02b67ada9e0f3235a5d4ba7fc33da50c1b4a1f30 Mon Sep 17 00:00:00 2001 From: Gokul R Date: Mon, 21 Oct 2024 22:17:08 +0530 Subject: [PATCH 4/5] format the code Signed-off-by: Gokul R --- .../FlintSparkPPLBuiltinFunctionITSuite.scala | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala index 215751236..a923d75fb 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala @@ -798,10 +798,7 @@ class FlintSparkPPLBuiltinFunctionITSuite val table = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test")) val filterExpr = EqualTo( UnresolvedAttribute("digest"), - UnresolvedFunction( - "md5", - seq(Literal("Spark")), - isDistinct = false)) + UnresolvedFunction("md5", seq(Literal("Spark")), isDistinct = false)) val filterPlan = Filter(filterExpr, table) val projectList = Seq(UnresolvedAttribute("digest")) val expectedPlan = Project(projectList, filterPlan) @@ -821,10 +818,7 @@ class FlintSparkPPLBuiltinFunctionITSuite val table = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test")) val filterExpr = EqualTo( UnresolvedAttribute("digest"), - UnresolvedFunction( - "sha1", - seq(Literal("Spark")), - isDistinct = false)) + UnresolvedFunction("sha1", seq(Literal("Spark")), isDistinct = false)) val filterPlan = Filter(filterExpr, table) val projectList = Seq(UnresolvedAttribute("digest")) val expectedPlan = Project(projectList, filterPlan) @@ -837,17 +831,15 @@ class FlintSparkPPLBuiltinFunctionITSuite | """.stripMargin) val results: Array[Row] = frame.collect() - val expectedResults: Array[Row] = Array(Row("529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b")) + val expectedResults: Array[Row] = + Array(Row("529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b")) assert(results.sameElements(expectedResults)) val logicalPlan: LogicalPlan = frame.queryExecution.logical val table = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test")) val filterExpr = EqualTo( UnresolvedAttribute("digest"), - UnresolvedFunction( - "sha2", - seq(Literal("Spark"), Literal(256)), - isDistinct = false)) + UnresolvedFunction("sha2", seq(Literal("Spark"), Literal(256)), isDistinct = false)) val filterPlan = Filter(filterExpr, table) val projectList = Seq(UnresolvedAttribute("digest")) val expectedPlan = Project(projectList, filterPlan) From 9cbf7e76ab46f8c273b131e529d291492e297b86 Mon Sep 17 00:00:00 2001 From: Gokul R Date: Tue, 22 Oct 2024 00:14:35 +0530 Subject: [PATCH 5/5] fix integration tests Signed-off-by: Gokul R --- .../FlintSparkPPLBuiltinFunctionITSuite.scala | 53 +++++-------------- 1 file changed, 14 insertions(+), 39 deletions(-) diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala index a923d75fb..763c2411b 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala @@ -787,63 +787,38 @@ class FlintSparkPPLBuiltinFunctionITSuite test("test cryptographic hash functions - md5") { val frame = sql(s""" - | source = $testTable digest=md5('Spark') | fields digest + | source = $testTable | eval a = md5('Spark') = '8cde774d6f7333752ed72cacddb05126' | fields age, a | """.stripMargin) val results: Array[Row] = frame.collect() - val expectedResults: Array[Row] = Array(Row("8cde774d6f7333752ed72cacddb05126")) - assert(results.sameElements(expectedResults)) - - val logicalPlan: LogicalPlan = frame.queryExecution.logical - val table = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test")) - val filterExpr = EqualTo( - UnresolvedAttribute("digest"), - UnresolvedFunction("md5", seq(Literal("Spark")), isDistinct = false)) - val filterPlan = Filter(filterExpr, table) - val projectList = Seq(UnresolvedAttribute("digest")) - val expectedPlan = Project(projectList, filterPlan) - comparePlans(logicalPlan, expectedPlan, checkAnalysis = false) + val expectedResults: Array[Row] = + Array(Row(70, true), Row(30, true), Row(25, true), Row(20, true)) + implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0)) + assert(results.sorted.sameElements(expectedResults.sorted)) } test("test cryptographic hash functions - sha1") { val frame = sql(s""" - | source = $testTable digest=sha1('Spark') | fields digest + | source = $testTable | eval a = sha1('Spark') = '85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c' | fields age, a | """.stripMargin) val results: Array[Row] = frame.collect() - val expectedResults: Array[Row] = Array(Row("85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c")) - assert(results.sameElements(expectedResults)) - - val logicalPlan: LogicalPlan = frame.queryExecution.logical - val table = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test")) - val filterExpr = EqualTo( - UnresolvedAttribute("digest"), - UnresolvedFunction("sha1", seq(Literal("Spark")), isDistinct = false)) - val filterPlan = Filter(filterExpr, table) - val projectList = Seq(UnresolvedAttribute("digest")) - val expectedPlan = Project(projectList, filterPlan) - comparePlans(logicalPlan, expectedPlan, checkAnalysis = false) + val expectedResults: Array[Row] = + Array(Row(70, true), Row(30, true), Row(25, true), Row(20, true)) + implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0)) + assert(results.sorted.sameElements(expectedResults.sorted)) } test("test cryptographic hash functions - sha2") { val frame = sql(s""" - | source = $testTable digest=sha2('Spark',256) | fields digest + | source = $testTable | eval a = sha2('Spark',256) = '529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b' | fields age, a | """.stripMargin) val results: Array[Row] = frame.collect() val expectedResults: Array[Row] = - Array(Row("529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b")) - assert(results.sameElements(expectedResults)) - - val logicalPlan: LogicalPlan = frame.queryExecution.logical - val table = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test")) - val filterExpr = EqualTo( - UnresolvedAttribute("digest"), - UnresolvedFunction("sha2", seq(Literal("Spark"), Literal(256)), isDistinct = false)) - val filterPlan = Filter(filterExpr, table) - val projectList = Seq(UnresolvedAttribute("digest")) - val expectedPlan = Project(projectList, filterPlan) - comparePlans(logicalPlan, expectedPlan, checkAnalysis = false) + Array(Row(70, true), Row(30, true), Row(25, true), Row(20, true)) + implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0)) + assert(results.sorted.sameElements(expectedResults.sorted)) } // Todo