Skip to content

Commit

Permalink
add document
Browse files Browse the repository at this point in the history
Signed-off-by: Lantao Jin <[email protected]>
  • Loading branch information
LantaoJin committed Aug 5, 2024
1 parent 941155a commit f53a3ec
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -177,14 +177,14 @@ class FlintSparkPPLDedupITSuite
val ex = intercept[UnsupportedOperationException](sql(s"""
| source = $testTable | dedup 1 name CONSECUTIVE=true | fields name
| """.stripMargin))
assert(ex.getMessage.contains("Consecutive duplicate events are not supported"))
assert(ex.getMessage.contains("Consecutive deduplication is not supported"))
}

test("test 1 name KEEPEMPTY=true CONSECUTIVE=true") {
val ex = intercept[UnsupportedOperationException](sql(s"""
| source = $testTable | dedup 1 name KEEPEMPTY=true CONSECUTIVE=true | fields name
| """.stripMargin))
assert(ex.getMessage.contains("Consecutive duplicate events are not supported"))
assert(ex.getMessage.contains("Consecutive deduplication is not supported"))
}

ignore("test dedupe 2 name") {
Expand Down Expand Up @@ -298,13 +298,13 @@ class FlintSparkPPLDedupITSuite
val ex = intercept[UnsupportedOperationException](sql(s"""
| source = $testTable | dedup 2 name CONSECUTIVE=true | fields name
| """.stripMargin))
assert(ex.getMessage.contains("Consecutive duplicate events are not supported"))
assert(ex.getMessage.contains("Consecutive deduplication is not supported"))
}

test("test 2 name KEEPEMPTY=true CONSECUTIVE=true") {
val ex = intercept[UnsupportedOperationException](sql(s"""
| source = $testTable | dedup 2 name KEEPEMPTY=true CONSECUTIVE=true | fields name
| """.stripMargin))
assert(ex.getMessage.contains("Consecutive duplicate events are not supported"))
assert(ex.getMessage.contains("Consecutive deduplication is not supported"))
}
}
18 changes: 16 additions & 2 deletions ppl-spark-integration/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,21 @@ Limitation: Overriding existing field is unsupported, following queries throw ex

> For additional details, review [FlintSparkPPLTimeWindowITSuite.scala](../integ-test/src/test/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLTimeWindowITSuite.scala)
**Dedup**

- `source = table | dedup a | fields a,b,c`
- `source = table | dedup a,b | fields a,b,c`
- `source = table | dedup a keepempty=true | fields a,b,c`
- `source = table | dedup a,b keepempty=true | fields a,b,c`
- `source = table | dedup 1 a | fields a,b,c`
- `source = table | dedup 1 a,b | fields a,b,c`
- `source = table | dedup 1 a keepempty=true | fields a,b,c`
- `source = table | dedup 1 a,b keepempty=true | fields a,b,c`
- `source = table | dedup 1 a consecutive=true| fields a,b,c` (Unsupported)
- `source = table | dedup 2 a | fields a,b,c` (Unsupported)

> For additional details, review [FlintSparkPPLDedupITSuite.scala](../integ-test/src/test/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLDedupITSuite.scala)
#### Supported Commands:
- `search` - [See details](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/search.rst)
- `where` - [See details](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/where.rst)
Expand All @@ -283,6 +298,7 @@ Limitation: Overriding existing field is unsupported, following queries throw ex
- `stats` - [See details](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/stats.rst) (supports AVG, COUNT, DISTINCT_COUNT, MAX, MIN and SUM aggregation functions)
- `sort` - [See details](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/sort.rst)
- `correlation` - [See details](../docs/PPL-Correlation-command.md)
- `dedup` - [See details](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/dedup.rst)

> For additional details, review [Integration Tests](../integ-test/src/test/scala/org/opensearch/flint/spark/)
Expand All @@ -298,5 +314,3 @@ Limitation: Overriding existing field is unsupported, following queries throw ex
- add [conditions](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/functions/condition.rst) support
- add [top](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/top.rst) support
- add [cast](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/functions/conversion.rst) support
- add [math](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/functions/math.rst) support
- add [deduplicate](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/dedup.rst) support
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ public LogicalPlan visitDedupe(Dedupe node, CatalystPlanContext context) {
}
if (consecutive) {
// Spark is not able to remove only consecutive events
throw new UnsupportedOperationException("Consecutive duplicate events are not supported");
throw new UnsupportedOperationException("Consecutive deduplication is not supported");
}
visitFieldList(node.getFields(), context);
// Columns to deduplicate
Expand Down Expand Up @@ -315,7 +315,7 @@ public LogicalPlan visitDedupe(Dedupe node, CatalystPlanContext context) {
return context.getPlan();
} else {
// Deduplicate ['a, 'b]
// +- Filter (isnotnull('a) AND isnotnull('b)
// +- Filter (isnotnull('a) AND isnotnull('b))
// +- Project
// +- UnresolvedRelation

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class PPLLogicalPlanDedupTranslatorTestSuite
plan(pplParser, "source=table | dedup a consecutive=true | fields a", false),
context)
}
assert(ex.getMessage === "Consecutive duplicate events are not supported")
assert(ex.getMessage === "Consecutive deduplication is not supported")
}

test("test dedup a keepempty=true consecutive=true") {
Expand All @@ -118,7 +118,7 @@ class PPLLogicalPlanDedupTranslatorTestSuite
false),
context)
}
assert(ex.getMessage === "Consecutive duplicate events are not supported")
assert(ex.getMessage === "Consecutive deduplication is not supported")
}

test("test dedup 1 a") {
Expand Down Expand Up @@ -203,7 +203,7 @@ class PPLLogicalPlanDedupTranslatorTestSuite
plan(pplParser, "source=table | dedup 1 a consecutive=true | fields a", false),
context)
}
assert(ex.getMessage === "Consecutive duplicate events are not supported")
assert(ex.getMessage === "Consecutive deduplication is not supported")
}

test("test dedup 1 a keepempty=true consecutive=true") {
Expand All @@ -216,7 +216,7 @@ class PPLLogicalPlanDedupTranslatorTestSuite
false),
context)
}
assert(ex.getMessage === "Consecutive duplicate events are not supported")
assert(ex.getMessage === "Consecutive deduplication is not supported")
}

test("test dedup 0") {
Expand Down Expand Up @@ -272,7 +272,7 @@ class PPLLogicalPlanDedupTranslatorTestSuite
plan(pplParser, "source=table | dedup 2 a consecutive=true | fields a | fields a", false),
context)
}
assert(ex.getMessage === "Consecutive duplicate events are not supported")
assert(ex.getMessage === "Consecutive deduplication is not supported")
}

test("test dedup 2 a keepempty=true consecutive=true") {
Expand All @@ -285,6 +285,6 @@ class PPLLogicalPlanDedupTranslatorTestSuite
false),
context)
}
assert(ex.getMessage === "Consecutive duplicate events are not supported")
assert(ex.getMessage === "Consecutive deduplication is not supported")
}
}

0 comments on commit f53a3ec

Please sign in to comment.