diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java index 228b785514205..48fd009d6e70f 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java @@ -23,6 +23,7 @@ import java.sql.Timestamp; import java.time.*; import java.util.*; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import org.apache.spark.api.java.Optional; @@ -535,6 +536,76 @@ public void testJoin() { joined.collectAsList()); } + private static final Comparator> comparatorStringAndIntTuple = + new Comparator>() { + @Override + public int compare(Tuple2 o1, Tuple2 o2) { + if (o1._1.compareTo(o2._1) != 0) { + return o1._1.compareTo(o2._1); + } + return o1._2.compareTo(o2._2); + } + + @Override + public boolean equals(Object obj) { + return super.equals(obj); + } + }; + + private void assertEqualsUnorderly( + List> expected, + List> actual) { + Assert.assertEquals( + expected.stream().sorted(comparatorStringAndIntTuple).collect(Collectors.toList()), + actual.stream().sorted(comparatorStringAndIntTuple).collect(Collectors.toList()) + ); + } + + @Test + public void testDropDuplicates() { + List> data = Arrays.asList( + new Tuple2<>("a", 1), new Tuple2<>("a", 2), + new Tuple2<>("b", 1), new Tuple2<>("a", 1) + ); + Dataset> ds = spark.createDataset(data, + Encoders.tuple(Encoders.STRING(), Encoders.INT())); + + assertEqualsUnorderly( + Arrays.asList(tuple2("a", 1), tuple2("a", 2), tuple2("b", 1)), + ds.dropDuplicates().collectAsList() + ); + + assertEqualsUnorderly( + Arrays.asList(tuple2("a", 1), tuple2("b", 1)), + ds.dropDuplicates("_1").collectAsList() + ); + + assertEqualsUnorderly( + Arrays.asList(tuple2("a", 1), tuple2("b", 1)), + ds.dropDuplicates(new String[] { "_1" }).collectAsList() + ); + + assertEqualsUnorderly( + Arrays.asList(tuple2("a", 1), tuple2("a", 2)), + ds.dropDuplicates("_2").collectAsList() + ); + + assertEqualsUnorderly( + Arrays.asList(tuple2("a", 1), tuple2("a", 2)), + ds.dropDuplicates(new String[] { "_2" }).collectAsList() + ); + + assertEqualsUnorderly( + Arrays.asList(tuple2("a", 1), tuple2("a", 2), tuple2("b", 1)), + ds.dropDuplicates("_1", "_2").collectAsList() + ); + + assertEqualsUnorderly( + Arrays.asList(tuple2("a", 1), tuple2("a", 2), tuple2("b", 1)), + ds.dropDuplicates(new String[] { "_1", "_2" }).collectAsList() + ); + } + @Test public void testTupleEncoder() { Encoder> encoder2 = Encoders.tuple(Encoders.INT(), Encoders.STRING());