diff --git a/src/Set.php b/src/Set.php index 7f5a8ae..082929c 100644 --- a/src/Set.php +++ b/src/Set.php @@ -44,6 +44,35 @@ public static function distinct(iterable $data, bool $strict = true): \Generator } } + /** + * Iterate only the distinct elements using $compareBy function for getting comparable value. + * + * Supports only strict-type comparisons: + * - scalars: compares strictly by type + * - objects: always treats different instances as not equal to each other + * - arrays: compares serialized + * + * @template T + * + * @param iterable $data + * @param callable $compareBy + * + * @return \Generator + */ + public static function distinctBy(iterable $data, callable $compareBy): \Generator + { + $map = []; + + foreach ($data as $datum) { + $hash = UniqueExtractor::getString($compareBy($datum), true); + + if (!isset($map[$hash])) { + $map[$hash] = true; + yield $datum; + } + } + } + /** * Iterates the intersection of iterables in strict type mode. * diff --git a/src/Stream.php b/src/Stream.php index 6ad121a..de663b4 100644 --- a/src/Stream.php +++ b/src/Stream.php @@ -667,6 +667,28 @@ public function distinct(bool $strict = true): self return $this; } + /** + * Filter out elements from the iterable source only returning unique elements. + * + * Using $compareBy function for getting comparable value. + * + * Supports only strict-type comparisons: + * - scalars: compares strictly by type + * - objects: always treats different instances as not equal to each other + * - arrays: compares serialized + * + * @param callable $compareBy + * + * @return $this + * + * @see Single::distinctBy() + */ + public function distinctBy(callable $compareBy): self + { + $this->iterable = Set::distinctBy($this->iterable, $compareBy); + return $this; + } + /** * Cycle through the elements of iterable source sequentially forever * diff --git a/tests/Set/DistinctByTest.php b/tests/Set/DistinctByTest.php new file mode 100644 index 0000000..a1fc95d --- /dev/null +++ b/tests/Set/DistinctByTest.php @@ -0,0 +1,659 @@ +assertEquals($expected, $result); + } + + public function dataProviderForArray(): array + { + return [ + [ + [], + fn ($item) => $item, + [], + ], + [ + [], + fn ($item) => $item['name'], + [], + ], + [ + [1], + fn ($item) => $item, + [1], + ], + [ + [1, 1], + fn ($item) => $item, + [1], + ], + [ + [1, '1'], + fn ($item) => $item, + [1, '1'], + ], + [ + ['1', 1], + fn ($item) => $item, + ['1', 1], + ], + [ + ['aa', 'bb', 'aa'], + fn ($item) => $item, + ['aa', 'bb'], + ], + [ + [1, 2, 1, 2, 3], + fn ($item) => $item, + [1, 2, 3], + ], + [ + ['1', 2, '1', '2', 3], + fn ($item) => $item, + ['1', 2, '2', 3], + ], + [ + ['1', 2, '1', '2', 3], + fn ($item) => (int)$item, + ['1', 2, '3'], + ], + [ + [[1], [1], [1, 2]], + fn ($item) => $item, + [[1], [1, 2]], + ], + [ + [[1], [1], [1, 2]], + fn ($item) => $item[0], + [[1]], + ], + [ + [[1], 'a' => [1]], + fn ($item) => $item, + [[1]], + ], + [ + [false, null, 0, 0.0, ''], + fn ($item) => $item, + [false, null, 0, 0.0, ''], + ], + [ + [true, 1, '1', 1.0, '1.0'], + fn ($item) => $item, + [true, 1, '1', 1.0, '1.0'], + ], + [ + [['a' => 1, 'b' => 2], ['a' => 1], ['a' => 1], 2, 2], + fn ($item) => $item, + [['a' => 1, 'b' => 2], ['a' => 1], 2], + ], + [ + [['a' => 1, 'b' => 2], ['a' => 1], ['a' => 1], ['a' => 2]], + fn ($item) => $item['a'], + [['a' => 1, 'b' => 2], ['a' => 2]], + ], + [ + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ], + fn ($item) => $item, + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ], + ], + [ + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ], + fn ($item) => $item['id'], + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ], + ], + [ + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ], + fn ($item) => $item['name'], + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Jane', 'id' => 5], + ], + ], + ]; + } + + /** + * @dataProvider dataProviderForGenerators + * @param \Generator $data + * @param callable $compareBy + * @param array $expected + */ + public function testGenerators(\Generator $data, callable $compareBy, array $expected): void + { + // Given + $result = []; + + // When + foreach (Set::distinctBy($data, $compareBy) as $datum) { + $result[] = $datum; + } + + // Then + $this->assertEquals($expected, $result); + } + + public function dataProviderForGenerators(): array + { + $gen = fn ($data) => GeneratorFixture::getGenerator($data); + + return [ + [ + $gen([]), + fn ($item) => $item, + [], + ], + [ + $gen([]), + fn ($item) => $item['name'], + [], + ], + [ + $gen([1]), + fn ($item) => $item, + [1], + ], + [ + $gen([1, 1]), + fn ($item) => $item, + [1], + ], + [ + $gen([1, '1']), + fn ($item) => $item, + [1, '1'], + ], + [ + $gen(['1', 1]), + fn ($item) => $item, + ['1', 1], + ], + [ + $gen(['aa', 'bb', 'aa']), + fn ($item) => $item, + ['aa', 'bb'], + ], + [ + $gen([1, 2, 1, 2, 3]), + fn ($item) => $item, + [1, 2, 3], + ], + [ + $gen(['1', 2, '1', '2', 3]), + fn ($item) => $item, + ['1', 2, '2', 3], + ], + [ + $gen(['1', 2, '1', '2', 3]), + fn ($item) => (int)$item, + ['1', 2, '3'], + ], + [ + $gen([[1], [1], [1, 2]]), + fn ($item) => $item, + [[1], [1, 2]], + ], + [ + $gen([[1], [1], [1, 2]]), + fn ($item) => $item[0], + [[1]], + ], + [ + $gen([[1], 'a' => [1]]), + fn ($item) => $item, + [[1]], + ], + [ + $gen([false, null, 0, 0.0, '']), + fn ($item) => $item, + [false, null, 0, 0.0, ''], + ], + [ + $gen([true, 1, '1', 1.0, '1.0']), + fn ($item) => $item, + [true, 1, '1', 1.0, '1.0'], + ], + [ + $gen([['a' => 1, 'b' => 2], ['a' => 1], ['a' => 1], 2, 2]), + fn ($item) => $item, + [['a' => 1, 'b' => 2], ['a' => 1], 2], + ], + [ + $gen([['a' => 1, 'b' => 2], ['a' => 1], ['a' => 1], ['a' => 2]]), + fn ($item) => $item['a'], + [['a' => 1, 'b' => 2], ['a' => 2]], + ], + [ + $gen([ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ]), + fn ($item) => $item, + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ], + ], + [ + $gen([ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ]), + fn ($item) => $item['id'], + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ], + ], + [ + $gen([ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ]), + fn ($item) => $item['name'], + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Jane', 'id' => 5], + ], + ], + ]; + } + + /** + * @dataProvider dataProviderForIterators + * @param \Iterator $data + * @param callable $compareBy + * @param array $expected + */ + public function testIterators(\Iterator $data, callable $compareBy, array $expected): void + { + // Given + $result = []; + + // When + foreach (Set::distinctBy($data, $compareBy) as $datum) { + $result[] = $datum; + } + + // Then + $this->assertEquals($expected, $result); + } + + public function dataProviderForIterators(): array + { + $iter = fn ($data) => new ArrayIteratorFixture($data); + + return [ + [ + $iter([]), + fn ($item) => $item, + [], + ], + [ + $iter([]), + fn ($item) => $item['name'], + [], + ], + [ + $iter([1]), + fn ($item) => $item, + [1], + ], + [ + $iter([1, 1]), + fn ($item) => $item, + [1], + ], + [ + $iter([1, '1']), + fn ($item) => $item, + [1, '1'], + ], + [ + $iter(['1', 1]), + fn ($item) => $item, + ['1', 1], + ], + [ + $iter(['aa', 'bb', 'aa']), + fn ($item) => $item, + ['aa', 'bb'], + ], + [ + $iter([1, 2, 1, 2, 3]), + fn ($item) => $item, + [1, 2, 3], + ], + [ + $iter(['1', 2, '1', '2', 3]), + fn ($item) => $item, + ['1', 2, '2', 3], + ], + [ + $iter(['1', 2, '1', '2', 3]), + fn ($item) => (int)$item, + ['1', 2, '3'], + ], + [ + $iter([[1], [1], [1, 2]]), + fn ($item) => $item, + [[1], [1, 2]], + ], + [ + $iter([[1], [1], [1, 2]]), + fn ($item) => $item[0], + [[1]], + ], + [ + $iter([[1], 'a' => [1]]), + fn ($item) => $item, + [[1]], + ], + [ + $iter([false, null, 0, 0.0, '']), + fn ($item) => $item, + [false, null, 0, 0.0, ''], + ], + [ + $iter([true, 1, '1', 1.0, '1.0']), + fn ($item) => $item, + [true, 1, '1', 1.0, '1.0'], + ], + [ + $iter([['a' => 1, 'b' => 2], ['a' => 1], ['a' => 1], 2, 2]), + fn ($item) => $item, + [['a' => 1, 'b' => 2], ['a' => 1], 2], + ], + [ + $iter([['a' => 1, 'b' => 2], ['a' => 1], ['a' => 1], ['a' => 2]]), + fn ($item) => $item['a'], + [['a' => 1, 'b' => 2], ['a' => 2]], + ], + [ + $iter([ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ]), + fn ($item) => $item, + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ], + ], + [ + $iter([ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ]), + fn ($item) => $item['id'], + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ], + ], + [ + $iter([ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ]), + fn ($item) => $item['name'], + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Jane', 'id' => 5], + ], + ], + ]; + } + + /** + * @dataProvider dataProviderForTraversables + * @param \Traversable $data + * @param callable $compareBy + * @param array $expected + */ + public function testTraversables(\Traversable $data, callable $compareBy, array $expected): void + { + // Given + $result = []; + + // When + foreach (Set::distinctBy($data, $compareBy) as $datum) { + $result[] = $datum; + } + + // Then + $this->assertEquals($expected, $result); + } + + public function dataProviderForTraversables(): array + { + $iter = fn ($data) => new IteratorAggregateFixture($data); + + return [ + [ + $iter([]), + fn ($item) => $item, + [], + ], + [ + $iter([]), + fn ($item) => $item['name'], + [], + ], + [ + $iter([1]), + fn ($item) => $item, + [1], + ], + [ + $iter([1, 1]), + fn ($item) => $item, + [1], + ], + [ + $iter([1, '1']), + fn ($item) => $item, + [1, '1'], + ], + [ + $iter(['1', 1]), + fn ($item) => $item, + ['1', 1], + ], + [ + $iter(['aa', 'bb', 'aa']), + fn ($item) => $item, + ['aa', 'bb'], + ], + [ + $iter([1, 2, 1, 2, 3]), + fn ($item) => $item, + [1, 2, 3], + ], + [ + $iter(['1', 2, '1', '2', 3]), + fn ($item) => $item, + ['1', 2, '2', 3], + ], + [ + $iter(['1', 2, '1', '2', 3]), + fn ($item) => (int)$item, + ['1', 2, '3'], + ], + [ + $iter([[1], [1], [1, 2]]), + fn ($item) => $item, + [[1], [1, 2]], + ], + [ + $iter([[1], [1], [1, 2]]), + fn ($item) => $item[0], + [[1]], + ], + [ + $iter([[1], 'a' => [1]]), + fn ($item) => $item, + [[1]], + ], + [ + $iter([false, null, 0, 0.0, '']), + fn ($item) => $item, + [false, null, 0, 0.0, ''], + ], + [ + $iter([true, 1, '1', 1.0, '1.0']), + fn ($item) => $item, + [true, 1, '1', 1.0, '1.0'], + ], + [ + $iter([['a' => 1, 'b' => 2], ['a' => 1], ['a' => 1], 2, 2]), + fn ($item) => $item, + [['a' => 1, 'b' => 2], ['a' => 1], 2], + ], + [ + $iter([['a' => 1, 'b' => 2], ['a' => 1], ['a' => 1], ['a' => 2]]), + fn ($item) => $item['a'], + [['a' => 1, 'b' => 2], ['a' => 2]], + ], + [ + $iter([ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ]), + fn ($item) => $item, + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ], + ], + [ + $iter([ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ]), + fn ($item) => $item['id'], + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ], + ], + [ + $iter([ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ]), + fn ($item) => $item['name'], + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Jane', 'id' => 5], + ], + ], + ]; + } +} diff --git a/tests/Stream/SetTest.php b/tests/Stream/SetTest.php index b0c7942..f97c332 100644 --- a/tests/Stream/SetTest.php +++ b/tests/Stream/SetTest.php @@ -51,6 +51,23 @@ public function dataProviderForArray(): array ->toArray(), [1, 2, 3, '1', '2', '3'], ], + [ + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ], + fn (iterable $iterable) => Stream::of($iterable) + ->distinctBy(fn ($item) => $item['name']) + ->toArray(), + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Jane', 'id' => 5], + ], + ], [ [ [1, 2, 3, 4, 5], @@ -482,6 +499,23 @@ public function dataProviderForGenerators(): array ->toArray(), [1, 2, 3, '1', '2', '3'], ], + [ + $gen([ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ]), + fn (iterable $iterable) => Stream::of($iterable) + ->distinctBy(fn ($item) => $item['name']) + ->toArray(), + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Jane', 'id' => 5], + ], + ], [ [ $gen([1, 2, 3, 4, 5]), @@ -915,6 +949,23 @@ public function dataProviderForIterators(): array ->toArray(), [1, 2, 3, '1', '2', '3'], ], + [ + $iter([ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ]), + fn (iterable $iterable) => Stream::of($iterable) + ->distinctBy(fn ($item) => $item['name']) + ->toArray(), + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Jane', 'id' => 5], + ], + ], [ [ $iter([1, 2, 3, 4, 5]), @@ -1348,6 +1399,23 @@ public function dataProviderForTraversables(): array ->toArray(), [1, 2, 3, '1', '2', '3'], ], + [ + $trav([ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Mary', 'id' => 3], + ['name' => 'John', 'id' => 4], + ['name' => 'Jane', 'id' => 5], + ]), + fn (iterable $iterable) => Stream::of($iterable) + ->distinctBy(fn ($item) => $item['name']) + ->toArray(), + [ + ['name' => 'John', 'id' => 1], + ['name' => 'Mary', 'id' => 2], + ['name' => 'Jane', 'id' => 5], + ], + ], [ [ $trav([1, 2, 3, 4, 5]),